Ejemplo n.º 1
0
	def startThreads(self, url, bundle_url):
		startTime = time.time()		
		ADFBundle.grabBundleKeysByURL(bundle_url)

		print "processing... ", url 
		new_agent = HttpAgent(url)
		response = new_agent.RequestResponse()

		#Read only the first 1MB of data
		snippet = response.read(50000)

		soup = BeautifulSoup(snippet)
		links = soup.findAll('a', {"target" : "_source"})

		#Create an instance for each search results
		parse_url = urlparse(response.geturl())
		hostname = parse_url.scheme + '://' + parse_url.netloc 

		counter = 0
		#populate queue with hosts
		for link in links:			
			counter += 1

			try:
				new_searchResult = SearchResult(hostname, link['href'], None)				
				newTags, newAttrs = new_searchResult.exploreSource() #Get the CSet variables		
				
				Explore.allTags = Explore.allTags.union(newTags)
				Explore.allAttrs = Explore.allAttrs.union(newAttrs)
			except:
				print "link unexplored"
				pass

		elapsedTime = (time.time() - startTime)
		print "Elapsed Time: %s" % elapsedTime
Ejemplo n.º 2
0
	def grabBundleSource(self, mf):
		"""The <c:set> points to an adfBundle.  This function go directly to the source of these bundles"""

		#adfBundle precompile a list of adfBundle keys pointing to their respective URLs		
		valueHashPattern = re.compile('^#\{(\w+)\[\'([\w.]+)\'\]\}')
		
		for key, value in self.bundlePathDict.items():
			#Transform the bundle value to a directory path
			match = re.match(valueHashPattern, value)
			if match:
				adfBundleDictName = match.group(1) #adfBundle
				adfBundleKey = match.group(2) #oracle.apps.atk.essMeta.resource.AtkEssMetaEHBundle   	
				
				try:
					if ADFBundle.bundleSoup[adfBundleKey] != None:  #grab bundle source if already defined						
						self.bundlePathURL[(key)] = ADFBundle.bundleAddress[adfBundleKey]
						self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[(adfBundleKey)]							
					else:
						print "bundleSoup does not exist"
				except:						
					try:
						if ADFBundle.bundleAddress[adfBundleKey]:
							#url = ADFBundle.bundleAddress[adfBundleKey] + '&content=1'
							self.bundlePathURL[(key)] = ADFBundle.bundleAddress[adfBundleKey]
							try:
								ADFBundle.grabBundleSource(adfBundleKey)
								#Save the soup object for all the dialogs within the JSF
								self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[(adfBundleKey)]
							except:
								print "ADFBundle cannot be created"
					except:
						print "Sorry, this bundle key is not recognized"
						print >>mf, adfBundleKey	
				finally:
					print adfBundleKey, " scanned"	
Ejemplo n.º 3
0
def startThreads(url, bundle_url, filename, param):
	elapsedTime = 0
	startTime = time.time()
	ADFBundle.grabBundleKeysByURL(bundle_url)

	print "processing... ", url 
	new_agent = HttpAgent(url)
	response = new_agent.RequestResponse()

	soup = BeautifulSoup(response)
	links = soup.findAll('a', {"target" : "_source"})

	mf = open("outputs/" + param['container'] + "MissingBundle_" + today_date_label + "_" + filename + ".txt", 'w')
	f = open("outputs/" + param['container'] + "SearchResults_" + today_date_label + "_" + filename + ".txt", 'w')
	bf = None

	if param['container'] == 'dialog':
		headerOutputLn = ["Page Link", "Product Family", "Dialog Number", "Dialog Title", "Dialog ID", "Dialog Modal", "Dialog Parents", "Button Group Name", "# of Command Buttons", "# of CANCEL", "# of OK", "# of DONE", "# of SAVE and CLOSE", "Component Name", "Component Attributes"]
		print >>f, '\t'.join(headerOutputLn)
	elif param['container'] == 'explore':
		headerOutputLn = ["Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Tag Attributes"]
		print >>f, '\t'.join(headerOutputLn)
	elif param['container'] == 'icon':
		headerOutputLn = ["Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Attribute Name", "File Extension", "Image Source", "Original Attribute Value"]
		print >>f, '\t'.join(headerOutputLn)

	#Create an instance for each search results
	parse_url = urlparse(response.geturl())
	hostname = parse_url.scheme + '://' + parse_url.netloc 

	for i in range(5):
		t = ThreadUrl(hostname, queue, mf, f, bf, param)
		t.setDaemon(True)
		t.start()

	counter = 0
	#populate queue with hosts
	for link in links:
		try:
			if param['processSize'] != 'All' and counter == int(param['processSize']):  #iterate over only a few for testing purpose
				break
		except:
			pass	

		counter += 1
		#Add to queue			
		queue.put(link)

	#wait on queue until everything has been processed
	queue.join()

	f.close()
	mf.close()	
	elapsedTime = (time.time() - startTime)
	print "Elapsed Time: %s" % Icon.elapsedTime
	return elapsedTime
Ejemplo n.º 4
0
    def grabBundleSource(self, mf):
        """The <c:set> points to an adfBundle.  This function go directly to the source of these bundles"""

        #adfBundle precompile a list of adfBundle keys pointing to their respective URLs
        valueHashPattern = re.compile('^#\{(\w+)\[\'([\w.]+)\'\]\}')

        for key, value in self.bundlePathDict.items():
            #Transform the bundle value to a directory path
            match = re.match(valueHashPattern, value)
            if match:
                adfBundleDictName = match.group(1)  #adfBundle
                adfBundleKey = match.group(
                    2)  #oracle.apps.atk.essMeta.resource.AtkEssMetaEHBundle

                try:
                    if ADFBundle.bundleSoup[
                            adfBundleKey] != None:  #grab bundle source if already defined
                        self.bundlePathURL[(
                            key)] = ADFBundle.bundleAddress[adfBundleKey]
                        self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[(
                            adfBundleKey)]
                    else:
                        print "bundleSoup does not exist"
                except:
                    try:
                        if ADFBundle.bundleAddress[adfBundleKey]:
                            #url = ADFBundle.bundleAddress[adfBundleKey] + '&content=1'
                            self.bundlePathURL[(
                                key)] = ADFBundle.bundleAddress[adfBundleKey]
                            try:
                                ADFBundle.grabBundleSource(adfBundleKey)
                                #Save the soup object for all the dialogs within the JSF
                                self.bundlePathSoup[(
                                    key)] = ADFBundle.bundleSoup[(
                                        adfBundleKey)]
                            except:
                                print "ADFBundle cannot be created"
                    except:
                        print "Sorry, this bundle key is not recognized"
                        print >> mf, adfBundleKey
                finally:
                    print adfBundleKey, " scanned"
Ejemplo n.º 5
0
    def startThreads(self, url, bundle_url):
        startTime = time.time()
        ADFBundle.grabBundleKeysByURL(bundle_url)

        print "processing... ", url
        new_agent = HttpAgent(url)
        response = new_agent.RequestResponse()

        #Read only the first 1MB of data
        snippet = response.read(50000)

        soup = BeautifulSoup(snippet)
        links = soup.findAll('a', {"target": "_source"})

        #Create an instance for each search results
        parse_url = urlparse(response.geturl())
        hostname = parse_url.scheme + '://' + parse_url.netloc

        counter = 0
        #populate queue with hosts
        for link in links:
            counter += 1

            try:
                new_searchResult = SearchResult(hostname, link['href'], None)
                newTags, newAttrs = new_searchResult.exploreSource(
                )  #Get the CSet variables

                Explore.allTags = Explore.allTags.union(newTags)
                Explore.allAttrs = Explore.allAttrs.union(newAttrs)
            except:
                print "link unexplored"
                pass

        elapsedTime = (time.time() - startTime)
        print "Elapsed Time: %s" % elapsedTime
Ejemplo n.º 6
0
def startThreads(url, bundle_url, filename, param):
    elapsedTime = 0
    startTime = time.time()
    ADFBundle.grabBundleKeysByURL(bundle_url)

    print "processing... ", url
    new_agent = HttpAgent(url)
    response = new_agent.RequestResponse()

    soup = BeautifulSoup(response)
    links = soup.findAll('a', {"target": "_source"})

    mf = open(
        "outputs/" + param['container'] + "MissingBundle_" + today_date_label +
        "_" + filename + ".txt", 'w')
    f = open(
        "outputs/" + param['container'] + "SearchResults_" + today_date_label +
        "_" + filename + ".txt", 'w')
    bf = None

    if param['container'] == 'dialog':
        headerOutputLn = [
            "Page Link", "Product Family", "Dialog Number", "Dialog Title",
            "Dialog ID", "Dialog Modal", "Dialog Parents", "Button Group Name",
            "# of Command Buttons", "# of CANCEL", "# of OK", "# of DONE",
            "# of SAVE and CLOSE", "Component Name", "Component Attributes"
        ]
        print >> f, '\t'.join(headerOutputLn)
    elif param['container'] == 'explore':
        headerOutputLn = [
            "Page Link", "Product Family", "Tag Number", "Tag Name",
            "Tag Parents", "Tag Attributes"
        ]
        print >> f, '\t'.join(headerOutputLn)
    elif param['container'] == 'icon':
        headerOutputLn = [
            "Page Link", "Product Family", "Tag Number", "Tag Name",
            "Tag Parents", "Attribute Name", "File Extension", "Image Source",
            "Original Attribute Value"
        ]
        print >> f, '\t'.join(headerOutputLn)

    #Create an instance for each search results
    parse_url = urlparse(response.geturl())
    hostname = parse_url.scheme + '://' + parse_url.netloc

    for i in range(5):
        t = ThreadUrl(hostname, queue, mf, f, bf, param)
        t.setDaemon(True)
        t.start()

    counter = 0
    #populate queue with hosts
    for link in links:
        try:
            if param['processSize'] != 'All' and counter == int(
                    param['processSize']
            ):  #iterate over only a few for testing purpose
                break
        except:
            pass

        counter += 1
        #Add to queue
        queue.put(link)

    #wait on queue until everything has been processed
    queue.join()

    f.close()
    mf.close()
    elapsedTime = (time.time() - startTime)
    print "Elapsed Time: %s" % Icon.elapsedTime
    return elapsedTime