def startThreads(self, url, bundle_url): startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() #Read only the first 1MB of data snippet = response.read(50000) soup = BeautifulSoup(snippet) links = soup.findAll('a', {"target" : "_source"}) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc counter = 0 #populate queue with hosts for link in links: counter += 1 try: new_searchResult = SearchResult(hostname, link['href'], None) newTags, newAttrs = new_searchResult.exploreSource() #Get the CSet variables Explore.allTags = Explore.allTags.union(newTags) Explore.allAttrs = Explore.allAttrs.union(newAttrs) except: print "link unexplored" pass elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % elapsedTime
def grabBundleSource(self, mf): """The <c:set> points to an adfBundle. This function go directly to the source of these bundles""" #adfBundle precompile a list of adfBundle keys pointing to their respective URLs valueHashPattern = re.compile('^#\{(\w+)\[\'([\w.]+)\'\]\}') for key, value in self.bundlePathDict.items(): #Transform the bundle value to a directory path match = re.match(valueHashPattern, value) if match: adfBundleDictName = match.group(1) #adfBundle adfBundleKey = match.group(2) #oracle.apps.atk.essMeta.resource.AtkEssMetaEHBundle try: if ADFBundle.bundleSoup[adfBundleKey] != None: #grab bundle source if already defined self.bundlePathURL[(key)] = ADFBundle.bundleAddress[adfBundleKey] self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[(adfBundleKey)] else: print "bundleSoup does not exist" except: try: if ADFBundle.bundleAddress[adfBundleKey]: #url = ADFBundle.bundleAddress[adfBundleKey] + '&content=1' self.bundlePathURL[(key)] = ADFBundle.bundleAddress[adfBundleKey] try: ADFBundle.grabBundleSource(adfBundleKey) #Save the soup object for all the dialogs within the JSF self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[(adfBundleKey)] except: print "ADFBundle cannot be created" except: print "Sorry, this bundle key is not recognized" print >>mf, adfBundleKey finally: print adfBundleKey, " scanned"
def startThreads(url, bundle_url, filename, param): elapsedTime = 0 startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() soup = BeautifulSoup(response) links = soup.findAll('a', {"target" : "_source"}) mf = open("outputs/" + param['container'] + "MissingBundle_" + today_date_label + "_" + filename + ".txt", 'w') f = open("outputs/" + param['container'] + "SearchResults_" + today_date_label + "_" + filename + ".txt", 'w') bf = None if param['container'] == 'dialog': headerOutputLn = ["Page Link", "Product Family", "Dialog Number", "Dialog Title", "Dialog ID", "Dialog Modal", "Dialog Parents", "Button Group Name", "# of Command Buttons", "# of CANCEL", "# of OK", "# of DONE", "# of SAVE and CLOSE", "Component Name", "Component Attributes"] print >>f, '\t'.join(headerOutputLn) elif param['container'] == 'explore': headerOutputLn = ["Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Tag Attributes"] print >>f, '\t'.join(headerOutputLn) elif param['container'] == 'icon': headerOutputLn = ["Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Attribute Name", "File Extension", "Image Source", "Original Attribute Value"] print >>f, '\t'.join(headerOutputLn) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc for i in range(5): t = ThreadUrl(hostname, queue, mf, f, bf, param) t.setDaemon(True) t.start() counter = 0 #populate queue with hosts for link in links: try: if param['processSize'] != 'All' and counter == int(param['processSize']): #iterate over only a few for testing purpose break except: pass counter += 1 #Add to queue queue.put(link) #wait on queue until everything has been processed queue.join() f.close() mf.close() elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % Icon.elapsedTime return elapsedTime
def grabBundleSource(self, mf): """The <c:set> points to an adfBundle. This function go directly to the source of these bundles""" #adfBundle precompile a list of adfBundle keys pointing to their respective URLs valueHashPattern = re.compile('^#\{(\w+)\[\'([\w.]+)\'\]\}') for key, value in self.bundlePathDict.items(): #Transform the bundle value to a directory path match = re.match(valueHashPattern, value) if match: adfBundleDictName = match.group(1) #adfBundle adfBundleKey = match.group( 2) #oracle.apps.atk.essMeta.resource.AtkEssMetaEHBundle try: if ADFBundle.bundleSoup[ adfBundleKey] != None: #grab bundle source if already defined self.bundlePathURL[( key)] = ADFBundle.bundleAddress[adfBundleKey] self.bundlePathSoup[(key)] = ADFBundle.bundleSoup[( adfBundleKey)] else: print "bundleSoup does not exist" except: try: if ADFBundle.bundleAddress[adfBundleKey]: #url = ADFBundle.bundleAddress[adfBundleKey] + '&content=1' self.bundlePathURL[( key)] = ADFBundle.bundleAddress[adfBundleKey] try: ADFBundle.grabBundleSource(adfBundleKey) #Save the soup object for all the dialogs within the JSF self.bundlePathSoup[( key)] = ADFBundle.bundleSoup[( adfBundleKey)] except: print "ADFBundle cannot be created" except: print "Sorry, this bundle key is not recognized" print >> mf, adfBundleKey finally: print adfBundleKey, " scanned"
def startThreads(self, url, bundle_url): startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() #Read only the first 1MB of data snippet = response.read(50000) soup = BeautifulSoup(snippet) links = soup.findAll('a', {"target": "_source"}) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc counter = 0 #populate queue with hosts for link in links: counter += 1 try: new_searchResult = SearchResult(hostname, link['href'], None) newTags, newAttrs = new_searchResult.exploreSource( ) #Get the CSet variables Explore.allTags = Explore.allTags.union(newTags) Explore.allAttrs = Explore.allAttrs.union(newAttrs) except: print "link unexplored" pass elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % elapsedTime
def startThreads(url, bundle_url, filename, param): elapsedTime = 0 startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() soup = BeautifulSoup(response) links = soup.findAll('a', {"target": "_source"}) mf = open( "outputs/" + param['container'] + "MissingBundle_" + today_date_label + "_" + filename + ".txt", 'w') f = open( "outputs/" + param['container'] + "SearchResults_" + today_date_label + "_" + filename + ".txt", 'w') bf = None if param['container'] == 'dialog': headerOutputLn = [ "Page Link", "Product Family", "Dialog Number", "Dialog Title", "Dialog ID", "Dialog Modal", "Dialog Parents", "Button Group Name", "# of Command Buttons", "# of CANCEL", "# of OK", "# of DONE", "# of SAVE and CLOSE", "Component Name", "Component Attributes" ] print >> f, '\t'.join(headerOutputLn) elif param['container'] == 'explore': headerOutputLn = [ "Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Tag Attributes" ] print >> f, '\t'.join(headerOutputLn) elif param['container'] == 'icon': headerOutputLn = [ "Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Attribute Name", "File Extension", "Image Source", "Original Attribute Value" ] print >> f, '\t'.join(headerOutputLn) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc for i in range(5): t = ThreadUrl(hostname, queue, mf, f, bf, param) t.setDaemon(True) t.start() counter = 0 #populate queue with hosts for link in links: try: if param['processSize'] != 'All' and counter == int( param['processSize'] ): #iterate over only a few for testing purpose break except: pass counter += 1 #Add to queue queue.put(link) #wait on queue until everything has been processed queue.join() f.close() mf.close() elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % Icon.elapsedTime return elapsedTime