def grabBundleSource(key): if key not in ADFBundle.bundleSoup: print "key:", key try: if ADFBundle.bundleAddress[key] != None: url = ADFBundle.bundleAddress[key] + '&content=1' try: new_agent = HttpAgent(url) if new_agent: response = new_agent.RequestResponse() else: print "no agent is established" except: print "Error: Cannot establish http agent" #From the source file, convert it to soup xliff_soup = None try: xliff_soup = Soup.HTMLToStoneSoup(response, None) except: print "Error: Cannot convert http response into soup" #Save the soup object for all the dialogs within the JSF ADFBundle.bundleSoup[(key)] = xliff_soup except: print "key is not found in bundleAddress" else: print "key is already in bundleSoup" pass
def exploreSource(self): """ @Output: A Tuple with unique lists of Tag Names and Tag Attributes """ url = '' try: url = self.getTargetFullPath() except: print "No Download Link Provided" return False try: new_agent = HttpAgent(url) if new_agent: print "new agent created" response = new_agent.RequestResponse() else: print "no agent is established" except: print "Error: Cannot establish http agent" #From the source file, convert it to soup if response: selfClosingTags = [ 'c:set', 'f:facet', 'af:popup', 'af:panelGroupLayout', 'af:spacer', 'af:panelHeader' ] another_soup = None try: another_soup = Soup.HTMLToStoneSoup(response, selfClosingTags) self.resultSoup = another_soup attrSet = Set() tagSet = Set() allTags = self.resultSoup.findAll(True) for tag in allTags: tagSet.add(tag.name) for attr in tag.attrs: attrSet.add(attr[0]) return (tagSet, attrSet) except: print "Error: Cannot convert http response into soup" else: print "no soup" pass
def grabBundleKeysByURL(url): new_agent = HttpAgent(url) response = new_agent.RequestResponse() htmlBody = response.read() parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc linksBody = htmlBody.split('<a', 1) if linksBody: links = linksBody[1].split('+') hrefPattern = re.compile( '_source\"\s?href=\"([\w.:/\&-_\?\;]+\.xlf)\">([\w./&\-_]+)') for link in links: if re.search('_source', link): found = re.search(hrefPattern, link) #folders = found.group(2).split('/') #bundleSet.add(folders[-1]) #print link #print found.group(1) bundleKey = found.group(2) bundleKey = re.sub('</a>', '', bundleKey) bundleKey = re.sub('.xlf', '', bundleKey) bundleLink = hostname + found.group(1) #Build the hash key from "oracle/apps/" to the end oracleAppPattern = re.compile('oracle/apps') if re.search(oracleAppPattern, bundleKey): key = bundleKey.split('oracle/apps')[1] key = re.sub('/', '.', key) key = 'oracle.apps' + key else: key = re.sub('/', '.', bundleKey) #print "key:", key, " link:", bundleLink ADFBundle.bundleAddress[key] = bundleLink print str(len(links))
def searchSource(self): url = '' try: url = self.getTargetFullPath() except: print "No Download Link Provided" return False try: new_agent = HttpAgent(url) if new_agent: print "new agent created" response = new_agent.RequestResponse() else: print "no agent is established" except: print "Error: Cannot establish http agent" #From the source file, convert it to soup if response: selfClosingTags = [ 'c:set', 'f:facet', 'af:popup', 'af:panelGroupLayout', 'af:spacer', 'af:panelHeader' ] another_soup = None try: another_soup = Soup.HTMLToStoneSoup(response, selfClosingTags) self.resultSoup = another_soup except: print "Error: Cannot convert http response into soup" #Search for all C:SET tags #C:SET Tags are hashkeys to the resource bundles self.searchCSet() else: print "no soup" pass
def startThreads(self, url, bundle_url): startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() #Read only the first 1MB of data snippet = response.read(50000) soup = BeautifulSoup(snippet) links = soup.findAll('a', {"target": "_source"}) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc counter = 0 #populate queue with hosts for link in links: counter += 1 try: new_searchResult = SearchResult(hostname, link['href'], None) newTags, newAttrs = new_searchResult.exploreSource( ) #Get the CSet variables Explore.allTags = Explore.allTags.union(newTags) Explore.allAttrs = Explore.allAttrs.union(newAttrs) except: print "link unexplored" pass elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % elapsedTime
def retrieveFormParam(): option_dict = dict() new_agent = HttpAgent(grepSource_url) if new_agent: response = new_agent.RequestResponse() soup = BeautifulSoup(response) #Find top selection app = soup.find('select', attrs={"name": "top"}) app_options = getFormSelection(app) option_dict['app_options'] = app_options #Find series selection series = soup.find('select', attrs={"name": "series"}) series_options = getFormSelection(series) option_dict['series_options'] = series_options #Find label selection """ #Failed to link this dropdown list to series selection label = soup.find('select', attrs={"name":"label"}) label_options = getFormSelection(label) option_dict['label_options'] = label_options """ label_options = ['LATEST'] option_dict['label_options'] = label_options #Find cmd selection cmd = soup.find('select', attrs={"name": "cmd"}) cmd_options = getFormSelection(cmd) option_dict['cmd_options'] = cmd_options else: print "no agent is established" return option_dict
def startThreads(url, bundle_url, filename, param): elapsedTime = 0 startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() soup = BeautifulSoup(response) links = soup.findAll('a', {"target": "_source"}) mf = open( "outputs/" + param['container'] + "MissingBundle_" + today_date_label + "_" + filename + ".txt", 'w') f = open( "outputs/" + param['container'] + "SearchResults_" + today_date_label + "_" + filename + ".txt", 'w') bf = None if param['container'] == 'dialog': headerOutputLn = [ "Page Link", "Product Family", "Dialog Number", "Dialog Title", "Dialog ID", "Dialog Modal", "Dialog Parents", "Button Group Name", "# of Command Buttons", "# of CANCEL", "# of OK", "# of DONE", "# of SAVE and CLOSE", "Component Name", "Component Attributes" ] print >> f, '\t'.join(headerOutputLn) elif param['container'] == 'explore': headerOutputLn = [ "Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Tag Attributes" ] print >> f, '\t'.join(headerOutputLn) elif param['container'] == 'icon': headerOutputLn = [ "Page Link", "Product Family", "Tag Number", "Tag Name", "Tag Parents", "Attribute Name", "File Extension", "Image Source", "Original Attribute Value" ] print >> f, '\t'.join(headerOutputLn) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc for i in range(5): t = ThreadUrl(hostname, queue, mf, f, bf, param) t.setDaemon(True) t.start() counter = 0 #populate queue with hosts for link in links: try: if param['processSize'] != 'All' and counter == int( param['processSize'] ): #iterate over only a few for testing purpose break except: pass counter += 1 #Add to queue queue.put(link) #wait on queue until everything has been processed queue.join() f.close() mf.close() elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % Icon.elapsedTime return elapsedTime