def LoadImagesFromBrowser(): global bingSearchCache, bgCurrentQuery print("Loading browser results..") ScrollBingPage(1) time.sleep(scroll_wait) elist = None try: bingSearchCache[bgCurrentQuery] = None elist = GetBingResultElements() results = {} for e in elist: try: image = ProcessBingImageElement(e) index = e["data-idx"] results[str(index)] = image except Exception as e: view_traceback() # Save Images to in-memory cache # TODO : Convert to simple storage using sqlalchemy and sqllite3 bingSearchCache[bgCurrentQuery] = results rescount = str(len(results)) print(rescount + " no. of images loaded onto cache for : " + bgCurrentQuery) return rescount except Exception as e: view_traceback() return "-1"
def ScrollBingPage(nPages): # Scrolls a Bing Image search by a defined # number of pages. print("Scrolling target page.") global bgMoreResultsContainer, bgMoreResultsButton showMoreContainer = None showMoreButton = None try: # Need to scroll once to ensure presence of indicating element # quirk observed in Bing page. browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(scroll_wait) if nPages > 2: nPages = nPages - 2 for i in range(nPages): showMoreContainer = browser.find_element_by_css_selector( bgMoreResultsContainer) cls = showMoreContainer.get_attribute("class") if "b_hide" in cls or cls == "": # Button is hidden. Scrolling can be made directly browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") else: showMoreButton = browser.find_element_by_css_selector( bgMoreResultsButton) showMoreButton.click() return True else: return True except Exception as e: view_traceback() return False
def GetBingSearchImages(n, offset): global bingSearchCache, bgCurrentQuery imageCount = None try: imageCount = len(bingSearchCache[bgCurrentQuery]) if imageCount == 0 or imageCount is None: LoadBingSearchPage(bgCurrentQuery) except Exception as e: imageCount = 0 return "404" # Cleaning the input if n <= 0: n = 1 if offset <= 0: offset = imageCount if offset >= imageCount: # If asking for more than available images, return a default value. offset = imageCount - 1 if n + offset > imageCount: ScrollBingPage(2) # Update the results in the browser. LoadBingSearchResults(bgCurrentQuery) imageCount = len(bingSearchCache[bgCurrentQuery]) #n = imageCount-(offset) try: if bingSearchCache: results = bingSearchCache[bgCurrentQuery] idxrange = range(n, n + offset + 1) apiresults = [] # Note sometimes, keys may be missing for key in idxrange: try: apiresults.append(results[str(key)]) except KeyError as ke: pass jsonresponse = "{\n" for apiresult in apiresults: id = apiresult.id image = apiresult jsonresponse += "\"" + id + "\"" + ": " jsonresponse += JSONEncoder().encode({ "base64": image.base64, "url": image.url, "width": image.width, "height": image.height, }) jsonresponse += "," jsonresponse = jsonresponse[:-1] jsonresponse += "}\n" return jsonresponse else: return "500" except Exception as e: view_traceback() return "501"
def InitializeBrowser(): global browser,logpath,pjspath,wd profile_name = "SELENIUM" geckopath = os.path.join(wd,"geckodriver.exe") try: browser = webdriver.Firefox(log_path=logpath, executable_path=geckopath,firefox_profile=ffprofilepath) browser.implicitly_wait(3) #browser = webdriver.PhantomJS(logpath=logpath) #browser.implicitly_wait(10) except Exception as e: print("Error starting PhantomJS") view_traceback()
def LoadBingSearchResults(query): global maxResults, bgResultsPerRow #ScrollBingPage(2) elements = GetBingResultElements() results = {} for e in elements: try: image = ProcessBingImageElement(e) index = e["data-idx"] results[str(index)] = image except Exception as e: print("LoadBingSearchResults Error : " + e) view_traceback() # Save Images to in-memory cache # TODO : Convert to simple storage using sqlalchemy and sqllite3 bingSearchCache[query] = results rescount = str(len(results)) return rescount