def write_text(): with open(riots_file) as listofriots: for i, l in enumerate(listofriots): if i < 0: # to pass the ones we already have continue try: print "looking at riot %d" % i name = l.strip() # name = l.split("-")[1].strip().split('(')[0].strip() bing_news = PyBingWebSearch(api_key, name) news = bing_news.search(limit=5, format='json') time.sleep(3) try: shutil.rmtree("riots_bing/riot_%02d" % i) except OSError: pass os.mkdir("riots_bing/riot_%02d" % i) for j, new in enumerate(news): with open('riots_bing/riot_%02d/%02d.txt' % (i, j), 'w') as riot_file: riot_file.write( new.title.encode('utf-8').strip() + '\n') riot_file.write( new.description.encode('utf-8').strip()) except: continue
def __init__(self, config): self.config = config self.bs = PyBingWebSearch("YOUR ID") self.cooldown = self.config["p_googlesleep"]; self.results_per_page = int(self.config["p_results_per_query"]); if (self.config["p_skippages"] > 0): print("Bing Scanner will skip the first %d pages..."%(self.config["p_skippages"]))
def getSeedPagesFromBing(self): try: bingWeb = PyBingWebSearch(self.apiKey, self.searchTerm) searchResults = bingWeb.search(limit=10, format="json") return searchResults except Exception as e: crawlerLogger.error("Failed to get seeds. Error:" + str(e))
def searchBing(): search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % ( orgname, orgname.lower()) bing_web = PyBingWebSearch(B_API_KEY, search_term) result = bing_web.search(limit=50, format='json') counter = 0 while counter < 50: try: regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] first regex_string is: ", regex_string m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE) if debug: print "DEBUG: Bing[", counter, "] raw results:" print "title: ", result[ counter].title, " description: ", result[ counter].description if m == None: if debug: print "DEBUG: Bing[", counter, "] first regex returned 'None'" regex_string = "'^.*at\s%s\.'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] second regex_string is: ", regex_string m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE) if m == None: if debug: print "DEBUG: Bing[", counter, "] second regex returned 'None'" counter += 1 continue else: pass if debug: print "DEBUG: Bing [", counter, "] full regex match: ", str( m.group()) stdout.write('Name: ') stdout.write( str(re.sub(' \| LinkedIn', ',', result[counter].title))) stdout.write(' Role: ') try: stdout.write(str(m.group(1))) except IndexError: stdout.write(str(m.group())) pass if verbose: stdout.write(' VERBOSE_URL: ') stdout.write(result[counter].url) stdout.write("\n") counter += 1 except IndexError as e: if verbose: print "INFO: No additional Bing Search Results available" break except Exception as e: print "Bing_ERROR: Something strange happened, printing error: " print e exit()
def extract_snippet(prompt): API_KEY = "0nNf/RGQhw/62syJrJGDRbm4BUx4fwkyDYpiFLBobCo" bing_web = PyBingWebSearch(API_KEY, prompt, web_only=False) first_fifty_result= bing_web.search(limit=50, format='json') bing_result = [] for result in first_fifty_result: bing_result.append(result.description) return bing_result
def get_results_ids_fb(query): search_term = query + " site:facebook.com/events" bing_web = PyBingWebSearch(API_KEY, search_term) results = bing_web.search(limit=50, format='json') set_ids = set([]) while len(results) > 0: print(len(results)) set_ids = extract_pages(set_ids, results) if len(results) < 50: results = [] else: results = bing_web.search(limit=50, format='json') #1-50 ids_list = list(set_ids) return (ids_list)
def webQuery(self, query, result_num=10): # format query based on OData protocol and desired JSON format of results full_query = query.replace(' ', '+') logging.debug('Sending following URL query: ' + full_query) print('%-20s= %s' % ("URL", full_query)) bing_web = PyBingWebSearch(self.__i_accountKey, full_query, web_only=False) first_n_result = bing_web.search(limit=result_num, format='json') return first_n_result
def getfrombing(self, apikey, text, limit, operation): if operation == 'moderateimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Moderate'") elif operation == 'strictimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Strict'") elif operation == 'adultimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Off'") elif operation == 'websearch': bing_obj = PyBingWebSearch(apikey, text, web_only=False) elif operation == 'videosearch': bing_obj = PyBingVideoSearch(apikey, text) elif operation == 'newssearch': bing_obj = PyBingNewsSearch(apikey, text) result = bing_obj.search(limit=limit, format='json') return result
def BingSearch(keyword): urls = [] bing_web = PyBingWebSearch(GetTheConfig('bing', 'Key'), keyword, web_only=False) results = bing_web.search(limit=int(GetTheConfig('bing', 'QUANTITY')), format='json') for result in results: url = result.url urls.append(url) results = DeduplicateValue(urls) urls = [] for result in results: urls.append({"Bing": result}) return urls
def bing_search(search_term): latitude = settings['bing'].get('latitude') longitude = settings['bing'].get('longitude') custom_params = { # 'Sources': "'" + settings['bing']['sources'] + "'", 'Market': "'" + settings['bing']['market'] + "'", } if latitude is not None and longitude is not None: custom_params.update({ 'Latitude': latitude, 'Longitude': longitude, }) custom_params_str = "".join( ["&" + k + "=" + v for k, v in iter(custom_params.items())]) bing_web = PyBingWebSearch( settings['bing']['api_key'], search_term, web_only=False, custom_params=custom_params_str, ) # web_only is optional, but should be true to use your web only quota instead of your all purpose quota return bing_web.search(limit=int(settings['bing']['results_limit']), format='json')
def compare(request): if request.method == 'POST': form = search(request.POST) if form.is_valid(): product = form.cleaned_data['querry'] API_KEY = "8eFYvQ0mCr06A3YoUZV9XK7867AgLLDeLuBdhILm+3c" querry = "buy " + product bing_web = PyBingWebSearch(API_KEY, querry, web_only=False) results = bing_web.search(limit=50, format='json') fkart_urls = [] fkart_price_ar = [] sdeal_urls = [] sdeal_price_ar = [] min_fkart = 0 min_sdeal = 0 for result in results: comp = result.url.split('.')[1] if comp == 'flipkart': p = ' ' try: p = result.url.split('/')[4] except: continue if p == 'p': fkart_urls.append(result.url) fkart_flag = 1 if comp == 'snapdeal': p = ' ' try: p = result.url.split('/')[3] except: continue if p == 'product': sdeal_urls.append(result.url) sdeal_flag = 1 if len(fkart_urls) == 0 and len(sdeal_urls) == 0: result = 'Search Failed!' context = RequestContext(request, {'result': result}) return render_to_response('home.html', context) else: for url in fkart_urls: fkart_url = url fkart_page = requests.get(fkart_url) fkart_html = fkart_page.text fkart_soup = bs(fkart_html, 'html.parser') meta_desc = fkart_soup.findAll( attrs={"name": "Description"}) meta_desc_content_split = meta_desc[0]['content'].split( " ") for_bool = 0 For_bool = 0 try: for_index = meta_desc_content_split.index('for') except: for_bool = 1 try: for_index = meta_desc_content_split.index('For') except: For_bool = 1 if for_bool == 0 or For_bool == 0: str_price = meta_desc_content_split[for_index + 1] if (str_price == 'Rs.'): fkart_price = meta_desc_content_split[for_index + 2] fkart_price_ar.append(fkart_price) else: fkart_price = str_price[3:] fkart_price_ar.append(fkart_price) for url in sdeal_urls: sdeal_url = url sdeal_page = requests.get(sdeal_url) sdeal_html = sdeal_page.text sdeal_soup = bs(sdeal_html, 'html.parser') input_tag = sdeal_soup.find_all('input', id='productPrice') ex = 0 try: str_price = input_tag[0]['value'] except: ex = 1 if (ex != 1): sdeal_price_ar.append(str_price) if (len(fkart_price_ar) > 0): min_fkart = fkart_price_ar[0] for price in fkart_price_ar: if (price > min_fkart): min_fkart = price if (len(sdeal_price_ar) > 0): min_sdeal = sdeal_price_ar[0] for price in sdeal_price_ar: if (price > min_sdeal): min_sdeal = price result = 'Search Succesful!' context = RequestContext( request, { 'form': form, 'result': result, 'flipkart_price': str(min_fkart), 'snapdeal_price': str(min_sdeal) }) return render_to_response('home.html', context) else: form = search() context = RequestContext(request, {'form': form}) return render_to_response('home.html', context)
def get_top_bing_goodreads_search(search_term): query = "site:goodreads.com {0}".format(search_term) bing_web = PyBingWebSearch(BING_SEARCH_API_KEY, query, web_only=False) results = bing_web.search(limit=50, format='json') return [r.url for r in results if 'goodreads.com/book/show/' in r.url]
def test_search_all(self): web_bing = PyBingWebSearch(SECRET_KEY, "Python Software Foundation") result_one = web_bing.search_all(limit=60) self.assertTrue(len(result_one) == 60) self.assertTrue("Python" in result_one[0].title)
def _GetMovieResearch(self, term, limit=50, format='json'): bing = PyBingWebSearch(self.BING_API_KEY, term, web_only=False) return bing.search(limit, format)
f = open("D:\Projects\hackathon\content2.txt", "r") s = f.read() from py_bing_search import PyBingWebSearch ''' search_term = "Python Software Foundation" bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) first_fifty_result= bing_web.search(limit=6, format='json') #1-50 #second_fifty_result= bing_web.search(limit=50, format='json') #51-100 ''' ##s from correct import * s = corrections(s) search_term = s bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) x = bing_web.search(limit=6, format='json') results = [] for i in range(len(x)): results += [(x[i].title, x[i].description, x[i].url)] from py_bing_search import PyBingImageSearch bing_image = PyBingImageSearch( '1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', s, image_filters='Size:medium+Color:Monochrome') #image_filters is optional photos = bing_image.search(limit=6, format='json')