def get_results_ids_fb(query): search_term = query + " site:facebook.com/events" bing_web = PyBingWebSearch(API_KEY, search_term) results = bing_web.search(limit=50, format='json') set_ids = set([]) while len(results) > 0: print(len(results)) set_ids = extract_pages(set_ids, results) if len(results) < 50: results = [] else: results = bing_web.search(limit=50, format='json') #1-50 ids_list = list(set_ids) return (ids_list)
def getSeedPagesFromBing(self): try: bingWeb = PyBingWebSearch(self.apiKey, self.searchTerm) searchResults = bingWeb.search(limit=10, format="json") return searchResults except Exception as e: crawlerLogger.error("Failed to get seeds. Error:" + str(e))
def searchBing(text): auth_key = '1KN2M8IdjS+AsXS7+s9NXFRw1vIcHO/awnbyF1+WjEs' search_term = text bing_web = PyBingWebSearch(auth_key, search_term) first_ten_result= bing_web.search(limit=10, format='json') #1-10 title_urls = [Result(res.title, res.url) for res in first_ten_result] return title_urls
def write_text(): with open(riots_file) as listofriots: for i, l in enumerate(listofriots): if i < 0: # to pass the ones we already have continue try: print "looking at riot %d" % i name = l.strip() # name = l.split("-")[1].strip().split('(')[0].strip() bing_news = PyBingWebSearch(api_key, name) news = bing_news.search(limit=5, format='json') time.sleep(3) try: shutil.rmtree("riots_bing/riot_%02d" % i) except OSError: pass os.mkdir("riots_bing/riot_%02d" % i) for j, new in enumerate(news): with open('riots_bing/riot_%02d/%02d.txt' % (i, j), 'w') as riot_file: riot_file.write( new.title.encode('utf-8').strip() + '\n') riot_file.write( new.description.encode('utf-8').strip()) except: continue
def searchBing(): search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % ( orgname, orgname.lower()) bing_web = PyBingWebSearch(B_API_KEY, search_term) result = bing_web.search(limit=50, format='json') counter = 0 while counter < 50: try: regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] first regex_string is: ", regex_string m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE) if debug: print "DEBUG: Bing[", counter, "] raw results:" print "title: ", result[ counter].title, " description: ", result[ counter].description if m == None: if debug: print "DEBUG: Bing[", counter, "] first regex returned 'None'" regex_string = "'^.*at\s%s\.'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] second regex_string is: ", regex_string m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE) if m == None: if debug: print "DEBUG: Bing[", counter, "] second regex returned 'None'" counter += 1 continue else: pass if debug: print "DEBUG: Bing [", counter, "] full regex match: ", str( m.group()) stdout.write('Name: ') stdout.write( str(re.sub(' \| LinkedIn', ',', result[counter].title))) stdout.write(' Role: ') try: stdout.write(str(m.group(1))) except IndexError: stdout.write(str(m.group())) pass if verbose: stdout.write(' VERBOSE_URL: ') stdout.write(result[counter].url) stdout.write("\n") counter += 1 except IndexError as e: if verbose: print "INFO: No additional Bing Search Results available" break except Exception as e: print "Bing_ERROR: Something strange happened, printing error: " print e exit()
def extract_snippet(prompt): API_KEY = "0nNf/RGQhw/62syJrJGDRbm4BUx4fwkyDYpiFLBobCo" bing_web = PyBingWebSearch(API_KEY, prompt, web_only=False) first_fifty_result= bing_web.search(limit=50, format='json') bing_result = [] for result in first_fifty_result: bing_result.append(result.description) return bing_result
def searchBing(): search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % (orgname,orgname.lower()) bing_web = PyBingWebSearch(B_API_KEY, search_term) result = bing_web.search(limit=50, format='json') counter = 0 while counter < 50: try: regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname) if debug: print "DEBUG: Bing[", counter,"] first regex_string is: ", regex_string m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE) if debug: print "DEBUG: Bing[", counter,"] raw results:" print "title: ", result[counter].title, " description: ", result[counter].description if m == None: if debug: print "DEBUG: Bing[", counter,"] first regex returned 'None'" regex_string = "'^.*at\s%s\.'" % (orgname) if debug: print "DEBUG: Bing[", counter,"] second regex_string is: ", regex_string m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE) if m == None: if debug: print "DEBUG: Bing[", counter,"] second regex returned 'None'" counter+=1 continue else: pass if debug: print "DEBUG: Bing [", counter, "] full regex match: ", str(m.group()) stdout.write('Name: ') stdout.write(str(re.sub(' \| LinkedIn', ',', result[counter].title))) stdout.write(' Role: ') try: stdout.write(str(m.group(1))) except IndexError: stdout.write(str(m.group())) pass if verbose: stdout.write(' VERBOSE_URL: ') stdout.write(result[counter].url) stdout.write("\n") counter+=1 except IndexError as e: if verbose: print "INFO: No additional Bing Search Results available" break except Exception as e: print "Bing_ERROR: Something strange happened, printing error: " print e exit()
def getTopTen(query): # print 'length', len (encounteredUrls) # print 'pages = ',pages top_ten_urls = [] # hold the initial urls bing = PyBingWebSearch('mMlCxUd5qmU5uDJ1w1VLbDkobVK905A9cZZhYkfqGHg=',query,web_only=False) first_ten_results = bing.search(limit=20, format='json') #1-50 #urlList, next_uri = bing.search(query, limit=10, format='json') # get the results counter = pages # count number of urls for result in first_ten_results: checkUrl(result.url,top_ten_urls) if (pages - counter >=10): # only care about top 10 break return top_ten_urls
def webQuery(self, query, result_num=10): # format query based on OData protocol and desired JSON format of results full_query = query.replace(' ', '+') logging.debug('Sending following URL query: ' + full_query) print('%-20s= %s' % ("URL", full_query)) bing_web = PyBingWebSearch(self.__i_accountKey, full_query, web_only=False) first_n_result = bing_web.search(limit=result_num, format='json') return first_n_result
def BingSearch(keyword): urls = [] bing_web = PyBingWebSearch(GetTheConfig('bing', 'Key'), keyword, web_only=False) results = bing_web.search(limit=int(GetTheConfig('bing', 'QUANTITY')), format='json') for result in results: url = result.url urls.append(url) results = DeduplicateValue(urls) urls = [] for result in results: urls.append({"Bing": result}) return urls
def bing_search(search_term): latitude = settings['bing'].get('latitude') longitude = settings['bing'].get('longitude') custom_params = { # 'Sources': "'" + settings['bing']['sources'] + "'", 'Market': "'" + settings['bing']['market'] + "'", } if latitude is not None and longitude is not None: custom_params.update({ 'Latitude': latitude, 'Longitude': longitude, }) custom_params_str = "".join( ["&" + k + "=" + v for k, v in iter(custom_params.items())]) bing_web = PyBingWebSearch( settings['bing']['api_key'], search_term, web_only=False, custom_params=custom_params_str, ) # web_only is optional, but should be true to use your web only quota instead of your all purpose quota return bing_web.search(limit=int(settings['bing']['results_limit']), format='json')
def compare(request): if request.method == 'POST': form = search(request.POST) if form.is_valid(): product = form.cleaned_data['querry'] API_KEY = "8eFYvQ0mCr06A3YoUZV9XK7867AgLLDeLuBdhILm+3c" querry = "buy " + product bing_web = PyBingWebSearch(API_KEY, querry, web_only=False) results = bing_web.search(limit=50, format='json') fkart_urls = [] fkart_price_ar = [] sdeal_urls = [] sdeal_price_ar = [] min_fkart = 0 min_sdeal = 0 for result in results: comp = result.url.split('.')[1] if comp == 'flipkart': p = ' ' try: p = result.url.split('/')[4] except: continue if p == 'p': fkart_urls.append(result.url) fkart_flag = 1 if comp == 'snapdeal': p = ' ' try: p = result.url.split('/')[3] except: continue if p == 'product': sdeal_urls.append(result.url) sdeal_flag = 1 if len(fkart_urls) == 0 and len(sdeal_urls) == 0: result = 'Search Failed!' context = RequestContext(request, {'result': result}) return render_to_response('home.html', context) else: for url in fkart_urls: fkart_url = url fkart_page = requests.get(fkart_url) fkart_html = fkart_page.text fkart_soup = bs(fkart_html, 'html.parser') meta_desc = fkart_soup.findAll( attrs={"name": "Description"}) meta_desc_content_split = meta_desc[0]['content'].split( " ") for_bool = 0 For_bool = 0 try: for_index = meta_desc_content_split.index('for') except: for_bool = 1 try: for_index = meta_desc_content_split.index('For') except: For_bool = 1 if for_bool == 0 or For_bool == 0: str_price = meta_desc_content_split[for_index + 1] if (str_price == 'Rs.'): fkart_price = meta_desc_content_split[for_index + 2] fkart_price_ar.append(fkart_price) else: fkart_price = str_price[3:] fkart_price_ar.append(fkart_price) for url in sdeal_urls: sdeal_url = url sdeal_page = requests.get(sdeal_url) sdeal_html = sdeal_page.text sdeal_soup = bs(sdeal_html, 'html.parser') input_tag = sdeal_soup.find_all('input', id='productPrice') ex = 0 try: str_price = input_tag[0]['value'] except: ex = 1 if (ex != 1): sdeal_price_ar.append(str_price) if (len(fkart_price_ar) > 0): min_fkart = fkart_price_ar[0] for price in fkart_price_ar: if (price > min_fkart): min_fkart = price if (len(sdeal_price_ar) > 0): min_sdeal = sdeal_price_ar[0] for price in sdeal_price_ar: if (price > min_sdeal): min_sdeal = price result = 'Search Succesful!' context = RequestContext( request, { 'form': form, 'result': result, 'flipkart_price': str(min_fkart), 'snapdeal_price': str(min_sdeal) }) return render_to_response('home.html', context) else: form = search() context = RequestContext(request, {'form': form}) return render_to_response('home.html', context)
def get_top_bing_goodreads_search(search_term): query = "site:goodreads.com {0}".format(search_term) bing_web = PyBingWebSearch(BING_SEARCH_API_KEY, query, web_only=False) results = bing_web.search(limit=50, format='json') return [r.url for r in results if 'goodreads.com/book/show/' in r.url]
#coding=utf-8 from py_bing_search import PyBingWebSearch search_term = "site:cert.org.cn" bing_web = PyBingWebSearch('6I7UKjtX4bFiCDO0eQr4N4ErGG1+10BSWTmt0/aQ9QE', search_term, web_only=False) # web_only is optional, but should be true to use your web only quota instead of your all purpose quota first_fifty_result= bing_web.search(limit=50, format='json') #1-50 second_fifty_result= bing_web.search(limit=50, format='json') #51-100 # 显示标题 second_fifty_result[0].description) # 显示url second_fifty_result[0].url) '''for x in xrange(1,int(len(second_fifty_result))): print second_fifty_result[x].url ''' '''for x in xrange(1,int(len(first_fifty_result))): print first_fifty_result[x].url,first_fifty_result[x].title ''' for y in (first_fifty_result,second_fifty_result): for x in xrange(1,int(len(y))): print y[x].url pass
def search(search_term): bing_web = PyBingWebSearch(s5, search_term,web_only=False) first_ten_result = bing_web.search(limit=10, format='json') return first_ten_result
from py_bing_search import PyBingWebSearch ''' search_term = "Python Software Foundation" bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) first_fifty_result= bing_web.search(limit=6, format='json') #1-50 #second_fifty_result= bing_web.search(limit=50, format='json') #51-100 ''' ##s from correct import * s = corrections(s) search_term = s bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) x = bing_web.search(limit=6, format='json') results = [] for i in range(len(x)): results += [(x[i].title, x[i].description, x[i].url)] from py_bing_search import PyBingImageSearch bing_image = PyBingImageSearch( '1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', s, image_filters='Size:medium+Color:Monochrome') #image_filters is optional photos = bing_image.search(limit=6, format='json') images = []
def test_can_search(self): web_bing = PyBingWebSearch(SECRET_KEY, "Python Software Foundation") result_one = web_bing.search(limit=50) self.assertTrue(len(result_one) == 50) self.assertTrue("Python" in result_one[0].title) time.sleep
def _GetMovieResearch(self, term, limit=50, format='json'): bing = PyBingWebSearch(self.BING_API_KEY, term, web_only=False) return bing.search(limit, format)