def run(self, keywords=[]): if not keywords: # Check if file exists if not os.path.isfile(self.default_keyword_file): return False else: keywords = [] fp = open(self.default_keyword_file, "r") for line in fp.readlines(): keywords.append(line.strip()) fp.close() self.keywords = keywords print "Using Keywords:{0}".format(self.keywords) try: # Get the hits for the given keywords bing = PyBingSearch(BING_API_KEY) for keyword in self.keywords: print "KEYWORD:{0}".format(keyword) result_list, next_uri = bing.search(keyword, limit=self.maxResuts, format='json') for result in result_list: url = result.url print "Found URL:{0}".format(url) self.urls.append(url) except: print "Something went wrong querying Bing." pass return True
def bing_search(query): bing = PyBingSearch('rLSasvRW9cvlU5fG9hoSGjJG2M1eIjR+Ld27nFC9Pj8=') buildquery = query.replace(',', ' ') result_list = bing.search_all(query, limit=10, format='json') bingurls = [] for result in result_list: bingurls.append(result.url) return bingurls
def bingSearch(self, numresult=10): bing = PyBingSearch(self.bing_api_key) results, next_uri = bing.search(self.query, limit=numresult, format='json') res = [] for i in range(numresult): res += [results[i].url] return res
def GetLinksForQueryBing(query): bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY') try: result_list = bing.search_all(query, limit=20, format='json') except PyBingException: return [] results = [result.url for result in result_list] results = results[:min(20, len(results))] return [r for r in results if r.find("youtube") == -1]
def bingSearch(link, limit=4): #Sanitize input try: linkfile = link.replace("^", "|") bing=PyBingSearch('MsYC/eW39AiaY9EYFIC8mlX8C7HPRRooagMKRwVZx7Q') try: result_list, next_uri = bing.search(linkfile, limit, format='json') except: result_list, next_uri = bing.search(linkfile.replace(" news", ""), limit, format='json') returning=[] for i in xrange(limit): try: returning.append(result_list[i].url.encode('utf8')) except: break return returning except: return [link.replace(" news", "")]
def getTopTen(): global query global pagesToBeCrawled global fb bing = PyBingSearch('mMlCxUd5qmU5uDJ1w1VLbDkobVK905A9cZZhYkfqGHg=') query = raw_input("Enter a search query ") pagesToBeCrawled = input("Enter the number of pages you would like to be crawled? ") fp.write('****************************The query searched for is:' + query + ", pages to be crawled: " + str(pagesToBeCrawled) + '\n') urlList, next_uri = bing.search(query, limit=10, format='json') # get the results for result in urlList: #initialUrls.append(result); # Add the initial lists to the list if (pages > pagesToBeCrawled): print 'We have successfully crawled',pagesToBeCrawled,'pages' break checkUrl(result.url)
def bingSearch(linkfile): print "\nCalling bingSearch with arguments linkfile: {}:".format(str(linkfile)) #Sanitize input linkfile = linkfile.replace("^", "|") bing=PyBingSearch('XXXXX') #Get from bing: result_list, next_uri = bing.search(linkfile, limit=5, format='json') #result_list, next_uri = bing.search("Python Software Foundation", limit=50, format='json') result_list[0].description #first bing result file = open( 'bingResults.txt', 'w') for res in result_list: file.write('"' + res.url + '" ') break print "\nbingSearch complete" return str(result_list[0].url)
def GetLinksForQueryBing(query): #service = build("customsearch", "v1", # developerKey="AIzaSyDBh9qkTpuXSWbsjCfnCTQJFuFGKOYCElM") #res = service.cse().list( # q=query, # cx='000504779742960611072:dpmv5fihhu8', # ).execute() #return [item['link'] for item in res['items']][:20] try: bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY') result_list = bing.search_all(query, limit=20, format='json') results = [result.url for result in result_list] except: return None return results[:min(20, len(results))]
def get_improved_term(query): bing = PyBingSearch('') # Add your bing-api key here result_list, next_url = bing.search("%s wikipedia" % query, limit=3, format='json') for result in result_list: wiki_url = result.url wiki_desc = result.description if "en.wikipedia" in wiki_url: if ("may refer to" not in wiki_desc) or ("may also refer to" not in wiki_desc): wiki_corr_term = (wiki_url).split("/")[-1] try: wiki_corr_term_dec = str( urllib.unquote(wiki_corr_term).decode('utf-8')) return wiki_corr_term_dec except: pass return query
soup = BeautifulSoup(r) body = soup.find('body').text body = unicodedata.normalize('NFKD', body).encode('ascii', 'ignore') body = body.splitlines() body = [i for i in body if i != ''] body = [x for x in body if len(x) > 70] body = map(cut, body) if len(body) < 5: indexes = range(0, len(body)) else: indexes = [randint(0, len(body) - 1) for i in range(0, 5)] return ['"' + body[i] + '"' for i in indexes] #Now the request bing = PyBingSearch('1lQ7z/Ye5Qo/vuWoEuznwGUDQX841pfEkLC77SBTNCs') #Function def request_urls(url): statements = rand_statements(url) list_duplicates = [] for statement in statements: result_list, next_uri = bing.search(statement, limit=50, format='json') results = [ unicodedata.normalize('NFKD', result_list[i].url).encode( 'ascii', 'ignore') for i in range(0, len(result_list)) ] list_duplicates = list_duplicates + results #Get the frequencies of each url we get return Counter(list_duplicates).most_common()
def bingWikiSearch(self): query = self.query.split(" ")[0] + " :wiki" bing = PyBingSearch(self.bing_api_key) results, next_uri = bing.search(query, limit=1, format='json') return results[0].url
from py_bing_search import PyBingSearch import urllib import urllib2 import json import os import socket socket.setdefaulttimeout(5) key = '4axpjG94pE8x9yUZqveY2LObcgNLVfX5oTW6+s5JbR0' bing = PyBingSearch('4axpjG94pE8x9yUZqveY2LObcgNLVfX5oTW6+s5JbR0') credentialBing = 'Basic ' + (':%s' % key).encode( 'base64')[:-1] # the "-1" is to remove the trailing "\n" which encode adds photo_directory = 'bingBad' if not os.path.exists(photo_directory): os.makedirs(photo_directory) for offset in range(0, 50000, 50): bing_search_url = "https://api.datamarket.azure.com/Bing/Search/v1/Image?Query=%27bad%20photography%27&$format=json&$top=200&$skip=" + str( offset) request = urllib2.Request(bing_search_url) request.add_header('Authorization', credentialBing) requestOpener = urllib2.build_opener() response = requestOpener.open(request) results = json.load(response) for i, image in enumerate(results['d']['results']):
def GetLinksForQueryBing(query): bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY') result_list = bing.search_all(query, limit=20, format='json') return [result.url for result in result_list][:20]
import json import os import requests import time from collections import defaultdict from functools import reduce from py_bing_search import PyBingSearch import sys apikey = 'wtprucmwrgk6bd92rq7tun97' edmund_url = 'http://api.edmunds.com/api/vehicle/v2/' end_url = '?fmt=json&view=full&api_key=' + apikey bing = PyBingSearch('Np5rmrL6fIPP3jpDqVi+Li/rJ1Joih4Q6wP69HrjQro=') model_id = 1 make_id = 1 engine_id = 1 models_list = [] makes_list = [] engines_list = [] make_ids = {} makes_models_dict = defaultdict(list) makes_json = requests.get(edmund_url + 'makes' + end_url).json() def add_engines(engine, model_id): global engine_id global engines_list global models_list
from py_bing_search import PyBingSearch bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY') result_list = bing.search_all( "(yawn) AND (other OR early) AND (people) AND (contagious OR catching) AND (room)", limit=50, format='json') print[result.url for result in result_list][:10]
# -*- coding: utf-8 -*- __author__ = 'lufo' from py_bing_search import PyBingSearch bing = PyBingSearch('QkcWAM6VJ/S0LJI9wvVGN4UNQUwikMb4zY/kUVe/hAw') result_list, next_uri = bing.search("Python Software Foundation", limit=50, format='json') for result in result_list: print result.url