예제 #1
0
    def run(self, keywords=[]):

        if not keywords:
            # Check if file exists
            if not os.path.isfile(self.default_keyword_file):
                return False
            else:
                keywords = []
                fp = open(self.default_keyword_file, "r")
                for line in fp.readlines():
                    keywords.append(line.strip())
                fp.close()

        self.keywords = keywords
        print "Using Keywords:{0}".format(self.keywords)

        try:
            # Get the hits for the given keywords
            bing = PyBingSearch(BING_API_KEY)
            for keyword in self.keywords:
                print "KEYWORD:{0}".format(keyword)
                result_list, next_uri = bing.search(keyword,
                                                    limit=self.maxResuts,
                                                    format='json')
                for result in result_list:
                    url = result.url
                    print "Found URL:{0}".format(url)
                    self.urls.append(url)
        except:
            print "Something went wrong querying Bing."
            pass

        return True
예제 #2
0
def bing_search(query):
    bing = PyBingSearch('rLSasvRW9cvlU5fG9hoSGjJG2M1eIjR+Ld27nFC9Pj8=')
    buildquery = query.replace(',', ' ')
    result_list = bing.search_all(query, limit=10, format='json')
    bingurls = []
    for result in result_list:
        bingurls.append(result.url)
    return bingurls
 def bingSearch(self, numresult=10):
     bing = PyBingSearch(self.bing_api_key)
     results, next_uri = bing.search(self.query,
                                     limit=numresult,
                                     format='json')
     res = []
     for i in range(numresult):
         res += [results[i].url]
     return res
예제 #4
0
def GetLinksForQueryBing(query):
    bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY')
    try:
        result_list = bing.search_all(query, limit=20, format='json')
    except PyBingException:
        return []
    results = [result.url for result in result_list]
    results = results[:min(20, len(results))]
    return [r for r in results if r.find("youtube") == -1]
def bingSearch(link, limit=4):
    #Sanitize input
    try:
	    linkfile = link.replace("^", "|")
	    bing=PyBingSearch('MsYC/eW39AiaY9EYFIC8mlX8C7HPRRooagMKRwVZx7Q')
	    try: result_list, next_uri = bing.search(linkfile, limit, format='json')
	    except: result_list, next_uri = bing.search(linkfile.replace(" news", ""), limit, format='json')
	    returning=[]
	    for i in xrange(limit):
		 try: returning.append(result_list[i].url.encode('utf8'))
		 except: break
	    return returning
    except: return [link.replace(" news", "")]
예제 #6
0
def getTopTen():
	global query
	global pagesToBeCrawled
	global fb
	bing = PyBingSearch('mMlCxUd5qmU5uDJ1w1VLbDkobVK905A9cZZhYkfqGHg=')
	query = raw_input("Enter a search query ")
	pagesToBeCrawled = input("Enter the number of pages you would like to be crawled? ")
	fp.write('****************************The query searched for is:' + query + ", pages to be crawled: " + str(pagesToBeCrawled) + '\n')
	urlList, next_uri = bing.search(query, limit=10, format='json') # get the results
	for result in urlList:
		#initialUrls.append(result); # Add the initial lists to the list
		if (pages > pagesToBeCrawled):
				print 'We have successfully crawled',pagesToBeCrawled,'pages'
				break
		checkUrl(result.url)
예제 #7
0
def bingSearch(linkfile):
    print "\nCalling bingSearch with arguments linkfile: {}:".format(str(linkfile))
    #Sanitize input
    linkfile = linkfile.replace("^", "|")

    bing=PyBingSearch('XXXXX')
    #Get from bing:
    result_list, next_uri = bing.search(linkfile, limit=5, format='json')
    #result_list, next_uri = bing.search("Python Software Foundation", limit=50, format='json')
    result_list[0].description #first bing result
    file = open( 'bingResults.txt', 'w')
    for res in result_list:
        file.write('"' + res.url + '" ')
        break
    print "\nbingSearch complete"
    return str(result_list[0].url)
예제 #8
0
def GetLinksForQueryBing(query):
    #service = build("customsearch", "v1",
    #          developerKey="AIzaSyDBh9qkTpuXSWbsjCfnCTQJFuFGKOYCElM")

    #res = service.cse().list(
    #    q=query,
    #    cx='000504779742960611072:dpmv5fihhu8',
    #  ).execute()

    #return [item['link'] for item in res['items']][:20]

    try:
        bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY')
        result_list = bing.search_all(query, limit=20, format='json')

        results = [result.url for result in result_list]
    except:
        return None
    return results[:min(20, len(results))]
예제 #9
0
def get_improved_term(query):
    bing = PyBingSearch('')  # Add your bing-api key here
    result_list, next_url = bing.search("%s wikipedia" % query,
                                        limit=3,
                                        format='json')
    for result in result_list:
        wiki_url = result.url
        wiki_desc = result.description
        if "en.wikipedia" in wiki_url:
            if ("may refer to" not in wiki_desc) or ("may also refer to"
                                                     not in wiki_desc):
                wiki_corr_term = (wiki_url).split("/")[-1]
                try:
                    wiki_corr_term_dec = str(
                        urllib.unquote(wiki_corr_term).decode('utf-8'))
                    return wiki_corr_term_dec
                except:
                    pass
    return query
    soup = BeautifulSoup(r)
    body = soup.find('body').text
    body = unicodedata.normalize('NFKD', body).encode('ascii', 'ignore')
    body = body.splitlines()
    body = [i for i in body if i != '']
    body = [x for x in body if len(x) > 70]
    body = map(cut, body)
    if len(body) < 5:
        indexes = range(0, len(body))
    else:
        indexes = [randint(0, len(body) - 1) for i in range(0, 5)]
    return ['"' + body[i] + '"' for i in indexes]


#Now the request
bing = PyBingSearch('1lQ7z/Ye5Qo/vuWoEuznwGUDQX841pfEkLC77SBTNCs')


#Function
def request_urls(url):
    statements = rand_statements(url)
    list_duplicates = []
    for statement in statements:
        result_list, next_uri = bing.search(statement, limit=50, format='json')
        results = [
            unicodedata.normalize('NFKD', result_list[i].url).encode(
                'ascii', 'ignore') for i in range(0, len(result_list))
        ]
        list_duplicates = list_duplicates + results
    #Get the frequencies of each url we get
    return Counter(list_duplicates).most_common()
 def bingWikiSearch(self):
     query = self.query.split(" ")[0] + " :wiki"
     bing = PyBingSearch(self.bing_api_key)
     results, next_uri = bing.search(query, limit=1, format='json')
     return results[0].url
예제 #12
0
from py_bing_search import PyBingSearch
import urllib
import urllib2
import json
import os
import socket

socket.setdefaulttimeout(5)

key = '4axpjG94pE8x9yUZqveY2LObcgNLVfX5oTW6+s5JbR0'
bing = PyBingSearch('4axpjG94pE8x9yUZqveY2LObcgNLVfX5oTW6+s5JbR0')

credentialBing = 'Basic ' + (':%s' % key).encode(
    'base64')[:-1]  # the "-1" is to remove the trailing "\n" which encode adds

photo_directory = 'bingBad'
if not os.path.exists(photo_directory):
    os.makedirs(photo_directory)

for offset in range(0, 50000, 50):
    bing_search_url = "https://api.datamarket.azure.com/Bing/Search/v1/Image?Query=%27bad%20photography%27&$format=json&$top=200&$skip=" + str(
        offset)

    request = urllib2.Request(bing_search_url)
    request.add_header('Authorization', credentialBing)
    requestOpener = urllib2.build_opener()
    response = requestOpener.open(request)

    results = json.load(response)

    for i, image in enumerate(results['d']['results']):
예제 #13
0
def GetLinksForQueryBing(query):
    bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY')
    result_list = bing.search_all(query, limit=20, format='json')

    return [result.url for result in result_list][:20]
예제 #14
0
import json
import os
import requests
import time
from collections import defaultdict
from functools import reduce
from py_bing_search import PyBingSearch
import sys

apikey = 'wtprucmwrgk6bd92rq7tun97'
edmund_url = 'http://api.edmunds.com/api/vehicle/v2/'
end_url = '?fmt=json&view=full&api_key=' + apikey
bing = PyBingSearch('Np5rmrL6fIPP3jpDqVi+Li/rJ1Joih4Q6wP69HrjQro=')

model_id = 1
make_id = 1
engine_id = 1

models_list = []
makes_list = []
engines_list = []
make_ids = {}

makes_models_dict = defaultdict(list)
makes_json = requests.get(edmund_url + 'makes' + end_url).json()


def add_engines(engine, model_id):
    global engine_id
    global engines_list
    global models_list
예제 #15
0
from py_bing_search import PyBingSearch

bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY')
result_list = bing.search_all(
    "(yawn) AND (other OR early) AND (people) AND (contagious OR catching) AND (room)",
    limit=50,
    format='json')

print[result.url for result in result_list][:10]
예제 #16
0
# -*- coding: utf-8 -*-
__author__ = 'lufo'

from py_bing_search import PyBingSearch

bing = PyBingSearch('QkcWAM6VJ/S0LJI9wvVGN4UNQUwikMb4zY/kUVe/hAw')
result_list, next_uri = bing.search("Python Software Foundation",
                                    limit=50,
                                    format='json')

for result in result_list:
    print result.url