Beispiel #1
0
def download_raw_results(infile, outfile):
    skip = 0
    params = {"ImageFilters": '"Face:Face"', "$format": "json", "$top": 50, "$skip": skip}
    lines = tuple(open(infile, "r"))
    search_results = open(outfile, "w+")
    i = 1
    for line in lines:
        bing = BingSearchAPI(bing_keys[i])
        i = i + 1
        if i == 7:
            i = 0
        print line
        r = bing.search("web", line, params)
        if r.status_code == 200:
            # print r
            raw_search_results.append(r.json)
        # print("Appended an Entry!!!!!!!!")
        # res.append(bing.search("web",line,params))

    for result in raw_search_results:
        for elem in result["d"]["results"][0]["Web"]:
            websites_list.append(elem["DisplayUrl"])
    # Extract the links, and write to file so we don't have
    for link in websites_list:
        search_results.write("%s\n" % link.encode("utf-8"))
Beispiel #2
0
class BingImageFetcher:

    NUM_IMGS = 10

    def __init__(self, keypath):
        keyfile = open(keypath, 'r')
        key = keyfile.readline().strip()
        self.bing = BingSearchAPI(key)
        self.params = {
                        #'ImageFilters':'"Face:Face"',
                        '$format': 'json',
                        '$top': self.NUM_IMGS,
                        '$skip': 0}

    TIMEOUT = 10.0
    IMG_FILES = 'img'

    def create_request(self, word):
        # note, throws ConnectionError if failed to fetch
        resp = self.bing.search('image', word, self.params).json()
        image_results = resp['d']['results'][0]['Image']
        if len(image_results) == 0:
            raise Exception('Failed to find any images for query ' + word)
        image_url = image_results[random.randint(0, self.NUM_IMGS-1)]['MediaUrl']
        up = urlparse.urlparse(image_url)
        destfile = os.path.basename(up.path)
        destpath = os.path.join(BingImageFetcher.IMG_FILES, destfile)
        if not os.path.isdir(BingImageFetcher.IMG_FILES):
            os.mkdir(BingImageFetcher.IMG_FILES)
        is_cached = False
        if os.path.isfile(destpath):
            # if we already have that image then just use the cached version
            is_cached = True
        return is_cached, image_url, destpath
def query(query_string):
    bing = BingSearchAPI(my_key)
    params = {'ImageFilters':'"Face:Face"',
              '$format': 'json',
              '$top': 10,
              '$skip': 0}
    results = bing.search('web',query_string,params).json() # requests 1.0+ 

    return [result['Url'] for result in results['d']['results'][0]['Web']]
Beispiel #4
0
def query(query_string):
    bing = BingSearchAPI(my_key)
    params = {
        'ImageFilters': '"Face:Face"',
        '$format': 'json',
        '$top': 10,
        '$skip': 0
    }
    results = bing.search('web', query_string, params).json()  # requests 1.0+

    return [result['Url'] for result in results['d']['results'][0]['Web']]
Beispiel #5
0
def get_actor_url(actor_name):
    bing = BingSearchAPI(BING_KEY)
    params = {
        'ImageFilters': '"Face:Face"',
        '$format': 'json',
        '$top': 1,
        '$skip': 0
    }
    actor_name = actor_name.encode('utf-8')
    data = bing.search('image', actor_name, params).json()
    return data['d']['results'][0]['Image'][0]['Thumbnail']['MediaUrl']
Beispiel #6
0
def search_bing(query, per_page=10, offset=0):
    try:
        my_key = ""
        bing = BingSearchAPI(my_key)
        params = {'$format': 'json',
                  '$top': per_page,
                  '$skip': offset}
        results = bing.search('image+web', query, params)
        results = results['d']['results'][0]['Web']
        return results
    except(e):
        print e
        return []
Beispiel #7
0
def crawl_from_bing(search_query):
    my_key = read_bing_key()
    # search_query = "nba jumpshot"
    bing = BingSearchAPI(my_key)
    for i in range(20):
        params = {
              '$format': 'json',
              '$top': 50,
              '$skip': i * 50}
        result_list = bing.search('image',search_query,params).json()
        print(len(result_list['d']['results'][0]['Image']))
        for result in result_list['d']['results'][0]['Image']:
            image_url = (result['MediaUrl'])
            title_name = result['Title'].encode('gbk', 'ignore').decode(encoding="utf-8", errors="ignore")
            title_name = title_name.replace('... ','')
            download_single_image(image_url, search_query, title_name)
 def get_text(self):
     if self.conf_lyrics_done:
         print 'Lyrics are already done'
         return self.conf_lyrics_done
     bing = BingSearchAPI()
     tags = self.conf_tags
     search = '%s lyrics %s' % (tags['title'], tags['performer'])
     print 'Searching for lyrics. Search string: %s' % search
     lyrics_search = bing.search('web', search.encode('utf-8'), {'$format': 'json'})
     #print 'Lyrics search result: %s' % pformat(lyrics_search)
     for result in lyrics_search.get('d', {}).get('results', [{}])[0].get('Web', []):
         url = result['Url']
         print 'lyrics in %s?' % url
         for match, (good_attr, bad_part) in lyrics_matches.items():
             if match in url:
                 # Good! We have a known site with lyrics - let's extract them.
                 print 'yes, lyrics are probably here'
                 browser = Browser()
                 browser.set_handle_robots(None)
                 browser.open(url)
                 text = browser.response().read()
                 soup = BeautifulSoup(text, convertEntities=BeautifulSoup.HTML_ENTITIES)
                 lyrics_el = soup.find(attrs=good_attr)
                 if not lyrics_el:
                     #print 'Not found lyrics in %s' % text
                     continue
                 #print 'full text: %s' % text
                 #print 'Found something like this: %s' % lyrics_el
                 parts = list(self.extract_text_parts(lyrics_el.contents, bad_part))
                 lyrics = '\n'.join(parts)
                 #print 'Found lyrics: \n%s' % lyrics
                 print 'Found lyrics: %s...' % lyrics[:150]
                 self.conf_lyrics = lyrics
                 self.conf_lyrics_done = True
                 return self.conf_lyrics_done
         print 'Unsupported lyrics source: %s' % url
     if not self.conf_lyrics_done:
         print 'ERROR: lyrics not found! %s' % self.conf_tags['title']
     return self.conf_lyrics_done
Beispiel #9
0
def bing_search_total(_verbose, _search_phrase, _bing_api_key):

    _search_phrase_parsed = "%22" + _search_phrase.replace(' ', '+').strip(
        ' ') + "%22"  # %22 acts as quotes, facilitating a phrase search
    _bing_search = BingSearchAPI(_bing_api_key)
    _bing_parameters = {'$format': 'json', '$top': 2}

    try:
        res = _bing_search.search('web', _search_phrase_parsed,
                                  _bing_parameters).json()
        total_search_results = res["d"]["results"][0]["WebTotal"]
        total = int(total_search_results)
        if (isinstance(total, int)):
            if _verbose:
                print('\t' + _search_phrase_parsed.replace('+', ' ').replace(
                    '%22', '') + total)
                pass
            return total
    except Exception as e:
        if _verbose:
            print('\tERROR: in bing.search() - search total\n\t' + str(e))
        print('\tERROR: in bing.search() - search total\n\t' + str(e))
        print("[Errno {0}] {1}".format(e.errno, e.strerror))
        return 0
    for line in f:
        k.append(line.strip())
s=' '.join(k)

n=1000  #search result limit

my_key = "uAZ6dYNEodLuQxx1W3UKkLegY+Uj8y7e1E3AxPwqtmM"  #API key
query_string = s    #the query string. currently only has keyword parameter.
bing = BingSearchAPI(my_key)    #initialize search request
params = {'$format': 'json'}    #response format as json

#output file
f = open("bingresults.txt","w")

#get first 50 results from Bing
for obj in bing.search('web',query_string,params).json()['d']['results']:
    for lnk in obj['Web']:
        f.write(lnk['Url'])
        f.write('\n')

i=50

#get the rest results
while i<n:
    params = {'$format': 'json','$skip': i} #skip first i results
    for obj in bing.search('web',query_string,params).json()['d']['results']:
        for lnk in obj['Web']:
            f.write(lnk['Url'])
            f.write('\n')
    i+=50
Beispiel #11
0
 def get_pics(self):
     if self.conf_pics_done:
         print 'Pics are already done'
         return self.conf_pics_done
     imgdir = self.imgdir
     if len(glob.glob1(imgdir, "*.png")) > REQUIRED_IMAGE_COUNT:
         self.conf_pics_done = True
         return self.conf_pics_done
     bing = BingSearchAPI()
     tags = self.conf_tags
     search = '%s %s' % (tags['title'], tags['performer'])
     print 'Searching for images. Search string: %s' % search
     img_search = bing.search('image', search.encode('utf-8'), {'$format': 'json'})
     print 'Images: %s' % pformat(img_search)
     registry = processed_image_urls.setdefault(imgdir, set())
     if not os.path.exists(imgdir):
         os.makedirs(imgdir)
     for result in img_search.get('d', {}).get('results', [{}])[0].get('Image', []):
         if result['MediaUrl'] not in registry:
             browser = Browser()
             browser.set_handle_robots(None)
             registry.add(result['MediaUrl'])
             log.debug('%s images in %s' % (len(glob.glob1(imgdir, "*.png")), imgdir))
             try:
                 #log.debug('Opening %s' % result['SourceUrl'])
                 browser.open(result['SourceUrl'])
                 #log.debug('Opening %s' % result['MediaUrl'])
                 img = Image.open(browser.open(result['MediaUrl']))
                 if img.size[0] >= DEFAULT_VIDEO_RESOLUTION and img.size[1] >= DEFAULT_VIDEO_RESOLUTION[1]:
                     print 'Found image: %s' % result['MediaUrl']
                     img.save(os.path.join(imgdir, ('image%03d.png'
                         % (len(glob.glob1(imgdir, "*.png"))) + 1)))
                     self.conf_pics_done = True
                     if len(glob.glob1(imgdir, "*.png")) > REQUIRED_IMAGE_COUNT:
                         self.conf_pics_done = True
                         break
             except:
                 print_exc()
     if len(glob.glob1(imgdir, "*.png")) < REQUIRED_IMAGE_COUNT:
         search = tags['performer']
         print 'Searching for images. Search string: %s' % search
         img_search = bing.search('image', search.encode('utf-8'), {'$format': 'json'})
         for result in img_search.get('d', {}).get('results', [{}])[0].get('Image', []):
             if result['MediaUrl'] not in registry:
                 browser = Browser()
                 browser.set_handle_robots(None)
                 registry.add(result['MediaUrl'])
                 log.debug('%s images in %s' % (len(glob.glob1(imgdir, "*.png")), imgdir))
                 try:
                     #log.debug('Opening %s' % result['SourceUrl'])
                     browser.open(result['SourceUrl'])
                     #log.debug('Opening %s' % result['MediaUrl'])
                     img = Image.open(browser.open(result['MediaUrl']))
                     if img.size[0] >= DEFAULT_VIDEO_RESOLUTION[0] and img.size[1] >= DEFAULT_VIDEO_RESOLUTION[1]:
                         print 'Found image: %s' % result['MediaUrl']
                         img.save(os.path.join(imgdir, ('image%03d.png'
                             % (len(glob.glob1(imgdir, "*.png"))) + 1)))
                         if len(glob.glob1(imgdir, "*.png")) > REQUIRED_IMAGE_COUNT:
                             self.conf_pics_done = True
                             break
                 except:
                     print_exc()
     return self.conf_pics_done
Beispiel #12
0
from bing_search_api import BingSearchAPI
import json

my_key = "8jhH8TwVCHdDiWxXYgC5KqyEmChYTKW0kkFngbVYnH8"
query_string = "Sony"
bing = BingSearchAPI(my_key)
params = {'$format': 'json', '$top': 10, '$skip': 0}
news = bing.search('news', query_string, params).json()
for i in range(10):
    print(news['d']['results'][0]['News'][i])
#news = json.loads(bing.search('news', query_string, params).json())
Beispiel #13
0
from bing_search_api import BingSearchAPI
import json

my_key = "dWls875YJyXwh7dmX3LdIaETO9IDjfkdG4g8533M9zs"
query_string = raw_input("What is your query? ")
bing = BingSearchAPI(my_key)
params = { '$format': 'json',
              '$top': 10,
              '$skip': 0}
searchJSON = bing.search('news',query_string,params).json()
print searchJSON[1]

def Collocations_Method_2(_bing_api_key, _n_grams_from_input_text_file, _input_file_path, _apply_POS_restrictions,
                          _verbose):
    if _verbose:
        # A file to save the verbose output of the program
        _output_file_verbose = str(_input_file_path).replace(_input_file_path.split('/')[-1], 'verbose.txt')
        _output_file_verbose = open(_output_file_verbose, 'a')
        print("\n--------------------------------------------------------------------------", file=_output_file_verbose)
        print("\tMethod-2: Title-Url - Extracting collocations:", file=_output_file_verbose)
        print("--------------------------------------------------------------------------\n\n",
              file=_output_file_verbose)
        print("\tMethod-2: Title-Url - Extracting collocations ...")

    # A list to store n-gram phrases that are collocations
    title_url_collocations = []
    # A list to store n-gram phrases that are not collocations
    n_grams_not_collocations = []

    # Snowball stemmer is used to stem words
    stemmer = snowballstemmer.stemmer('english')
    # Call to Bing search API
    _bing_search = BingSearchAPI(_bing_api_key)
    _bing_search_parameters = {'$format': 'json', '$top': 10}  # Top 10 search results
    # Python list with words synonymous to 'Wikipedia', 'dictionary', 'definition'
    _list_of_synonymous_words = ['dictionary', 'lexicon', 'definition', 'meaning', 'unabridged', 'gazetteer' \
                                                                                                 'spellchecker',
                                 'spellingchecker', 'thesaurus', 'synonymfinder', 'wordfinder', 'wikipedia',
                                 'investorwords' \
                                 'investopedia', 'wiktionary']

    for _n_gram in _n_grams_from_input_text_file:
        if _verbose:
            print("\n%s:" % (_n_gram), file=_output_file_verbose)
        if _n_gram in title_url_collocations or _n_gram in n_grams_not_collocations:
            # If a particular n-gram phrase is checked if it is a collocation before,
            # it will be present in one of the lists, wordnet_collocations OR n_grams_not_collocations
            # Hence, we move on to the next n-gram / phrase
            continue
        else:
            # Before checking if the n-gram is a collocation we check if atlease one
            # POS tag is from the valid POS tag list: {Noun, Verb, Adverb, Adjective} if
            # _apply_POS_restrictions is set to True
            if _apply_POS_restrictions:
                valid_POS_tags = ['NN', 'VB', 'RB', 'JJ']
                _valid_POS_tag_counter = 0  # A counter to count the number of valid POS tags in n-gram
                for _pos_tag in valid_POS_tags:
                    if _pos_tag in _n_gram:
                        _valid_POS_tag_counter += 1
                if _valid_POS_tag_counter == 0:
                    # If no valid POS tag is present in the n-gram, it is not a collocation
                    # when POS restrictions are applied
                    n_grams_not_collocations.append(_n_gram)
                    if _verbose:
                        print("\t'%s' does not have valid POS tags\n\tMoving on to the next phrase ..." % (_n_gram),
                              file=_output_file_verbose)
                    continue  # We move on to the next phrase

            # If POS restrictions are not to be applied on the n-gram
            _n_gram_lower = _n_gram.lower() + ' '  # Lower case
            _n_gram_lower = re.sub(r'_.*? ', ' ', _n_gram_lower).rstrip(' ')
            _n_gram_lower_search_phrase = 'define "%s"' % (_n_gram_lower)  # Bing - Phrase search
            try:
                _search_results = _bing_search.search('web', _n_gram_lower_search_phrase,
                                                      _bing_search_parameters).json()
                _search_result_count = len(_search_results["d"]["results"][0]["Web"])
            except Exception as e:
                if _verbose:
                    print("\tERROR: Method-2 - Bing search - Title-Url\n%s" % (str(e)), file=_output_file_verbose)
                    print("\tERROR: Method-2 - Bing search - Title-Url\n%s" % (str(e)))
                _search_result_count = 0
                continue
            # List to save top 10 search Titles
            _search_titles = []
            # List to store top 10 search Urls
            _search_urls = []
            # We iterate through each of the search result and append search titles and Urls to their respective lists
            for x in xrange(0, _search_result_count):
                _url = _search_results["d"]["results"][0]["Web"][x]["Url"]
                _title = _search_results["d"]["results"][0]["Web"][x]["Title"]
                _title = unicodedata.normalize('NFKD', _title).encode('ascii', 'ignore')
                _url = unicodedata.normalize('NFKD', _url).encode('ascii', 'ignore')
                _search_titles.append(_title)
                _search_urls.append(_url)
            # removing punctuation, special characters and spaces from the keyword
            _n_gram_lower_no_spaces = ''.join(_char for _char in _n_gram_lower if _char.isalnum())
            _n_gram_lower_no_spaces = _n_gram_lower_no_spaces.replace(' ', '')
            _number_of_search_results_returned = len(_search_urls)  # No. of search urls = titles
            # Variable to store the count of titles and urls that have valid keywords and match with the search phrase
            _number_of_valid_titles = 0
            _number_of_valid_urls = 0
            for x in xrange(0, _number_of_search_results_returned):
                _search_title = ""
                _search_title = _search_titles[x]
                _search_title_lower_case = _search_title.lower()
                _search_title_lower_case_no_spaces = "".join(
                    _char for _char in _search_title_lower_case if _char.isalnum())
                _search_url = ""
                _search_url = _search_urls[x]
                _search_url_lower_case = _search_url.lower()
                _search_url_lower_case_no_spaces = "".join(_char for _char in _search_url_lower_case if _char.isalnum())
                if _verbose:
                    print("\t%d:\n\tSearch title: %s\n\tSearch Url: %s" % (x + 1, _search_title, _search_url),
                          file=_output_file_verbose)
                for _synonym in _list_of_synonymous_words:
                    _synonym_match = False
                    # Check if _synonym is present in the tile
                    _title_match = re.search(_synonym, _search_title_lower_case_no_spaces)
                    # check if _synonym is present in the url
                    _url_match = re.search(_synonym, _search_url_lower_case_no_spaces)
                    # If a match is found either in title or the url, open the link and check if the
                    # <title> </title> tag from the html has a match with the keyword
                    if _title_match:
                        _synonym_match = True
                    elif _url_match:
                        _synonym_match = True
                    else:
                        continue
                    if _synonym_match:
                        # Reading HTML from url
                        try:
                            # replace: _url_response = urllib2.urlopen(_search_url)
                            # _url_response = urllib2.urlopen(_search_url)
                            http = httplib2.Http(".cache")
                            resp, _url_response = http.request(_search_url, "GET")
                            _html = _url_response
                            # print(_html)
                            _beautiful_html = BeautifulSoup(_html, "lxml")
                        except Exception as e:
                            if _verbose:
                                print("\tException - Method-2 - Reading HTML\n%s" % (str(e)), file=_output_file_verbose)
                                print("\tException - Method-2 - Reading HTML\n%s" % (str(e)))
                                # print(e.fp.read())
                                print("-----------------\n" + _search_url + "\n---------------\n")
                        # Extracting text in between <h1> tag
                        try:
                            # Comments are to excluded, this part is to coded

                            # _text_from_title = _beautiful_html.find('h1').text
                            # print(_beautiful_html.find('h1').text + "\n")
                            # print("sss" + _beautiful_html.title.string + '\n')
                            _text_from_title = _beautiful_html.title.string
                            # Remove any non-ascii characters from the text extracted
                            _text_from_title_ascii_only = "".join(
                                _char for _char in _text_from_title if ord(_char) < 128)
                            _text_from_title_ascii_only = _text_from_title_ascii_only.lower()
                        except:
                            # If failed to extract text from <h1>
                            _text_from_title_ascii_only = ""

                        """
						# ------- FOR Stemmed match ------------
						# Stem the title text extracted and the n-gram phrase
						# If the stemmed n-gram phrase is present in the stemmed title, 
						# that n-gram phrase is a collocation
						_n_gram_lower_stemmed = ""
						for _word in _n_gram_lower.split(' '):
							_n_gram_lower_stemmed = " " + stemmer.stemWord(_word)
						_text_from_title_ascii_only_stemmed = ""
						for _word in _text_from_title_ascii_only.split(' '):
							_text_from_title_ascii_only_stemmed = " " + stemmer.stemWord(_word)
						if _verbose:
							print "\t\tStemmed search title: %s\n\t\tStemmed phrase: %s" %(_text_from_title_ascii_only_stemmed, _n_gram_lower_stemmed)
						if _n_gram_lower_stemmed in _text_from_title_ascii_only_stemmed:
							_number_of_valid_titles += 1
							if _verbose:
								print "\t\t\tMatch"
						else:
							if _verbose:
								print "\t\t\tNot a match"
						# ---------------------------------------
						"""
                        # ------------ FOR Exact title match -------------
                        if _verbose:
                            print("\t\tSearch TITLE processed: %s\n\t\tPhrase processed: %s" % (
                                _text_from_title_ascii_only, _n_gram_lower), file=_output_file_verbose)
                        if _n_gram_lower in _text_from_title_ascii_only:
                            _number_of_valid_titles += 1
                            if _verbose:
                                print("\t\t\tMatch", file=_output_file_verbose)
                        else:
                            if _verbose:
                                print("\t\t\tNot a match", file=_output_file_verbose)
                        # ------------------------------------------------
                        # Remove punctuation and numbers from Url and see if the n-gram / phrase is present in it
                        # If yes, then that n-gram is a collocation
                        _search_url_lower_case_no_spaces_no_punctuation = "".join(
                            [_char for _char in _search_url_lower_case_no_spaces if not _char.isdigit()])
                        if _verbose:
                            print("\t\tSearch URL processed: %s\n\t\tPhrase processed: %s" % (
                                _search_url_lower_case_no_spaces_no_punctuation, _n_gram_lower_no_spaces),
                                  file=_output_file_verbose)
                        if _n_gram_lower_no_spaces in _search_url_lower_case_no_spaces_no_punctuation:
                            _number_of_valid_urls += 1
                            if _verbose:
                                print("\t\t\tMatch", file=_output_file_verbose)
                        else:
                            if _verbose:
                                print("\t\t\tNot a match", file=_output_file_verbose)
                        break
                    else:
                        continue
        if _number_of_valid_titles > 0 or _number_of_valid_urls > 0:
            title_url_collocations.append(_n_gram)
            if _verbose:
                print("\n\tTotal number of valid titles: %d\n\tTotal number of valid urls: %d\n\t- Collocation -\n" \
                      % (_number_of_valid_titles, _number_of_valid_urls), file=_output_file_verbose)
        else:
            n_grams_not_collocations.append(_n_gram)
            if _verbose:
                print("\t- Not a collocation -\n", file=_output_file_verbose)

    # Output text file to save collocations
    _output_file_path_title_url_collocations = str(_input_file_path).replace(_input_file_path.split('/')[-1],
                                                                             'collocations_title_url.txt')
    _output_file_title_url_collocations = open(_output_file_path_title_url_collocations, 'w')
    for _collocation in title_url_collocations:
        _output_file_title_url_collocations.write(_collocation + '\n')
    _output_file_title_url_collocations.close()
    if _verbose:
        print("\nMethod-2: Title-Url - Collocations are written to the file:\n%s" % (
            _output_file_path_title_url_collocations), file=_output_file_verbose)

    # Output text file to save n-grams that are not collocations
    _output_file_path_title_url_not_collocations = str(_input_file_path).replace(_input_file_path.split('/')[-1],
                                                                                 'not_collocations_title_url.txt')
    _output_file_title_url_not_collocations = open(_output_file_path_title_url_not_collocations, 'w')
    for _n_gram in n_grams_not_collocations:
        _output_file_title_url_not_collocations.write(_n_gram + '\n')
    _output_file_title_url_not_collocations.close()
    if _verbose:
        print("Method-2: Title-Url - N-grams that are not collocations are written to the file:\n%s" % (
            _output_file_path_title_url_not_collocations), file=_output_file_verbose)

    if _verbose:
        print("\n--------------------------------------------------------------------------", file=_output_file_verbose)
        print("\tMethod-2: Title-Url - Extracting collocations - Complete", file=_output_file_verbose)
        print("--------------------------------------------------------------------------\n\n",
              file=_output_file_verbose)

    # Returning n-grams that are collocations and n-grams that are not
    if _verbose:
        print("\t\tMethod-2: Collocation extraction successful")
    return title_url_collocations, n_grams_not_collocations
	return sentence.replace(' .', '').replace(' , ', ', ')
	
#INFO FOR BING API
my_key = "insert_API_key"	#replace with Bing API Key
query_string = sys.argv[1]	#get query string as input from command line using sys.argv, for multiple words use query between " "
bing = BingSearchAPI(my_key)

#parameters for image searching -- more documentation on params and image filters here http://goo.gl/xG0v0O
params = {'ImageFilters':'"Style:Photo"',
          '$format': 'json',	#specifies format of data response
      	  '$top': 400,			#specifies number of results to return, default is 50
          '$skip': 0}			#specifies starting point offset for the results
          
#bing.search()requires sources first (images, web, video, etc.), then query string, then rest of params (above)
#full schema documentation for bing API is here http://goo.gl/xG0v0O
results = bing.search('image',query_string,params).json() 	#requests 1.0+ 

image_list = results['d']['results'][0]['Image']	#this gets us to the list of all the images

#create a new list of all the image source URLs using a list comprehension
image_urls = [image['MediaUrl'] for image in image_list if len(image['MediaUrl']) > 0]
for url in image_urls:	#print the list of image urls
	print url
# 
# # #download all those images to a directory (so i have them) -- only do this if you need the images, takes a lot of time
# # for url in image_urls:
# # 	file_name = url.rsplit('/',1)[1]
# # 	urllib.urlretrieve(url, file_name)
# 
# 
# #for each image, get the 5-sentence image description from Toronto Deep Learning using the response_for_image function and captions
Beispiel #16
0
    for line in f:
        k.append(line.strip())
s = ' '.join(k)

n = 1000  #search result limit

my_key = "uAZ6dYNEodLuQxx1W3UKkLegY+Uj8y7e1E3AxPwqtmM"  #API key
query_string = s  #the query string. currently only has keyword parameter.
bing = BingSearchAPI(my_key)  #initialize search request
params = {'$format': 'json'}  #response format as json

#output file
f = open("bingresults.txt", "w")

#get first 50 results from Bing
for obj in bing.search('web', query_string, params).json()['d']['results']:
    for lnk in obj['Web']:
        f.write(lnk['Url'])
        f.write('\n')

i = 50

#get the rest results
while i < n:
    params = {'$format': 'json', '$skip': i}  #skip first i results
    for obj in bing.search('web', query_string, params).json()['d']['results']:
        for lnk in obj['Web']:
            f.write(lnk['Url'])
            f.write('\n')
    i += 50