コード例 #1
0
 def search(self, query, numResultsRequested=50, offset=0):
     # TODO: Replace the x's with the Primary Account Key of your
     # Microsoft Account.
     bingKey = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
     
     self._query = query
     self._numResultsRequested = numResultsRequested
     self._offset = offset
     
     searchService = PyMsCognitiveImageSearch(
             bingKey, query,
             custom_params='?color=ColorOnly&imageType=Photo')
     searchService.current_offset = offset
     
     try:
         self._results = searchService.search(numResultsRequested, 'json')
     except Exception as e:
         print >> sys.stderr, \
             'Error when requesting Bing image search for "%s":' % query
         print >> sys.stderr, e.message
         self._offset = 0
         self._numResultsReceived = 0
         return
     
     json = searchService.most_recent_json
     self._numResultsReceived = len(self._results)
     if self._numResultsRequested < self._numResultsReceived:
         # py_ms_cognitive modified the request to get more results.
         self._numResultsRequested = self._numResultsReceived
     self._numResultsAvailable = int(json[u'totalEstimatedMatches'])
     
     if self.verbose:
         print 'Received results of Bing image search for "%s":' % query
         pprint.pprint(json)
コード例 #2
0
def request_images(times, search_term):
    '''
    :param times: 50x
    :param search_term: what images you want to search
    :return: search_term dir
    '''
    save_path = search_term.replace(" ", "_")
    if not os.path.exists(save_path):
        print("creating: {} dir.".format(save_path))
        os.makedirs(save_path)
    else:
        print('{} : in directory'.format(save_path))

    pic_num = len(os.listdir(save_path)) + 1
    # search_term = 'Taylor Swift'
    search_service = PyMsCognitiveImageSearch(api_key,
                                              search_term,
                                              silent_fail=True)
    for event in range(times):
        event = search_service.search(limit=50, format='json')  # 1-50
        for i in event:
            save_loc = save_path + '/{}.{}'.format(str(pic_num),
                                                   i.json['encodingFormat'])
            try:
                urllib.request.urlretrieve(i.content_url, save_loc)
                pic_num += 1
                print('found an image {}'.format(pic_num))
            except Exception as e:
                print("we have an error: {}".format(e))
    return save_path
コード例 #3
0
 def test_can_silent_fail_image_search(self):
     web_bing = PyMsCognitiveImageSearch(SECRET_KEY,
                                         "Python Software Foundation",
                                         silent_fail=True)
     result_one = web_bing.search(limit=50)
     self.assertTrue(len(result_one) == 50)
     self.assertTrue("python" in result_one[0].name.lower())
コード例 #4
0
 def test_search_all(self):
     web_bing = PyMsCognitiveImageSearch(SECRET_KEY,
                                         "Python Software Foundation")
     result_one = web_bing.search_all(quota=60)
     self.assertTrue(len(result_one) == 60)
     self.assertTrue(len(result_one) == 60)
     self.assertTrue("python" in result_one[0].name.lower())
コード例 #5
0
def get_search_results(search_term, key, num_results):
    # search_service = PyMsCognitiveImageSearch(key, search_term, silent_fail=True)
    search_service = PyMsCognitiveImageSearch(key, search_term)
    if num_results <= 50:
        result_list = search_service.search(limit=num_results, format='json')
    else:
        result_list = search_service.search_all(quota=num_results, format='json')

    return result_list
コード例 #6
0
def get_search_results(search_term, key, num_results):
    # search_service = PyMsCognitiveImageSearch(key, search_term, silent_fail=True)
    search_service = PyMsCognitiveImageSearch(key, search_term)
    if num_results <= 50:
        result_list = search_service.search(limit=num_results, format='json')
    else:
        result_list = search_service.search_all(quota=num_results,
                                                format='json')

    return result_list
コード例 #7
0
ファイル: views.py プロジェクト: anam123/NewsMaker
def keyword_finder(ques):
    text = ques.content
    stop = set(stopwords.words('english'))
    list = [i for i in text.split() if i not in stop]
    str = ' '.join(list)
    txt = str

    str = str.replace(".", "")
    str = str.replace(",", "")
    str = str.replace(";", "")
    str = str.replace(":", "")
    str = str.replace("!", "")
    str = str.replace("?", "")
    str = str.replace("'s", "")
    str = str.replace("\u2019s", "")
    str = unicodedata.normalize('NFKD', str).encode('ascii', 'ignore')
    tagged_sent = pos_tag(str.split())
    propernouns = [word for word, pos in tagged_sent if pos == 'NNP']
    #print propernouns
    proper_dict = {}
    for i in propernouns:
        if i in proper_dict:
            proper_dict[i] = proper_dict[i] + 1

        else:
            proper_dict[i] = 1
    print proper_dict

    sorted_x = sorted(proper_dict.items(), key=operator.itemgetter(1))

    sorted_x.reverse()
    print sorted_x
    count = 0
    search = ""
    for i in sorted_x:
        count = count + 1
        if count > 5:
            break
        else:
            if i[1] >= 2:
                search = search + " " + i[0]
    print search
    search_service = PyMsCognitiveImageSearch(
        'a99a009ec03c47b49389194014f65663', search)
    first_fifty_result = search_service.search(limit=20, format='json')

    url = first_fifty_result[0].__dict__['content_url']
    return url
コード例 #8
0
def get_image():
    '''
    Hits the the Bing image API to get an image of the college searched for
    Example call route: localhost:8082/image?name=carleton_college&state=minnesota
    '''
    school_name = request.args.get('name')
    school_state_abbrev = request.args.get('state')
    # Desired_aspect_ratio = 4 / 3
    while '_' in school_name:
        school_name = school_name[:school_name.index('_')] + ' ' + school_name[
            school_name.index('_') + 1:]
    search_service = PyMsCognitiveImageSearch(
        'e38d51f4b70944d0aa04339ddf1467f4',
        school_name + ' ' + school_state_abbrev)
    results = search_service.search(limit=1, format='json')
    result = results[0]
    return json.dumps([result.content_url, result.host_page_url])
コード例 #9
0
ファイル: imggrab.py プロジェクト: wbh1/slackimgboot-public
def risky(criteria):
    bing_image = PyMsCognitiveImageSearch(creds.ms, criteria)
    first_few_results = bing_image.search(limit=10, format='json')  #1-10
    numResults = len(first_few_results)  # too lazy to update variable name
    if numResults != 0:
        if numResults != 1:
            numb = random.randrange(1, numResults, 1)
            item = first_few_results[numb]
            result = item.content_url
            return result
        else:
            item = first_few_results[1]
            result = item.content_url
            return result
    else:
        failed = {'text': 'No Results.'}
        return failed
コード例 #10
0
def index():
    ##query for getting restaurant details from mongodb
    a1=request.form['q1']
    a2=request.form['q2']
    a3=request.form['q3']
    a4=request.form['q4']
    a5=request.form['q5']
    print a1
    qry=makequery(a1,a2,a3,a4,a5)
    print qry
    ##{"attributes.Ambience.trendy":True}
    res=query_restaurant(qry)
    rstrnt=res.get("names")
    stars=res.get("stars")
    reviews=res.get("reviews")
    rnge=range(len(stars))
    imgs=[]
    links=[]
    lt=[]
##    freqs=[{"text":"study","size":40},{"text":"motion","size":15},{"text":"forces","size":10},{"text":"electricity","size":15},{"text":"movement","size":10},{"text":"relation","size":5},{"text":"things","size":10},{"text":"force","size":5},{"text":"ad","size":5},{"text":"energy","size":85},{"text":"living","size":5},{"text":"nonliving","size":5},{"text":"laws","size":15},{"text":"speed","size":45},{"text":"velocity","size":30},{"text":"define","size":5},{"text":"constraints","size":5},{"text":"universe","size":10},{"text":"physics","size":120},{"text":"describing","size":5},{"text":"matter","size":90},{"text":"physics-the","size":5},{"text":"world","size":10},{"text":"works","size":10},{"text":"science","size":70},{"text":"interactions","size":30},{"text":"studies","size":5},{"text":"properties","size":45},{"text":"nature","size":40},{"text":"branch","size":30},{"text":"concerned","size":25},{"text":"source","size":40},{"text":"google","size":10},{"text":"defintions","size":5},{"text":"two","size":15},{"text":"grouped","size":15},{"text":"traditional","size":15},{"text":"fields","size":15},{"text":"acoustics","size":15},{"text":"optics","size":15},{"text":"mechanics","size":20},{"text":"thermodynamics","size":15},{"text":"electromagnetism","size":15},{"text":"modern","size":15},{"text":"extensions","size":15},{"text":"thefreedictionary","size":15},{"text":"interaction","size":15},{"text":"org","size":25},{"text":"answers","size":5},{"text":"natural","size":15},{"text":"objects","size":5},{"text":"treats","size":10},{"text":"acting","size":5},{"text":"department","size":5},{"text":"gravitation","size":5},{"text":"heat","size":10},{"text":"light","size":10},{"text":"magnetism","size":10},{"text":"modify","size":5},{"text":"general","size":10},{"text":"bodies","size":5},{"text":"philosophy","size":5},{"text":"brainyquote","size":5},{"text":"words","size":5},{"text":"ph","size":5},{"text":"html","size":5},{"text":"lrl","size":5},{"text":"zgzmeylfwuy","size":5},{"text":"subject","size":5},{"text":"distinguished","size":5},{"text":"chemistry","size":5},{"text":"biology","size":5},{"text":"includes","size":5},{"text":"radiation","size":5},{"text":"sound","size":5},{"text":"structure","size":5},{"text":"atoms","size":5},{"text":"including","size":10},{"text":"atomic","size":10},{"text":"nuclear","size":10},{"text":"cryogenics","size":10},{"text":"solid-state","size":10},{"text":"particle","size":10},{"text":"plasma","size":10},{"text":"deals","size":5},{"text":"merriam-webster","size":5},{"text":"dictionary","size":10},{"text":"analysis","size":5},{"text":"conducted","size":5},{"text":"order","size":5},{"text":"understand","size":5},{"text":"behaves","size":5},{"text":"en","size":5},{"text":"wikipedia","size":5},{"text":"wiki","size":5},{"text":"physics-","size":5},{"text":"physical","size":5},{"text":"behaviour","size":5},{"text":"collinsdictionary","size":5},{"text":"english","size":5},{"text":"time","size":35},{"text":"distance","size":35},{"text":"wheels","size":5},{"text":"revelations","size":5},{"text":"minute","size":5},{"text":"acceleration","size":20},{"text":"torque","size":5},{"text":"wheel","size":5},{"text":"rotations","size":5},{"text":"resistance","size":5},{"text":"momentum","size":5},{"text":"measure","size":10},{"text":"direction","size":10},{"text":"car","size":5},{"text":"add","size":5},{"text":"traveled","size":5},{"text":"weight","size":5},{"text":"electrical","size":5},{"text":"power","size":5}]
    print '**************************************************************************'
    extractor=PosWordCloudGenerator()
    for i in rnge:
        freqs=extractor.create_wordcloud(reviews[i])
        lt.append(freqs)
    for r in rstrnt:
        query=r+''+' Restaurant,Pittsburg,'
        try:
            bimg=PyMsCognitiveImageSearch('Api Key', query)
            res=bimg.search(limit=3,format='json')
        except:
            links=['','','']
            imgs.append(links)
            continue
        links=[]
        for i in res:
            links.append(i.content_url)
        imgs.append(links)
        
            
        
    return render_template("index.html",freqs=lt,rname=rstrnt,rnge=rnge,images=imgs,stars=stars)
    def search(self, query, numResultsRequested=50, offset=0):
        if 'BING_SEARCH_KEY' in os.environ:
            bingKey = os.environ['BING_SEARCH_KEY']
        else:
            sys.stderr.write(
                    'Environment variable BING_SEARCH_KEY is undefined. '
                    'Please define it, equal to your Bing Search API key.\n')
            return

        self._query = query
        self._numResultsRequested = numResultsRequested
        self._offset = offset

        params = {'color':'ColorOnly', 'imageType':'Photo'}

        searchService = PyMsCognitiveImageSearch(
                bingKey, query, custom_params=params)
        searchService.current_offset = offset

        try:
            self._results = searchService.search(numResultsRequested, 'json')
        except Exception as e:
            sys.stderr.write(
                    'Error when requesting Bing image search for '
                    '"%s":\n' % query)
            sys.stderr.write('%s\n' % str(e))
            self._offset = 0
            self._numResultsReceived = 0
            return

        json = searchService.most_recent_json
        self._numResultsReceived = len(self._results)
        if self._numResultsRequested < self._numResultsReceived:
            # py_ms_cognitive modified the request to get more results.
            self._numResultsRequested = self._numResultsReceived
        self._numResultsAvailable = int(json[u'totalEstimatedMatches'])

        if self.verbose:
            print('Received results of Bing image search for '
                  '"%s":' % query)
            pprint.pprint(json)
コード例 #12
0
import json
import attr
import sys
from py_ms_cognitive import PyMsCognitiveImageSearch
values = []
cont_sz = []
thumb = []
token = []
host_page = []
search_url = []
jsn = []
image_id = []
cont_url = []
cont_name = []
search_term = "body painting"
search_service = PyMsCognitiveImageSearch(search_term)
first_fifty_result = search_service.search_all(quota=50000, format='json')

for i in range(0, 50000):
    cont_sz.append(first_fifty_result[i].content_size)
    thumb.append(first_fifty_result[i].thumbnail_url)
    token.append(first_fifty_result[i].image_insights_token)
    host_page.append(first_fifty_result[i].host_page_url)
    search_url.append(first_fifty_result[i].web_search_url)
    jsn.append(first_fifty_result[i].json)
    image_id.append(first_fifty_result[i].image_id)
    cont_url.append(first_fifty_result[i].content_url)
    cont_name.append(first_fifty_result[i].name)

result = {
    'content_size': cont_sz,
コード例 #13
0
ファイル: bingTest.py プロジェクト: caitlinstanton/seventhson
def search(query):
    bing_web = PyMsCognitiveWebSearch(key, query)
    bing_Image = PyMsCognitiveImageSearch(key, query)
    first_ten_result = bing_web.search(limit=10, format='json')  #1-10
    first_ten_image = bing_Image.search(limit=10, format='json')  #1-10
    return (first_ten_image[0].content_url)
コード例 #14
0
def get_thumb(search_term, bing_api_key):
    #cd6f1ecf04544fcfaee3183b95ae87e6 - dead

    search_service = PyMsCognitiveImageSearch(bing_api_key, search_term)
    first_fifty_result = search_service.search(limit=3, format='json')  #1-50
    return first_fifty_result[0].thumbnail_url
コード例 #15
0
ファイル: seedcrawler_bing.py プロジェクト: fgsect/fexm
    def try_filetype_crawl(self):
        """
        Try to find download links to files of the given file format. 
        :return: A generator - StopIteration is called when no more links can/should be found.
        """

        # First: Try a simple  "filetype:" query - works for some, but not all filetypes
        query = "filetype:" + self.filetype
        PyMsCognitiveWebSearch.SEARCH_WEB_BASE = "https://api.cognitive.microsoft.com/bing/v7.0/search"
        self.search_service = PyMsCognitiveWebSearch(self.ms_key, query)
        results = self.search_service.search_all(format="json",
                                                 quota=LIMIT_RESULTS + 20)
        for item in results:
            try:
                r = requests.get(
                    item.url, timeout=MAX_TIMEOUT,
                    headers=headers)  # Request the url to resolve the redirect
            except Exception as e:  # requests.exceptions.ConnectTimeout:
                print("Skipping ", item.url, "because of Exception", str(e))
                # Then just skip
                continue
            if self.is_valid_file(self.filetype, r.url):
                print("Yielding ", r.url)
                yield r.url
        # If this fails, maybe the requested filetype is an image? Then perform an image search
        if self.filetype in image_list:  # Perform an image Search
            query = self.filetype + " sample"
            PyMsCognitiveImageSearch.SEARCH_IMAGE_BASE = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"
            self.search_service = PyMsCognitiveImageSearch(self.ms_key, query)

            results = self.search_service._search(
                limit=LIMIT_RESULTS,
                format="json")  # TODO: Class does not implement pagination? :(
            for item in results:
                utils.temp_print("Checking item", item.content_url)
                try:
                    r = requests.get(item.content_url,
                                     timeout=MAX_TIMEOUT,
                                     headers=headers)
                except Exception as e:
                    print("Skipping ", item.url, "because of Exception",
                          str(e))
                    # print("Timeout, checking next item")
                    continue

                print("Url is", r.url)
                if self.is_valid_file(self.filetype, r.url):
                    print("Yielding ", r.url)
                    yield r.url

        for result in self.website_crawl("." + self.filetype +
                                         " example file"):
            print("Yielding", result)
            yield result
        for result in self.website_crawl("." + self.filetype + " sample file"):
            print("Yielding", result)
            yield result

        # Last Resort: The index of trick. Note thatfi this can yield some undesired file samples, use with caution!
        query = "inurl:(" + self.filetype + ") intitle:\"index of:\""
        self.search_service = PyMsCognitiveWebSearch(self.ms_key, query)
        results = self.search_service.search_all(format="json",
                                                 quota=LIMIT_RESULTS)
        print(len(results))
        for item in results:
            try:
                r = requests.get(item.url, timeout=MAX_TIMEOUT)
            except Exception as e:
                print("Skipping ", item.url, "because of Exception", str(e))
                continue

            parsed_uri = urlparse(r.url)
            domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
            try:
                if requests.head(domain + "/robots.txt").status_code == 404:
                    # No Robots TXT - Skip
                    print("Skipping", domain,
                          "because it does not contain a robots.txt")
                    continue
            except Exception as e:
                print("Skipping", domain, "because of exception", str(e))
                continue
            print("Now scanning through", r.url)
            html_text = r.text
            if not "index of" in html_text:
                # We probably did not reach a file repository
                continue
            soup = BeautifulSoup(html_text, "html.parser")
            link_anchors = soup.find_all("a")
            links = list(map(lambda x: x.get("href"),
                             link_anchors))  # type: [str]
            links = list(
                filter(
                    lambda x: x is not None and x.lower().endswith(
                        self.filetype), links))
            for link in links:
                path = link
                filelink = urljoin(
                    r.url, path
                )  # Join the two urls. Urljoin handles every case: path is relative and path is absolute
                if self.is_valid_file(self.filetype, filelink):
                    print("Yielding", filelink)
                    yield filelink
コード例 #16
0
from py_ms_cognitive import PyMsCognitiveWebSearch, PyMsCognitiveImageSearch

key = '82af3a845a3640318879cf8d6db7320a'
query = "New York City"
bing_web = PyMsCognitiveWebSearch(key, query)
bing_Image = PyMsCognitiveImageSearch(key, query)
first_ten_result = bing_web.search(limit=10, format='json')  #1-10
first_ten_image = bing_Image.search(limit=10, format='json')  #1-10

print(first_ten_image[0].name)