예제 #1
0
 def __init__(self, config):
     self.config = config
     self.bs = PyBingWebSearch("YOUR ID")
     self.cooldown = self.config["p_googlesleep"];
     self.results_per_page = int(self.config["p_results_per_query"]);
     if (self.config["p_skippages"] > 0):
         print("Bing Scanner will skip the first %d pages..."%(self.config["p_skippages"]))
예제 #2
0
def write_text():
    with open(riots_file) as listofriots:
        for i, l in enumerate(listofriots):
            if i < 0:  # to pass the ones we already have
                continue
            try:
                print "looking at riot %d" % i
                name = l.strip()
                # name  = l.split("-")[1].strip().split('(')[0].strip()
                bing_news = PyBingWebSearch(api_key, name)
                news = bing_news.search(limit=5, format='json')
                time.sleep(3)
                try:
                    shutil.rmtree("riots_bing/riot_%02d" % i)
                except OSError:
                    pass
                os.mkdir("riots_bing/riot_%02d" % i)
                for j, new in enumerate(news):
                    with open('riots_bing/riot_%02d/%02d.txt' % (i, j),
                              'w') as riot_file:
                        riot_file.write(
                            new.title.encode('utf-8').strip() + '\n')
                        riot_file.write(
                            new.description.encode('utf-8').strip())
            except:
                continue
예제 #3
0
파일: views.py 프로젝트: agoops/bing10
def searchBing(text):
	auth_key = '1KN2M8IdjS+AsXS7+s9NXFRw1vIcHO/awnbyF1+WjEs'
	search_term = text
	bing_web = PyBingWebSearch(auth_key, search_term)
	first_ten_result= bing_web.search(limit=10, format='json') #1-10
	title_urls = [Result(res.title, res.url) for res in first_ten_result]
	return title_urls
예제 #4
0
 def getSeedPagesFromBing(self):
     try:
         bingWeb = PyBingWebSearch(self.apiKey, self.searchTerm)
         searchResults = bingWeb.search(limit=10, format="json")
         return searchResults
     except Exception as e:
         crawlerLogger.error("Failed to get seeds. Error:" + str(e))
예제 #5
0
def searchBing():
    search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % (
        orgname, orgname.lower())
    bing_web = PyBingWebSearch(B_API_KEY, search_term)
    result = bing_web.search(limit=50, format='json')

    counter = 0
    while counter < 50:
        try:
            regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname)
            if debug:
                print "DEBUG: Bing[", counter, "] first regex_string is: ", regex_string
            m = re.search('\.\s([\w\s]*\sat\sReliaQuest)',
                          result[counter].description, re.IGNORECASE)
            if debug:
                print "DEBUG: Bing[", counter, "] raw results:"
                print "title: ", result[
                    counter].title, " description: ", result[
                        counter].description
            if m == None:
                if debug:
                    print "DEBUG: Bing[", counter, "] first regex returned 'None'"
                regex_string = "'^.*at\s%s\.'" % (orgname)
                if debug:
                    print "DEBUG: Bing[", counter, "] second regex_string is: ", regex_string
                m = re.search('^.*at\sReliaQuest\.',
                              result[counter].description, re.IGNORECASE)
                if m == None:
                    if debug:
                        print "DEBUG: Bing[", counter, "] second regex returned 'None'"
                    counter += 1
                    continue
                else:
                    pass
            if debug:
                print "DEBUG: Bing [", counter, "] full regex match: ", str(
                    m.group())
            stdout.write('Name: ')
            stdout.write(
                str(re.sub(' \| LinkedIn', ',', result[counter].title)))
            stdout.write(' Role: ')
            try:
                stdout.write(str(m.group(1)))
            except IndexError:
                stdout.write(str(m.group()))
                pass
            if verbose:
                stdout.write(' VERBOSE_URL: ')
                stdout.write(result[counter].url)
            stdout.write("\n")
            counter += 1
        except IndexError as e:
            if verbose:
                print "INFO: No additional Bing Search Results available"
            break
        except Exception as e:
            print "Bing_ERROR: Something strange happened, printing error: "
            print e
            exit()
예제 #6
0
def extract_snippet(prompt):
	API_KEY = "0nNf/RGQhw/62syJrJGDRbm4BUx4fwkyDYpiFLBobCo"
	bing_web = PyBingWebSearch(API_KEY, prompt, web_only=False)
	first_fifty_result= bing_web.search(limit=50, format='json')
	bing_result = []
	for result in first_fifty_result:
		bing_result.append(result.description)
	return bing_result
예제 #7
0
def searchBing():
	search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % (orgname,orgname.lower())
	bing_web = PyBingWebSearch(B_API_KEY, search_term)
	result = bing_web.search(limit=50, format='json')

	counter = 0
	while counter < 50: 	
		try:
			regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname)
			if debug:
				 print "DEBUG: Bing[", counter,"] first regex_string is: ", regex_string
			m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE)
			if debug:
				print "DEBUG: Bing[", counter,"] raw results:"
				print "title: ", result[counter].title, " description: ", result[counter].description
			if m == None:
				if debug:
					print "DEBUG: Bing[", counter,"] first regex returned 'None'"
				regex_string = "'^.*at\s%s\.'" % (orgname)
				if debug:
					print "DEBUG: Bing[", counter,"] second regex_string is: ", regex_string
				m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE)
				if m == None:
					if debug:
						print "DEBUG: Bing[", counter,"] second regex returned 'None'"
					counter+=1
					continue
				else:
					pass
			if debug:
				print "DEBUG: Bing [", counter, "] full regex match: ", str(m.group())
			stdout.write('Name: ')
			stdout.write(str(re.sub(' \| LinkedIn', ',', result[counter].title)))
			stdout.write(' Role: ')
			try:
				stdout.write(str(m.group(1)))
			except IndexError:
				stdout.write(str(m.group()))
				pass
			if verbose:
				stdout.write(' VERBOSE_URL: ')
				stdout.write(result[counter].url)
			stdout.write("\n")
			counter+=1
		except IndexError as e:
			if verbose:
				print "INFO: No additional Bing Search Results available"
			break
		except Exception as e:
			print "Bing_ERROR: Something strange happened, printing error: "
			print e
			exit()
예제 #8
0
def getTopTen(query):
	# print 'length', len (encounteredUrls)
	# print 'pages = ',pages
	top_ten_urls = [] # hold the initial urls
	bing = PyBingWebSearch('mMlCxUd5qmU5uDJ1w1VLbDkobVK905A9cZZhYkfqGHg=',query,web_only=False)
	first_ten_results = bing.search(limit=20, format='json') #1-50
	#urlList, next_uri = bing.search(query, limit=10, format='json') # get the results
	counter = pages # count number of urls
	for result in first_ten_results:
		checkUrl(result.url,top_ten_urls)
		if (pages - counter >=10): # only care about top 10
			break
	return top_ten_urls
예제 #9
0
def get_results_ids_fb(query):
    search_term = query + " site:facebook.com/events"
    bing_web = PyBingWebSearch(API_KEY, search_term)
    results = bing_web.search(limit=50, format='json')
    set_ids = set([])
    while len(results) > 0:
        print(len(results))
        set_ids = extract_pages(set_ids, results)
        if len(results) < 50:
            results = []
        else:
            results = bing_web.search(limit=50, format='json')  #1-50
    ids_list = list(set_ids)
    return (ids_list)
예제 #10
0
    def webQuery(self, query, result_num=10):
        # format query based on OData protocol and desired JSON format of results

        full_query = query.replace(' ', '+')
        logging.debug('Sending following URL query: ' + full_query)

        print('%-20s= %s' % ("URL", full_query))

        bing_web = PyBingWebSearch(self.__i_accountKey,
                                   full_query,
                                   web_only=False)
        first_n_result = bing_web.search(limit=result_num, format='json')

        return first_n_result
예제 #11
0
def BingSearch(keyword):
    urls = []
    bing_web = PyBingWebSearch(GetTheConfig('bing', 'Key'),
                               keyword,
                               web_only=False)
    results = bing_web.search(limit=int(GetTheConfig('bing', 'QUANTITY')),
                              format='json')
    for result in results:
        url = result.url
        urls.append(url)

    results = DeduplicateValue(urls)
    urls = []

    for result in results:
        urls.append({"Bing": result})

    return urls
예제 #12
0
	def getfrombing(self, apikey, text, limit, operation):
		if operation == 'moderateimagesearch':
			bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Moderate'")
		elif operation == 'strictimagesearch':
			bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Strict'")
		elif operation == 'adultimagesearch':
			bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Off'")
		elif operation == 'websearch':
			bing_obj = PyBingWebSearch(apikey, text, web_only=False)
		elif operation == 'videosearch':
			bing_obj = PyBingVideoSearch(apikey, text)
		elif operation == 'newssearch':
			bing_obj = PyBingNewsSearch(apikey, text)
		result = bing_obj.search(limit=limit, format='json')
		return result
예제 #13
0
def bing_search(search_term):
    latitude = settings['bing'].get('latitude')
    longitude = settings['bing'].get('longitude')
    custom_params = {
        # 'Sources': "'" + settings['bing']['sources'] + "'",
        'Market': "'" + settings['bing']['market'] + "'",
    }
    if latitude is not None and longitude is not None:
        custom_params.update({
            'Latitude': latitude,
            'Longitude': longitude,
        })

    custom_params_str = "".join(
        ["&" + k + "=" + v for k, v in iter(custom_params.items())])
    bing_web = PyBingWebSearch(
        settings['bing']['api_key'],
        search_term,
        web_only=False,
        custom_params=custom_params_str,
    )
    # web_only is optional, but should be true to use your web only quota instead of your all purpose quota
    return bing_web.search(limit=int(settings['bing']['results_limit']),
                           format='json')
예제 #14
0
def compare(request):
    if request.method == 'POST':
        form = search(request.POST)
        if form.is_valid():
            product = form.cleaned_data['querry']
            API_KEY = "8eFYvQ0mCr06A3YoUZV9XK7867AgLLDeLuBdhILm+3c"

            querry = "buy " + product

            bing_web = PyBingWebSearch(API_KEY, querry, web_only=False)

            results = bing_web.search(limit=50, format='json')

            fkart_urls = []
            fkart_price_ar = []
            sdeal_urls = []
            sdeal_price_ar = []
            min_fkart = 0
            min_sdeal = 0

            for result in results:
                comp = result.url.split('.')[1]

                if comp == 'flipkart':
                    p = ' '

                    try:
                        p = result.url.split('/')[4]
                    except:
                        continue

                    if p == 'p':
                        fkart_urls.append(result.url)
                        fkart_flag = 1

                if comp == 'snapdeal':
                    p = ' '

                    try:
                        p = result.url.split('/')[3]
                    except:
                        continue

                    if p == 'product':
                        sdeal_urls.append(result.url)
                        sdeal_flag = 1

            if len(fkart_urls) == 0 and len(sdeal_urls) == 0:
                result = 'Search Failed!'
                context = RequestContext(request, {'result': result})
                return render_to_response('home.html', context)

            else:
                for url in fkart_urls:
                    fkart_url = url
                    fkart_page = requests.get(fkart_url)
                    fkart_html = fkart_page.text
                    fkart_soup = bs(fkart_html, 'html.parser')
                    meta_desc = fkart_soup.findAll(
                        attrs={"name": "Description"})
                    meta_desc_content_split = meta_desc[0]['content'].split(
                        " ")
                    for_bool = 0
                    For_bool = 0

                    try:
                        for_index = meta_desc_content_split.index('for')
                    except:
                        for_bool = 1

                    try:
                        for_index = meta_desc_content_split.index('For')
                    except:
                        For_bool = 1

                    if for_bool == 0 or For_bool == 0:
                        str_price = meta_desc_content_split[for_index + 1]
                        if (str_price == 'Rs.'):
                            fkart_price = meta_desc_content_split[for_index +
                                                                  2]
                            fkart_price_ar.append(fkart_price)
                        else:
                            fkart_price = str_price[3:]
                            fkart_price_ar.append(fkart_price)

                for url in sdeal_urls:
                    sdeal_url = url
                    sdeal_page = requests.get(sdeal_url)
                    sdeal_html = sdeal_page.text
                    sdeal_soup = bs(sdeal_html, 'html.parser')
                    input_tag = sdeal_soup.find_all('input', id='productPrice')
                    ex = 0
                    try:
                        str_price = input_tag[0]['value']
                    except:
                        ex = 1
                    if (ex != 1):
                        sdeal_price_ar.append(str_price)

                if (len(fkart_price_ar) > 0):
                    min_fkart = fkart_price_ar[0]
                    for price in fkart_price_ar:
                        if (price > min_fkart):
                            min_fkart = price

                if (len(sdeal_price_ar) > 0):
                    min_sdeal = sdeal_price_ar[0]
                    for price in sdeal_price_ar:
                        if (price > min_sdeal):
                            min_sdeal = price

                result = 'Search Succesful!'
                context = RequestContext(
                    request, {
                        'form': form,
                        'result': result,
                        'flipkart_price': str(min_fkart),
                        'snapdeal_price': str(min_sdeal)
                    })
                return render_to_response('home.html', context)
    else:
        form = search()
        context = RequestContext(request, {'form': form})
        return render_to_response('home.html', context)
예제 #15
0
파일: app.py 프로젝트: akash-attri/pec_hack
f = open("D:\Projects\hackathon\content2.txt", "r")
s = f.read()
from py_bing_search import PyBingWebSearch
'''
search_term = "Python Software Foundation"
bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) 
first_fifty_result= bing_web.search(limit=6, format='json') #1-50
#second_fifty_result= bing_web.search(limit=50, format='json') #51-100
'''
##s
from correct import *
s = corrections(s)

search_term = s
bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI',
                           search_term,
                           web_only=False)
x = bing_web.search(limit=6, format='json')

results = []
for i in range(len(x)):
    results += [(x[i].title, x[i].description, x[i].url)]

from py_bing_search import PyBingImageSearch

bing_image = PyBingImageSearch(
    '1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI',
    s,
    image_filters='Size:medium+Color:Monochrome')  #image_filters is optional

photos = bing_image.search(limit=6, format='json')
예제 #16
0
def get_top_bing_goodreads_search(search_term):
    query = "site:goodreads.com {0}".format(search_term)
    bing_web = PyBingWebSearch(BING_SEARCH_API_KEY, query, web_only=False)
    results = bing_web.search(limit=50, format='json')
    return [r.url for r in results if 'goodreads.com/book/show/' in r.url]
예제 #17
0
def search(search_term):
    bing_web = PyBingWebSearch(s5, search_term,web_only=False)  
    first_ten_result = bing_web.search(limit=10, format='json')  
    return first_ten_result
예제 #18
0
파일: bing.py 프로젝트: RASSec/python_pen
#coding=utf-8
from py_bing_search import PyBingWebSearch
search_term = "site:cert.org.cn"
bing_web = PyBingWebSearch('6I7UKjtX4bFiCDO0eQr4N4ErGG1+10BSWTmt0/aQ9QE', search_term, web_only=False) 
# web_only is optional, but should be true to use your web only quota instead of your all purpose quota
first_fifty_result= bing_web.search(limit=50, format='json') #1-50
second_fifty_result= bing_web.search(limit=50, format='json') #51-100

# 显示标题 second_fifty_result[0].description)
# 显示url second_fifty_result[0].url)
'''for x in xrange(1,int(len(second_fifty_result))):
	print second_fifty_result[x].url
'''

'''for x in xrange(1,int(len(first_fifty_result))):
	print first_fifty_result[x].url,first_fifty_result[x].title
'''
for y in (first_fifty_result,second_fifty_result):
	for x in xrange(1,int(len(y))):
		print y[x].url
		pass
예제 #19
0
 def test_search_all(self):
     web_bing = PyBingWebSearch(SECRET_KEY, "Python Software Foundation")
     result_one = web_bing.search_all(limit=60)
     self.assertTrue(len(result_one) == 60)
     self.assertTrue("Python" in result_one[0].title)
예제 #20
0
    def _GetMovieResearch(self, term, limit=50, format='json'):

        bing = PyBingWebSearch(self.BING_API_KEY, term, web_only=False)
        return bing.search(limit, format)
예제 #21
0
class bingScan:

    def __init__(self, config):
        self.config = config
        self.bs = PyBingWebSearch("YOUR ID")
        self.cooldown = self.config["p_googlesleep"];
        self.results_per_page = int(self.config["p_results_per_query"]);
        if (self.config["p_skippages"] > 0):
            print("Bing Scanner will skip the first %d pages..."%(self.config["p_skippages"]))


    def startGoogleScan(self):
        print("Querying Bing Search: '%s' with max pages %d..."%(self.config["p_query"], self.config["p_pages"]))

        pagecnt = 0
        curtry = 0
        
        last_request_time = datetime.datetime.now()

        while(pagecnt < self.config["p_pages"]):
            pagecnt = pagecnt +1
            redo = True
            while (redo):
              try:
                current_time = datetime.datetime.now()
                diff = current_time - last_request_time
                diff = int(diff.seconds)

                if (diff <= self.cooldown):
                    if (diff > 0): 
                        print("Commencing %ds bing cooldown..." %(self.cooldown - diff))
                        time.sleep(self.cooldown - diff)
                    
                last_request_time = datetime.datetime.now()
                resp = self.bs.search_web(self.config["p_query"], {'Web.Count':50,'Web.Offset':(pagecnt-1)*self.results_per_page})
                results = resp['SearchResponse']['Web']['Results']
                redo = False
              except KeyboardInterrupt:
                raise
              except Exception as err:
                raise
                redo = True
                sys.stderr.write("[RETRYING PAGE %d]\n" %(pagecnt))
                curtry = curtry +1
                if (curtry > self.config["p_maxtries"]):
                    print("MAXIMUM COUNT OF (RE)TRIES REACHED!")
                    sys.exit(1)
            
              
            curtry = 0
              

            if (len(results) == 0): break
            sys.stderr.write("[PAGE %d]\n" %(pagecnt))
            try:
                for r in results:
                    single = singleScan(self.config)
                    single.setURL(r["Url"])
                    single.setQuite(True)
                    single.scan()
            except KeyboardInterrupt:
                raise
            time.sleep(1)
        print("Bing Scan completed.")
예제 #22
0
 def test_search_all(self):
     web_bing = PyBingWebSearch(SECRET_KEY, "Python Software Foundation")
     result_one = web_bing.search_all(limit=60)
     self.assertTrue(len(result_one) == 60)
     self.assertTrue("Python" in result_one[0].title)
예제 #23
0
def _getData(data, name=1):
    srch_title = ""
    if data['title'] != u"":
        srch_title = "\"%s\"" % data['title']

    if data['site'] == u"":
        data['site'] = "www.linkedin.com/in/"

    com_name = data["company"]
    if com_name != "":
        com_name = "Current: %s" % com_name

    srch_location = ""
    if data['location'] != u"":
        srch_location = "Location %s" % data['location']

    search_term = "site:%s %s %s %s" % (data['site'], com_name, srch_title, srch_location)
    print search_term

    # get data using bing api
    bing_web = PyBingWebSearch(settings.BING_API_KEY, search_term.strip())
    result = bing_web.search_all(limit=100, format='json') #1-50

    name_tp = data['name'].split(" ")
    first_name = name_tp[0]
    last_name = ""
    if(len(name_tp) > 1):
        last_name = name_tp[1]

    res = []

    index = 0
    for item in result:
        index += 1
        '''title_tp = item.title
        if title != u"":
            title_tp = title'''

        description = item.description.replace(",", " ")
        description = description.replace("\"", " ")

        if "| LinkedIn" in item.title:
            temp = item.title
            temp = temp.split("|")[0].strip().split(" ")
            first_name = temp[0]
            last_name = temp[-1]
            item.title = ""

        if name == 0:
            first_name = ""
            last_name = ""

        # get title and company from a search result.
        ps_des = parse_str(description)
        res_company = ""
        if ps_des is not None:
            res_company = ps_des[1].strip()
            item.title = ps_des[0].strip()

        # get location and industry from a result
        res_location = ""
        industry_str = ""
        ps_ind = parse_str(description, "Industry")
        if ps_ind is not None:
            res_location = ps_ind[0].replace("Location", "").strip()
            industry_str = ps_ind[1].strip()

        # get score of location
        full_location = res_location
        res_location = res_location.replace("Area", "")
        res_location = res_location.replace("Industry", "")
        res_location = res_location.replace("Greater", "").strip()

        score_location = "No"
        if res_location != "" and (res_location == data['location'] or res_location in data['location'] or data['location'] in res_location):
            score_location = "Yes"

        # get score of company
        score_company = "No"
        if res_company != "" and res_company == data['company'] and res_company:
            score_company = "Yes"


        # get score
        score = 3
        if index < 4:
            score = 1
        elif index < 6:
            score = 2

        # get current company
        curr_com = parse_curr_company(description)

        # get education
        education = parse_curr_company(description, "Education:")

        # get date and timestamp
        time_stamp = time.time()
        date = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')

        if data['name'] == "" or (data['name'] != "" and data['name'] == "%s %s" % (first_name, last_name)):
            line = '"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%d","%s","%s","%s"\n' % \
                   (data['siteid'], data['company'], data['title'], data['location'], first_name, last_name, item.title, \
                    res_company, res_location, full_location, industry_str, item.url, curr_com, education, date, \
                    score, score_location, score_company, description)

            line = line.encode("utf8")
            line = line.replace("\u00E2\u20AC\u2122", "")
            res.append(line)

    return res