def getGoogleResults(self, pluginname, latest, cve): try: gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True) gs.results_per_page = 100 numberOfprocessed = 0 self.all_run = [] for i in range(int(limitForSearch)): results = gs.get_results() if not results: break # Semaphore for write in order to screen self.checkSimultaneus = threading.Semaphore(int(NumThreats)) # Semaphore for write to file self.writeFile = threading.Semaphore(int(NumThreats) - 1) for res in results: self.checkSimultaneus.acquire() host_name = urlparse(res.url.encode()).hostname # Create thread t = threading.Thread(target=self.__getGoogleResults, args=(host_name, latest, pluginname, cve)) self.all_run.append(t) # run thread self.all_run[len(self.all_run) - 1].start() except SearchError, e: print "Search failed: %s" % e
def google(self, text): try: print "Trying to search for " + text g1 = GoogleSearch(text) g1.results_per_page = 25 results = g1.get_results() if len(results) == 0: print "No search result!!" else: print "Results FOund!!" print type(results) print len(results) for res in results[:2]: time.sleep(1) url = res.url.encode("utf8") response = self.search(url) if response == "Kgpian": self.close() break except SearchError, e: print "Failed Once"
def __init__(self, config): self.config = config self.gs = GoogleSearch(self.config["p_query"], page=self.config["p_skippages"], random_agent=True) self.gs.results_per_page = self.config["p_results_per_query"]; self.cooldown = self.config["p_googlesleep"]; if (self.config["p_skippages"] > 0): print "Google Scanner will skip the first %d pages..."%(self.config["p_skippages"])
def go(self, query, pages): search = GoogleSearch(query) search.results_per_page = 10 for i in range(pages): search.page = i results = search.get_results() for page in results: self.scrape(page)
def google_search_results(search_query, wait=40, number_of_results=10, encode=True, max_fail_count=5, current_fail_count=1, random_text=None): ''' DO NOT MESS WITH THIS IT IS PERFECT FOR NOW''' # gets AT LEAST number_of_results results # don't query too fast or Google will block your IP temporarily # for this purpose, I have added the variable max_result_size # if your IP does get blocked, try later in the day or wait a day or two try: max_result_size = 10 #don't change it from this: the standard of 10 seems the least suspicious to google gs = GoogleSearch(search_query, random_agent=True) # does not actually search gs.results_per_page = max_result_size gs.page = 0 times_tried = 0 results = [] prev = 0 # print "getting results:" while len(results) < number_of_results: prev = len(results) times_tried += 1 time.sleep(random.uniform(0.5 * wait, 1.5 * wait)) results += gs.get_results( ) # Actual search and extraction of results. print "\rtimes_tried: %s\tlen(results): %s\tpage_number: %s" % ( times_tried, len(results), gs.page), print "\n" # We now have a list of SearchResult objects, called 'results'. # A SearchResult object has three attributes -- "title", "desc", and "url". # They are Unicode strings, so do a proper encoding before outputting them. (done below) if encode: for i in range(0, len(results)): results[i].title = results[i].title.encode("utf8", "ignore") results[i].desc = results[i].desc.encode("utf8", "ignore") results[i].url = results[i].url # random.shuffle(results) except SearchError, e: print "Google Try #%s: Search failed on this url:\t%s" % ( current_fail_count, e) google_search_redirect(random_text) if current_fail_count != max_fail_count: return google_search_results( search_query, wait=wait, number_of_results=wait, encode=encode, max_fail_count=max_fail_count, current_fail_count=current_fail_count + 1)
def search_google(term, domain): try: log.debug('Performing Google search for "{}"'.format(term)) gs = GoogleSearch(term, tld=domain) gs.results_per_page = 10 results = gs.get_results() log.debug('Got {} results'.format(len(results))) return [Url(res.url) for res in results[:10]] except SearchError as exc: log.exception(exc) return None
def get_number_of_results(term, ajax=False, verbose=True): if not ajax: gs = GoogleSearch(term) page = str(gs._get_results_page()) match = reg.search(page) if match: if verbose: print(term, match.groups()[0]) return int(match.groups()[0].replace(',','')) else: raw_input((term, page)) return int(search(term)['responseData']['cursor']['estimatedResultCount'])
def get(self, params=None): """ gets the answer from the answer template :param params: msg = params[0], func = params[1] :return: returns the first template if is_random is false, otherwise returns random template """ ynet_sections = [ u"חדשות", u"כלכלה", u"ספורט", u"תרבות", u"רכילות", u"דיגיטל", u"בריאות", u"יהדות", u"חופש", u"רכב", u"אוכל", u"צרכנות", u"יחסים", u"mynet", u"מדע", u"לימודים", u"קניות", u"קהילות", u"חדשות תוכן ועדכונים" ] msg = ('ynet.co.il:' + params[0]).encode('utf-8') try: b = Browser() gs = GoogleSearch(msg, lang='he', tld="co.il") gs.results_per_page = 50 results = gs.get_results() for res in results: try: if (res.url is not None): page = b.get_page(res.url) soup = BeautifulSoup(page) title = soup.find("title") if (title is not None): if (' "' in title.text and '" ' in title.text): return self.find_between( title.text, ' "', '" ') res = title.text.split('-')[0].replace( 'ynet', '').strip().strip('"') if ':' in res: res = res.split(':')[1].strip().strip('"') res = res.strip() if res == u'' or res in ynet_sections: continue else: return res except: continue return "?" except SearchError, e: return "?" #a = avoiding_msg_ynet(None,None) # a.get(["ynet.co.il:האם טביב ימכור את הקבוצה?"]) # res = a.get(["ynet.co.il:האם ביבי ימכור את המדינה?"]) #Sa.get(["ynet.co.il:מה יהיה עם הגז?"]) #a.get(["seret.co.il:המרגלת"]) #a = avoiding_msg_ynet() #a.test_browser() # a.get(["האם אלי טביב ימכור את הקבוצה?"]) #a.get(["ynet.co.il:איזה גרוע ביבי הא?"])
def searchInSeretil(): search_entered = '' keyboard = xbmc.Keyboard(search_entered, 'הכנס מילות חיפוש כאן') keyboard.doModal() if keyboard.isConfirmed(): search_entered = keyboard.getText() if search_entered != '': try: gs = GoogleSearch("site:seretil.me " + search_entered) gs.results_per_page = 100 results = gs.get_results() for res in results: title = res.title.encode('utf8') url = res.url.encode('utf8') title = title.replace('SERETIL.ME', '') title = title.replace('לצפייה ישירה', '') title = title.replace('וסדרות', '') title = title.replace('תרגום מובנה', '') title = title.replace('|', '') title = title.replace('.', '') title = title.replace('סרטים', '') title = title.replace('עם', '') title = title.replace('לצפיה', '') if 'עונה' in title: if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url: addDir(title, url, 211, '') else: if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url: image = '' req = urllib2.Request(url) req.add_header( 'User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' ) response = urllib2.urlopen(req) link3 = response.read() response.close() block = re.compile( '<div class="post-wrap post-wrap-single">(.*?)linkwithin_hook', re.M + re.I + re.S).findall(link3) image = '' images = re.compile('src="http(.*?).?jpg').findall( block[0]) if images: image = 'http' + images[0] + '.jpg' addDir(title, url, 5, image) except SearchError, e: print "Search failed: %s" % e xbmcplugin.setContent(int(sys.argv[1]), 'tvshows')
def scrape(self, keyword, pages=2): try: gs = GoogleSearch(keyword) gs.results_per_page = 10 gs.page = 0 results = gs.get_results() for res in results: url = res.url.encode('utf8') Title = res.title self.urls.append((url, Title)) except SearchError, e: print "Search failed: %s" % e
def goggle(self, word): """Get results from google """ try: results = [] gs = GoogleSearch(word, random_agent=True) gs.results_per_page = 50 hits = gs.get_results() for hit in hits: results.append(hit.url.encode('utf8')) return results except SearchError, e: print "Search failed: %s" % e
def search_by_filename(args): args_e = args.encode('utf8') try: gs = GoogleSearch('"' + args_e + '"') gs.results_per_page = 50 results = gs.get_results() for res in results: if re_math_sites(allow_sites, res.url.encode('utf8')): if re_math_sites(args_e, res.desc.encode('utf8')): return clean_result(res.title.encode('utf8')) except SearchError, e: print "Search failed: %s" % e
def Search_YTonGoogle(self,search): # import Google Search from xgoogle.search import GoogleSearch # search on google gs = GoogleSearch(search+' site:http://www.youtube.com ') gs.results_per_page = 25 gs.page = 0 # return result or None try: results = gs.get_results() return results except Exception, e: print 'getTrailer --> Error: %s' % e return None
def googledefault(termtosearch, lookspam): try: gs = GoogleSearch(termtosearch) gs.results_per_page = 50 results = gs.get_results() if lookspam: for res in results: print '\033[1;34mLooking for SPAM in........%s\033[1;m' % ( res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def searchDocuments(self, terms): ''' This function search terms in google and store the textual content in DomainKnowledgeDocument objects @param terms: list of string terms to be searched through internet ''' try: sentence = ' '.join(terms) gs = GoogleSearch(sentence) results = gs.get_results() for result in results: self.documentsURLs.append(result.get_URL()) print gs.num_results except SearchError, e: print "Search failed: %s" % e
def main(): gs = GoogleSearch('intitle:道德黑客技术论坛内部专版WEBSHELL') gs.results_per_page = 100 for index in range(4): gs.page = index + 1 results = gs.get_results() for result in results: url = result.getURL() print result ret = exploit(url) if ret == '': continue open('result.txt', 'a').write(ret)
def perform_search(self): url_list = list() try: gs = GoogleSearch(self.object) gs.results_per_page = 50 results = gs.get_results() for res in results: url_list.append(res.url.encode("utf8")) return url_list except SearchError, e: print("Search failed: %s" % e)
def run(self, string): query = "site:ReverseIndexSite %s" % string #if not thread: # say("Querying Google: '%s'" % query) gs = GoogleSearch(query) gs.results_per_page = 10 results = gs.get_results() if len(results) >= 1: result = None #At the end result must be a string containing the decoded md5 hash result = ["ReverseIndexSite", result] if thread: say(result) return result
def __init__(self, query, filetypes, site, resultsperpage, maxresults, repeat): if filetypes: filetypes = re.split(",", filetypes) query += " filetype:" + filetypes.pop(0) for filetype in filetypes: query += " OR filetype:" + filetype if site: query += " site:" + site print(query) self.gs = GoogleSearch(query, random_agent=True, repeat=repeat) self.gs.results_per_page = int(resultsperpage) self.maxresults = int(maxresults) self.lastpage = False
def get_ranks(self): for keyword, urls in campaigns.get_keywords().iteritems(): gs = GoogleSearch(keyword) gs.results_per_page = self.config['limits']['results_per_page'] sys.stderr.write('\n\nChecking keyword: %s\n' % keyword) results = self.get_results(gs) offset = 1 query_count = 0 while len(urls) > 0 and results: # Display a period for every hit we make to Google if query_count % 5 == 0: sys.stderr.write(' ') sys.stderr.write('.') for rank, row in enumerate(results): if (len(urls) > 0): # Find results containing one of our sites found = filter(lambda x: row.url.find(x) != -1, urls) for entry in found: campaigns.set_rank(entry, keyword, rank + offset) # Using sets to get remaining sites to check for urls = list(set(urls) - set(found)) else: break # Don't collect another time if no more URLs are left to check offset += len(results) results = None # We want to sleep here regardless because we might scrape # really fast if all the results are on the first page time.sleep(self.config['limits']['delay']) # Only check if there are sites remaining and we have not # surpassed our maximum configured depth if (len(urls) > 0 and offset <= self.config['limits']['search_depth'] + 1): results = self.get_results(gs) query_count += 1 elif verbose: sys.stderr.write('Not retrieving more results\n') if verbose: sys.stderr.write('URLs: %s\n' % ', '.join(urls)) if results: sys.stderr.write('Results: %s\n' % len(results))
def searchHandler(user, command, args, mess): try: if len(args) < 2: return "Please Provide your search Query" else: gs = GoogleSearch(args) gs.results_per_page = 10 gs.page = 1 results = gs.get_results() if len(results) > 0: for res in results: return res.title.encode("utf8") + "\n" + res.desc.encode( "utf8") + "\n" + res.url.encode("utf8") else: return "No Search Result Found for your query." except SearchError, e: return "Search failed: %s" % e
def run(self, string): query = "site:http://md5-database.org/md5 %s" % string #if not thread: # say("Querying Google: '%s'" % query) gs = GoogleSearch(query) gs.results_per_page = 10 results = gs._get_results_page() texts = results.findAll(text=True) texts = ''.join(texts) results = re.findall(re.compile('MD5\}.*?MD5'), texts) for line in results: if string in line: result = line[(line.find(',') + 1):line.find('.')].strip() return result return ''
def google_search(query): try: list = Set() for i in range(0, 15): print "Step: " + str(i) + " for " + query gs = GoogleSearch(query) gs.results_per_page = 100 gs.page = i results = gs.get_results() for res in results: url = res.url.encode('utf8') url = url[url.find(".") + 1:find_nth(url, "/", 3)] if url.count('.', 0, len(url)) > 1: url = url[url.find(".") + 1:len(url)] list.add(url) return list except SearchError, e: print "Search failed: %s" % e
def google(text): response = "" time.sleep(0.5) count = 0 try: print "Trying to search for " + text g1 = GoogleSearch(text) g1.results_per_page = 25 results = g1.get_results() for res in results[:2]: time.sleep(0.5) response = search(res.url.encode("utf8")) return response except SearchError, e: print "Failed Once"
def GetSearchResults(query=None,type=None,imdb_id=None, exact=False): if (type=="movies"): # This a google search. The -tv will ommit all TV shows. search = 'intitle:%s -"Episode List" -"Series Rating" site:%s' % (query,ICEFILMS_URL) else: search = 'allintitle:%s "Episode List" site:%s' % (query, ICEFILMS_URL) gs = GoogleSearch(search) gs.results_per_page = 25 gs.page = 0 results = gs.get_results() items = [] for res in results: name = re.sub( '(<em>|</em>|<a>|</a>|DivX|-|icefilms(\.info)?|<b>\.\.\.</b>|Episode List|links)', '', res.title.encode('utf8') ).strip() url=res.url video_url = re.search("icefilms\.info(/.*)", url).group(1) res = MediaInfo() res.type = type res.title = name match = re.search("(.*)\((\d*)\)", res.title) if (match): res.title = match.group(1).strip() res.year = int(match.group(2).strip()) res.id = video_url items.append(res) return items
def google_search(query): try: results = [] resultg = [] gs = GoogleSearch(query) gs.results_per_page = 30 while True: tmp = gs.get_results() if not tmp: # no more results were found break results.extend(tmp) #f.write(res.title.encode('utf8')) #f.write("\n<br><br>") #f.write(res.desc.encode('utf8')) #f.write("\n<br><br>") f = open("final.txt", "w") for res in results: f.write('\n <a href=' + res.url.encode('utf8') + '>' + '<h1>' + res.title.encode('utf8') + '</h1>\n' + '</a>\n') resultg.extend(res.url.encode('utf8')) f.close() except SearchError, e: print "Search failed: %s" % e
def searchFor(text): gs = GoogleSearch(text) gs.results_per_page = 32 page = 1 results = [] titles = [] while page < 5: results.extend(gs.get_results()) page += 1 results = results[:10] for res in results: titles.append(str(res.title.encode("utf-8"))) urls.append(str(res.url.encode("utf-8"))) print len(results) print titles try: sublime.active_window().show_quick_panel(titles, onSelection, sublime.MONOSPACE_FONT) except: webbrowser.open_new_tab("https://www.google.com/search?q=" + text.replace(" ", "+"))
def google(termtosearch, action): #action = spam or phis try: gs = GoogleSearch(termtosearch) gs.results_per_page = 100 results = [] while True: tmp = gs.get_results() if not tmp: break results.extend(tmp) #TODO switch in this code block if action == 'mal': for res in results: checkAgainstGoogle(res.url.encode('utf8')) else: if action == 'spam': for res in results: print '\033[1;34mLooking for SPAM in ......%s\033[1;m' % ( res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) elif action == 'phis': for res in results: print '\033[1;34mLooking for PHISHING in ......%s\033[1;m' % ( res.url.encode('utf8')) phishing_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def scan(url, wordlist): fname = wordlist with open(fname, 'r') as f: dorks = f.readlines() f.close() for dork in dorks: if len(dork) < 2: continue try: rnd = random_int(2, 5) time.sleep(rnd) g = GoogleSearch("site:" + url + " " + dork, random_agent=True) g.results_per_page = 10 print("."), results = g.get_results() if len(results) > 0: msg = "[+] Found " + results + " results with dork: " + dork logger.info(msg) for res in results: print res.title.encode('utf8') print res.url.encode("utf8") except SearchError, e: print "Search failed: %s" % e
for item, group in itertools.groupby(sorted(results))) def print_results(results): sr = sorted(results.iteritems(), key=operator.itemgetter(0), reverse=True) for r in sr: print "%d\t%s" % r if __name__ == "__main__": args = sys.argv[1:] if not args: print 'Error: usage %s "phrase with * in it"' % sys.argv[0] sys.exit(1) if args > 1: args = ' '.join(args) query = '"%s"' % args gs = GoogleSearch(query) gs.results_per_page = 100 results = gs.get_results() args_re = args.replace('*', '.*?') filter_re = re.compile(args_re, re.I) frf = filter_re.findall nomnomnom = [] for r in results: descs = filter_re.findall(r.desc) titles = filter_re.findall(r.title) nomnomnom += [x.lower().encode('utf-8') for x in descs + titles] results = count_results(nomnomnom) print_results(results)