Exemple #1
0
def checkSnortRule(cve_id: str, good_snort_link: list, pause_sec_list) -> bool:
    '''
        Search google by keyword "cve_id snort rule", and store it in good_snort_link when the domain name match 'https://www.snort.org' or 'https://blog.snort.org'.
    '''
    # [*] A little weird here is that the searching result here is different from googling by myself. It may match here while it should not in googling by myself,
    # but when it match here, it often match in googling by myself, too. So I think the result here can be take into consideration.

    query = cve_id + ' snort rule'
    agent = googlesearch.get_random_user_agent()
    para = {
        'oq': cve_id + '+',
        'aqs': 'chrome.1.69i59l2.1406j0j1',
        'sourceid': 'chrome',
        'ie': 'UTF-8'
    }  # I don't know whether this is useful
    pause_sec = random.choice(pause_sec_list)  # To avoid blocked by google

    for entry in googlesearch.search(
            query, stop=5, user_agent=agent, extra_params=para,
            pause=pause_sec):  # Mostly appears at top five result
        if entry[:
                 21] == 'https://www.snort.org' or entry[:
                                                         22] == 'https://blog.snort.org':  # 31 is the length of 'https://www.snort.org'
            good_snort_link.append(entry)
            return True

    return False
Exemple #2
0
    def searchInGoogle(self, message: dict, maxCountOfLinks: int = 5) -> []:
        """
        Выполняет поиск в Google по запросу с параметрами, которые пришли от клиента (от веб-сайта MAS).
        
        Параметры:
        --------
        message: dict
            словарь с критериями поиска вакансий от клиента
        
        maxCountOfLinks : int, optional
            максимальное количество страниц для поиска. Чем больше это число, тем дольше будет обрабатываться запрос.
            Для тестовых целей есть смысл сделать его маленьким (в пределах 10)
        
        Возвращаемое значение:
        --------
        array
            список ссылок, которые удалось достать из Google
        -------
        """

        user_agent = googlesearch.get_random_user_agent()
        response = googlesearch.search('Вакансии %s Гродно' %
                                       (message['position']),
                                       country=message['country'],
                                       stop=maxCountOfLinks,
                                       user_agent=user_agent)

        urls = [x for x in response]
        self.log_info("> Parsed %s items. Sending to viewer..." % len(urls))
        return urls
Exemple #3
0
def search_google(search, args):
    ''' the function where googlesearch from mario vilas
		is called
	'''

    s = search.split(',')
    search_stop = args.search_stop

    query = 'filetype:pdf'
    #query = 'site:%s filetype:pdf' % search
    # print(query)
    urls = []

    try:
        for url in gs.search(query,
                             num=20,
                             domains=s,
                             stop=search_stop,
                             user_agent=gs.get_random_user_agent()):
            #print(url)
            urls.append(url)

    except urllib.error.HTTPError as e:
        #print('Error: %s' % e)
        return False, e

    except urllib.error.URLError as e:
        return False, e

    return True, urls
Exemple #4
0
    def __init__(self, seq: str = None):
        self.__isvalid = True
        self.__number = 339960

        if seq != None:
            seq = str(int(seq))
            if not 0 < int(seq) < self.__number:
                self.__isvalid = False
            self.__seq = 'A' + '0' * (6 - len(seq)) + seq
            self.__isrand = False
        else:
            self.__seq = str(choice(range(1, self.__number)))
            self.__seq = 'A' + '0' * (6 - len(self.__seq)) + self.__seq
            self.__isrand = True

        self.__url = f'https://oeis.org/search?q={self.__seq}'
        self.__html = bs(
            get(self.__url,
                headers={
                    'User-Agent': str(get_random_user_agent())
                }).text, 'lxml')
        self.__elem = self.__html.find(
            'p', {
                'style':
                'text-indent: -1em; margin-left: 1em; margin-top: 0; margin-bottom: 0;'
            })
        self.__table = self.__html.findAll('table')[9]
Exemple #5
0
    def _search():
        time_since_last_use = 0
        prush("Selecting an engine...")
        engine_name = ""
        while True:
            engine = random.choice(search_engines)()
            engine_name = engine.__class__.__name__
            if not engine_name in engine_times:
                break
            time_since_last_use = (datetime.now() -
                                   engine_times[engine_name]).total_seconds()
            if time_since_last_use < ENGINE_COOLDOWN_TIME:
                prush(
                    "Engine '{}' used too recently. Trying another...".format(
                        engine_name))
            else:
                break

        engine.set_headers({'User-Agent': get_random_user_agent()})
        # internally intepreted as sleep(random_uniform(*self._delay))
        # This value set low (or zero) since we pause between use of each
        # engine (above).
        engine._delay = (0, 0)
        subject = random.choice(subjects) + " news"
        prush("Searching for subject '{}'...".format(subject))
        search_results = engine.search(subject, pages=SEARCH_PAGES).links()
        engine_times[engine_name] = datetime.now()
        prush("Found {} results for subject '{}'.".format(
            len(search_results), subject))
        return search_results
Exemple #6
0
 def __scrape(self):
     if self.__html == '':
         self.__html = bs(
             get(self.__url,
                 headers={
                     'User-Agent': str(get_random_user_agent())
                 }).text, 'lxml')
Exemple #7
0
def search_pdf(search, args):
    ''' the function where googlesearch from mario vilas
		is called
	'''

    search_stop = args.search_stop

    query = '%s filetype:pdf' % search
    #print(query)
    urls = []

    try:
        for url in gs.search(query,
                             num=20,
                             stop=search_stop,
                             user_agent=gs.get_random_user_agent()):
            #print(url)
            # parse out the name of the file in the url
            filename = find_name(url)
            # add the file to queue
            process_queue_data(filename, url, 'url')
            urls.append(url)

    except urllib.error.HTTPError as e:
        print('Error: %s' % e)
        return -1
 def google_search_query(self, query, lang="id", maxSearch=10, **kwargs):
     dt = {
         "google_search_query": {
             "urls": [],
             "title": []
         },
     }
     for i in search(query,
                     lang=lang,
                     start=0,
                     stop=maxSearch,
                     tld="com",
                     safe="off",
                     tbs="0",
                     num=10,
                     country=None,
                     pause=1.5,
                     domains=None,
                     tpe="",
                     user_agent=get_random_user_agent()):
         dt["google_search_query"]["urls"].append(i)
         tl = BeautifulSoup(requests.get(i).content, "html.parser")
         for title in tl.findAll("title"):
             dq = title.text.strip().replace("\n", "")
             dt["google_search_query"]["title"].append(dq)
     ggle = json.dumps(dt, indent=2, sort_keys=True)
     return json.loads(ggle)
Exemple #9
0
def google_search(query, resultsToRetrieve, userAgent, sleepDelay=1):
    """
	perform google search and handle exceptions
	"""
    try:
        search_results = search(query,
                                stop=resultsToRetrieve,
                                user_agent=get_random_user_agent())
        #search_results = ["test result 1","test result 2"]
        if search_results != None:  #i dont know why i need this for exception handling to work!!
            for item in search_results:
                pass
        # 		#print(str(item))
    except urllib.error.HTTPError as e:
        if e.code == 429 or str(e) == "HTTP Error 429: Too Many Requests":
            print(
                colored(
                    '\tRejected, sleeping for ' + str(sleepDelay) + ' min..',
                    'red'))
            sleep((sleepDelay * 60))
            print('\tRetrying..')
            google_search(query, resultsToRetrieve, userAgent,
                          (sleepDelay * 2))
    else:
        return search_results
Exemple #10
0
def findSites(query, qnt=None, site=''):
    #site = site if site != '' else 'gov.br'
    site = [site if site else 'gov.br']
    for result in search(
            query, lang='pt', num=20, start=0, stop= int(qnt) if qnt else None, pause=1,
            domains=site,
            user_agent=get_random_user_agent()):
        yield result
Exemple #11
0
def title_to_paper_link(title):
    try:
        paper_gen = googlesearch.search(title, num=10, stop=1, domains=PAPER_DOMAINS, user_agent=googlesearch.get_random_user_agent())
        paper_link = next(itertools.islice(paper_gen, 0, None))
    except:
        paper_gen = googlesearch.search(title, num=10, stop=1, user_agent=googlesearch.get_random_user_agent())
        paper_link = next(itertools.islice(paper_gen, 0, None))
    return paper_link
Exemple #12
0
def title_to_code_link(title):
    try:
        code_gen = googlesearch.search(title + 'github', num=10, stop=1, domains=CODE_DOMAINS, user_agent=googlesearch.get_random_user_agent())
        code_link = next(itertools.islice(code_gen, 0, None))
    except:
        code_gen = googlesearch.search(title + 'github', num=10, stop=1, user_agent=googlesearch.get_random_user_agent())
        code_link = next(itertools.islice(code_gen, 0, None))
    return code_link
Exemple #13
0
    def go(self):
        i = 1
        for dork in self.google_dorks:
            try:
                dork = dork.strip()
                self.links = [
                ]  # Stores URLs with files, clear out for each dork.

                # Search for the links to collect.
                if self.domain:
                    query = dork + " site:" + self.domain
                else:
                    query = dork

                pause_time = self.delay + random.choice(self.jitter)
                print("[*] Search ( " + str(i) + " / " +
                      str(len(self.google_dorks)) + " ) for Google dork [ " +
                      query + " ] and waiting " + str(pause_time) +
                      " seconds between searches")

                for url in googlesearch.search(
                        query,
                        start=0,
                        stop=self.search_max,
                        num=100,
                        pause=pause_time,
                        extra_params={'filter': '0'},
                        user_agent=googlesearch.get_random_user_agent()):
                    self.links.append(url)

                # Since googlesearch.search method retreives URLs in batches of 100, ensure the file list only contains the requested amount.
                if len(self.links) > self.search_max:
                    self.links = self.links[:-(len(self.links) -
                                               self.search_max)]

                print("[*] Results: " + str(len(self.links)) +
                      " sites found for Google dork: " + dork)
                for foundDork in self.links:
                    print(foundDork)

                self.total_dorks += len(self.links)

                # Only save links with valid results to an output file.
                if self.save_links and (self.links):
                    f = open(self.log_file, 'a')
                    f.write('#: ' + dork + "\n")
                    for link in self.links:
                        f.write(link + "\n")
                    f.write("=" * 50 + "\n")
                    f.close

            except:
                print("[-] ERROR with dork: " + dork)

            i += 1

        self.fp.close
        print("[*] Total dorks found: " + str(self.total_dorks))
def get_category_for_archive_element(product_name):
    user_agent = get_random_user_agent()
    response = search("xkom " + product_name, start=0,
                      stop=5, num=5, user_agent=user_agent)

    try:
        result = ''
        while 'x-kom' not in result or '.html' not in result:
            result = next(response)
        print(result)
        category = get_category(result)
    except StopIteration:
        category = "Brak"
    return category
Exemple #15
0
def go_gle(query):
    my_results_list = []
    for i in search(
            query,  # Expression
            tld='com',  # TL domain
            lang='en',  # Set lang en
            num=10,  # Number of results / page
            start=0,  # First result to retrieve
            stop=None,  # Last result to retrieve
            pause=0,  # Lapse between HTTP requests
            user_agent=get_random_user_agent(),
    ):
        my_results_list.append(i)

    return '\n'.join(my_results_list)
Exemple #16
0
def get_value(companyname):
    accurateset = OrderedSet()
    multipleset = OrderedSet()
    try:
        user_agent_str = googlesearch.get_random_user_agent()
        logger.info('google search for %s', companyname)
        for j in googlesearch.search(companyname,
                                     tld="co.in",
                                     num=3,
                                     stop=3,
                                     pause=1,
                                     user_agent=user_agent_str):
            #logger.info('google search result %s', companyname)
            #print(j)
            correct = True
            for item in bannedlist:
                if j.__contains__(item):
                    correct = False
                    break
            if correct:
                try:
                    newstr = re.split("www.", j)[1]
                except IndexError as e:
                    newstr = re.split("//", j)[1]
                finamdomain = re.split("/", newstr)[0]

                if "." == finamdomain[0]:
                    finamdomain.replace(".", "", 1)

                multipleset.add(finamdomain)

                pattren_str = re.compile(r'([a-z]+)')
                firstname = re.search(pattren_str,
                                      companyname.lower()).group(0)

                if firstname in finamdomain:
                    accurateset.add(finamdomain)

    except urllib.error.HTTPError as httperr:
        #print(httperr.headers,httperr.read())  # Dump the headers to see if there's more information
        return {'error': 'captcha'}

    data = {
        'companyname': companyname,
        'accurate': list(accurateset),
        'multiple': list(multipleset)
    }
    return data
Exemple #17
0
def google_search(domain):
    target_url = ''
    kw = '日常生活用具'
    query = "{} {}".format(domain, kw)
    print('domain: ' + domain)
    try:
        for url in search(query,
                          lang='ja',
                          stop=1,
                          pause=3.0,
                          user_agent=get_random_user_agent()):
            print(url)
            target_url = url
    except Exception as e:
        print(e)
        target_url = None
    return target_url
Exemple #18
0
 def __init__(self, query):
     self.__query = query
     self.__url = f'https://scholar.google.it/scholar?hl=en&as_sdt=0%2C5&q={parse.quote_plus(query)}&btnG=&oq=we'
     self.__html = bs(
         get(self.__url,
             headers={
                 'User-Agent': str(get_random_user_agent())
             }).text, 'lxml')
     ids = (i['data-cid'] for i in self.__html.findAll(
         'div', {'class': 'gs_r gs_or gs_scl'}))
     gs = self.__html.findAll('div', {'class': 'gs_fl'})
     a = (i.findAll('a') for i in gs)
     self.__cit = (i[2].text.split()[-1] for i in a if len(i) > 2)
     self.__rel = ('https://scholar.google.it' + i[3]['href'] for i in a
                   if len(i) > 3)
     self.__results = [
         self.__res(i['href'], i.text, next(self.__rel))
         for i in (self.__html.find('a', {'id': i}) for i in ids)
     ]
Exemple #19
0
def search_google(word, stp=5):
    # Search query
    query = str(word)

    query_result = search(query=query,
                          tld='com',
                          lang='en',
                          num=5,
                          start=0,
                          stop=stp)

    results = []
    for res in query_result:
        res = filter_result(res)
        html = get_page(res, get_random_user_agent())

        results.append({'link': res, 'page': html})

    return results
Exemple #20
0
def hits_google(search, args):
    ''' the function where googlesearch from mario vilas
		is called
	'''
    s = search.split(',')
    query = 'filetype:pdf'

    try:
        hits = gs.hits(query, domains=s, user_agent=gs.get_random_user_agent())

    except urllib.error.HTTPError as e:
        return False, e

    except urllib.error.URLError as e:
        return False, e

    except IndexError as e:
        return False, e

    return True, hits
Exemple #21
0
def gdork(domain):
    ''' Run google dorks against domain '''
    try:
        dorks = {
            'Login portal': [
                f'site:{domain} intext:login',
            ],
            'Files': [f'site:{domain} filetype:pdf'],
        }
        result = ''

        for dork_type in dorks.keys():
            user_agent = get_random_user_agent()
            result += f'=================\n{dork_type.upper()}\n=================\n'
            for dork in dorks[dork_type]:
                data = search(dork, user_agent=user_agent)
                for link in data:
                    print(link)
                    result += (link + "\n")
                result += '\n'

        return result
    except (gaierror, URLError):
        return 0
Exemple #22
0
def get_random_agent():
    return (gs.get_random_user_agent())
Exemple #23
0
def get_user_agent():
    return googlesearch.get_random_user_agent()[source]
Exemple #24
0
highSeverity = ['.conf', '.cnf', '.cfg', '.env', '.sql', '.dbf', '.mdb', '.log', '.bak', '.htaccess']

mediumSeverity = ['.txt', '.csv', 'admin', 'git', 'svn', 'ini']

testingUrl = str(results.url)
resultsnumber = int(results.resultcount)
verbose = results.verbose
waitTime = results.wait

print("Searching....\n")
try:
    for dork in dorkDict:
        if verbose == True:
            print("[Testing " + dork + "]")
        results = []
        finishedDork = "site:" + testingUrl + " " + dorkDict[dork]
        for x in googlesearch.search(finishedDork, lang='en', num=resultsnumber, start=0, stop=resultsnumber, pause=waitTime, user_agent=googlesearch.get_random_user_agent()):
            results.append(x)
        if len(results) > 0:
            print(colours.OKGREEN + "[" + dork + "]" + colours.ENDC)
            for x in results:
                if bool([ele for ele in highSeverity if(ele in x)]):
                    print(colours.FAIL + x + colours.ENDC)
                elif bool([ele for ele in mediumSeverity if(ele in x)]):
                    print(colours.WARNING + x + colours.ENDC)
                else:
                    print(x)
except Exception as e:
    print(e)
Exemple #25
0
myLowPause = 5
myHighPause = 15
myDate = date.today()
nbTrials = 0
myTLD = "com"  #Google tld   -> we search in google.com
myHl = "en"  #in english
#this may be long
while myStart < myMaxStart:
    print("PASSAGE NUMBER :" + str(myStart))
    print("Query:" + myKeyword)
    #change user-agent and pause to avoid blocking by Google
    myPause = random.randint(myLowPause, myHighPause)  #long pause
    print("Pause:" + str(myPause))
    #change user_agent  and provide local language in the User Agent
    #myUserAgent =  getRandomUserAgent(myconfig.userAgentsList, myUserAgentLanguage)
    myUserAgent = googlesearch.get_random_user_agent()
    print("UserAgent:" + str(myUserAgent))
    #myPause=myPause*(nbTrials+1)  #up the pause if trial get nothing
    #print("Pause:"+str(myPause))
    try:
        urls = googlesearch.search(query=myKeyword,
                                   tld=myTLD,
                                   lang=myHl,
                                   safe='off',
                                   num=myNum,
                                   start=myStart,
                                   stop=myStop,
                                   domains=None,
                                   pause=myPause,
                                   tpe='',
                                   user_agent=myUserAgent)
Exemple #26
0
from googlesearch import search, get_random_user_agent
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-v', dest='vuln', action='store', help='The vulnerability that you want to gather h1 reports', required=True)
parser.add_argument('-n', dest='numberOfReports', action='store', type=int, default=100, help='The number of how many reports you want to gather. DEFAULT: 100')
parser.add_argument('-p', dest='seconds', action='store', type=float, default=2.0, help='How much second/seconds do you want it to wait between HTTP Requests, I highly recommend to set this up so that we can avoid getting banned by google. DEFAULT: 2')
args = parser.parse_args()
numberOfReports = args.numberOfReports
vuln = args.vuln
seconds = args.seconds
user_agent = get_random_user_agent()
query = f"site:hackerone.com inurl:/reports/ intext:{vuln}"

def banner():
	print("""
  ____ ____  ___ _____ _   _ 
 / ___|  _ \|_ _|_   _| | | |
| |  _| |_) || |  | | | |_| |
| |_| |  _ < | |  | | |  _  |
 \____|_| \_\___| |_| |_| |_|
 	a tool created by 0xShin
		""")

def search_links(query):
	reports = []
	links = search(query, stop=numberOfReports, pause=seconds, user_agent=user_agent)
	print('[+] Currently gathering links [+]')
	for link in links:
		reports.append(link)
	return reports
def outputToFile(reports):
Exemple #27
0
def user_agent():
    # Information about user agent
    print(">> User agant :")
    print(googlesearch.get_random_user_agent())
Exemple #28
0
def main():
    with open('county_list.csv', newline='') as f:
        engine_times = dict()
        r = csv.reader(f, delimiter=',')
        for row in r:
            county, state = row[0], row[1]
            prush("{}, {}...".format(county, state))

            time_since_last_use = 0
            engine_name = ""
            while True:
                # This does basically constitute a busy loop if all engines are
                # in a cooldown period, but since this is single threaded, I'm
                # not too concerned.
                engine = random.choice(search_engines)()
                engine_name = engine.__class__.__name__
                if not engine_name in engine_times:
                    break
                time_since_last_use = (
                    datetime.now() -
                    engine_times[engine_name]).total_seconds()
                if time_since_last_use >= ENGINE_MIN_COOLDOWN_SECS:
                    break

            engine.set_headers({'User-Agent': get_random_user_agent()})
            subject = PREFERRED_SEARCH_TEMPLATE.format(county, state)
            search_results = engine.search(subject, pages=SEARCH_PAGES).links()
            engine_times[engine_name] = datetime.now()

            if len(search_results) == 0:
                subject = ALTERNATE_SEARCH_TEMPLATE.format(county, state)

                # Random-uniform wait period between successive calls to same
                # engine adds some delay and jitter to the calls, making it
                # just slightly harder to get rate-limited.
                time.sleep(
                    random.uniform(ALTERNATE_SEARCH_MIN_WAIT_SECS,
                                   ALTERNATE_SEARCH_MAX_WAIT_SECS))

                search_results = engine.search(subject,
                                               pages=SEARCH_PAGES).links()

            title = fmt_title(engine_name, subject)
            access_time = fmt_access_time()

            markdown = ""
            with open(state + "/" + county + ".md", "r") as county_file:
                markdown = county_file.read()

            if len(search_results) == 0 or search_results[0] in markdown:
                continue

            uri = select_best_search_result(search_results)

            if len(markdown.strip()) == 0 or NO_TIPS_PLACEHOLDER.lower(
            ) in markdown.lower():
                markdown = fmt_page_heading(county, state)

            markdown = markdown + fmt_entry(title, uri, access_time)

            with open(state + "/" + county + ".md", "w") as county_file:
                county_file.write(markdown)
#myTbs= "qdr:m"   #recherche sur le dernier mois. pas utilisé.
#tbs=myTbs,
#pause assez importante pour ne pas bloquer affiner les valeurs si besoin
myLowPause = 15
myHighPause = 45

#on boucle (peut durer plusieurs heures - faites cela pendant la nuit :-) !!!)
while i < len(myQueries):
    myQuery = myQueries[i]
    print("PASSAGE NUMERO :" + str(i))
    print("Query:" + myQuery)
    #on fait varier le user_agent et la pause pour ne pas se faire bloquer
    myPause = random.randint(
        myLowPause, myHighPause)  #pause assez importante pour ne pas bloquer.
    print("Pause:" + str(myPause))
    myUserAgent = googlesearch.get_random_user_agent(
    )  #modification du user_agent pour ne pas bloquer
    print("UserAgent:" + str(myUserAgent))
    df = pd.DataFrame(columns=['query', 'page', 'position',
                               'source'])  #dataframe de travail
    try:
        urls = googlesearch.search(query=myQuery,
                                   tld='fr',
                                   lang='fr',
                                   safe='off',
                                   num=myNum,
                                   start=myStart,
                                   stop=myStop,
                                   domains=None,
                                   pause=myPause,
                                   only_standard=False,
                                   extra_params={},
Exemple #30
0
# -*- coding: UTF-8 -*-
__title__ = 'potato'
__version__ = '1.0.1'
__author__ = '@gyscordia'
__license__ = 'MIT'
__copyright__ = 'Copyright 2020 by Me'

try:
    from googlesearch import search
except ImportError:
    print("dammmm..")


import googlesearch
import os
import sys
from datetime import datetime
agente = googlesearch.get_random_user_agent()
data = str(datetime.today())
horario = data[0:19]
print("getting urls... {}".format(horario))


try:
    for j in search(query=sys.argv[1], tld=sys.argv[2], lang=sys.argv[3], num=int(sys.argv[4]), stop=int(sys.argv[4]), pause=5, user_agent=agente):
        print(j)
except IndexError:
    print('try: '+ __file__ + ' search (query you want to use) com(top level domain) language(pt-br, en) num(number of urls, eg. 10)')
    print('example: '+ __file__ + ' noticias com pt-br 50')