Example #1
0
def get_number_of_results(term, ajax=False, verbose=True):
    if not ajax:
        gs = GoogleSearch(term)
        page = str(gs._get_results_page())
        match = reg.search(page)
        if match:
            if verbose: print(term, match.groups()[0])
            return int(match.groups()[0].replace(',',''))
        else:
            raw_input((term, page))
    return int(search(term)['responseData']['cursor']['estimatedResultCount'])
Example #2
0
        def run(self, string):

            query = "site:http://md5-database.org/md5 %s" % string

            #if not thread:
            #    say("Querying Google: '%s'" % query)

            gs = GoogleSearch(query)
            gs.results_per_page = 10
            results = gs._get_results_page()
            texts = results.findAll(text=True)
            texts = ''.join(texts)
            results = re.findall(re.compile('MD5\}.*?MD5'), texts)
            for line in results:
                if string in line:
                    result = line[(line.find(',') + 1):line.find('.')].strip()
                    return result

            return ''
Example #3
0
def augmentedScoring(phrase) :
    #TODO hit count ist nicht ganz korrekt
    #=> more like: about .* results kann ueber die ganze seite gehen...
    #=> investigate regex/google site!
    hitScores = []
    #without domains -> denominator
    searchStr = "\"" + phrase + "\""
    gs = GoogleSearch(searchStr)
    gs.results_per_page = 50
    sleep(searchSleepTime)
    score = 0
    matchStrings = ""
    pageStrings = ""
    try:
        page = gs._get_results_page()
        logging.info(gs.last_search_url)
        pageStr = str(page)
        pageStrings += pageStr
        if pageStr.find("resultStats\">") != -1 or pageStr.find("No results found for") > -1:
            m = re.search(r'resultStats\">.*bout (.*) results</div', pageStr)
            if m is not None:
                score = m.group(1)
                matchStrings += m.group(0) + " - "
                logging.info("score (" + searchStr + "): " + score)
                score = score.replace(',','')
                hitScores.append(int(score))
            else:
                logging.warning("No match! .. no google hits? (" + searchStr + ")")
                hitScores.append(0)
                for _ in domains: hitScores.append(0)
                hitScores.append("first: no match")
                hitScores.append(unicode(pageStrings, 'utf-8', "strict"))
                logging.info(hitScores) 
                return hitScores
        else:
            logging.warning("No google hits! (" + searchStr + ")")
            hitScores.append(0)
            for _ in domains: hitScores.append(0)
            hitScores.append("first: no match")
            hitScores.append(unicode(pageStrings, 'utf-8', "strict"))
            logging.info(hitScores) 
            return hitScores
    except SearchError as se:
        logging.warning("Search Error on: " + searchStr + " no results? " + str(se))
        hitScores.append(0)
        for _ in domains: hitScores.append(0)
        hitScores.append("first: search error: " + str(se))
        hitScores.append(unicode(pageStrings, 'utf-8', "strict"))
        logging.info(hitScores)
        return hitScores
    denominationScore = float(score)
    logging.info("denominator (" + searchStr + "): " + str(denominationScore))
    
    #augmented with domains
    for domain in domains :
        searchStr = "\"" + phrase + "\" \"" + domain + "\""
        gs = GoogleSearch(searchStr)
        gs.results_per_page = 50
        sleep(searchSleepTime)
        score = 0
        try:
            page = gs._get_results_page()
            logging.info(gs.last_search_url)
            pageStr = str(page)
            pageStrings += pageStr
            if pageStr.find("resultStats\">") != -1 :
                m = re.search(r'resultStats">.*bout (.*) results</div', pageStr)
                if m is not None:
                    score = m.group(1)
                    matchStrings += m.group(0) + " - "
                    logging.info("score (" + searchStr + "): " + score)
                    score = score.replace(',','')
                else: logging.warning("No match! .. no google hits? (" + searchStr + ")")
            else: logging.warning("No google hits! (" + searchStr + ")")
        except SearchError as se:
            logging.warning("Search Error on: " + searchStr + " no results? " + str(se))

        #relativating by general hit count
        hitScores.append(float(score) / denominationScore)

    hitScores.append(matchStrings)
    hitScores.append(unicode(pageStrings, 'utf-8', "strict"))
    logging.info(hitScores)
    return hitScores