Ejemplo n.º 1
0
    def search_ngram(self, ngram, index):

        print("%s: Searching for [%s]" %
              (AcureRateUtils.get_now_as_str(), ngram))

        search_url = F6SEngager.F6S_SEARCH_URL % ngram
        # rc, response, ip = SatoriMain.perform_request(search_url, opener, with_ip=False, should_delay=False)
        rc, response = self.perform_request(search_url)
        if rc != 200:
            print(">>> ERROR: %s: %s." % (rc, response))
            return

        # Check results
        results = json.loads(response)
        if results[0]['text'].find(' match') < 1:
            print("F6S Scraper: No mention of match(es) - %s." %
                  results[0]['text'])
            return

        num_matches = int(results[0]['text'].split(' ')[0])
        if num_matches == 0:
            print("F6S Scraper: No hits returned when searching for %s." %
                  ngram)
            return

        # Count how many of them are 'Startup'
        startups_only = [
            res for res in results[1:]
            if 'rightText' in res and res['rightText'] == 'Startup' and 'text'
            in res and res['text'] and res['text'].lower().find(ngram) == 0
        ]
        if len(startups_only) == 0:
            return

        # Should we call recursively
        if len(startups_only) >= 20:
            for l in self.all_valid_chars:
                self.search_ngram(ngram + l, index + 1)

        print("%s: Found %s results for [%s]. Writing:" %
              (AcureRateUtils.get_now_as_str(), len(startups_only), ngram))

        # Write to file
        self.extract_and_write(startups_only, ngram)

        pass
Ejemplo n.º 2
0
    def extract_and_write(self, startups, ngram):

        # Iterate over all startups
        for res in startups:
            text = res['text']
            the_type = res['type']
            value = res['value']
            if text.find(';') == 0:
                text = "'%s'" % text
            if text.lower().find(ngram) == 0:
                self.companies_file.write('%s; %s; %s\n' %
                                          (text, the_type, value))
                now_str = AcureRateUtils.get_now_as_str()
                print('%s: %s, %s, %s' % (now_str, text, the_type, value))

        self.companies_file.flush()
        pass