Example #1
0
    def _get_parse_links(self,
                         data,
                         kw,
                         only_results=False,
                         page_num=1,
                         ip='127.0.0.1'):
        """Act the same as _parse_links, but just return the db data instead of inserting data into a connection or
        or building actual queries.

        [[lastrowid]] needs to be replaced with the last rowid from the database when inserting.

        Not secure against sql injections from google ~_~
        """

        parser = GoogleParser(data)
        if only_results:
            return parser

        results = parser.links
        first = (page_num, time.asctime(), len(results), parser.num_results()
                 or '', kw, ip)

        second = []
        for result in results:
            second.append([
                result.link_title,
                result.link_url.geturl(), result.link_snippet,
                result.link_position, result.link_url.hostname
            ])

        return (first, second)
Example #2
0
    def _get_parse_links(self, data, kw, only_results=False, page_num = 1, ip='127.0.0.1'):
        """Act the same as _parse_links, but just return the db data instead of inserting data into a connection or
        or building actual queries.

        [[lastrowid]] needs to be replaced with the last rowid from the database when inserting.

        Not secure against sql injections from google ~_~
        """

        parser = GoogleParser(data)
        if only_results:
            return parser

        results = parser.links
        first = (page_num,
                 time.asctime(),
                 len(results),
                 parser.num_results() or '',
                 kw,
                 ip)

        second = []
        for result in results:
            second.append([
                result.link_title,
                result.link_url.geturl(),
                result.link_snippet,
                result.link_position,
                result.link_url.hostname
            ])

        return (first, second)
Example #3
0
def parse_links(data, conn, kw, page_num=1, ip='127.0.0.1'):
    """Insert parsed data into the database. High level parsing function.

    Args:
    conn -- Either a sqlite3 cursor or connection object. If called in threads, make sure
    to wrap this function in some kind of synchronization functionality.
    """
    parser = GoogleParser(data)
    results = parser.links
    conn.execute('''
        INSERT INTO serp_page
         (page_number, requested_at,
         num_results, num_results_for_kw_google,
         search_query, requested_by)
         VALUES(?, ?, ?, ?, ?, ?)''',
           (page_num, time.asctime(), len(results), parser.num_results() or '',  kw, ip))
    lastrowid = conn.lastrowid
    #logger.debug('Inserting in link: search_query={}, title={}, url={}'.format(kw, ))
    conn.executemany('''INSERT INTO link
    ( title,
     url,
     snippet,
     rank,
     domain,
     serp_id) VALUES(?, ?, ?, ?, ?, ?)''',
    [(
      result.link_title,
      result.link_url.geturl(),
      result.link_snippet,
      result.link_position,
      result.link_url.hostname) +
     (lastrowid, ) for result in results])