def _get_parse_links(self, data, kw, only_results=False, page_num=1, ip='127.0.0.1'): """Act the same as _parse_links, but just return the db data instead of inserting data into a connection or or building actual queries. [[lastrowid]] needs to be replaced with the last rowid from the database when inserting. Not secure against sql injections from google ~_~ """ parser = GoogleParser(data) if only_results: return parser results = parser.links first = (page_num, time.asctime(), len(results), parser.num_results() or '', kw, ip) second = [] for result in results: second.append([ result.link_title, result.link_url.geturl(), result.link_snippet, result.link_position, result.link_url.hostname ]) return (first, second)
def _get_parse_links(self, data, kw, only_results=False, page_num = 1, ip='127.0.0.1'): """Act the same as _parse_links, but just return the db data instead of inserting data into a connection or or building actual queries. [[lastrowid]] needs to be replaced with the last rowid from the database when inserting. Not secure against sql injections from google ~_~ """ parser = GoogleParser(data) if only_results: return parser results = parser.links first = (page_num, time.asctime(), len(results), parser.num_results() or '', kw, ip) second = [] for result in results: second.append([ result.link_title, result.link_url.geturl(), result.link_snippet, result.link_position, result.link_url.hostname ]) return (first, second)
def parse_links(data, conn, kw, page_num=1, ip='127.0.0.1'): """Insert parsed data into the database. High level parsing function. Args: conn -- Either a sqlite3 cursor or connection object. If called in threads, make sure to wrap this function in some kind of synchronization functionality. """ parser = GoogleParser(data) results = parser.links conn.execute(''' INSERT INTO serp_page (page_number, requested_at, num_results, num_results_for_kw_google, search_query, requested_by) VALUES(?, ?, ?, ?, ?, ?)''', (page_num, time.asctime(), len(results), parser.num_results() or '', kw, ip)) lastrowid = conn.lastrowid #logger.debug('Inserting in link: search_query={}, title={}, url={}'.format(kw, )) conn.executemany('''INSERT INTO link ( title, url, snippet, rank, domain, serp_id) VALUES(?, ?, ?, ?, ?, ?)''', [( result.link_title, result.link_url.geturl(), result.link_snippet, result.link_position, result.link_url.hostname) + (lastrowid, ) for result in results])