def update_db(self, live_events): logger.info('Update youtube upcoming events with (online_ts and cover)') cnx = mysql.connector.connect(**db) cursor = cnx.cursor(buffered=True) update_sql = "UPDATE crawled_lives " \ " SET " \ " detail_crawler=%(detail_crawler)s, " \ " detail_crawler_ts=%(detail_crawler_ts)s, " \ " online_ts=%(online_ts)s, " \ " cover_url=%(cover_url)s, " \ " cover=%(cover)s, " \ " owner_avatar = %(avatar)s, " \ " category = %(category)s, " \ " description = %(description)s, " \ " more_info = %(more_info)s " \ " WHERE web_url = %(web_url)s " to_update = { "detail_crawler":"youtube_detail", "detail_crawler_ts":get_query_ts(), } try: for event in live_events: event.update(to_update) event['cover'] = fetch_image(event['cover_url']) if event['cover_url'] else None event['avatar'] = fetch_image(event['avatar_url']) if event['avatar_url'] else None cursor.execute(update_sql, event) logger.debug('event(%s) is updated.' % (event['web_url'], )) cnx.commit() except: logger.error("error! SQL=" + cursor.statement, exc_info=True) finally: cnx.close()
def _save_upcoming_to_db(self, event_list): """ Save aggregate parsed upcoming events to database """ logger.info("Save parsed livestream upcoming event to DB") cnx = mysql.connector.connect(**db) cursor = cnx.cursor(buffered=True) insert_sql = ( "INSERT INTO crawled_lives " " (site, referer, list_crawler, detail_crawler, detail_crawler_ts, web_url, title," " cover_thumbnail_url, cover_thumbnail, cover_url, cover, owner, owner_url, owner_avatar, online_ts, category, description, more_info) " " VALUES " " (%(site)s, %(referer)s, %(list_crawler)s, %(detail_crawler)s, %(detail_crawler_ts)s, %(web_url)s, " " %(title)s, %(thumbnail_url)s, %(thumbnail)s, %(cover_url)s, %(cover)s, %(owner)s, %(owner_url)s, " " %(owner_avatar)s, %(online_ts)s, %(category)s, %(description)s, %(more_info)s) " " ON DUPLICATE KEY UPDATE " " `list_crawler_ts` = NOW(), " " more_info = %(more_info)s" ) try: for v in event_list: v["cover"] = fetch_image(v["cover_url"]) if v["cover_url"] else None if v["cover"].__sizeof__() > 100000: v["cover"] = None v["owner_avatar"] = fetch_image(v["owner_avatar_url"]) if v["owner_avatar_url"] else None v["thumbnail"] = fetch_image(v["thumbnail_url"]) if v["thumbnail_url"] else None cursor.execute(insert_sql, v) cnx.commit() logger.info("Successfully save parsed %d livestream upcoming event to DB", len(event_list)) except: logger.error("Error while executing SQL=%s", cursor.statement, exc_info=True) finally: cnx.close()
def save_to_db(self, event_list): logger.info('Store %d pre_crawled youtube upcoming events without online_ts.', len(event_list)) cnx = mysql.connector.connect(**db) cursor = cnx.cursor(buffered=True) insert_sql = "INSERT INTO crawled_lives " \ " (site, referer, list_crawler, web_url, title, cover_thumbnail_url, cover_thumbnail, cover_url, owner, owner_url) " \ " VALUES " \ " (%(site)s, %(referer)s, %(list_crawler)s, %(web_url)s, " \ " %(title)s, %(thumbnail_url)s,%(thumbnail)s, %(cover_url)s, %(owner)s, %(owner_url)s) " \ " ON DUPLICATE KEY UPDATE " \ " `list_crawler_ts` = NOW() " try: for v in event_list: v['thumbnail'] = fetch_image(v['thumbnail_url']) if v['thumbnail_url'] else None cursor.execute(insert_sql, v) cnx.commit() except: logger.error("error! SQL=" + cursor.statement, exc_info=True) finally: cnx.close()