def insert_feeditems(feed_id, url): feed = feedparser.parse(url) print "Feed title :" + feed.feed.title + "\n" print "Feed description: " + feed.feed.description + "\n" print "Feed link: " + feed.feed.link + "\n" print "Language: " + feed.feed.language + "\n" for f in feed.entries: print f #@Todo: Not sure how to convert the date into a datetime object properly. Using default values right now #pub_date = datetime.datetime.strptime(f.updated, "%a, %d %b %Y %H:%M:%S ") #sql = "INSERT INTO feed_item(title, \ #description, link, guid, pub_date, date, feed_id) \ #VALUES ('%s', '%s', '%s', '%s' , '%s', '%s', '%d')" %\ #(MySQLdb.escape_string(f.title), MySQLdb.escape_string(f.description), MySQLdb.escape_string(f.link), MySQLdb.escape_string(f.guid), datetime.datetime.now(), datetime.datetime.now(), feed_id) conn = mdb.connect(mysql_conn['host'], mysql_conn['username'], mysql_conn['password'], mysql_conn['db'], charset = "utf8") cursor = conn.cursor() #print("sql>>>>"+MySQLdb.escape_string(sql)) #cursor.execute(sql) cursor.executemany( """INSERT INTO feed_item (title, description, link, guid, pub_date, date, feed_id) VALUES (%s, %s, %s, %s, %s, %s, %s)""", [ (f.title, f.description, f.link, f.guid, datetime.datetime.now(), datetime.datetime.now(), feed_id ) #soheilTODO replace datetime.now (not sure which one) with pub_date ] ) #get the id of the feed item just inserted to pass it to insert_feeditem_tags function cursor.executemany( """SELECT * FROM feed_item WHERE guid = %s""",(f.guid,)) feeditem = cursor.fetchone() feeditem_id = feeditem[0] conn.commit() conn.close() #autotag this feed item and update database autotagger.insert_feeditem_tags(feeditem_id)
def insert_feeditems(feed_id, url): if params.output: print 'Loading %s' % url feed = parseUrl(url) if feed.feed: # feed was available and timeout did not occur #print "Feed title :" + feed.feed.title + "\n" #print "Feed description: " + feed.feed.description + "\n" #print "Feed link: " + feed.feed.link + "\n" #print "Language: " + feed.feed.language + "\n" for f in feed.entries: conn = mdb.connect(dbc.host, dbc.user, dbc.passwrd, dbc.db, charset="utf8") c = conn.cursor() link = '' linkHash = '' guid = '' titleHash = '' title = '' description = '' pubDate = '' if 'title' in f: title = f.title.encode('utf-8') # the hash of this string is considered a unique value for each feed if len(title) > 0: h = hashlib.sha512() h.update(str(title)) titleHash = h.hexdigest() if 'link' in f: link = f.link if len(link) > 0: h = hashlib.sha512() h.update(str(link)) linkHash = h.hexdigest() if len(titleHash) > 0 and len(linkHash) > 0: if 'title' in f and 'description'in f: description = f.description.encode('utf-8') pubDate = '' if 'published' in f: # the value of pubDate tag within each feed pubDate = f.published.encode('utf-8') if 'guid' in f: guid = f.guid try: c.execute('SELECT id FROM feeditem WHERE title_hash = %s or link_hash = %s', (titleHash, linkHash)) if len(c.fetchall()) == 0: c.execute("""INSERT feeditem (title, description, link, link_hash, guid, title_hash, pub_date, feed_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""", (title, description, link, linkHash, guid, titleHash, pubDate, feed_id,) #soheilTODO replace datetime.now (not sure which one) with pub_date ) #print 'new feeditem!' feeditem_id = c.lastrowid # get the id of last inserted row except RuntimeError as error: print error conn.commit() c.close() conn.close() conn.commit() c.close() conn.close() #autotag this feed item and update database if it's not already in there #print feeditem_id if feeditem_id > 0: autotagger.insert_feeditem_tags(feeditem_id) else: print 'feeditem ignored: no guid, no link!' else: print 'WARNING feed url timed out %s ' % (url,)