def process_email(server, msg_id): #print #log.debug("Messages:") #datalist = ['FLAGS', 'RFC822', 'BODY'] datalist = ['RFC822'] response = server.fetch(msg_id, datalist) for msgid, data in response.iteritems(): email_info, plain = parse_message(msgid, data) if not email_info: continue topic_title, result = parse_plain(plain) #for data in result: # for k, v in data.items(): # log.debug(k, v) with open(common.DB_DIR + '/email_backup/%s_%s.txt' % (msgid, topic_title), 'w') as f: f.write(plain.encode('utf8')) log.info('parsing topic: %s' % topic_title) topic_title = db.ensure_topic_exists(topic_title) for data in result: article = dict(url=data['url'], title=data['title'], source=data['source'], url_date=email_info[RECEIVE_TIME]) article_id = db.ensure_article_exists(article) brief = data['brief'] db.insert_or_update_t_a_rel(topic_title, article_id, brief)
sql = 'select article_id, topic_title, brief'\ ' from topic_article_rel' c.execute(sql) fetch = c.fetchall() count = 0 t_count = 0 try: for aid, t, brief in fetch: if len(brief) > 200: lt = t.lower() l_cached = brief.lower() if lt in l_cached: idx = l_cached.find(lt) s = idx - 75 e = idx + 75 if s < 0: s = 0 e += 75 brief = brief[s:e] else: biref = brief[:150] db.insert_or_update_t_a_rel(t, aid, brief) count += 1 t_count += 1 #conn.commit() finally: print 'total', t_count print 'do', count