Ejemplo n.º 1
0
def process_email(server, msg_id):
    #print
    #log.debug("Messages:")
    #datalist = ['FLAGS', 'RFC822', 'BODY']
    datalist = ['RFC822']
    response = server.fetch(msg_id, datalist)

    for msgid, data in response.iteritems():
        email_info, plain = parse_message(msgid, data)
        if not email_info:
            continue
        topic_title, result = parse_plain(plain)
        #for data in result:
        #    for k, v in data.items():
        #        log.debug(k, v)
        with open(common.DB_DIR + '/email_backup/%s_%s.txt' % (msgid, topic_title), 'w') as f:
            f.write(plain.encode('utf8'))
        log.info('parsing topic: %s' % topic_title)
        topic_title = db.ensure_topic_exists(topic_title)
        for data in result:
            article = dict(url=data['url'],
                           title=data['title'],
                           source=data['source'],
                           url_date=email_info[RECEIVE_TIME])
            article_id = db.ensure_article_exists(article)
            brief = data['brief']
            db.insert_or_update_t_a_rel(topic_title, article_id, brief)
Ejemplo n.º 2
0
sql = 'select article_id, topic_title, brief'\
      ' from topic_article_rel'

c.execute(sql)
fetch = c.fetchall()

count = 0
t_count = 0
try:
  for aid, t, brief in fetch:
    if len(brief) > 200:
        lt = t.lower()
        l_cached = brief.lower()
        if lt in l_cached:
            idx = l_cached.find(lt)
            s = idx - 75
            e = idx + 75
            if s < 0:
                s = 0
                e += 75
            brief = brief[s:e]
        else:
            biref = brief[:150]
        db.insert_or_update_t_a_rel(t, aid, brief)
        count += 1
    t_count += 1
    #conn.commit()
finally:
  print 'total', t_count
  print 'do', count