def parse(env, limit=1000, offset=0, where=None): from core import syncer from core.helpers import Timer where = where or 'raw IS NOT NULL' sql = 'SELECT count(id) FROM emails WHERE {where}'.format(where=where) count = env.sql(sql).fetchone()[0] - offset if count <= 0: return log.info('Parse %s emails for %r', count, env.username) timer, done = Timer(), 0 for offset in range(offset, count, limit): i = env.sql(''' SELECT id FROM emails WHERE {where} ORDER BY created DESC LIMIT %s OFFSET %s '''.format(where=where), (limit, offset)) for row in i: raw = env.sql('SELECT raw FROM emails WHERE id=%s', [row['id']]) raw = raw.fetchone()[0].tobytes() data = syncer.get_parsed(env, raw, row['id']) syncer.update_email(env, dict(data), 'id=%s', [row['id']]) env.db.commit() done += 1 log.info(' - done %s for %.2f', done, timer.duration)
def test_emails(env, path, expected): raw = read_file('files_parser', path) result = dict(syncer.get_parsed(env, raw, 'test')) assert expected['subject'] == result['subj'] for type_ in ['html', 'text']: if expected.get(type_): assert type_ in result assert expected[type_] in result[type_] assert result[type_].count(expected[type_]) == 1 if expected.get('attachments'): assert 'attachments' in result attachments = json.loads(result['attachments']) assert len(expected['attachments']) == len(attachments) assert expected['attachments'] == attachments if expected.get('from'): assert 'fr' in result assert expected['from'] == list(result['fr']) if expected.get('refs'): assert 'refs' in result assert expected['refs'] == result['refs']