Esempio n. 1
0
def parse(env, limit=1000, offset=0, where=None):
    from core import syncer
    from core.helpers import Timer

    where = where or 'raw IS NOT NULL'

    sql = 'SELECT count(id) FROM emails WHERE {where}'.format(where=where)
    count = env.sql(sql).fetchone()[0] - offset
    if count <= 0:
        return

    log.info('Parse %s emails for %r', count, env.username)

    timer, done = Timer(), 0
    for offset in range(offset, count, limit):
        i = env.sql('''
        SELECT id FROM emails
        WHERE {where}
        ORDER BY created DESC
        LIMIT %s OFFSET %s
        '''.format(where=where), (limit, offset))
        for row in i:
            raw = env.sql('SELECT raw FROM emails WHERE id=%s', [row['id']])
            raw = raw.fetchone()[0].tobytes()
            data = syncer.get_parsed(env, raw, row['id'])
            syncer.update_email(env, dict(data), 'id=%s', [row['id']])
            env.db.commit()
            done += 1
        log.info('  - done %s for %.2f', done, timer.duration)
Esempio n. 2
0
def parse(env, limit=1000, offset=0, where=None):
    from core import syncer
    from core.helpers import Timer

    where = where or 'raw IS NOT NULL'

    sql = 'SELECT count(id) FROM emails WHERE {where}'.format(where=where)
    count = env.sql(sql).fetchone()[0] - offset
    if count <= 0:
        return

    log.info('Parse %s emails for %r', count, env.username)

    timer, done = Timer(), 0
    for offset in range(offset, count, limit):
        i = env.sql('''
        SELECT id FROM emails
        WHERE {where}
        ORDER BY created DESC
        LIMIT %s OFFSET %s
        '''.format(where=where), (limit, offset))
        for row in i:
            raw = env.sql('SELECT raw FROM emails WHERE id=%s', [row['id']])
            raw = raw.fetchone()[0].tobytes()
            data = syncer.get_parsed(env, raw, row['id'])
            syncer.update_email(env, dict(data), 'id=%s', [row['id']])
            env.db.commit()
            done += 1
        log.info('  - done %s for %.2f', done, timer.duration)
Esempio n. 3
0
def test_emails(env, path, expected):
    raw = read_file('files_parser', path)
    result = dict(syncer.get_parsed(env, raw, 'test'))
    assert expected['subject'] == result['subj']

    for type_ in ['html', 'text']:
        if expected.get(type_):
            assert type_ in result
            assert expected[type_] in result[type_]
            assert result[type_].count(expected[type_]) == 1

    if expected.get('attachments'):
        assert 'attachments' in result
        attachments = json.loads(result['attachments'])
        assert len(expected['attachments']) == len(attachments)
        assert expected['attachments'] == attachments

    if expected.get('from'):
        assert 'fr' in result
        assert expected['from'] == list(result['fr'])

    if expected.get('refs'):
        assert 'refs' in result
        assert expected['refs'] == result['refs']