Esempio n. 1
0
def test_write(id):
    dbm = trDM()
    i = 0
    project = dbm.get_project(id)
    print project['id']
    doc_id = 'tr-%s' % project['id']
    dst_path = '%s/%s.txt' % (CORPUS_DIRECTORY, doc_id)
    src_path = '%s/%s.txt' % (PROCESSED_DIRECTORY, project['id'])
    if os.path.exists(src_path):
        print 'from file'
        data = read_utf_8_file(src_path)
    else:
        print 'from db'
        data = project['content']
    write_utf_8_file("/tmp/proj.txt", "%s||||%s||||%s||||%s\n" % (doc_id, project['title'].strip(), project['url'], prepare_document(data)))
Esempio n. 2
0
def main():
    ml_dbm = mlDM()
    tr_dbm = trDM()
    build_corpus(ml_dbm, tr_dbm)