def test_write(id): dbm = trDM() i = 0 project = dbm.get_project(id) print project['id'] doc_id = 'tr-%s' % project['id'] dst_path = '%s/%s.txt' % (CORPUS_DIRECTORY, doc_id) src_path = '%s/%s.txt' % (PROCESSED_DIRECTORY, project['id']) if os.path.exists(src_path): print 'from file' data = read_utf_8_file(src_path) else: print 'from db' data = project['content'] write_utf_8_file("/tmp/proj.txt", "%s||||%s||||%s||||%s\n" % (doc_id, project['title'].strip(), project['url'], prepare_document(data)))
def main(): ml_dbm = mlDM() tr_dbm = trDM() build_corpus(ml_dbm, tr_dbm)