예제 #1
0
파일: db.py 프로젝트: EruditePanda/nerus
def db_insert_(name, offset, count, chunk):
    log('Inserting source')
    source = Source.find(name)
    path = source.get()
    records = source.load(path)
    records = log_progress(records, total=count)
    records = head(skip(records, offset), count)

    db = get_db(host=WORKER_HOST)
    docs = (_.as_bson for _ in records)
    chunk_insert(db[SOURCE], docs, chunk)
예제 #2
0
def queue_retry_(chunk):
    log('Retrying')
    connection = get_connection(host=WORKER_HOST)
    queue = get_queue(FAILED, connection=connection)

    ids = annotators_ids(queue.jobs)
    queue.empty()

    for annotator in ids:
        annotator_ids = ids[annotator]
        annotator_ids = log_progress(annotator_ids, prefix=annotator)
        chunks = group_chunks(annotator_ids, size=chunk)
        queue = get_queue(annotator, connection=connection)
        for chunk_ in chunks:
            enqueue(queue, task, chunk_)
예제 #3
0
def queue_insert_(annotators, offset, count, chunk):
    log('Annotators: %s; offset: %d, count: %d, chunk: %d',
        ', '.join(annotators), offset, count or -1, chunk)

    db = get_db(host=WORKER_HOST)
    ids = read_index(db[SOURCE], offset)
    ids = log_progress(ids, total=count)

    ids = head(ids, count)
    chunks = group_chunks(ids, size=chunk)

    connection = get_connection(host=WORKER_HOST)
    queues = dict(get_queues(annotators, connection))
    for chunk in chunks:
        for annotator in annotators:
            queue = queues[annotator]
            enqueue(queue, task, chunk)
예제 #4
0
def dump_norm_(source, target):
    records = load_raw(source)
    records = norm_raw(records)
    records = log_progress(records)
    dump_norm__(records, target)
예제 #5
0
def dump_raw_(path, annotators, count, chunk):
    log('Dumping %s', ', '.join(annotators))
    db = get_db(host=WORKER_HOST)
    records = read_raw(db, annotators, count, chunk)
    records = log_progress(records, total=count)
    dump_raw__(records, path)