예제 #1
0
def test_serve_continuation(reader, writer, ledger, pre_served_size):
    job = Job('somejob', reader, writer, ledger)
    for i in range(pre_served_size):
        ledger.set(job.ledger_id(i), 'fake value')
    items = job.serve(50)
    n_items = CSV_SIZE - pre_served_size
    assert len(items) == n_items
예제 #2
0
def test_receive_cont(job):
    n_served = 10
    n_received = 5
    job_name = job.name
    ledger = job.ledger
    items = job.serve(n_served)
    job.receive(items[:n_received], False)
    reader = CsvReader({'input_file_path': job.reader.file_path})
    writer = CsvWriter({'output_file_path': job.writer.file_path})
    del job
    # we processed half the served items and we are making a new job
    cont_job = Job(job_name, reader, writer, ledger, cont=True)
    # requesting the second half of the items
    print('serving')
    items = cont_job.serve(n_served - n_received)
    cont_job.receive(items, False)
    # counting in the ledger
    received = [
        x for x in ledger.scan_iter(f'{job_name}*')
        if ledger.get(x).decode('utf8') == RECEIVED
    ]
    served = [
        x for x in ledger.scan_iter(f'{job_name}*')
        if ledger.get(x).decode('utf8') == SERVED
    ]
    assert len(received) == n_served
    assert len(cont_job.received) == n_served
    assert len(served) == 0
예제 #3
0
def test_restart(reader, writer, ledger):
    jobname = 'somejob'
    n_skips = 10
    for i in range(n_skips):
        ledger.set(f'{jobname}:{i}', RECEIVED)
    assert len(list(ledger.scan_iter(f'{jobname}*'))) == n_skips
    job = Job(jobname, reader, writer, ledger)
    job.restart()
    assert not len(list(ledger.scan_iter(f'{jobname}*')))
    assert not job.served
    assert not job.received
예제 #4
0
def test_restore_records(reader, writer, ledger):
    job_name: str = 'somejob'
    n_skips: int = 10
    for i in range(n_skips):
        ledger.set(f'{job_name}:{i}', RECEIVED)
    ledger.set(f'{job_name}:1', SERVED)
    job: Job = Job(job_name, reader, writer, ledger)
    assert len(job.received) == n_skips - 1
    assert len(job.served) == 1
    items = job.serve(100)
    assert len(items) == CSV_SIZE - n_skips
예제 #5
0
파일: app.py 프로젝트: savkov/planchet
def _load_jobs(ledger) -> Dict:
    jobs: Dict = {}
    for job_key in ledger.scan_iter('JOB:*'):
        job_name: str = job_key.decode('utf8').split(':', 1)[1]
        try:
            jobs[job_name] = Job.restore_job(job_name, job_key, ledger)
            OUTPUT_REGISTRY.add(jobs[job_name].writer.file_path)
        except json.JSONDecodeError:
            logging.error(f'Could not restore job: {job_key}')
        except FileNotFoundError as e:
            logging.error(f'Could not restore job: {job_key}; {e}')
    return jobs
예제 #6
0
def test_status(reader, writer, ledger):
    job = Job('somejob', reader, writer, ledger)
    assert job.status == IN_PROGRESS
    items = job.serve(5)
    assert job.status == IN_PROGRESS
    items.extend(job.serve(100))
    job.receive(items, True)
    assert job.status == COMPLETE
예제 #7
0
def test_restore_job(reader, writer, ledger):
    job_name: str = 'testjob'
    job_key: str = 'JOB:testjob'
    metadata: Dict = {
        'input_file_path': reader.file_path,
        'output_file_path': writer.file_path
    }
    value = json.dumps({
        'metadata': metadata,
        'reader_name': type(reader).__name__,
        'writer_name': type(writer).__name__,
        'mode': READ_WRITE
    })
    ledger.set(job_key, value)
    n_skips: int = 10
    for i in range(n_skips):
        ledger.set(f'{job_name}:{i}', RECEIVED)
    ledger.set(f'{job_name}:1', SERVED)
    job: Job = Job.restore_job(job_name, job_key, ledger)
    assert len(job.received) == n_skips - 1
    assert len(job.served) == 1
    records = job.serve(100)
    assert len(records) == CSV_SIZE - n_skips
예제 #8
0
def writing_job(writer, ledger):
    job_name = 'writing-job'
    yield Job(job_name, None, writer, ledger, WRITE_ONLY)
예제 #9
0
def reading_job(reader, ledger):
    job_name = 'reading-job'
    yield Job(job_name, reader, None, ledger, READ_ONLY)
예제 #10
0
def job(reader, writer, ledger):
    job_name = 'somejob'
    yield Job(job_name, reader, writer, ledger)
예제 #11
0
def test_restore_unknown_job(ledger):
    job_name: str = 'testjob'
    job_key: str = 'JOB:testjob'
    job: NoneType = Job.restore_job(job_name, job_key, ledger)
    assert job is None
예제 #12
0
def test_serve_batch(reader, writer, ledger, batch_size):
    job = Job('somejob', reader, writer, ledger)
    items = job.serve(batch_size)
    n_items = batch_size if batch_size < 30 else 30
    assert len(items) == n_items
예제 #13
0
파일: app.py 프로젝트: savkov/planchet
def scramble(job_name: str,
             metadata: Dict,
             reader_name: str,
             writer_name: str,
             token: Union[str, None] = None,
             clean_start: bool = False,
             mode: str = READ_WRITE,
             cont: bool = False,
             force_overwrite: bool = False):
    """
    Start a new job.

    :param job_name: job name
    :param metadata: I/O classes configuration
    :param reader_name: class reader name
    :param writer_name: class writer name
    :param token: authentication token for the job; default no authentication
    :param clean_start: clean the items before you start
    :param mode: I/O mode
    :param cont: start a repair job
    :param force_overwrite: force overwrite of the
    """
    logging.info(
        util.pink(f'SCRAMBLING: name->{job_name}; metadata->{metadata}; '
                  f'reader_name->{reader_name}; writer_name->{writer_name}; '
                  f'clean_start->{clean_start}'))
    reader, writer = _make_io(reader_name, writer_name, metadata)

    # checking output file validity
    if not force_overwrite and writer and writer.file_path in OUTPUT_REGISTRY:
        msg = f'Output path not allowed `{writer.file_path}`.'
        raise HTTPException(status_code=400, detail=msg)

    # get job with that name from the ledger
    existing_job = LEDGER.get(f'JOB:{job_name}')

    # trying to re-create an existing job
    if existing_job and (not clean_start and not cont):
        msg = f'Job {job_name} already exists.'
        raise HTTPException(status_code=400, detail=msg)

    # continue where you left off if the job exists
    if existing_job and cont:
        job = JOB_LOG[job_name]
        # don't delete the output file only the records
        job.clean(output=False)
        del job
        del JOB_LOG[job_name]
    new_job: Job = Job(job_name, reader, writer, LEDGER, mode, cont)

    # clean ledger before starting
    if clean_start:
        new_job.restart()

    # log the new job
    JOB_LOG[job_name] = new_job
    if writer:
        OUTPUT_REGISTRY.add(writer.file_path)
    LEDGER.set(
        f'JOB:{job_name}',
        json.dumps({
            'metadata': metadata,
            'reader_name': reader_name,
            'writer_name': writer_name,
            'mode': mode
        }))
    _add_token(job_name, LEDGER, token)