Beispiel #1
0
def extract_fileobj(job, dataset, fh, file_name=None):
    """ Upload contents of an opened fh to the data repository. """
    meta = {'source_file': file_name}
    source = job.package.ingest(fh, meta=meta, overwrite=False)
    source.save()
    job.set_source(source)
    return source
Beispiel #2
0
def extract_fileobj(job, dataset, fh, file_name=None):
    """ Upload contents of an opened fh to the data repository. """
    meta = {'source_file': file_name}
    source = job.package.ingest(fh, meta=meta, overwrite=False)
    source.save()
    job.set_source(source)
    return source
Beispiel #3
0
def extract_url(job, dataset, url):
    """ Upload contents of a URL to the data repository. """
    source = job.package.ingest(url, overwrite=False)
    if source is None:
        return
    source.save()
    job.set_source(source)
    return source
Beispiel #4
0
def transform_source(job, dataset, source_name):
    """ Transform the contents of an uploaded source dataset to a
    well-understood file format. """
    source = Source(job.package, source_name)
    job.set_source(source)
    source = validate_table(source)
    if source.meta.get('num_failed') > 0:
        return job.failed()
    return source
Beispiel #5
0
def transform_source(job, dataset, source_name):
    """ Transform the contents of an uploaded source dataset to a
    well-understood file format. """
    source = Source(job.package, source_name)
    job.set_source(source)
    table = Table(job.package, ARTIFACT_NAME)
    table.meta.update(source.meta)
    table = extract_table(source, table)
    return table
Beispiel #6
0
def transform_source(job, dataset, source_name):
    """ Transform the contents of an uploaded source dataset to a
    well-understood file format. """
    source = Source(job.package, source_name)
    job.set_source(source)
    table = Table(job.package, ARTIFACT_NAME)
    table.meta.update(source.meta)
    table = extract_table(source, table)
    return table
Beispiel #7
0
def load(job, dataset, source_name):
    """ Load the table artifact for this dataset into the fact
    table. """
    source = Source(job.package, source_name)
    job.set_source(source)
    dataset.data = {}
    dataset.fields = source.meta.get('fields', {})
    if not len(dataset.fields):
        raise ValueError('No columns recognized in source data.')

    db.session.commit()
    dataset.fact_table.drop()
    dataset.fact_table.create()
    dataset.fact_table.load_iter(load_table(source))
Beispiel #8
0
def load(job, dataset, source_name):
    """ Load the table artifact for this dataset into the fact
    table. """
    source = Source(job.package, source_name)
    job.set_source(source)
    table = Table(job.package, ARTIFACT_NAME)
    dataset.fields = table.meta.get('fields', {})
    dataset.samples = table.meta.get('samples', {})
    if not len(dataset.fields):
        raise ValueError('No columns recognized in source data.')

    db.session.commit()
    dataset.fact_table.drop()
    dataset.fact_table.create()
    dataset.fact_table.load_iter(table.records())