Beispiel #1
0
def stream_mapping(infile: Path,
                   outfile: Path,
                   mapping_yaml: Path,
                   sign: bool = True) -> None:
    queries: List[Tuple[str, QueryMapping]] = []
    config = load_mapping_file(mapping_yaml)
    for dataset, meta in config.items():
        for data in keys_values(meta, "queries", "query"):
            data.pop("database", None)
            data["csv_url"] = "/dev/null"
            query = model.make_mapping(data, key_prefix=dataset)
            queries.append((dataset, query))

    try:
        with path_writer(outfile) as outfh:
            with input_file(infile) as fh:
                for record in CSVSource.read_csv(fh):
                    for (dataset, query) in queries:
                        ns = Namespace(dataset)
                        if query.source.check_filters(record):  # type: ignore
                            entities = query.map(record)
                            for entity in entities.values():
                                if sign:
                                    entity = ns.apply(entity)
                                write_entity(outfh, entity)
    except BrokenPipeError:
        raise click.Abort()
Beispiel #2
0
def sign(infile: Path, outfile: Path, signature: Optional[str]) -> None:
    ns = Namespace(signature)
    try:
        with path_writer(outfile) as outfh:
            for entity in path_entities(infile, EntityProxy):
                signed = ns.apply(entity)
                write_entity(outfh, signed)
    except BrokenPipeError:
        raise click.Abort()
Beispiel #3
0
def import_vis(infile: Path, outfile: Path) -> None:
    with path_writer(outfile) as outfh:
        with open(infile, "r") as infh:
            data = json.load(infh)
            if "entities" in data:
                entities = data.get("entities", data)
            if "layout" in data:
                entities = data.get("layout", {}).get("entities", data)
            for entity_data in ensure_list(entities):
                entity = EntityProxy.from_dict(model, entity_data)
                write_entity(outfh, entity)
Beispiel #4
0
def validate(infile: Path, outfile: Path) -> None:
    try:
        with path_writer(outfile) as outfh:
            for entity in path_entities(infile, EntityProxy, cleaned=False):
                clean = model.make_entity(entity.schema)
                clean.id = entity.id
                for (prop, value) in entity.itervalues():
                    clean.add(prop, value)
                write_entity(outfh, clean)
    except BrokenPipeError:
        raise click.Abort()
Beispiel #5
0
def sieve(
    infile: Path,
    outfile: Path,
    schema: Iterable[str],
    property: Iterable[str],
    type: Iterable[str],
) -> None:
    try:
        with path_writer(outfile) as outfh:
            for entity in path_entities(infile, EntityProxy):
                sieved = sieve_entity(entity, schema, property, type)
                if sieved is not None:
                    write_entity(outfh, sieved)
    except BrokenPipeError:
        raise click.Abort()
Beispiel #6
0
def aggregate(infile: Path, outfile: Path) -> None:
    buffer: Dict[str, EntityProxy] = {}
    namespace = Namespace(None)
    try:
        with path_writer(outfile) as outfh:
            for entity in path_entities(infile, EntityProxy):
                entity = namespace.apply(entity)
                if entity.id in buffer:
                    buffer[entity.id].merge(entity)
                else:
                    buffer[entity.id] = entity

            for entity in buffer.values():
                write_entity(outfh, entity)
    except BrokenPipeError:
        raise click.Abort()
Beispiel #7
0
def run_mapping(outfile: Path, mapping_yaml: Path, sign: bool = True) -> None:
    config = load_mapping_file(mapping_yaml)
    try:
        with path_writer(outfile) as outfh:
            for dataset, meta in config.items():
                ns = Namespace(dataset)
                for mapping in keys_values(meta, "queries", "query"):
                    entities = model.map_entities(mapping, key_prefix=dataset)
                    for entity in entities:
                        if sign:
                            entity = ns.apply(entity)
                        write_entity(outfh, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
Beispiel #8
0
def sorted_aggregate(path: Path, outpath: Path, entity_type: Type[E]) -> None:
    """Aggregate entities based on the premise that the fragements in the source
    stream are sorted by their ID."""
    entity: Optional[E] = None
    with path_writer(outpath) as outfh:
        for next_entity in path_entities(path, entity_type=entity_type):
            if entity is None:
                entity = next_entity
                continue
            if next_entity.id == entity.id:
                entity = entity.merge(next_entity)
                continue
            write_entity(outfh, entity)
            entity = next_entity

        if entity is not None:
            write_entity(outfh, entity)