예제 #1
0
def _build_row_types_processor(types, headers_inverse):
    type_processors = []
    if not types:
        return type_processors
    saw_header = False
    for i, col_type in enumerate(types):
        if isinstance(col_type, parser.TypeDefData):
            if saw_header:
                raise ProcessException(
                    "Cannot specify types for indexed columns after named columns"
                )
            type_def = col_type
            optional_type = type_def.optional
            optional_ref = optional_type
            default = type_def.default
        elif isinstance(col_type, parser.KeyToValueData):
            saw_header = True
            type_def = col_type.value
            optional_type = type_def.optional
            default = type_def.default
            if isinstance(col_type.key, parser.LocData):
                optional_ref = col_type.key.optional
                i = col_type.key.value
                if i > 0:
                    i -= 1
            elif isinstance(col_type.key, parser.IdData):
                optional_ref = col_type.key.optional
                header = col_type.key.value
                i = headers_inverse[header]
            else:
                raise ProcessException(f"Unknown key type: {col_type.key}")
        else:
            raise ProcessException(f"Unknown col_type: {col_type}")
        row_type_processor = _build_type_processor(type_def.type)
        if default is not NO_DEFAULT and not (optional_ref or optional_type):
            optional_ref = True
            optional_type = True
        if default is NO_DEFAULT:
            default = None
        else:
            try:
                default = ast.literal_eval(default)
            except Exception as e:
                raise ProcessException(
                    f"Invalid default literal value: {default}") from e
        type_processors.append(
            (i, row_type_processor, optional_ref, optional_type, default))
    return type_processors
예제 #2
0
def _build_type_processor(t):
    if t in {"_", "s"}:
        return str
    elif t == "f":
        return float
    elif t == "i":
        return int
    elif t == "b":
        return lambda v: str(v).lower() in {"true", "yes", "y", "on", "1"}
    elif t == "j":
        return lambda v: json.loads(str(v))
    elif t == "l":
        return lambda v: ast.literal_eval(str(v))
    elif t == "d":
        raise ProcessException("Dates are not supported yet")
    else:
        raise ProcessException(f"Unsupported column type: {t}")
예제 #3
0
def run_jq(text, args):
    final_args = ["jq", "-c", args or "."]
    text = bytes(text, encoding="utf-8") if text else None
    try:
        return subprocess.check_output(
            final_args,
            stderr=subprocess.STDOUT,
            input=text,
        ).decode()
    except subprocess.CalledProcessError as e:
        raise ProcessException(e.stdout.decode())
예제 #4
0
def _extract_structure_field_name(field: parser.ParseData, headers):
    if isinstance(field, parser.KeyToValueData):
        name = field.key.value
    elif isinstance(field, parser.RefData):
        first, rest = field.value[0], field.value[1:]
        if isinstance(first, parser.LocData):
            num = first.value
            if num > 0:
                num -= 1
            else:
                num += len(headers)
            if num in headers:
                first_name = headers[num]
            else:
                first_name = str(first.value)
        elif isinstance(first, parser.IdData):
            first_name = first.value
        else:
            raise ProcessException(f"Unexpected first field: {first}")
        name = ".".join([first_name] + [str(f.value) for f in rest])
    else:
        raise ProcessException(f"Unexpected field: {field}")
    return name
예제 #5
0
 def get(self, processed_cmd: ProcessedCommand, safe=False):
     if processed_cmd.raw:
         default = [self.data[self.default_raw]]
     else:
         default = [self.data[self.default_alias]]
     data_processors_configs = getattr(processed_cmd, self.attr)
     if not data_processors_configs:
         return default
     result = []
     for data_processor_config in data_processors_configs:
         alias = data_processor_config.alias
         args = data_processor_config.args
         try:
             obj_or_cls = self.data[alias]
         except KeyError:
             if safe:
                 return default
             raise ProcessException(f"Unregistered {self.type.__name__.lower()}: {alias}")
         if isinstance(obj_or_cls, type) and issubclass(obj_or_cls, self.type):
             result.append(obj_or_cls(args))
         else:
             result.append(obj_or_cls)
     return result
예제 #6
0
def get(alias):
    try:
        return macros[alias]
    except KeyError:
        raise ProcessException(f"Unregistered macro {alias}")
예제 #7
0
def _process_cmd(ctx, cmd) -> ProcessedCommand:
    cmd = cmd.strip()

    if cmd.startswith("@"):
        alias, *args = cmd.split(" ", 1)
        alias = alias[1:]
        macro = macros.get(alias)
        if args:
            args = macro["split"](args[0].strip())
        cmd = macro["fn"](*args)

    changed = set()
    if cmd == ":" or (ctx.processed_command
                      and ctx.processed_command.cmd == cmd):
        return ctx.processed_command, changed

    previous_command = ctx.processed_command
    result = ProcessedCommand(cmd)

    if cmd.startswith(":") and len(cmd) >= 2:
        cmd_separator = cmd[1]
        cmd = cmd[2:]
    else:
        cmd_separator = ";"
    for expression in cmd.split(cmd_separator):
        expression = expression.strip()
        if not expression:
            continue
        if expression == "h":
            result.has_header = True
            continue
        if expression == "r":
            result.raw = True
            continue
        expression_split = expression.split(":", 1)
        if len(expression_split) < 2:
            continue
        expression_type, expression_body = expression_split
        expression_type, expression_body = expression_type.strip(
        ), expression_body.strip()
        if expression_type not in "dhtsio":
            raise ProcessException(
                f"Unsupported command type: {expression_type}")
        elif expression_type == "d":
            if expression_body.startswith("\\"):
                expression_body = ast.literal_eval(f'"{expression_body}"')
            result.delimiter = expression_body or DEFAULT_CMD.delimiter
        elif expression_type == "t":
            if expression_body:
                result.types = parser.parse_types(expression_body)
            else:
                result.types = DEFAULT_CMD.types
        elif expression_type == "s":
            if expression_body:
                result.structure = parser.parse_structure(expression_body)
            else:
                result.structure = DEFAULT_CMD.structure
        elif expression_type == "i":
            if expression_body:
                result.inputs = parser.parse_processors(expression_body)
            else:
                result.input = DEFAULT_CMD.inputs
        elif expression_type == "o":
            if expression_body:
                result.outputs = parser.parse_processors(expression_body)
            else:
                result.outputs = DEFAULT_CMD.outputs
        else:
            raise ProcessException("Unexpected")

    previous_command.cmd = cmd
    ctx.processed_command = result
    for attr in [
            "cmd", "delimiter", "outputs", "inputs", "structure", "types",
            "has_header", "raw"
    ]:
        if getattr(previous_command, attr) != getattr(result, attr):
            changed.add(attr)
    return result, changed
예제 #8
0
    def impl(data: parser.ParseData, values):
        if isinstance(data, parser.StructureData):
            assert values
            if not data.fields:
                if data.type == "[]":
                    return lambda r: replace_missing_from_row(r)
                elif data.type == "()":
                    return lambda r: tuple(replace_missing(i) for i in r)
                elif data.type == "s()":
                    return lambda r: set(replace_missing(i) for i in r)
                elif data.type in {"{}", "d()"}:
                    return lambda r: dict(
                        zip(headers.values(), replace_missing_from_row(r)))
                else:
                    raise ProcessException(
                        f"Unsupported data type: {data.type}")
            else:
                values = [impl(f, values=True) for f in data.fields]
                if data.type == "[]":
                    return lambda row: [v(row) for v in values]
                elif data.type == "()":
                    return lambda row: tuple(v(row) for v in values)
                elif data.type == "s()":
                    return lambda row: set(v(row) for v in values)
                elif data.type in {"{}", "d()"}:
                    keys = [impl(f, values=False) for f in data.fields]
                    return lambda row: dict(zip(keys, [v(row)
                                                       for v in values]))
                else:
                    raise ProcessException(
                        f"Unsupported data type: {data.type}")
        elif isinstance(data, parser.KeyToValueData):
            if values:
                return impl(data.value, values=True)
            else:
                return data.key.value
        elif isinstance(data, parser.RefData):
            if values:
                path = data.value
                default = data.default
                has_default = default is not NO_DEFAULT
                if has_default:
                    try:
                        default = ast.literal_eval(default)
                    except Exception as e:
                        raise ProcessException(
                            f"Invalid default literal value: {default}") from e
                else:
                    default = None
                has_optional = any(
                    (isinstance(p, parser.IdData) and p.optional) or (
                        isinstance(p, parser.LocData) and p.optional)
                    for p in path)
                value_processor = []
                for part in path:
                    getter = get
                    if isinstance(part, parser.IdData):
                        if part.optional or (has_default and not has_optional):
                            getter = getsafe
                        key = part.value
                        if not value_processor:
                            index = headers_inverse.get(key)
                            value_processor.append(getter(index, default))
                        else:
                            value_processor.append(getter(key, default))
                    elif isinstance(part, parser.LocData):
                        if part.optional or (has_default and not has_optional):
                            getter = getsafe
                        index = part.value
                        if not value_processor:
                            if index > 0:
                                index -= 1
                        value_processor.append(getter(index, default))
                    else:
                        raise ProcessException(f"Unexpected part {part}")

                def fn(row):
                    result = row
                    for p in value_processor:
                        result = p(result)
                        if result is default:
                            break
                    return result

                return fn
            else:
                return _extract_structure_field_name(data, headers)
        else:
            raise ProcessException(f"Unsupported parser data: {data}")
예제 #9
0
def _parse(element, expr):
    try:
        parsed = element.parseString(expr, parseAll=True)
    except Exception:
        raise ProcessException(f"Not a valid expresion: {expr}")
    return parsed.asList()[0]