Ejemplos de join_field en Python, ejemplos de pyLibrary.dot.join_field en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: meta.py Proyecto: klahnakoski/MoDevETL

    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        alias_done = set()
        index = split_field(table)[0]
        query_path = split_field(table)[1:]
        metadata = self.default_es.get_metadata(index=index)
        for index, meta in qb.sort(metadata.indices.items(), {"value": 0, "sort": -1}):
            for _, properties in meta.mappings.items():
                columns = _elasticsearch.parse_properties(index, None, properties.properties)
                columns = columns.filter(lambda r: not r.abs_name.startswith("other.") and not r.abs_name.startswith("previous_values.cf_"))  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
                with Timer("upserting {{num}} columns", {"num": len(columns)}, debug=DEBUG):
                    with self.columns.locker:
                        for c in columns:
                            # ABSOLUTE
                            c.table = join_field([index]+query_path)
                            self.upsert_column(c)

                            for alias in meta.aliases:
                                # ONLY THE LATEST ALIAS IS CHOSEN TO GET COLUMNS
                                if alias in alias_done:
                                    continue
                                alias_done.add(alias)
                                c = copy(c)
                                c.table = join_field([alias]+query_path)
                                self.upsert_column(c)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: expressions.py Proyecto: davehunt/ActiveData

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()  # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1:s + len(c.name) + 1:]
                test3 = body[s - 8:s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name +
                                                " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-" * len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name +
                                                " = getSourceValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" +
                                                c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                break

        return "".join(contextVariables)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: meta.py Proyecto: klahnakoski/TestFailures

            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index]+split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias]+split_field(query_path[0]))
                        self._upsert_column(c)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: query.py Proyecto: klahnakoski/intermittents

def _get_nested_path(field, schema):
    if not INDEX_CACHE:
        _late_import()

    if is_keyword(field):
        field = join_field([schema.es.alias] + split_field(field))
        for i, f in reverse(enumerate(split_field(field))):
            path = join_field(split_field(field)[0:i + 1:])
            if path in INDEX_CACHE:
                return join_field(split_field(path)[1::])
    return None

Ejemplo n.º 5

0

Mostrar archivo

Archivo: list_usingPythonList.py Proyecto: klahnakoski/TestFailures

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(
                    name=full_name,
                    table=".",
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(
                        name=full_name,
                        table=".",
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    elif len(value)==1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)

Ejemplo n.º 6

0

Mostrar archivo

            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index] + split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias] +
                                             split_field(query_path[0]))
                        self._upsert_column(c)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: list_usingPythonList.py Proyecto: davehunt/ActiveData

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(name=full_name,
                                table=".",
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(name=full_name,
                                    table=".",
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name],
                                          nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, columns, prefix + [name],
                                          newpath, name_to_column)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: expressions.py Proyecto: klahnakoski/MoDataSubmission

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()    # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1: s + len(c.name) + 1:]
                test3 = body[s - 8: s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-"*len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                break

        return "".join(contextVariables)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: meta.py Proyecto: klahnakoski/TestFailures

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(
                    name=short_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated])
                    Thread.sleep(seconds=1)
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: dimensions.py Proyecto: klahnakoski/MoDevETL

    def getDomain(self, **kwargs):
        # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
        kwargs = wrap(kwargs)
        kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)

        if not self.partitions and self.edges:
            # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
            partitions = [
                {
                    "name": v.name,
                    "value": v.name,
                    "where": v.where,
                    "style": v.style,
                    "weight": v.weight  # YO! WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.edges)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.where
            ]
            self.isFacet = True
        elif kwargs.depth == None:  # ASSUME self.fields IS A dict
            partitions = DictList()
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    break
                partitions.append({
                    "name":part.name,
                    "value":part.value,
                    "where":part.where,
                    "style":coalesce(part.style, part.parent.style),
                    "weight":part.weight   # YO!  WHAT DO WE *NOT* COPY?
                })
        elif kwargs.depth == 0:
            partitions = [
                {
                    "name":v.name,
                    "value":v.value,
                    "where":v.where,
                    "style":v.style,
                    "weight":v.weight   # YO!  WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.partitions)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)]
        elif kwargs.depth == 1:
            partitions = DictList()
            rownum = 0
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    continue
                rownum += 1
                try:
                    for j, subpart in enumerate(part.partitions):
                        partitions.append({
                            "name":join_field(split_field(subpart.parent.name) + [subpart.name]),
                            "value":subpart.value,
                            "where":subpart.where,
                            "style":coalesce(subpart.style, subpart.parent.style),
                            "weight":subpart.weight   # YO!  WHAT DO WE *NOT* COPY?
                        })
                except Exception, e:
                    Log.error("", e)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: jx.py Proyecto: davehunt/ActiveData

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception, e:
                Log.error("{{name}} does not exist", name=fieldname)
            if isinstance(d, list) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1:])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])

Ejemplo n.º 12

0

Mostrar archivo

Archivo: util.py Proyecto: klahnakoski/esReplicate

def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap(
            {
                "filter": {
                    "and": [
                        f0,
                        {"nested": {"path": join_field(sub_path), "filter": f1, "inner_hits": {"size": 100000}}},
                    ]
                },
                "from": 0,
                "size": 0,
                "sort": [],
            }
        )
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({"query": {"filtered": {"filter": f0}}, "from": 0, "size": 0, "sort": []})
        return output, wrap([f0])

Ejemplo n.º 13

0

Mostrar archivo

Archivo: jx.py Proyecto: klahnakoski/MoDataSubmission

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception, e:
                Log.error("{{name}} does not exist", name=fieldname)
            if isinstance(d, list) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1 :])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])

Ejemplo n.º 14

0

Mostrar archivo

def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for k, new_value in constants.leaves():
        errors = []
        try:
            old_value = dot.set_attr(sys.modules, k, new_value)
            continue
        except Exception, e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("/", ".")

            path = split_field(k)
            for i, p in enumerate(path):
                if i == 0:
                    continue
                prefix = join_field(path[:1])
                name = join_field(path[i:])
                if caller_module.endswith(prefix):
                    old_value = dot.set_attr(caller_globals, name, new_value)
                    if DEBUG:
                        from pyLibrary.debugs.logs import Log

                        Log.note("Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                            module= prefix,
                            attribute= name,
                            old_value= old_value,
                            new_value= new_value)
                    break
        except Exception, e:
            errors.append[e]

Ejemplo n.º 15

0

Mostrar archivo

Archivo: expressions.py Proyecto: klahnakoski/MoDataSubmission

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Ejemplo n.º 16

0

Mostrar archivo

Archivo: query.py Proyecto: davehunt/ActiveData

def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "table": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.name,
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.table}}, name={{col.name}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Thread.sleep(seconds=1)
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: stream.py Proyecto: klahnakoski/MoDevETL

    def _decode_object(index, parent_path, path, name2index, destination=None, expected_vars=NO_VARS):
        if destination is None:
            destination = {}

        nested_done = False
        while True:
            c, index = skip_whitespace(index)
            if c == b',':
                continue
            elif c == b'"':
                name, index = simple_token(index, c)

                c, index = skip_whitespace(index)
                if c != b':':
                    Log.error("Expecting colon")
                c, index = skip_whitespace(index)

                child_expected = needed(name, expected_vars)
                if child_expected and nested_done:
                    Log.error("Expected property found after nested json.  Iteration failed.")

                full_path = join_field(split_field(parent_path)+ [name])
                if path and (path[0] == full_path or path[0].startswith(full_path+".")):
                    # THE NESTED PROPERTY WE ARE LOOKING FOR
                    if path[0] == full_path:
                        new_path = path[1:]
                    else:
                        new_path = path

                    nested_done = True
                    for j, i in _decode(index - 1, full_path, new_path, name2index, expected_vars=child_expected):
                        index = i
                        j = {name: j}
                        for k, v in destination.items():
                            j.setdefault(k, v)
                        yield j, index
                    continue

                if child_expected:
                    # SOME OTHER PROPERTY
                    value, index = _decode_token(index, c, full_path, path, name2index, None, expected_vars=child_expected)
                    destination[name] = value
                else:
                    # WE DO NOT NEED THIS VALUE
                    index = jump_to_end(index, c)
                    continue


            elif c == "}":
                break

        if not nested_done:
            yield destination, index

Ejemplo n.º 18

0

Mostrar archivo

Archivo: expressions.py Proyecto: davehunt/ActiveData

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name +
                                                " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Ejemplo n.º 19

0

Mostrar archivo

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = join_field(split_field(frum)[:1:])

        settings = set_default({
            "index": index,
            "name": frum
        }, _containers.config.default.settings)
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum

Ejemplo n.º 20

0

Mostrar archivo

Archivo: __init__.py Proyecto: klahnakoski/TestFailures

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = join_field(split_field(frum)[:1:])

        settings = set_default(
            {
                "index": index,
                "name": frum
            },
            _containers.config.default.settings
        )
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum

Ejemplo n.º 21

0

Mostrar archivo

Archivo: convert.py Proyecto: davehunt/ActiveData

    def _inner(schema, parent_name, indent):
        more_lines = []
        for k,v in schema.items():
            full_name = join_field(split_field(parent_name)+[k])
            details = indent+"* "+_md_code(full_name)
            if v.type:
                details += " - "+_md_italic(v.type)
            else:
                Log.error("{{full_name}} is missing type", full_name=full_name)
            if v.description:
                details += " " + v.description
            more_lines.append(details)

            if v.type in ["object", "array", "nested"]:
                more_lines.extend(_inner(v.properties, full_name, indent+"  "))
        return more_lines

Ejemplo n.º 22

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

    def add_column(self, column):
        """
        ADD COLUMN, IF IT DOES NOT EXIST ALREADY
        """
        if column.name not in self.columns:
            self.columns[column.name] = {column}
        elif column.type not in [c.type for c in self.columns[column.name]]:
            self.columns[column.name].add(column)

        if column.type == "nested":
            nested_table_name = join_field(split_field(self.name) + split_field(column.name))
            # MAKE THE TABLE
            table = Table_usingSQLite(nested_table_name, self.db, self.uid + [UID_PREFIX+"id"+unicode(len(self.uid))], exists=False)
            self.nested_tables[nested_table_name] = table
        else:
            self.db.execute(
                "ALTER TABLE " + quote_table(self.name) + " ADD COLUMN " + _quote_column(column) + " " + column.type
            )

Ejemplo n.º 23

0

Mostrar archivo

def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap({
            "filter": {
                "and": [
                    f0, {
                        "nested": {
                            "path": join_field(sub_path),
                            "filter": f1,
                            "inner_hits": {
                                "size": 100000
                            }
                        }
                    }
                ]
            },
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({
            "query": {
                "filtered": {
                    "filter": f0
                }
            },
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0])

Ejemplo n.º 24

0

Mostrar archivo

Archivo: lists.py Proyecto: klahnakoski/esReplicate

def _get_schema_from_list(frum, columns, prefix, nested_path):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    names = {}
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type!="object":
            agg_type = names.get(".", "undefined")
            names["."] = _merge_type[agg_type][row_type]
        else:
            for name, value in d.items():
                agg_type = names.get(name, "undefined")
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    else:
                        this_type=_type_to_name[value[0].__class__]
                        if this_type=="object":
                            this_type="nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[agg_type][this_type]
                names[name] = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath)

    for n, t in names.items():
        full_name = ".".join(prefix + [n])
        column = Column(
            name=full_name,
            table=".",
            es_column=full_name,
            es_index=".",
            type=t,
            nested_path=nested_path
        )
        columns.append(column)

Ejemplo n.º 25

0

Mostrar archivo

    def convert(self, expr):
        """
        ADD THE ".$value" SUFFIX TO ALL VARIABLES
        """
        if isinstance(expr, Expression):
            vars_ = expr.vars()
            rename = {
                v: join_field(split_field(v) + ["$value"])
                for v in vars_
            }
            return expr.map(rename)

        if expr is True or expr == None or expr is False:
            return expr
        elif Math.is_number(expr):
            return expr
        elif expr == ".":
            return "."
        elif is_keyword(expr):
            #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
            return expr + ".$value"
        elif isinstance(expr, basestring):
            Log.error("{{name|quote}} is not a valid variable name", name=expr)
        elif isinstance(expr, Date):
            return expr
        elif isinstance(expr, QueryOp):
            return self._convert_query(expr)
        elif isinstance(expr, Mapping):
            if expr["from"]:
                return self._convert_query(expr)
            elif len(expr) >= 2:
                #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
                return wrap({
                    name: self.convert(value)
                    for name, value in expr.items()
                })
            else:
                # ASSUME SINGLE-CLAUSE EXPRESSION
                k, v = expr.items()[0]
                return self.converter_map.get(k, self._convert_bop)(k, v)
        elif isinstance(expr, (list, set, tuple)):
            return wrap([self.convert(value) for value in expr])

Ejemplo n.º 26

0

Mostrar archivo

Archivo: __init__.py Proyecto: davehunt/ActiveData

    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif isinstance(frum, (list, set, GeneratorType)):
            return _ListContainer(frum)
        elif isinstance(frum, basestring):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error(
                    "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
                )

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                }, config.default.settings)
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif isinstance(frum, Mapping):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}",
                      type=frum.__class__.__name__)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: __init__.py Proyecto: klahnakoski/TestFailures

    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif isinstance(frum, (list, set, GeneratorType)):
            return _ListContainer(frum)
        elif isinstance(frum, basestring):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                },
                config.default.settings
            )
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif isinstance(frum, Mapping):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: expressions.py Proyecto: klahnakoski/MoDataSubmission

    def compile_expression(self, expression, constants=None):
        # EXPAND EXPRESSION WITH ANY CONSTANTS
        expression = setValues(expression, constants)

        fromPath = self.fromData.name           # FIRST NAME IS THE INDEX
        indexName = join_field(split_field(fromPath)[:1:])

        context = self.getFrameVariables(expression)
        if context == "":
            return addFunctions(expression).head+expression

        func = UID()
        code = addFunctions(context+expression)
        output = code.head + \
            'var ' + func + ' = function(' + indexName + '){\n' + \
            context + \
            expression + ";\n" + \
            '};\n' + \
            func + '(_source)\n'

        return Compiled(output)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

    def insert(self, docs):
        doc_collection = {}
        for d in docs:
            # ASSIGN A NON-NULL PRIMARY KEY
            if any(v == None for v in self.uid_accessor(d)):
                for u in self.uid:
                    d[u] = coalesce(d[u], unique_name())

            uid = wrap({u: d[u] for u in self.uid})
            self.flatten(d, uid, doc_collection)

        for nested_path, insertion in doc_collection.items():
            active_columns = list(insertion.active_columns)
            vals = [[quote_value(get_document_value(d, c)) for c in active_columns] for d in insertion.rows]

            command = "INSERT INTO " + quote_table(join_field(split_field(self.name)+split_field(nested_path[0]))) + "(" + \
                      ",".join(_quote_column(c) for c in active_columns) + \
                      ")\n" + \
                      " UNION ALL\n".join("SELECT " + ",".join(vv) for vv in vals)

            self.db.execute(command)

Ejemplo n.º 30

0

Mostrar archivo

Archivo: expressions.py Proyecto: davehunt/ActiveData

    def compile_expression(self, expression, constants=None):
        # EXPAND EXPRESSION WITH ANY CONSTANTS
        expression = setValues(expression, constants)

        fromPath = self.fromData.name  # FIRST NAME IS THE INDEX
        indexName = join_field(split_field(fromPath)[:1:])

        context = self.getFrameVariables(expression)
        if context == "":
            return addFunctions(expression).head + expression

        func = UID()
        code = addFunctions(context + expression)
        output = code.head + \
            'var ' + func + ' = function(' + indexName + '){\n' + \
            context + \
            expression + ";\n" + \
            '};\n' + \
            func + '(_source)\n'

        return Compiled(output)

Ejemplo n.º 31

0

Mostrar archivo

    def insert(self, docs):
        doc_collection = {}
        for d in docs:
            # ASSIGN A NON-NULL PRIMARY KEY
            if any(v == None for v in self.uid_accessor(d)):
                for u in self.uid:
                    d[u] = coalesce(d[u], unique_name())

            uid = wrap({u: d[u] for u in self.uid})
            self.flatten(d, uid, doc_collection)

        for nested_path, insertion in doc_collection.items():
            active_columns = list(insertion.active_columns)
            vals = [[
                quote_value(get_document_value(d, c)) for c in active_columns
            ] for d in insertion.rows]

            command = "INSERT INTO " + quote_table(join_field(split_field(self.name)+split_field(nested_path[0]))) + "(" + \
                      ",".join(_quote_column(c) for c in active_columns) + \
                      ")\n" + \
                      " UNION ALL\n".join("SELECT " + ",".join(vv) for vv in vals)

            self.db.execute(command)

Ejemplo n.º 32

0

Mostrar archivo

    def add_column(self, column):
        """
        ADD COLUMN, IF IT DOES NOT EXIST ALREADY
        """
        if column.name not in self.columns:
            self.columns[column.name] = {column}
        elif column.type not in [c.type for c in self.columns[column.name]]:
            self.columns[column.name].add(column)

        if column.type == "nested":
            nested_table_name = join_field(
                split_field(self.name) + split_field(column.name))
            # MAKE THE TABLE
            table = Table_usingSQLite(
                nested_table_name,
                self.db,
                self.uid + [UID_PREFIX + "id" + unicode(len(self.uid))],
                exists=False)
            self.nested_tables[nested_table_name] = table
        else:
            self.db.execute("ALTER TABLE " + quote_table(self.name) +
                            " ADD COLUMN " + _quote_column(column) + " " +
                            column.type)

Ejemplo n.º 33

0

Mostrar archivo

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(name=short_name,
                              url=None,
                              query_path=None,
                              timestamp=Date.now())
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now(
            ) - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    Log.note("waiting for columns to update {{columns|json}}",
                             columns=[
                                 c.table + "." + c.es_column for c in columns
                                 if not c.last_updated
                             ])
                    Thread.sleep(seconds=1)
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)

Ejemplo n.º 34

0

Mostrar archivo

Archivo: typed.py Proyecto: klahnakoski/esReplicate

    def convert(self, expr):
        """
        ADD THE ".$value" SUFFIX TO ALL VARIABLES
        """
        if isinstance(expr, Expression):
            vars_ = expr.vars()
            rename = {v: join_field(split_field(v)+["$value"]) for v in vars_}
            return expr.map(rename)

        if expr is True or expr == None or expr is False:
            return expr
        elif Math.is_number(expr):
            return expr
        elif expr == ".":
            return "."
        elif is_keyword(expr):
            #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
            return expr + ".$value"
        elif isinstance(expr, basestring):
            Log.error("{{name|quote}} is not a valid variable name", name=expr)
        elif isinstance(expr, Date):
            return expr
        elif isinstance(expr, QueryOp):
            return self._convert_query(expr)
        elif isinstance(expr, Mapping):
            if expr["from"]:
                return self._convert_query(expr)
            elif len(expr) >= 2:
                #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
                return wrap({name: self.convert(value) for name, value in expr.items()})
            else:
                # ASSUME SINGLE-CLAUSE EXPRESSION
                k, v = expr.items()[0]
                return self.converter_map.get(k, self._convert_bop)(k, v)
        elif isinstance(expr, (list, set, tuple)):
            return wrap([self.convert(value) for value in expr])

Ejemplo n.º 35

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

def typed_column(name, type):
    return join_field(split_field(name)+["$" + type])

Ejemplo n.º 36

0

Mostrar archivo

    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command["clear"]))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(
                    c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.type not in ["nested", "object"]
        }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(name=new_column_name,
                            type=ctype,
                            table=self.name,
                            es_index=self.name,
                            es_column=typed_column(new_column_name, ctype))
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = join_field(
                    split_field(self.name) + split_field(nested_column_name))
                nested_table = self.nested_tables[nested_table_name]
                self_primary_key = ",".join(
                    quote_table(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID_PREFIX + "id" + unicode(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.name) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Dict(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         _quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.name) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " + quote_value(i) + " " +
                    quote_table(extra_key.es_column) + "," + ",".join(
                        quote_value(row[c.name]) + " " +
                        quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + _quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(name=c.name,
                                        type=c.type,
                                        table=self.name,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        listwrap(c.nested_path))
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [
                                c.type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = "UPDATE " + quote_table(self.name) + " SET " + \
                  ",\n".join(
                      [
                          _quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                          for k, v in command.set.items()
                          if get_type(v) != "nested"
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ] +
                      [
                          _quote_column(c) + "=NULL"
                          for k in listwrap(command["clear"])
                          if k in self.columns
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ]
                  ) + \
                  " WHERE " + where_sql

        self.db.execute(command)

Ejemplo n.º 37

0

Mostrar archivo

        def _flatten(d, uid, path, nested_path):
            insertion = doc_collection[
                "." if not nested_path else nested_path[0]]
            row = uid.copy()
            insertion.rows.append(row)
            if isinstance(d, Mapping):
                for k, v in d.items():
                    cname = join_field(split_field(path) + [k])
                    ctype = get_type(v)
                    if ctype is None:
                        continue

                    c = unwraplist([
                        c for c in self.columns.get(cname, Null)
                        if c.type == ctype
                    ])
                    if not c:
                        c = Column(name=cname,
                                   table=self.name,
                                   type=ctype,
                                   es_column=typed_column(cname, ctype),
                                   es_index=self.name,
                                   nested_path=nested_path)
                        self.add_column(c)
                    insertion.active_columns.add(c)

                    if ctype == "nested":
                        row[cname] = "."
                        deeper = [cname] + listwrap(nested_path)
                        insertion = doc_collection.get(cname, None)
                        if not insertion:
                            doc_collection[cname] = Dict(active_columns=set(),
                                                         rows=[])
                        for i, r in enumerate(v):
                            child_uid = set_default(
                                {UID_PREFIX + "id" + unicode(len(uid)): i},
                                uid)
                            _flatten(r, child_uid, cname, deeper)
                    elif ctype == "object":
                        row[cname] = "."
                        _flatten(v, cname, nested_path)
                    elif c.type:
                        row[cname] = v
            else:
                k = "."
                v = d
                cname = join_field(split_field(path) + [k])
                ctype = get_type(v)
                if ctype is None:
                    return

                c = unwraplist([
                    c for c in self.columns.get(cname, Null) if c.type == ctype
                ])
                if not c:
                    c = Column(name=cname,
                               table=self.name,
                               type=ctype,
                               es_column=typed_column(cname, ctype),
                               es_index=self.name,
                               nested_path=nested_path)
                    self.add_column(c)
                insertion.active_columns.add(c)

                if ctype == "nested":
                    row[cname] = "."
                    deeper = [cname] + listwrap(nested_path)
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        doc_collection[cname] = Dict(active_columns=set(),
                                                     rows=[])
                    for i, r in enumerate(v):
                        child_uid = set_default(
                            {UID_PREFIX + "id" + unicode(len(uid)): i}, uid)
                        _flatten(r, child_uid, cname, deeper)
                elif ctype == "object":
                    row[cname] = "."
                    _flatten(v, cname, nested_path)
                elif c.type:
                    row[cname] = v

Ejemplo n.º 38

0

Mostrar archivo

Archivo: setop.py Proyecto: klahnakoski/esReplicate

def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    columns = query.frum.get_columns()
    leaf_columns = set(c.name for c in columns if c.type not in STRUCT and (not c.nested_path or c.es_column == c.nested_path))
    nested_columns = set(c.name for c in columns if c.nested_path)

    i = 0
    source = "fields"
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(s.value, LeavesOp):
            term = s.value.term
            if isinstance(term, Variable):

                if term.var == ".":
                    es_query.fields = None
                    source = "_source"

                    net_columns = leaf_columns - set(select.name)
                    for n in net_columns:
                        new_select.append({
                            "name": n,
                            "value": Variable(n),
                            "put": {"name": n, "index": i, "child": "."}
                        })
                        i += 1
                else:
                    parent = term.var + "."
                    prefix = len(parent)
                    for c in leaf_columns:
                        if c.startswith(parent):
                            if es_query.fields is not None:
                                es_query.fields.append(c)

                            new_select.append({
                                "name": s.name + "." + c[prefix:],
                                "value": Variable(c),
                                "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                            })
                            i += 1

        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var in nested_columns:
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                parent = s.value.var + "."
                prefix = len(parent)
                net_columns = [c for c in leaf_columns if c.startswith(parent)]
                if not net_columns:
                    # LEAF
                    if es_query.fields is not None:
                        es_query.fields.append(s.value.var)
                    new_select.append({
                        "name": s.name,
                        "value": s.value,
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    # LEAVES OF OBJECT
                    for n in net_columns:
                        if es_query.fields is not None:
                            es_query.fields.append(n)
                        new_select.append({
                            "name": s.name,
                            "value": Variable(n),
                            "put": {"name": s.name, "index": i, "child": n[prefix:]}
                        })
                i += 1
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": s.value.to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value.var))
        elif isinstance(n.value, Variable):
            n.pull = "fields." + literal_field(n.value.var)
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)

Ejemplo n.º 39

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command["clear"]))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars
            for c in self.columns.get(v, Null)
            if c.type not in ["nested", "object"]
            }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(
                name=new_column_name,
                type=ctype,
                table=self.name,
                es_index=self.name,
                es_column=typed_column(new_column_name, ctype)
            )
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = join_field(split_field(self.name)+split_field(nested_column_name))
                nested_table = self.nested_tables[nested_table_name]
                self_primary_key = ",".join(quote_table(c.es_column) for u in self.uid for c in self.columns[u])
                extra_key_name = UID_PREFIX+"id"+unicode(len(self.uid))
                extra_key = [e for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.name) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d, Dict(), doc_collection, path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         _quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.name) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " +
                    quote_value(i) + " " +quote_table(extra_key.es_column) + "," +
                    ",".join(
                        quote_value(row[c.name]) + " " + quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns
                    )
                    for i, row in enumerate(doc_collection.get(".", Null).rows)
                )

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + _quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(
                            name=c.name,
                            type=c.type,
                            table=self.name,
                            es_index=c.es_index,
                            es_column=c.es_column,
                            nested_path=[nested_column_name]+listwrap(c.nested_path)
                        )
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [c.type for c in self.columns[c.name]]:
                            self.columns[column.name].add(column)

        command = "UPDATE " + quote_table(self.name) + " SET " + \
                  ",\n".join(
                      [
                          _quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                          for k, v in command.set.items()
                          if get_type(v) != "nested"
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ] +
                      [
                          _quote_column(c) + "=NULL"
                          for k in listwrap(command["clear"])
                          if k in self.columns
                          for c in self.columns[k]
                          if c.type != "nested" and not c.nested_path
                          ]
                  ) + \
                  " WHERE " + where_sql

        self.db.execute(command)

Ejemplo n.º 40

0

Mostrar archivo

def parse_properties(parent_index_name, parent_query_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    from pyLibrary.queries.meta import Column

    columns = DictList()
    for name, property in esProperties.items():
        if parent_query_path:
            index_name, query_path = parent_index_name, join_field(split_field(parent_query_path) + [name])
        else:
            index_name, query_path = parent_index_name, name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            self_columns = parse_properties(index_name, query_path, property.properties)
            for c in self_columns:
                c.nested_path = unwraplist([query_path] + listwrap(c.nested_path))
            columns.extend(self_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="nested",
                nested_path=query_path
            ))

            continue

        if property.properties:
            child_columns = parse_properties(index_name, query_path, property.properties)
            columns.extend(child_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled == False else "object"
            ))

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, (n, p) in enumerate(property.fields.items()):
                if n == name:
                    # DEFAULT
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path,
                        es_column=query_path,
                        type=p.type
                    ))
                else:
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path + "\\." + n,
                        es_column=query_path + "\\." + n,
                        type=p.type
                    ))
            continue

        if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type=property.type
            ))
            if property.index_name and name != property.index_name:
                columns.append(Column(
                    table=index_name,
                    es_index=index_name,
                    es_column=query_path,
                    name=query_path,
                    type=property.type
                ))
        elif property.enabled == None or property.enabled == False:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled==False else "object"
            ))
        else:
            Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)

    return columns

Ejemplo n.º 41

0

Mostrar archivo

Archivo: dimensions.py Proyecto: mozilla/ActiveData-ETL

    def getDomain(self, **kwargs):
        # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
        kwargs = wrap(kwargs)
        kwargs.depth = coalesce(
            kwargs.depth,
            len(self.fields) - 1 if isinstance(self.fields, list) else None)

        if not self.partitions and self.edges:
            # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
            partitions = [
                {
                    "name": v.name,
                    "value": v.name,
                    "esfilter": v.esfilter,
                    "style": v.style,
                    "weight": v.weight  # YO! WHAT DO WE *NOT* COPY?
                } for i, v in enumerate(self.edges)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.esfilter
            ]
            self.isFacet = True
        elif kwargs.depth == None:  # ASSUME self.fields IS A dict
            partitions = DictList()
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    break
                partitions.append({
                    "name":
                    part.name,
                    "value":
                    part.value,
                    "esfilter":
                    part.esfilter,
                    "style":
                    coalesce(part.style, part.parent.style),
                    "weight":
                    part.weight  # YO!  WHAT DO WE *NOT* COPY?
                })
        elif kwargs.depth == 0:
            partitions = [
                {
                    "name": v.name,
                    "value": v.value,
                    "esfilter": v.esfilter,
                    "style": v.style,
                    "weight": v.weight  # YO!  WHAT DO WE *NOT* COPY?
                } for i, v in enumerate(self.partitions)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)
            ]
        elif kwargs.depth == 1:
            partitions = DictList()
            rownum = 0
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    continue
                rownum += 1
                try:
                    for j, subpart in enumerate(part.partitions):
                        partitions.append({
                            "name":
                            join_field(
                                split_field(subpart.parent.name) +
                                [subpart.name]),
                            "value":
                            subpart.value,
                            "esfilter":
                            subpart.esfilter,
                            "style":
                            coalesce(subpart.style, subpart.parent.style),
                            "weight":
                            subpart.weight  # YO!  WHAT DO WE *NOT* COPY?
                        })
                except Exception, e:
                    Log.error("", e)

Ejemplo n.º 42

0

Mostrar archivo

def typed_column(name, type):
    return join_field(split_field(name) + ["$" + type])

Ejemplo n.º 43

0

Mostrar archivo

def untyped_column(column_name):
    if "$" in column_name:
        return join_field(split_field(column_name)[:-1])
    else:
        return column_name

Ejemplo n.º 44

0

Mostrar archivo

Archivo: setop.py Proyecto: klahnakoski/MoDevETL

def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    column_names = set(c.name for c in query.frum.get_columns() if c.type not in ["object"] and (not c.nested_path or c.abs_name == c.nested_path or not c.nested_path))
    source = "fields"

    i = 0
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if s.value == "*":
            es_query.fields = None
            source = "_source"

            net_columns = column_names - set(select.name)
            for n in net_columns:
                new_select.append({
                    "name": n,
                    "value": n,
                    "put": {"name": n, "index": i, "child": "."}
                })
                i += 1
        elif s.value == ".":
            es_query.fields = None
            source = "_source"

            new_select.append({
                "name": s.name,
                "value": s.value,
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif s.value == "_id":
            new_select.append({
                "name": s.name,
                "value": s.value,
                "pull": "_id",
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif isinstance(s.value, basestring) and s.value.endswith(".*") and is_keyword(s.value[:-2]):
            parent = s.value[:-1]
            prefix = len(parent)
            for c in column_names:
                if c.startswith(parent):
                    if es_query.fields is not None:
                        es_query.fields.append(c)

                    new_select.append({
                        "name": s.name + "." + c[prefix:],
                        "value": c,
                        "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                    })
                    i += 1
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            parent = s.value + "."
            prefix = len(parent)
            net_columns = [c for c in column_names if c.startswith(parent)]
            if not net_columns:
                if es_query.fields is not None:
                    es_query.fields.append(s.value)
                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
            else:
                for n in net_columns:
                    if es_query.fields is not None:
                        es_query.fields.append(n)
                    new_select.append({
                        "name": s.name,
                        "value": n,
                        "put": {"name": s.name, "index": i, "child": n[prefix:]}
                    })
            i += 1
        elif isinstance(s.value, list):
            Log.error("need an example")
            if es_query.fields is not None:
                es_query.fields.extend([v for v in s.value])
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression(s.value).to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value))
        else:
            n.pull = "fields." + literal_field(n.value)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.es_response_time = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)

Ejemplo n.º 45

0

Mostrar archivo

Archivo: elasticsearch.py Proyecto: klahnakoski/MoDataSubmission

def parse_properties(parent_index_name, parent_query_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    from pyLibrary.queries.meta import Column

    columns = DictList()
    for name, property in esProperties.items():
        if parent_query_path:
            index_name, query_path = parent_index_name, join_field(split_field(parent_query_path) + [name])
        else:
            index_name, query_path = parent_index_name, name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            self_columns = parse_properties(index_name, query_path, property.properties)
            for c in self_columns:
                c.nested_path = unwraplist([query_path] + listwrap(c.nested_path))
            columns.extend(self_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="nested",
                nested_path=query_path
            ))

            continue

        if property.properties:
            child_columns = parse_properties(index_name, query_path, property.properties)
            columns.extend(child_columns)
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled == False else "object"
            ))

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, (n, p) in enumerate(property.fields.items()):
                if n == name:
                    # DEFAULT
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path,
                        es_column=query_path,
                        type=p.type
                    ))
                else:
                    columns.append(Column(
                        table=index_name,
                        es_index=index_name,
                        name=query_path + "\\." + n,
                        es_column=query_path + "\\." + n,
                        type=p.type
                    ))
            continue

        if property.type in ["string", "boolean", "integer", "date", "long", "double"]:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type=property.type
            ))
            if property.index_name and name != property.index_name:
                columns.append(Column(
                    table=index_name,
                    es_index=index_name,
                    es_column=query_path,
                    name=query_path,
                    type=property.type
                ))
        elif property.enabled == None or property.enabled == False:
            columns.append(Column(
                table=index_name,
                es_index=index_name,
                name=query_path,
                es_column=query_path,
                type="source" if property.enabled==False else "object"
            ))
        else:
            Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path)

    return columns

Ejemplo n.º 46

0

Mostrar archivo

Archivo: meta.py Proyecto: klahnakoski/MoTreeherder

class FromESMetadata(Schema):
    """
    QUERY THE METADATA
    """
    def __new__(cls, *args, **kwargs):
        global singlton
        if singlton:
            return singlton
        else:
            singlton = object.__new__(cls)
            return singlton

    @use_settings
    def __init__(self,
                 host,
                 index,
                 alias=None,
                 name=None,
                 port=9200,
                 settings=None):
        global _elasticsearch
        if hasattr(self, "settings"):
            return

        from pyLibrary.queries.containers.lists import ListContainer
        from pyLibrary.env import elasticsearch as _elasticsearch

        self.settings = settings
        self.default_name = coalesce(name, alias, index)
        self.default_es = _elasticsearch.Cluster(settings=settings)
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.meta = Dict()
        table_columns = metadata_tables()
        column_columns = metadata_columns()
        self.meta.tables = ListContainer(
            "meta.tables", [], wrap({c.name: c
                                     for c in table_columns}))
        self.meta.columns = ListContainer(
            "meta.columns", [], wrap({c.name: c
                                      for c in column_columns}))
        self.meta.columns.insert(column_columns)
        self.meta.columns.insert(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return

    @property
    def query_path(self):
        return None

    @property
    def url(self):
        return self.default_es.path + "/" + self.default_name.replace(".", "/")

    def get_table(self, table_name):
        with self.meta.tables.locker:
            return self.meta.tables.query(
                {"where": {
                    "eq": {
                        "name": table_name
                    }
                }})

    def _upsert_column(self, c):
        # ASSUMING THE  self.meta.columns.locker IS HAD
        existing_columns = [
            r for r in self.meta.columns.data
            if r.table == c.table and r.name == c.name
        ]
        if not existing_columns:
            self.meta.columns.add(c)
            Log.note("todo: {{table}}.{{column}}",
                     table=c.table,
                     column=c.es_column)
            self.todo.add(c)

            # MARK meta.columns AS DIRTY TOO
            cols = [
                r for r in self.meta.columns.data if r.table == "meta.columns"
            ]
            for cc in cols:
                cc.partitions = cc.cardinality = None
                cc.last_updated = Date.now()
            self.todo.extend(cols)
        else:
            canonical = existing_columns[0]
            if canonical.relative and not c.relative:
                return  # RELATIVE COLUMNS WILL SHADOW ABSOLUTE COLUMNS

            for key in Column.__slots__:
                canonical[key] = c[key]
            Log.note("todo: {{table}}.{{column}}",
                     table=canonical.table,
                     column=canonical.es_column)
            self.todo.add(canonical)

    def _get_columns(self, table=None, metadata=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        if not metadata:
            metadata = self.default_es.get_metadata(force=True)

        def parse_all(please_stop):
            for abs_index, meta in jx.sort(metadata.indices.items(), {
                    "value": 0,
                    "sort": -1
            }):
                if meta.index != abs_index:
                    continue

                for _, properties in meta.mappings.items():
                    if please_stop:
                        return
                    self._parse_properties(abs_index, properties, meta)

        if table:
            for abs_index, meta in jx.sort(metadata.indices.items(), {
                    "value": 0,
                    "sort": -1
            }):
                if table == meta.index:
                    for _, properties in meta.mappings.items():
                        self._parse_properties(abs_index, properties, meta)
                    return
                if table == abs_index:
                    self._get_columns(table=meta.index, metadata=metadata)
                    return
        else:
            self.parser = Thread.run("parse properties", parse_all)

    def _parse_properties(self, abs_index, properties, meta):
        abs_columns = _elasticsearch.parse_properties(abs_index, None,
                                                      properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and not r.es_column.
            startswith("previous_values.cf_") and not r.es_index.startswith(
                "debug"))
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   debug=DEBUG):

            def add_column(c, query_path):
                c.last_updated = Date.now()
                if query_path:
                    c.table = c.es_index + "." + query_path.last()
                else:
                    c.table = c.es_index

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        if query_path:
                            c.table = alias + "." + query_path.last()
                        else:
                            c.table = alias
                        self._upsert_column(c)

            # EACH query_path IS A LIST OF EVER-INCREASING PATHS THROUGH EACH NESTED LEVEL
            query_paths = wrap([[c.es_column] for c in abs_columns
                                if c.type == "nested"])
            for a, b in itertools.product(query_paths, query_paths):
                aa = a.last()
                bb = b.last()
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) < len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(0, aa)
                        break
            query_paths.append([])

            for c in abs_columns:
                # ADD RELATIVE COLUMNS
                full_path = listwrap(c.nested_path)
                abs_depth = len(full_path)
                abs_parent = coalesce(full_path.last(), "")
                for query_path in query_paths:
                    rel_depth = len(query_path)

                    # ABSOLUTE
                    add_column(copy(c), query_path)
                    cc = copy(c)
                    cc.relative = True

                    if not query_path:
                        add_column(cc, query_path)
                        continue

                    rel_parent = query_path.last()

                    if c.es_column.startswith(rel_parent + "."):
                        cc.name = c.es_column[len(rel_parent) + 1:]
                        add_column(cc, query_path)
                    elif c.es_column == rel_parent:
                        cc.name = "."
                        add_column(cc, query_path)
                    elif not abs_parent:
                        # THIS RELATIVE NAME (..o) ALSO NEEDS A RELATIVE NAME (o)
                        # AND THEN REMOVE THE SHADOWED
                        cc.name = "." + ("." *
                                         (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent.startswith(abs_parent + "."):
                        cc.name = "." + ("." *
                                         (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent != abs_parent:
                        # SIBLING NESTED PATHS ARE INVISIBLE
                        pass
                    else:
                        Log.error("logic error")

    def query(self, _query):
        return self.meta.columns.query(
            QueryOp(
                set_default(
                    {
                        "from": self.meta.columns,
                        "sort": ["table", "name"]
                    }, _query.as_dict())))

    def get_columns(self,
                    table_name,
                    column_name=None,
                    fail_when_not_found=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            with self.meta.columns.locker:
                columns = [
                    c for c in self.meta.columns.data
                    if c.table == table_name and (
                        column_name is None or c.name == column_name)
                ]
            if columns:
                columns = jx.sort(columns, "name")
                if fail_when_not_found:
                    # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                    while len(self.todo) and not all(
                            columns.get("last_updated")):
                        Log.note(
                            "waiting for columns to update {{columns|json}}",
                            columns=[
                                c.table + "." + c.es_column for c in columns
                                if not c.last_updated
                            ])
                        Thread.sleep(seconds=1)
                    return columns
                elif all(columns.get("last_updated")):
                    return columns
        except Exception, e:
            Log.error("Not expected", cause=e)

        if fail_when_not_found:
            if column_name:
                Log.error("no columns matching {{table}}.{{column}}",
                          table=table_name,
                          column=column_name)
            else:
                self._get_columns(table=table_name)
                Log.error("no columns for {{table}}", table=table_name)

        self._get_columns(table=join_field(split_field(table_name)[0:1]))
        return self.get_columns(table_name=table_name,
                                column_name=column_name,
                                fail_when_not_found=True)

Ejemplo n.º 47

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

        def _flatten(d, uid, path, nested_path):
            insertion = doc_collection["." if not nested_path else nested_path[0]]
            row = uid.copy()
            insertion.rows.append(row)
            if isinstance(d, Mapping):
                for k, v in d.items():
                    cname = join_field(split_field(path)+[k])
                    ctype = get_type(v)
                    if ctype is None:
                        continue

                    c = unwraplist([c for c in self.columns.get(cname, Null) if c.type == ctype])
                    if not c:
                        c = Column(
                            name=cname,
                            table=self.name,
                            type=ctype,
                            es_column=typed_column(cname, ctype),
                            es_index=self.name,
                            nested_path=nested_path
                        )
                        self.add_column(c)
                    insertion.active_columns.add(c)

                    if ctype == "nested":
                        row[cname] = "."
                        deeper = [cname] + listwrap(nested_path)
                        insertion = doc_collection.get(cname, None)
                        if not insertion:
                            doc_collection[cname] = Dict(
                                active_columns=set(),
                                rows=[]
                            )
                        for i, r in enumerate(v):
                            child_uid = set_default({UID_PREFIX+"id"+unicode(len(uid)): i}, uid)
                            _flatten(r, child_uid, cname, deeper)
                    elif ctype == "object":
                        row[cname] = "."
                        _flatten(v, cname, nested_path)
                    elif c.type:
                        row[cname] = v
            else:
                k="."
                v=d
                cname = join_field(split_field(path)+[k])
                ctype = get_type(v)
                if ctype is None:
                    return

                c = unwraplist([c for c in self.columns.get(cname, Null) if c.type == ctype])
                if not c:
                    c = Column(
                        name=cname,
                        table=self.name,
                        type=ctype,
                        es_column=typed_column(cname, ctype),
                        es_index=self.name,
                        nested_path=nested_path
                    )
                    self.add_column(c)
                insertion.active_columns.add(c)

                if ctype == "nested":
                    row[cname] = "."
                    deeper = [cname] + listwrap(nested_path)
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        doc_collection[cname] = Dict(
                            active_columns=set(),
                            rows=[]
                        )
                    for i, r in enumerate(v):
                        child_uid = set_default({UID_PREFIX+"id"+unicode(len(uid)): i}, uid)
                        _flatten(r, child_uid, cname, deeper)
                elif ctype == "object":
                    row[cname] = "."
                    _flatten(v, cname, nested_path)
                elif c.type:
                    row[cname] = v

Ejemplo n.º 48

0

Mostrar archivo

Archivo: dimensions.py Proyecto: klahnakoski/MoDevETL

    def __init__(self, dim, parent, qb):
        dim = wrap(dim)

        self.name = dim.name
        self.parent = coalesce(parent)
        self.full_name = join_field(split_field(self.parent.full_name)+[self.name])
        self.edges = None  # FOR NOW
        dot.set_default(self, dim)
        self.where = dim.where
        self.type = coalesce(dim.type, "set")
        self.limit = coalesce(dim.limit, DEFAULT_QUERY_LIMIT)
        self.index = coalesce(dim.index, coalesce(parent, Null).index, qb.settings.index)

        if not self.index:
            Log.error("Expecting an index name")

        # ALLOW ACCESS TO SUB-PART BY NAME (IF ONLY THERE IS NO NAME COLLISION)
        self.edges = Dict()
        for e in listwrap(dim.edges):
            new_e = Dimension(e, self, qb)
            self.edges[new_e.full_name] = new_e

        self.partitions = wrap(coalesce(dim.partitions, []))
        parse_partition(self)

        fields = coalesce(dim.field, dim.fields)
        if not fields:
            return  # NO FIELDS TO SEARCH
        elif isinstance(fields, Mapping):
            self.fields = wrap(fields)
            edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
        else:
            self.fields = listwrap(fields)
            edges = wrap([{"name": f, "value": f, "index": i, "allowNulls": False} for i, f in enumerate(self.fields)])

        if dim.partitions:
            return  # ALREADY HAVE PARTS
        if self.type not in KNOWN - ALGEBRAIC:
            return  # PARTS OR TOO FUZZY (OR TOO NUMEROUS) TO FETCH

        qb.get_columns()
        with Timer("Get parts of {{name}}", {"name": self.name}):
            parts = qb.query({
                "from": self.index,
                "select": {"name": "count", "aggregate": "count"},
                "edges": edges,
                "where": self.where,
                "limit": self.limit
            })
            Log.note("{{name}} has {{num}} parts",  name= self.name,  num= len(parts))

        d = parts.edges[0].domain

        if dim.path:
            if len(edges) > 1:
                Log.error("Not supported yet")
            # EACH TERM RETURNED IS A PATH INTO A PARTITION TREE
            temp = Dict(partitions=[])
            for i, count in enumerate(parts):
                a = dim.path(d.getEnd(d.partitions[i]))
                if not isinstance(a, list):
                    Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
                addParts(
                    temp,
                    dim.path(d.getEnd(d.partitions[i])),
                    count,
                    0
                )
            self.value = coalesce(dim.value, "name")
            self.partitions = temp.partitions
        elif isinstance(fields, Mapping):
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            partitions = DictList()
            for g, p in parts.groupby(edges):
                if p:
                    partitions.append({
                        "value": g,
                        "where": {"and": [
                            {"term": {e.value: g[e.name]}}
                            for e in edges
                        ]},
                        "count": int(p)
                    })
            self.partitions = partitions
        elif len(edges) == 1:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            self.partitions = wrap([
                {
                    "name": str(d.partitions[i].name),  # CONVERT TO STRING
                    "value": d.getEnd(d.partitions[i]),
                    "where": {"term": {edges[0].value: d.partitions[i].value}},
                    "count": count
                }
                for i, count in enumerate(parts)
            ])
            self.order = {p.value: i for i, p in enumerate(self.partitions)}
        elif len(edges) == 2:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS
            d2 = parts.edges[1].domain

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            array = parts.data.values()[0].cube  # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)

            def edges2value(*values):
                if isinstance(fields, Mapping):
                    output = Dict()
                    for e, v in zip(edges, values):
                        output[e.name] = v
                    return output
                else:
                    return tuple(values)

            self.partitions = wrap([
                {
                    "name": str(d.partitions[i].name),  # CONVERT TO STRING
                    "value": d.getEnd(d.partitions[i]),
                    "where": {"term": {edges[0].value: d.partitions[i].value}},
                    "count": SUM(subcube),
                    "partitions": [
                        {
                            "name": str(d2.partitions[j].name),  # CONVERT TO STRING
                            "value": edges2value(d.getEnd(d.partitions[i]), d2.getEnd(d2.partitions[j])),
                            "where": {"and": [
                                {"term": {edges[0].value: d.partitions[i].value}},
                                {"term": {edges[1].value: d2.partitions[j].value}}
                            ]},
                            "count": count2
                        }
                        for j, count2 in enumerate(subcube)
                        if count2 > 0  # ONLY INCLUDE PROPERTIES THAT EXIST
                    ]
                }
                for i, subcube in enumerate(array)
            ])
        else:
            Log.error("Not supported")

        parse_partition(self)  # RELATE THE PARTS TO THE PARENTS

Ejemplo n.º 49

0

Mostrar archivo

Archivo: aggs.py Proyecto: klahnakoski/intermittents

def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many)>1:
            canonical_name=literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {"_nested": set_default({
                "nested": {
                    "path": join_field(split_field(frum.name)[1::])
                }
            }, es_query)}
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query, select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start, query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.es_response_time = es_duration.seconds
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",  format= query.format, cause=e)
        Log.error("Some problem", e)

Ejemplo n.º 50

0

Mostrar archivo

Archivo: qb_usingES.py Proyecto: klahnakoski/MoDevETL

 def query_path(self):
     return join_field(split_field(self.name)[1:])

Ejemplo n.º 51

0

Mostrar archivo

def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many) > 1:
            canonical_name = literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(
                s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[
            s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [
        AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])
    ]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)})

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {
                "_nested":
                set_default(
                    {
                        "nested": {
                            "path": join_field(split_field(frum.name)[1::])
                        }
                    }, es_query)
            }
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[
            query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query,
                               select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start,
                                       query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start,
                                     query, select)

        output.meta.es_response_time = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",
                      format=query.format,
                      cause=e)
        Log.error("Some problem", e)

Ejemplo n.º 52

0

Mostrar archivo

 def query_path(self):
     return join_field(split_field(self.name)[1:])

Ejemplo n.º 53

0

Mostrar archivo

Archivo: dimensions.py Proyecto: mozilla/ActiveData-ETL

    def __init__(self, dim, parent, qb):
        self.name = dim.name
        self.parent = parent
        self.full_name = join_field(
            split_field(self.parent.full_name) + [self.name])
        dot.set_default(self, dim)
        self.esfilter = dim.esfilter
        self.type = coalesce(dim.type, "set")
        self.limit = coalesce(dim.limit, DEFAULT_QUERY_LIMIT)
        self.index = coalesce(dim.index,
                              coalesce(parent, Null).index,
                              qb.es.settings.name)

        if not self.index:
            Log.error("Expecting an index name")

        # ALLOW ACCESS TO SUB-PART BY NAME (IF ONLY THERE IS NO NAME COLLISION)
        self.edges = Dict()
        for e in listwrap(dim.edges):
            new_e = Dimension(e, self, qb)
            self.edges[new_e.full_name] = new_e

        self.partitions = wrap(coalesce(dim.partitions, []))
        parse_partition(self)

        fields = coalesce(dim.field, dim.fields)
        if not fields:
            return  # NO FIELDS TO SEARCH
        elif isinstance(fields, Mapping):
            self.fields = wrap(fields)
            edges = wrap([{
                "name": k,
                "value": v,
                "allowNulls": False
            } for k, v in self.fields.items()])
        else:
            self.fields = listwrap(fields)
            edges = wrap([{
                "name": f,
                "value": f,
                "index": i,
                "allowNulls": False
            } for i, f in enumerate(self.fields)])

        if dim.partitions:
            return  # ALREADY HAVE PARTS
        if dim.type not in KNOWN - ALGEBRAIC:
            return  # PARTS OR TOO FUZZY (OR TOO NUMEROUS) TO FETCH

        with Timer("Get parts of {{name}}", {"name": self.name}):
            parts = qb.query({
                "from": self.index,
                "select": {
                    "name": "count",
                    "aggregate": "count"
                },
                "edges": edges,
                "esfilter": self.esfilter,
                "limit": self.limit
            })
            Log.note("{{name}} has {{num}} parts",
                     name=self.name,
                     num=len(parts))

        d = parts.edges[0].domain

        if dim.path:
            if len(edges) > 1:
                Log.error("Not supported yet")
            # EACH TERM RETURNED IS A PATH INTO A PARTITION TREE
            temp = Dict(partitions=[])
            for i, count in enumerate(parts):
                a = dim.path(d.getEnd(d.partitions[i]))
                if not isinstance(a, list):
                    Log.error("The path function on " + dim.name +
                              " must return an ARRAY of parts")
                addParts(temp, dim.path(d.getEnd(d.partitions[i])), count, 0)
            self.value = coalesce(dim.value, "name")
            self.partitions = temp.partitions
        elif isinstance(fields, Mapping):
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            partitions = DictList()
            for g, p in parts.groupby(edges):
                if p:
                    partitions.append({
                        "value": g,
                        "esfilter": {
                            "and": [{
                                "term": {
                                    e.value: g[e.name]
                                }
                            } for e in edges]
                        },
                        "count": int(p)
                    })
            self.partitions = partitions
        elif len(edges) == 1:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            self.partitions = wrap([
                {
                    "name": str(d.partitions[i].name),  # CONVERT TO STRING
                    "value": d.getEnd(d.partitions[i]),
                    "esfilter": {
                        "term": {
                            edges[0].value: d.partitions[i].value
                        }
                    },
                    "count": count
                } for i, count in enumerate(parts)
            ])
            self.order = {p.value: i for i, p in enumerate(self.partitions)}
        elif len(edges) == 2:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS
            d2 = parts.edges[1].domain

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            array = parts.data.values(
            )[0].cube  # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)

            def edges2value(*values):
                if isinstance(fields, Mapping):
                    output = Dict()
                    for e, v in zip(edges, values):
                        output[e.name] = v
                    return output
                else:
                    return tuple(values)

            self.partitions = wrap([
                {
                    "name":
                    str(d.partitions[i].name),  # CONVERT TO STRING
                    "value":
                    d.getEnd(d.partitions[i]),
                    "esfilter": {
                        "term": {
                            edges[0].value: d.partitions[i].value
                        }
                    },
                    "count":
                    SUM(subcube),
                    "partitions": [
                        {
                            "name":
                            str(d2.partitions[j].name),  # CONVERT TO STRING
                            "value":
                            edges2value(d.getEnd(d.partitions[i]),
                                        d2.getEnd(d2.partitions[j])),
                            "esfilter": {
                                "and": [{
                                    "term": {
                                        edges[0].value: d.partitions[i].value
                                    }
                                }, {
                                    "term": {
                                        edges[1].value: d2.partitions[j].value
                                    }
                                }]
                            },
                            "count":
                            count2
                        } for j, count2 in enumerate(subcube)
                        if count2 > 0  # ONLY INCLUDE PROPERTIES THAT EXIST
                    ]
                } for i, subcube in enumerate(array)
            ])
        else:
            Log.error("Not supported")

        parse_partition(self)  # RELATE THE PARTS TO THE PARENTS

Ejemplo n.º 54

0

Mostrar archivo

def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    columns = query.frum.get_columns()
    leaf_columns = set(c.name for c in columns if c.type not in ["object", "nested"] and (not c.nested_path or c.es_column == c.nested_path))
    nested_columns = set(c.name for c in columns if c.nested_path)

    i = 0
    source = "fields"
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var == ".":
                    es_query.fields = None
                    source = "_source"

                    net_columns = leaf_columns - set(select.name)
                    for n in net_columns:
                        new_select.append({
                            "name": n,
                            "value": n,
                            "put": {"name": n, "index": i, "child": "."}
                        })
                        i += 1
                else:
                    parent = s.value.var + "."
                    prefix = len(parent)
                    for c in leaf_columns:
                        if c.startswith(parent):
                            if es_query.fields is not None:
                                es_query.fields.append(c)

                            new_select.append({
                                "name": s.name + "." + c[prefix:],
                                "value": c,
                                "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                            })
                            i += 1

        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var in nested_columns:
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                parent = s.value.var + "."
                prefix = len(parent)
                net_columns = [c for c in leaf_columns if c.startswith(parent)]
                if not net_columns:
                    # LEAF
                    if es_query.fields is not None:
                        es_query.fields.append(s.value.var)
                    new_select.append({
                        "name": s.name,
                        "value": s.value,
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    # LEAVES OF OBJECT
                    for n in net_columns:
                        if es_query.fields is not None:
                            es_query.fields.append(n)
                        new_select.append({
                            "name": s.name,
                            "value": n,
                            "put": {"name": s.name, "index": i, "child": n[prefix:]}
                        })
                i += 1
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": s.value.to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value))
        else:
            n.pull = "fields." + literal_field(n.value)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)

Ejemplo n.º 55

0

Mostrar archivo

def parse_columns(parent_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    columns = DictList()
    for name, property in esProperties.items():
        if parent_path:
            path = join_field(split_field(parent_path) + [name])
        else:
            path = name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            child_columns = deepcopy(parse_columns(path, property.properties))
            self_columns = deepcopy(child_columns)
            for c in self_columns:
                c.depth += 1
            columns.extend(self_columns)
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "nested",
                "useSource": False
            })

            if path not in INDEX_CACHE:
                pp = split_field(parent_path)
                for i in qb.reverse(range(len(pp))):
                    c = INDEX_CACHE.get(join_field(pp[:i + 1]), None)
                    if c:
                        INDEX_CACHE[path] = c.copy()
                        break
                else:
                    Log.error("Can not find parent")

                INDEX_CACHE[path].name = path
            INDEX_CACHE[path].columns = child_columns
            continue

        if property.properties:
            child_columns = parse_columns(path, property.properties)
            columns.extend(child_columns)
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "object",
                "useSource": False
            })

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, n, p in enumerate(property.fields):
                if n == name:
                    # DEFAULT
                    columns.append({
                        "name": join_field(split_field(path)[1::]),
                        "type": p.type,
                        "useSource": p.index == "no"
                    })
                else:
                    columns.append({
                        "name":
                        join_field(split_field(path)[1::]) + "\\." + n,
                        "type":
                        p.type,
                        "useSource":
                        p.index == "no"
                    })
            continue

        if property.type in [
                "string", "boolean", "integer", "date", "long", "double"
        ]:
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": property.type,
                "useSource": property.index == "no"
            })
            if property.index_name and name != property.index_name:
                columns.append({
                    "name": property.index_name,
                    "type": property.type,
                    "useSource": property.index == "no"
                })
        elif property.enabled == None or property.enabled == False:
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "object",
                "useSource": True
            })
        else:
            Log.warning("unknown type {{type}} for property {{path}}",
                        type=property.type,
                        path=path)

    return columns

Ejemplo n.º 56

0

Mostrar archivo

Archivo: Table_usingSQLite.py Proyecto: klahnakoski/MoDataSubmission

def untyped_column(column_name):
    if "$" in column_name:
        return join_field(split_field(column_name)[:-1])
    else:
        return column_name