Python split_field Examples, pyLibrary.dot.split_field Python Examples

Example #1

0

Show file

File: meta.py Project: klahnakoski/MoDevETL

    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        alias_done = set()
        index = split_field(table)[0]
        query_path = split_field(table)[1:]
        metadata = self.default_es.get_metadata(index=index)
        for index, meta in qb.sort(metadata.indices.items(), {"value": 0, "sort": -1}):
            for _, properties in meta.mappings.items():
                columns = _elasticsearch.parse_properties(index, None, properties.properties)
                columns = columns.filter(lambda r: not r.abs_name.startswith("other.") and not r.abs_name.startswith("previous_values.cf_"))  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
                with Timer("upserting {{num}} columns", {"num": len(columns)}, debug=DEBUG):
                    with self.columns.locker:
                        for c in columns:
                            # ABSOLUTE
                            c.table = join_field([index]+query_path)
                            self.upsert_column(c)

                            for alias in meta.aliases:
                                # ONLY THE LATEST ALIAS IS CHOSEN TO GET COLUMNS
                                if alias in alias_done:
                                    continue
                                alias_done.add(alias)
                                c = copy(c)
                                c.table = join_field([alias]+query_path)
                                self.upsert_column(c)

Example #2

0

Show file

File: expressions.py Project: klahnakoski/MoDataSubmission

    def select(self, selectList, fromPath, varName, sourceVar):
        path = split_field(fromPath)
        is_deep = len(path) > 1
        heads = []
        list = []
        for s in selectList:
            if is_deep:
                if s.value and isKeyword(s.value):
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    Log.error("do not know how to handle yet")
            else:
                if s.value and isKeyword(s.value):
                    list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
                elif s.value:
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    code, decode = self.Parts2Term(s.domain)
                    heads.append(code.head)
                    list.append("Value2Pipe(" + code.body + ")\n")


        if len(split_field(fromPath)) > 1:
            output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
        else:
            output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'

        return Dict(
            head="".join(heads),
            body=output
        )

Example #3

0

Show file

File: expressions.py Project: davehunt/ActiveData

    def select(self, selectList, fromPath, varName, sourceVar):
        path = split_field(fromPath)
        is_deep = len(path) > 1
        heads = []
        list = []
        for s in selectList:
            if is_deep:
                if s.value and isKeyword(s.value):
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    Log.error("do not know how to handle yet")
            else:
                if s.value and isKeyword(s.value):
                    list.append("Value2Pipe(getDocValue(" +
                                value2MVEL(s.value) + "))\n")
                elif s.value:
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    code, decode = self.Parts2Term(s.domain)
                    heads.append(code.head)
                    list.append("Value2Pipe(" + code.body + ")\n")

        if len(split_field(fromPath)) > 1:
            output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(
                ["Value2Pipe(" + v + ")\n" for v in list]) + ';\n'
        else:
            output = varName + ' = ' + '+"|"+'.join(
                ["Value2Pipe(" + v + ")\n" for v in list]) + ';\n'

        return Dict(head="".join(heads), body=output)

Example #4

0

Show file

File: jx.py Project: klahnakoski/MoDataSubmission

def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if isinstance(field_name, Mapping) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if isinstance(field_name, basestring):
        if len(split_field(field_name)) == 1:
            return [(d[field_name],) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif isinstance(field_name, list):
        paths = [_select_a_field(f) for f in field_name]
        output = DictList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = DictList()
        _tuple((), data, paths, 0, output)
        return output

Example #5

0

Show file

File: jx.py Project: davehunt/ActiveData

def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if isinstance(field_name, Mapping) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if isinstance(field_name, basestring):
        if len(split_field(field_name)) == 1:
            return [(d[field_name], ) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif isinstance(field_name, list):
        paths = [_select_a_field(f) for f in field_name]
        output = DictList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = DictList()
        _tuple((), data, paths, 0, output)
        return output

Example #6

0

Show file

File: jx.py Project: klahnakoski/MoDataSubmission

def _select_a_field(field):
    if isinstance(field, basestring):
        return wrap({"name": field, "value": split_field(field)})
    elif isinstance(wrap(field).value, basestring):
        field = wrap(field)
        return wrap({"name": field.name, "value": split_field(field.value)})
    else:
        return wrap({"name": field.name, "value": field.value})

Example #7

0

Show file

File: jx.py Project: davehunt/ActiveData

def _select_a_field(field):
    if isinstance(field, basestring):
        return wrap({"name": field, "value": split_field(field)})
    elif isinstance(wrap(field).value, basestring):
        field = wrap(field)
        return wrap({"name": field.name, "value": split_field(field.value)})
    else:
        return wrap({"name": field.name, "value": field.value})

Example #8

0

Show file

File: flat_list.py Project: klahnakoski/Activedata-ETL

    def select(self, fields):
        if isinstance(fields, Mapping):
            fields=fields.value

        if isinstance(fields, basestring):
            # RETURN LIST OF VALUES
            if len(split_field(fields)) == 1:
                if self.path[0] == fields:
                    return [d[1] for d in self.data]
                else:
                    return [d[0][fields] for d in self.data]
            else:
                keys = split_field(fields)
                depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
                short_key = keys[depth:]

                output = DictList()
                _select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
                return output

        if isinstance(fields, list):
            output = DictList()

            meta = []
            for f in fields:
                if hasattr(f.value, "__call__"):
                    meta.append((f.name, f.value))
                else:
                    meta.append((f.name, functools.partial(lambda v, d: d[v], f.value)))

            for row in self._values():
                agg = Dict()
                for name, f in meta:
                    agg[name] = f(row)

                output.append(agg)

            return output

            # meta = []
            # for f in fields:
            #     keys = split_field(f.value)
            #     depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
            #     short_key = join_field(keys[depth:])
            #
            #     meta.append((f.name, depth, short_key))
            #
            # for row in self._data:
            #     agg = Dict()
            #     for name, depth, short_key in meta:
            #         if short_key:
            #             agg[name] = row[depth][short_key]
            #         else:
            #             agg[name] = row[depth]
            #     output.append(agg)
            # return output

        Log.error("multiselect over FlatList not supported")

Example #9

0

Show file

File: expressions.py Project: davehunt/ActiveData

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()  # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1:s + len(c.name) + 1:]
                test3 = body[s - 8:s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name +
                                                " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-" * len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name +
                                                " = getSourceValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" +
                                                c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                break

        return "".join(contextVariables)

Example #10

0

Show file

File: meta.py Project: klahnakoski/TestFailures

            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index]+split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias]+split_field(query_path[0]))
                        self._upsert_column(c)

Example #11

0

Show file

            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index] + split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias] +
                                             split_field(query_path[0]))
                        self._upsert_column(c)

Example #12

0

Show file

File: expressions.py Project: klahnakoski/MoDataSubmission

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Example #13

0

Show file

File: query.py Project: klahnakoski/intermittents

def _get_nested_path(field, schema):
    if not INDEX_CACHE:
        _late_import()

    if is_keyword(field):
        field = join_field([schema.es.alias] + split_field(field))
        for i, f in reverse(enumerate(split_field(field))):
            path = join_field(split_field(field)[0:i + 1:])
            if path in INDEX_CACHE:
                return join_field(split_field(path)[1::])
    return None

Example #14

0

Show file

File: expressions.py Project: davehunt/ActiveData

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name +
                                                " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Example #15

0

Show file

File: expressions.py Project: klahnakoski/MoDataSubmission

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()    # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1: s + len(c.name) + 1:]
                test3 = body[s - 8: s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-"*len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                break

        return "".join(contextVariables)

Example #16

0

Show file

File: jx.py Project: davehunt/ActiveData

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception, e:
                Log.error("{{name}} does not exist", name=fieldname)
            if isinstance(d, list) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1:])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])

Example #17

0

Show file

File: util.py Project: klahnakoski/esReplicate

def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap(
            {
                "filter": {
                    "and": [
                        f0,
                        {"nested": {"path": join_field(sub_path), "filter": f1, "inner_hits": {"size": 100000}}},
                    ]
                },
                "from": 0,
                "size": 0,
                "sort": [],
            }
        )
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({"query": {"filtered": {"filter": f0}}, "from": 0, "size": 0, "sort": []})
        return output, wrap([f0])

Example #18

0

Show file

File: dimensions.py Project: klahnakoski/MoDevETL

    def getDomain(self, **kwargs):
        # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
        kwargs = wrap(kwargs)
        kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)

        if not self.partitions and self.edges:
            # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
            partitions = [
                {
                    "name": v.name,
                    "value": v.name,
                    "where": v.where,
                    "style": v.style,
                    "weight": v.weight  # YO! WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.edges)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.where
            ]
            self.isFacet = True
        elif kwargs.depth == None:  # ASSUME self.fields IS A dict
            partitions = DictList()
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    break
                partitions.append({
                    "name":part.name,
                    "value":part.value,
                    "where":part.where,
                    "style":coalesce(part.style, part.parent.style),
                    "weight":part.weight   # YO!  WHAT DO WE *NOT* COPY?
                })
        elif kwargs.depth == 0:
            partitions = [
                {
                    "name":v.name,
                    "value":v.value,
                    "where":v.where,
                    "style":v.style,
                    "weight":v.weight   # YO!  WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.partitions)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)]
        elif kwargs.depth == 1:
            partitions = DictList()
            rownum = 0
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    continue
                rownum += 1
                try:
                    for j, subpart in enumerate(part.partitions):
                        partitions.append({
                            "name":join_field(split_field(subpart.parent.name) + [subpart.name]),
                            "value":subpart.value,
                            "where":subpart.where,
                            "style":coalesce(subpart.style, subpart.parent.style),
                            "weight":subpart.weight   # YO!  WHAT DO WE *NOT* COPY?
                        })
                except Exception, e:
                    Log.error("", e)

Example #19

0

Show file

File: dicts.py Project: mozilla/ChangeDetector

    def __setitem__(self, key, value):
        if key == "":
            from pyLibrary.debugs.logs import Log

            Log.error("key is empty string.  Probably a bad idea")
        if key == ".":
            # SOMETHING TERRIBLE HAPPENS WHEN value IS NOT A Mapping;
            # HOPEFULLY THE ONLY OTHER METHOD RUN ON self IS unwrap()
            v = unwrap(value)
            _set(self, "_dict", v)
            return v
        if isinstance(key, str):
            key = key.decode("utf8")

        try:
            d = _get(self, "_dict")
            value = unwrap(value)
            if key.find(".") == -1:
                if value is None:
                    d.pop(key, None)
                else:
                    d[key] = value
                return self

            seq = split_field(key)
            for k in seq[:-1]:
                d = _getdefault(d, k)
            if value == None:
                d.pop(seq[-1], None)
            else:
                d[seq[-1]] = value
            return self
        except Exception, e:
            raise e

Example #20

0

Show file

File: dicts.py Project: mozilla/ChangeDetector

    def __setitem__(self, key, value):
        if key == "":
            from pyLibrary.debugs.logs import Log

            Log.error("key is empty string.  Probably a bad idea")
        if isinstance(key, str):
            key = key.decode("utf8")
        d=self
        try:
            value = unwrap(value)
            if key.find(".") == -1:
                if value is None:
                    dict.pop(d, key, None)
                else:
                    dict.__setitem__(d, key, value)
                return self

            seq = split_field(key)
            for k in seq[:-1]:
                d = _getdefault(d, k)
            if value == None:
                dict.pop(d, seq[-1], None)
            else:
                dict.__setitem__(d, seq[-1], value)
            return self
        except Exception, e:
            raise e

Example #21

0

Show file

def is_setop(es, query):
    if not any(
            map(es.cluster.version.startswith,
                ["1.4.", "1.5.", "1.6.", "1.7."])):
        return False

    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(
            query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([
            s.aggregate in ("count", "none") for s in select
        ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
        if simpleAgg or isDeep:
            return True
    else:
        isSmooth = AND(
            (e.domain.type in ALGEBRAIC and e.domain.interval == "none")
            for e in query.edges)
        if isSmooth:
            return True

    return False

Example #22

0

Show file

File: meta.py Project: klahnakoski/TestFailures

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(
                    name=short_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated])
                    Thread.sleep(seconds=1)
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)

Example #23

0

Show file

File: __init__.py Project: mozilla/ActiveData-ETL

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not type2container:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        settings = set_default({
            "index": split_field(frum)[0],
            "name": frum,
        }, config.default.settings)
        settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
        return type2container["elasticsearch"](settings)
    elif isinstance(frum, Mapping) and frum.type and type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import Query
        return Query(frum, schema=schema)
    else:
        return frum

Example #24

0

Show file

def is_fieldop(es, query):
    if not (es.cluster.version.startswith("1.4.")
            or es.cluster.version.startswith("1.5.")):
        return False

    # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP)
    select = listwrap(query.select)
    if not query.edges:
        isDeep = len(split_field(
            query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        isSimple = AND(
            s.value != None and (s.value in ["*", "."] or is_keyword(s.value))
            for s in select)
        noAgg = AND(s.aggregate == "none" for s in select)

        if not isDeep and isSimple and noAgg:
            return True
    else:
        isSmooth = AND((
            e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none")
                       for e in query.edges)
        if isSmooth:
            return True

    return False

Example #25

0

Show file

File: dicts.py Project: mozilla/ActiveData-ETL

    def __setitem__(self, key, value):
        if key == "":
            from pyLibrary.debugs.logs import Log

            Log.error("key is empty string.  Probably a bad idea")
        if isinstance(key, str):
            key = key.decode("utf8")

        try:
            d = _get(self, "_dict")
            value = unwrap(value)
            if key.find(".") == -1:
                if value is None:
                    d.pop(key, None)
                else:
                    d[key] = value
                return self

            seq = split_field(key)
            for k in seq[:-1]:
                d = _getdefault(d, k)
            if value == None:
                d.pop(seq[-1], None)
            else:
                d[seq[-1]] = value
            return self
        except Exception, e:
            raise e

Example #26

0

Show file

File: jx.py Project: klahnakoski/MoDataSubmission

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception, e:
                Log.error("{{name}} does not exist", name=fieldname)
            if isinstance(d, list) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1 :])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])

Example #27

0

Show file

File: dicts.py Project: mozilla/ChangeDetector

    def __getitem__(self, key):
        if key == None:
            return Null
        if key == ".":
            output = _get(self, "_dict")
            if isinstance(output, Mapping):
                return self
            else:
                return output

        if isinstance(key, str):
            key = key.decode("utf8")
        elif not isinstance(key, unicode):
            from pyLibrary.debugs.logs import Log
            Log.error("only string keys are supported")

        d = _get(self, "_dict")

        if key.find(".") >= 0:
            seq = split_field(key)
            for n in seq:
                if isinstance(d, NullType):
                    d = NullType(d, n)  # OH DEAR, Null TREATS n AS PATH, NOT LITERAL
                else:
                    d = _getdefault(d, n)  # EVERYTHING ELSE TREATS n AS LITERAL


            return wrap(d)
        else:
            o = d.get(key)

        if o == None:
            return NullType(d, key)
        return wrap(o)

Example #28

0

Show file

File: __init__.py Project: klahnakoski/Activedata-ETL

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not type2container:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        settings = set_default(
            {
                "index": split_field(frum)[0],
                "name": frum,
            },
            config.default.settings
        )
        settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
        return type2container["elasticsearch"](settings)
    elif isinstance(frum, Mapping) and frum.type and type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import Query
        return Query(frum, schema=schema)
    else:
        return frum

Example #29

0

Show file

File: expressions.py Project: klahnakoski/MoDataSubmission

def setValues(expression, constants):
    if not constants:
        return expression

    constants = constants.copy()

    # EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
    for c in constants:
        value = c.value
        n = c.name
        if len(split_field(n)) >= 3:
            continue    # DO NOT GO TOO DEEP
        if isinstance(value, list):
            continue  # DO NOT MESS WITH ARRAYS

        if isinstance(value, Mapping):
            for k, v in value.items():
                constants.append({"name": n + "." + k, "value": v})

    for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
        s = 0
        while True:
            s = expression.find(c.name, s)
            if s == -1:
                break
            if re.match(r"\w", expression[s - 1]):
                break
            if re.match(r"\w", expression[s + len(c.name)]):
                break

            v = value2MVEL(c.value)
            expression = expression[:s:] + "" + v + expression[:s + len(c.name):]

    return expression

Example #30

0

Show file

    def __setitem__(self, key, value):
        assert not isinstance(key, str)

        d = _get(self, "__dict__")
        o = d["_obj"]
        k = d["__key__"]

        seq = [k] + split_field(key)
        _assign_to_null(o, seq, value)

Example #31

0

Show file

File: Table_usingSQLite.py Project: klahnakoski/MoDataSubmission

    def add_column(self, column):
        """
        ADD COLUMN, IF IT DOES NOT EXIST ALREADY
        """
        if column.name not in self.columns:
            self.columns[column.name] = {column}
        elif column.type not in [c.type for c in self.columns[column.name]]:
            self.columns[column.name].add(column)

        if column.type == "nested":
            nested_table_name = join_field(split_field(self.name) + split_field(column.name))
            # MAKE THE TABLE
            table = Table_usingSQLite(nested_table_name, self.db, self.uid + [UID_PREFIX+"id"+unicode(len(self.uid))], exists=False)
            self.nested_tables[nested_table_name] = table
        else:
            self.db.execute(
                "ALTER TABLE " + quote_table(self.name) + " ADD COLUMN " + _quote_column(column) + " " + column.type
            )

Example #32

0

Show file

File: format.py Project: klahnakoski/MoDataSubmission

def _get(v, k, d):
    for p in split_field(k):
        try:
            v = v.get(p)
            if v is None:
                return d
        except Exception:
            v = [vv.get(p) for vv in v]
    return v

Example #33

0

Show file

File: format.py Project: klahnakoski/MoTreeherder

def _get(v, k, d):
    for p in split_field(k):
        try:
            v = v.get(p)
            if v is None:
                return d
        except Exception:
            v = [vv.get(p) for vv in v]
    return v

Example #34

0

Show file

File: nones.py Project: klahnakoski/TestFailures

    def __setitem__(self, key, value):
        assert not isinstance(key, str)

        d = _get(self, "__dict__")
        o = d["_obj"]
        k = d["__key__"]

        seq = [k] + split_field(key)
        _assign_to_null(o, seq, value)

Example #35

0

Show file

class Json2Redshift(object):
    @use_settings
    def __init__(
            self,
            host,
            user,
            password,
            table,
            meta,  # REDSHIFT COPY COMMAND REQUIRES A BUCKET TO HOLD PARAMETERS
            database=None,
            port=5439,
            settings=None):
        self.settings = settings
        self.db = Redshift(settings)
        INDEX_CACHE[settings.table] = wrap(
            {"name": settings.table})  # HACK TO GET parse_columns TO WORK
        columns = parse_columns(settings.table,
                                settings.mapping.test_result.properties)
        nested = [c.name for c in columns if c.type == "nested"]
        self.columns = wrap([
            c for c in columns if c.type not in ["object"] and not any(
                c.name.startswith(n + ".") for n in nested)
        ])

        try:
            self.db.execute("""
                CREATE TABLE {{table_name}} (
                    "_id" character varying UNIQUE,
                    {{columns}}
                )""", {
                "table_name":
                self.db.quote_column(settings.table),
                "columns":
                SQL(",\n".join(
                    self.db.quote_column(c.name) + " " +
                    self.db.es_type2pg_type(c.type) for c in self.columns))
            },
                            retry=False)
        except Exception, e:
            if "already exists" in e:
                Log.alert("Table {{table}} exists in Redshift",
                          table=settings.table)
            else:
                Log.error("Could not make table", e)

        # MAKE jsonpaths FOR COPY COMMAND
        jsonpaths = {
            "jsonpaths": [
                "$" + "".join("[" + convert.string2quote(p) + "]"
                              for p in split_field(c.name))
                for c in self.columns
            ]
        }
        content = convert.value2json(jsonpaths)
        content = content.replace("\\\"", "'")
        # PUSH TO S3
        s3.Bucket(meta).write(meta.jsonspath, content)

Example #36

0

Show file

File: Table_usingSQLite.py Project: klahnakoski/MoDataSubmission

def get_document_value(document, column):
    """
    RETURN DOCUMENT VALUE IF MATCHES THE column (name, type)

    :param document: THE DOCUMENT
    :param column: A (name, type) PAIR
    :return: VALUE, IF IT IS THE SAME NAME AND TYPE
    """
    v = document.get(split_field(column.name)[0], None)
    return get_if_type(v, column.type)

Example #37

0

Show file

def get_document_value(document, column):
    """
    RETURN DOCUMENT VALUE IF MATCHES THE column (name, type)

    :param document: THE DOCUMENT
    :param column: A (name, type) PAIR
    :return: VALUE, IF IT IS THE SAME NAME AND TYPE
    """
    v = document.get(split_field(column.name)[0], None)
    return get_if_type(v, column.type)

Example #38

0

Show file

File: list_usingPythonList.py Project: klahnakoski/TestFailures

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(
                    name=full_name,
                    table=".",
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(
                        name=full_name,
                        table=".",
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value)==0:
                        this_type = "undefined"
                    elif len(value)==1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)

Example #39

0

Show file

File: nones.py Project: klahnakoski/intermittents

    def __getitem__(self, key):
        if isinstance(key, str):
            key = key.decode("utf8")
        elif isinstance(key, int):
            return NullType(self, key)

        path = split_field(key)
        output = self
        for p in path:
            output = NullType(output, p)
        return output

Example #40

0

Show file

File: query.py Project: davehunt/ActiveData

def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "table": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.name,
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.table}}, name={{col.name}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Thread.sleep(seconds=1)
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)

Example #41

0

Show file

File: nones.py Project: klahnakoski/MoTreeherder

    def __setitem__(self, key, value):
        try:
            d = _get(self, "__dict__")
            o = d["_obj"]
            path = d["__key__"]
            if path is None:
                return   # NO NEED TO DO ANYTHING

            seq = [path] + split_field(key)
            _assign(o, seq, value)
        except Exception, e:
            raise e

Example #42

0

Show file

File: list_usingPythonList.py Project: davehunt/ActiveData

def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix)
            column = name_to_column.get(full_name)
            if not column:
                column = Column(name=full_name,
                                table=".",
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns[full_name] = column
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix + [name])
                column = name_to_column.get(full_name)
                if not column:
                    column = Column(name=full_name,
                                    table=".",
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                columns[full_name] = column
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], columns, prefix + [name],
                                          nested_path, name_to_column)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, columns, prefix + [name],
                                          newpath, name_to_column)

Example #43

0

Show file

File: stream.py Project: klahnakoski/MoDevETL

    def _decode_object(index, parent_path, path, name2index, destination=None, expected_vars=NO_VARS):
        if destination is None:
            destination = {}

        nested_done = False
        while True:
            c, index = skip_whitespace(index)
            if c == b',':
                continue
            elif c == b'"':
                name, index = simple_token(index, c)

                c, index = skip_whitespace(index)
                if c != b':':
                    Log.error("Expecting colon")
                c, index = skip_whitespace(index)

                child_expected = needed(name, expected_vars)
                if child_expected and nested_done:
                    Log.error("Expected property found after nested json.  Iteration failed.")

                full_path = join_field(split_field(parent_path)+ [name])
                if path and (path[0] == full_path or path[0].startswith(full_path+".")):
                    # THE NESTED PROPERTY WE ARE LOOKING FOR
                    if path[0] == full_path:
                        new_path = path[1:]
                    else:
                        new_path = path

                    nested_done = True
                    for j, i in _decode(index - 1, full_path, new_path, name2index, expected_vars=child_expected):
                        index = i
                        j = {name: j}
                        for k, v in destination.items():
                            j.setdefault(k, v)
                        yield j, index
                    continue

                if child_expected:
                    # SOME OTHER PROPERTY
                    value, index = _decode_token(index, c, full_path, path, name2index, None, expected_vars=child_expected)
                    destination[name] = value
                else:
                    # WE DO NOT NEED THIS VALUE
                    index = jump_to_end(index, c)
                    continue


            elif c == "}":
                break

        if not nested_done:
            yield destination, index

Example #44

0

Show file

File: nones.py Project: klahnakoski/intermittents

    def __setitem__(self, key, value):
        try:
            d = _get(self, "__dict__")
            o = d["_obj"]
            path = d["__key__"]
            if path is None:
                return   # NO NEED TO DO ANYTHING

            seq = [path] + split_field(key)
            _assign(o, seq, value)
        except Exception, e:
            raise e

Example #45

0

Show file

File: Table_usingSQLite.py Project: klahnakoski/MoDataSubmission

    def insert(self, docs):
        doc_collection = {}
        for d in docs:
            # ASSIGN A NON-NULL PRIMARY KEY
            if any(v == None for v in self.uid_accessor(d)):
                for u in self.uid:
                    d[u] = coalesce(d[u], unique_name())

            uid = wrap({u: d[u] for u in self.uid})
            self.flatten(d, uid, doc_collection)

        for nested_path, insertion in doc_collection.items():
            active_columns = list(insertion.active_columns)
            vals = [[quote_value(get_document_value(d, c)) for c in active_columns] for d in insertion.rows]

            command = "INSERT INTO " + quote_table(join_field(split_field(self.name)+split_field(nested_path[0]))) + "(" + \
                      ",".join(_quote_column(c) for c in active_columns) + \
                      ")\n" + \
                      " UNION ALL\n".join("SELECT " + ",".join(vv) for vv in vals)

            self.db.execute(command)

Example #46

0

Show file

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = join_field(split_field(frum)[:1:])

        settings = set_default({
            "index": index,
            "name": frum
        }, _containers.config.default.settings)
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum

Example #47

0

Show file

File: dicts.py Project: mozilla/ActiveData-ETL

    def __delitem__(self, key):
        if isinstance(key, str):
            key = key.decode("utf8")

        if key.find(".") == -1:
            dict.pop(self, key, None)
            return

        d = self
        seq = split_field(key)
        for k in seq[:-1]:
            d = d[k]
        d.pop(seq[-1], None)

Example #48

0

Show file

File: nones.py Project: klahnakoski/MoTreeherder

    def __getitem__(self, key):
        if isinstance(key, slice):
            return Null
        elif isinstance(key, str):
            key = key.decode("utf8")
        elif isinstance(key, int):
            return NullType(self, key)

        path = split_field(key)
        output = self
        for p in path:
            output = NullType(output, p)
        return output

Example #49

0

Show file

File: deep.py Project: klahnakoski/MoDevETL

def is_deepop(es, query):
    if query.edges or query.groupby:
        return False
    if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
        return False

    vars = query_get_all_vars(query)
    columns = query.frum.get_columns()
    if len(split_field(query.frum.name)) > 1:
        return True
    if any(c for c in columns if c.nested_path and c.name in vars):
        return True
    return False

Example #50

0

Show file

File: dicts.py Project: mozilla/ChangeDetector

    def __delitem__(self, key):
        if isinstance(key, str):
            key = key.decode("utf8")

        if key.find(".") == -1:
            dict.pop(self, key, None)
            return

        d = self
        seq = split_field(key)
        for k in seq[:-1]:
            d = d[k]
        d.pop(seq[-1], None)

Example #51

0

Show file

File: __init__.py Project: mozilla/ChangeDetector

def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not type2container:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            from pyLibrary.queries.meta import FromESMetadata

            if frum == "meta.columns":
                return meta.singlton.columns
            elif frum == "meta.table":
                return meta.singlton.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = containers.config.default.type
            index = split_field(frum)[0]

        settings = set_default(
            {
                "index": index,
                "name": frum
            },
            containers.config.default.settings
        )
        settings.type = None
        return type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import Query
        return Query(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum

Example #52

0

Show file

    def add_column(self, column):
        """
        ADD COLUMN, IF IT DOES NOT EXIST ALREADY
        """
        if column.name not in self.columns:
            self.columns[column.name] = {column}
        elif column.type not in [c.type for c in self.columns[column.name]]:
            self.columns[column.name].add(column)

        if column.type == "nested":
            nested_table_name = join_field(
                split_field(self.name) + split_field(column.name))
            # MAKE THE TABLE
            table = Table_usingSQLite(
                nested_table_name,
                self.db,
                self.uid + [UID_PREFIX + "id" + unicode(len(self.uid))],
                exists=False)
            self.nested_tables[nested_table_name] = table
        else:
            self.db.execute("ALTER TABLE " + quote_table(self.name) +
                            " ADD COLUMN " + _quote_column(column) + " " +
                            column.type)

Example #53

0

Show file

def is_deepop(es, query):
    if query.edges or query.groupby:
        return False
    if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
        return False
    if len(split_field(query.frum.name)) > 1:
        return True

    # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
    # vars_ = query_get_all_vars(query)
    # columns = query.frum.get_columns()
    # if any(c for c in columns if c.nested_path and c.name in vars_):
    #    return True
    return False

Example #54

0

Show file

    def insert(self, docs):
        doc_collection = {}
        for d in docs:
            # ASSIGN A NON-NULL PRIMARY KEY
            if any(v == None for v in self.uid_accessor(d)):
                for u in self.uid:
                    d[u] = coalesce(d[u], unique_name())

            uid = wrap({u: d[u] for u in self.uid})
            self.flatten(d, uid, doc_collection)

        for nested_path, insertion in doc_collection.items():
            active_columns = list(insertion.active_columns)
            vals = [[
                quote_value(get_document_value(d, c)) for c in active_columns
            ] for d in insertion.rows]

            command = "INSERT INTO " + quote_table(join_field(split_field(self.name)+split_field(nested_path[0]))) + "(" + \
                      ",".join(_quote_column(c) for c in active_columns) + \
                      ")\n" + \
                      " UNION ALL\n".join("SELECT " + ",".join(vv) for vv in vals)

            self.db.execute(command)

Example #55

0

Show file

File: convert.py Project: davehunt/ActiveData

    def _inner(schema, parent_name, indent):
        more_lines = []
        for k,v in schema.items():
            full_name = join_field(split_field(parent_name)+[k])
            details = indent+"* "+_md_code(full_name)
            if v.type:
                details += " - "+_md_italic(v.type)
            else:
                Log.error("{{full_name}} is missing type", full_name=full_name)
            if v.description:
                details += " " + v.description
            more_lines.append(details)

            if v.type in ["object", "array", "nested"]:
                more_lines.extend(_inner(v.properties, full_name, indent+"  "))
        return more_lines

Example #56

0

Show file

File: setop.py Project: davehunt/ActiveData

def is_deep(query):
    select = listwrap(query.select)
    if len(select) > 1:
        return False

    if aggregates[select[0].aggregate] not in ("none", "count"):
        return False

    if len(query.edges) <= 1:
        return False

    isDeep = len(split_field(
        query["from"].name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    if not isDeep:
        return False  # BETTER TO USE TERM QUERY

    return True

Example #57

0

Show file

    def frum(self, fromPath, sourceVar, loopVariablePrefix):
        """
        indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT
        loopVariablePrefix PREFIX FOR LOOP VARIABLES
        """
        loopCode = "if (<PATH> != null){ for(<VAR> : <PATH>){\n<CODE>\n}}\n"
        self.prefixMap = []
        code = "<CODE>"
        path = split_field(fromPath)

        # ADD LOCAL VARIABLES
        from pyLibrary.queries.es09.util import INDEX_CACHE

        columns = INDEX_CACHE[path[0]].columns
        for i, c in enumerate(columns):
            if c.name == "attachments":
                Log.debug("")
            if c.name.find("\\.") >= 0:
                self.prefixMap.insert(
                    0, {
                        "path":
                        c.name,
                        "variable":
                        "get(" + sourceVar + ", \"" +
                        c.name.replace("\\.", ".") + "\")"
                    })
            else:
                self.prefixMap.insert(0, {
                    "path": c.name,
                    "variable": sourceVar + ".?" + c.name
                })

        # ADD LOOP VARIABLES
        currPath = []
        # self.prefixMap.insert(0, {"path": path[0], "variable": path[0]})
        for i, step in enumerate(path[1::]):
            loopVariable = loopVariablePrefix + str(i)
            currPath.append(step)
            pathi = ".".join(currPath)
            shortPath = self._translate(pathi)
            self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable})

            loop = loopCode.replace("<VAR>",
                                    loopVariable).replace("<PATH>", shortPath)
            code = code.replace("<CODE>", loop)
        return code

Example #58

0

Show file

def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for k, new_value in constants.leaves():
        errors = []
        try:
            old_value = dot.set_attr(sys.modules, k, new_value)
            continue
        except Exception, e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("/", ".")

            path = split_field(k)
            for i, p in enumerate(path):
                if i == 0:
                    continue
                prefix = join_field(path[:1])
                name = join_field(path[i:])
                if caller_module.endswith(prefix):
                    old_value = dot.set_attr(caller_globals, name, new_value)
                    if DEBUG:
                        from pyLibrary.debugs.logs import Log

                        Log.note("Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                            module= prefix,
                            attribute= name,
                            old_value= old_value,
                            new_value= new_value)
                    break
        except Exception, e:
            errors.append[e]

Example #59

0

Show file

File: dicts.py Project: mozilla/ActiveData-ETL

    def __getitem__(self, key):
        if key == None:
            return Null
        if isinstance(key, str):
            key = key.decode("utf8")

        d = self
        if key.find(".") >= 0:
            seq = split_field(key)
            for n in seq:
                d = _getdefault(self, n)
            return wrap(d)
        else:
            o = dict.get(d, None)

        if o == None:
            return NullType(d, key)
        return wrap(o)

Example #60

0

Show file

def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap({
            "filter": {
                "and": [
                    f0, {
                        "nested": {
                            "path": join_field(sub_path),
                            "filter": f1,
                            "inner_hits": {
                                "size": 100000
                            }
                        }
                    }
                ]
            },
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({
            "query": {
                "filtered": {
                    "filter": f0
                }
            },
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0])