Esempio n. 1
0
 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_null(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
     self.dimension = Null
Esempio n. 2
0
def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if isinstance(edge.value, Variable):
        calc = {"field": edge.value.var}
    else:
        calc = {"script_field": edge.value.to_ruby()}

    if edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
        missing_filter = set_default(
            {"filter": {"or": [
                OrOp("or", [
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_max))]),
                ]).to_esfilter(),
                edge.value.missing().to_esfilter()
            ]}},
            es_query
        )
    else:
        missing_filter = None

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Esempio n. 3
0
def Stats2ZeroMoment(stats):
    # MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
    # ADDED count
    mc0, mc1, mc2, skew, kurt = stats.count, coalesce(stats.mean, 0), coalesce(stats.variance, 0), coalesce(stats.skew, 0), coalesce(stats.kurtosis, 0)

    mz0 = mc0
    mz1 = mc1 * mc0
    mz2 = (mc2 + mc1 * mc1) * mc0
    mc3 = coalesce(skew, 0) * (mc2 ** 1.5) # 3rd central moment
    mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0  # 3rd non-central moment
    mc4 = (coalesce(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment
    mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0

    m = ZeroMoment(mz0, mz1, mz2, mz3, mz4)
    if DEBUG:
        from pyLibrary.testing.fuzzytestcase import assertAlmostEqualValue

        globals()["DEBUG"] = False
        try:
            v = ZeroMoment2Stats(m)
            assertAlmostEqualValue(v.count, stats.count, places=10)
            assertAlmostEqualValue(v.mean, stats.mean, places=10)
            assertAlmostEqualValue(v.variance, stats.variance, places=10)
            assertAlmostEqualValue(v.skew, stats.skew, places=10)
            assertAlmostEqualValue(v.kurtosis, stats.kurtosis, places=10)
        except Exception, e:
            v = ZeroMoment2Stats(m)
            Log.error("programmer error")
        globals()["DEBUG"] = True
Esempio n. 4
0
 def single(col, r):
     min = coalesce(r["gte"], r[">="])
     max = coalesce(r["lte"], r["<="])
     if min and max:
         # SPECIAL CASE (BETWEEN)
         return db.quote_column(col) + SQL(" BETWEEN ") + db.quote_value(min) + SQL(" AND ") + db.quote_value(max)
     else:
         return " AND ".join(
             db.quote_column(col) + name2sign[sign] + db.quote_value(value)
             for sign, value in r.items()
         )
Esempio n. 5
0
def percent(value, decimal=None, digits=None, places=None):
    value = float(value)
    if value == 0.0:
        return "0%"

    digits = coalesce(digits, places)
    if digits != None:
        left_of_decimal = int(math.ceil(math.log10(abs(value)))) + 2
        decimal = digits - left_of_decimal

    decimal = coalesce(decimal, 0)
    right_of_decimal = max(decimal, 0)
    format = "{:." + _unicode(right_of_decimal) + "%}"
    return format.format(__builtin__.round(value, decimal + 2))
Esempio n. 6
0
    def send_email(self,
            from_address=None,
            to_address=None,
            subject=None,
            text_data=None,
            html_data=None
    ):
        """Sends an email.

        from_addr is an email address; to_addrs is a list of email adresses.
        Addresses can be plain (e.g. "*****@*****.**") or with real names
        (e.g. "John Smith <*****@*****.**>").

        text_data and html_data are both strings.  You can specify one or both.
        If you specify both, the email will be sent as a MIME multipart
        alternative, i.e., the recipient will see the HTML content if his
        viewer supports it; otherwise he'll see the text content.
        """

        settings = self.settings

        from_address = coalesce(from_address, settings["from"], settings.from_address)
        to_address = listwrap(coalesce(to_address, settings.to_address, settings.to_addrs))

        if not from_address or not to_address:
            raise Exception("Both from_addr and to_addrs must be specified")
        if not text_data and not html_data:
            raise Exception("Must specify either text_data or html_data")

        if not html_data:
            msg = MIMEText(text_data)
        elif not text_data:
            msg = MIMEText(html_data, 'html')
        else:
            msg = MIMEMultipart('alternative')
            msg.attach(MIMEText(text_data, 'plain'))
            msg.attach(MIMEText(html_data, 'html'))

        msg['Subject'] = coalesce(subject, settings.subject)
        msg['From'] = from_address
        msg['To'] = ', '.join(to_address)

        if self.server:
            # CALL AS PART OF A SMTP SESSION
            self.server.sendmail(from_address, to_address, msg.as_string())
        else:
            # CALL AS STAND-ALONE
            with self:
                self.server.sendmail(from_address, to_address, msg.as_string())
Esempio n. 7
0
    def _convert_edge(self, edge):
        if isinstance(edge, basestring):
            return Data(
                name=edge,
                value=edge,
                domain=self._convert_domain()
            )
        else:
            edge = wrap(edge)
            if not edge.name and not isinstance(edge.value, basestring):
                Log.error("You must name compound edges: {{edge}}",  edge= edge)

            if isinstance(edge.value, (Mapping, list)) and not edge.domain:
                # COMPLEX EDGE IS SHORT HAND
                domain =self._convert_domain()
                domain.dimension = Data(fields=edge.value)

                return Data(
                    name=edge.name,
                    allowNulls=False if edge.allowNulls is False else True,
                    domain=domain
                )

            domain = self._convert_domain(edge.domain)
            return Data(
                name=coalesce(edge.name, edge.value),
                value=edge.value,
                range=edge.range,
                allowNulls=False if edge.allowNulls is False else True,
                domain=domain
            )
Esempio n. 8
0
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         return coalesce(self.dimensions[expr], expr)
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, QueryOp):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
Esempio n. 9
0
def compileDuration2Term(edge):
    if edge.esscript:
        Log.error("edge script not supported yet")

    # IS THERE A LIMIT ON THE DOMAIN?
    numPartitions = len(edge.domain.partitions)
    value = edge.value
    if isKeyword(value):
        value = "doc[\"" + value + "\"].value"

    ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO)
    nullTest = compileNullTest(edge)

    ms = edge.domain.interval.milli
    if edge.domain.interval.month > 0:
        ms = durations.YEAR.milli / 12 * edge.domain.interval.month

    partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")"
    partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")"

    def int2Partition(value):
        if Math.round(value) == numPartitions:
            return edge.domain.NULL
        return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))

    return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
Esempio n. 10
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     self.domain = edge.domain
     self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
     self.key2index = {}
     self.computed_domain = False
Esempio n. 11
0
 def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         settings=settings
     )
     self.batch_size = batch_size
     self.es.add_alias(coalesce(settings.alias, settings.index))
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
     self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
     Thread.run("add debug logs to es", self._insert_loop)
Esempio n. 12
0
    def query(self, sql, param=None):
        """
        RETURN LIST OF dicts
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            result = convert.table2list(columns, fixed)

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception, e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e, stack_depth=1)
Esempio n. 13
0
    def __init__(self, host, index, alias=None, name=None, port=9200, settings=None):
        global _elasticsearch
        if hasattr(self, "settings"):
            return

        from pyLibrary.queries.containers.list_usingPythonList import ListContainer
        from pyLibrary.env import elasticsearch as _elasticsearch

        self.settings = settings
        self.default_name = coalesce(name, alias, index)
        self.default_es = _elasticsearch.Cluster(settings=settings)
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.es_metadata = Null
        self.last_es_metadata = Date.now()-OLD_METADATA

        self.meta=Data()
        table_columns = metadata_tables()
        column_columns = metadata_columns()
        self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns}))
        self.meta.columns = ColumnList()
        self.meta.columns.insert(column_columns)
        self.meta.columns.insert(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Esempio n. 14
0
def es_terms(es, mvel, query):
    """
    RETURN LIST OF ALL EDGE QUERIES

    EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
    WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
    """
    if len(query.edges) == 2:
        return _es_terms2(es, mvel, query)

    select = listwrap(query.select)
    FromES = build_es_query(query)
    packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
    for s in select:
        FromES.facets[s.name] = {
            "terms": {
                "field": packed_term.field,
                "script_field": packed_term.expression,
                "size": coalesce(query.limit, 200000),
            },
            "facet_filter": simplify_esfilter(query.where),
        }

    term2Parts = packed_term.term2parts

    data = es09.util.post(es, FromES, query.limit)

    # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
    # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
    for k, f in data.facets.items():
        for t in f.terms:
            term2Parts(t.term)

    # NUMBER ALL EDGES FOR jx INDEXING
    for f, e in enumerate(query.edges):
        e.index = f
        if e.domain.type in ["uid", "default"]:
            # e.domain.partitions = jx.sort(e.domain.partitions, "value")
            for p, part in enumerate(e.domain.partitions):
                part.dataIndex = p
            e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    output = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass
Esempio n. 15
0
def get_decoders_by_depth(query):
    """
    RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
    """
    schema = query.frum
    output = FlatList()
    for e in wrap(coalesce(query.edges, query.groupby, [])):
        if e.value != None and not isinstance(e.value, NullOp):
            e = e.copy()
            vars_ = e.value.vars()

            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)

            e.value = e.value.map({schema[v].name: schema[v].es_column for v in vars_})
        elif e.range:
            e = e.copy()
            min_ = e.range.min
            max_ = e.range.max
            vars_ = min_.vars() | max_.vars()

            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)

            map_ = {schema[v].name: schema[v].es_column for v in vars_}
            e.range = {
                "min": min_.map(map_),
                "max": max_.map(map_)
            }
        elif e.domain.dimension:
            vars_ = e.domain.dimension.fields
            e.domain.dimension = e.domain.dimension.copy()
            e.domain.dimension.fields = [schema[v].es_column for v in vars_]
        elif all(e.domain.partitions.where):
            vars_ = set()
            for p in e.domain.partitions:
                vars_ |= p.where.vars()

        try:
            depths = set(len(schema[v].nested_path)-1 for v in vars_)
            if -1 in depths:
                Log.error(
                    "Do not know of column {{column}}",
                    column=unwraplist([v for v in vars_ if schema[v]==None])
                )
            if len(depths) > 1:
                Log.error("expression {{expr}} spans tables, can not handle", expr=e.value)
            max_depth = Math.MAX(depths)
            while len(output) <= max_depth:
                output.append([])
        except Exception, e:
            # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
            max_depth = 0
            output.append([])

        limit = 0
        output[max_depth].append(AggsDecoder(e, query, limit))
Esempio n. 16
0
def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = FlatList()

    return extract_rows(es, es_query, query)
Esempio n. 17
0
    def get_index(self, row):
        domain = self.edge.domain
        part = row[self.start]
        if part == None:
            return len(domain.partitions)

        f = coalesce(part["from"], part["key"])
        t = coalesce(part["to"], part["key"])
        if f == None or t == None:
            return len(domain.partitions)
        else:
            for p in domain.partitions:
                if p.min <= f <p.max:
                    return p.dataIndex
        sample = part.copy
        sample.buckets = None
        Log.error("Expecting to find {{part}}",  part=sample)
Esempio n. 18
0
 def __init__(self, description, param=None, debug=True, silent=False):
     self.template = description
     self.param = wrap(coalesce(param, {}))
     self.debug = debug
     self.silent = silent
     self.start = 0
     self.end = 0
     self.interval = None
Esempio n. 19
0
    def start(cls, settings=None):
        """
        RUN ME FIRST TO SETUP THE THREADED LOGGING
        http://victorlin.me/2012/08/good-logging-practice-in-python/

        log       - LIST OF PARAMETERS FOR LOGGER(S)
        trace     - SHOW MORE DETAILS IN EVERY LOG LINE (default False)
        cprofile  - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False)
                    USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"}
        profile   - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):)
                    USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"}
        constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE)
        """
        global _Thread
        if not settings:
            return
        settings = wrap(settings)

        cls.settings = settings
        cls.trace = cls.trace | coalesce(settings.trace, False)
        if cls.trace:
            from pyLibrary.thread.threads import Thread as _Thread

        if settings.cprofile is False:
            settings.cprofile = {"enabled": False}
        elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled):
            if isinstance(settings.cprofile, bool):
                settings.cprofile = {"enabled": True, "filename": "cprofile.tab"}

            import cProfile

            cls.cprofiler = cProfile.Profile()
            cls.cprofiler.enable()

        if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled):
            from pyLibrary.debugs import profiles

            if isinstance(settings.profile, bool):
                profiles.ON = True
                settings.profile = {"enabled": True, "filename": "profile.tab"}

            if settings.profile.enabled:
                profiles.ON = True

        if settings.constants:
            constants.set(settings.constants)

        if settings.log:
            cls.logging_multi = TextLog_usingMulti()
            if cls.main_log:
                cls.main_log.stop()
            cls.main_log = TextLog_usingThread(cls.logging_multi)

            for log in listwrap(settings.log):
                Log.add_log(Log.new_instance(log))

        if settings.cprofile.enabled==True:
            Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
Esempio n. 20
0
    def select(self, fields):
        if isinstance(fields, Mapping):
            fields=fields.value

        if isinstance(fields, basestring):
            # RETURN LIST OF VALUES
            if len(split_field(fields)) == 1:
                if self.path[0] == fields:
                    return [d[1] for d in self.data]
                else:
                    return [d[0][fields] for d in self.data]
            else:
                keys = split_field(fields)
                depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
                short_key = keys[depth:]

                output = FlatList()
                _select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
                return output

        if isinstance(fields, list):
            output = FlatList()

            meta = []
            for f in fields:
                if hasattr(f.value, "__call__"):
                    meta.append((f.name, f.value))
                else:
                    meta.append((f.name, functools.partial(lambda v, d: d[v], f.value)))

            for row in self._values():
                agg = Data()
                for name, f in meta:
                    agg[name] = f(row)

                output.append(agg)

            return output

            # meta = []
            # for f in fields:
            #     keys = split_field(f.value)
            #     depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
            #     short_key = join_field(keys[depth:])
            #
            #     meta.append((f.name, depth, short_key))
            #
            # for row in self._data:
            #     agg = Data()
            #     for name, depth, short_key in meta:
            #         if short_key:
            #             agg[name] = row[depth][short_key]
            #         else:
            #             agg[name] = row[depth]
            #     output.append(agg)
            # return output

        Log.error("multiselect over FlatList not supported")
Esempio n. 21
0
    def getDomain(self, **kwargs):
        # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
        kwargs = wrap(kwargs)
        kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None)

        if not self.partitions and self.edges:
            # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
            partitions = [
                {
                    "name": v.name,
                    "value": v.name,
                    "where": v.where,
                    "style": v.style,
                    "weight": v.weight  # YO! WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.edges)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.where
            ]
            self.isFacet = True
        elif kwargs.depth == None:  # ASSUME self.fields IS A dict
            partitions = FlatList()
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    break
                partitions.append({
                    "name":part.name,
                    "value":part.value,
                    "where":part.where,
                    "style":coalesce(part.style, part.parent.style),
                    "weight":part.weight   # YO!  WHAT DO WE *NOT* COPY?
                })
        elif kwargs.depth == 0:
            partitions = [
                {
                    "name":v.name,
                    "value":v.value,
                    "where":v.where,
                    "style":v.style,
                    "weight":v.weight   # YO!  WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.partitions)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)]
        elif kwargs.depth == 1:
            partitions = FlatList()
            rownum = 0
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    continue
                rownum += 1
                try:
                    for j, subpart in enumerate(part.partitions):
                        partitions.append({
                            "name":join_field(split_field(subpart.parent.name) + [subpart.name]),
                            "value":subpart.value,
                            "where":subpart.where,
                            "style":coalesce(subpart.style, subpart.parent.style),
                            "weight":subpart.weight   # YO!  WHAT DO WE *NOT* COPY?
                        })
                except Exception, e:
                    Log.error("", e)
Esempio n. 22
0
def normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if not sort:
        return FlatList.EMPTY

    output = FlatList()
    for s in listwrap(sort):
        if isinstance(s, basestring) or Math.is_integer(s):
            output.append({"value": s, "sort": 1})
        elif not s.field and not s.value and s.sort==None:
            #ASSUME {name: sort} FORM
            for n, v in s.items():
                output.append({"value": n, "sort": sort_direction[v]})
        else:
            output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)})
    return wrap(output)
Esempio n. 23
0
 def _convert_window(self, window):
     return Data(
         name=coalesce(window.name, window.value),
         value=window.value,
         edges=[self._convert_edge(e) for e in listwrap(window.edges)],
         sort=self._convert_sort(window.sort),
         aggregate=window.aggregate,
         range=self._convert_range(window.range),
         where=self._convert_where(window.where)
     )
Esempio n. 24
0
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None):
    """
    Snagged from unittest/case.py, then modified (Aug2014)
    """
    if expected == None:  # None has no expectations
        return
    if test == expected:
        # shortcut
        return

    if not Math.is_number(expected):
        # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
        if isinstance(expected, list) and len(expected)==0 and test == None:
            return
        if isinstance(expected, Mapping) and not expected.keys() and test == None:
            return
        if test != expected:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))
        return

    num_param = 0
    if digits != None:
        num_param += 1
    if places != None:
        num_param += 1
    if delta != None:
        num_param += 1
    if num_param>1:
        raise TypeError("specify only one of digits, places or delta")

    if digits is not None:
        with suppress_exception:
            diff = Math.log10(abs(test-expected))
            if diff < digits:
                return

        standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals())
    elif delta is not None:
        if abs(test - expected) <= delta:
            return

        standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals())
    else:
        if places is None:
            places = 15

        with suppress_exception:
            diff = Math.log10(abs(test-expected))
            if diff < Math.ceiling(Math.log10(abs(test)))-places:
                return


        standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals())

    raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
Esempio n. 25
0
    def add_alias(self, alias=None):
        alias = coalesce(alias, self.settings.alias)
        self.cluster_state = None
        self.cluster.post(
            "/_aliases",
            data={
                "actions": [
                    {"add": {"index": self.settings.index, "alias": alias}}
                ]
            },
            timeout=coalesce(self.settings.timeout, 30)
        )

        # WAIT FOR ALIAS TO APPEAR
        while True:
            response = self.cluster.get("/_cluster/state", retry={"times": 5}, timeout=3)
            if alias in response.metadata.indices[self.settings.index].aliases:
                return
            Log.note("Waiting for alias {{alias}} to appear", alias=alias)
            Till(seconds=1).wait()
Esempio n. 26
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = convert.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname, suite.name))
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = convert.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = convert.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = convert.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Esempio n. 27
0
 def _worker(self, please_stop):
     if Sqlite.canonical:
         self.db = Sqlite.canonical
     else:
         self.db = sqlite3.connect(coalesce(self.filename, ':memory:'))
         try:
             full_path = File("pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath
             # self.db.execute("SELECT sqlite3_enable_load_extension(1)")
             self.db.enable_load_extension(True)
             self.db.execute("SELECT load_extension('" + full_path + "')")
         except Exception, e:
             Log.warning("loading sqlite extension functions failed, doing without. (no SQRT for you!)", cause=e)
Esempio n. 28
0
    def _all_combos(self):
        """
        RETURN AN ITERATOR OF ALL COORDINATES
        """
        combos = PRODUCT(self.dims)
        if not combos:
            return

        calc = [(coalesce(PRODUCT(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)]

        for c in xrange(combos):
            yield tuple(int(c / dd) % mm for dd, mm in calc)
Esempio n. 29
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()

        if isinstance(self.key, set):
            Log.error("problem")

        if isinstance(desc.partitions[0], (int, float, basestring)):
            # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
        elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and isinstance(desc.key, (list, set)):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif desc.key == None:
            Log.error("Domains must have keys")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
        elif all(p.esfilter for p in self.partitions):
            # EVERY PART HAS AN esfilter DEFINED, SO USE THEM
            for i, p in enumerate(self.partitions):
                p.dataIndex = i

        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")
Esempio n. 30
0
 def _open(self):
     """ DO NOT USE THIS UNLESS YOU close() FIRST"""
     try:
         self.db = connect(
             host=self.settings.host,
             port=self.settings.port,
             user=coalesce(self.settings.username, self.settings.user),
             passwd=coalesce(self.settings.password, self.settings.passwd),
             db=coalesce(self.settings.schema, self.settings.db),
             charset=u"utf8",
             use_unicode=True,
             ssl=coalesce(self.settings.ssl, None)
         )
     except Exception, e:
         if self.settings.host.find("://") == -1:
             Log.error(u"Failure to connect to {{host}}:{{port}}",
                 host= self.settings.host,
                 port= self.settings.port,
                 cause=e
             )
         else:
             Log.error(u"Failure to connect.  PROTOCOL PREFIX IS PROBABLY BAD", e)