Ejemplo n.º 1
0
    def _convert_edge(self, edge):
        if isinstance(edge, basestring):
            return Dict(
                name=edge,
                value=edge,
                domain=self._convert_domain()
            )
        else:
            edge = wrap(edge)
            if not edge.name and not isinstance(edge.value, basestring):
                Log.error("You must name compound edges: {{edge}}",  edge= edge)

            if isinstance(edge.value, (Mapping, list)) and not edge.domain:
                # COMPLEX EDGE IS SHORT HAND
                domain =self._convert_domain()
                domain.dimension = Dict(fields=edge.value)

                return Dict(
                    name=edge.name,
                    allowNulls=False if edge.allowNulls is False else True,
                    domain=domain
                )

            domain = self._convert_domain(edge.domain)
            return Dict(
                name=coalesce(edge.name, edge.value),
                value=edge.value,
                range=edge.range,
                allowNulls=False if edge.allowNulls is False else True,
                domain=domain
            )
Ejemplo n.º 2
0
def safe_size(source):
    """
    READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG
    RETURN A str() OR A FileString()
    """

    if source is None:
        return None

    total_bytes = 0
    bytes = []
    b = source.read(MIN_READ_SIZE)
    while b:
        total_bytes += len(b)
        bytes.append(b)
        if total_bytes > MAX_STRING_SIZE:
            try:
                data = FileString(TemporaryFile())
                for bb in bytes:
                    data.write(bb)
                del bytes
                del bb
                b = source.read(MIN_READ_SIZE)
                while b:
                    total_bytes += len(b)
                    data.write(b)
                    b = source.read(MIN_READ_SIZE)
                data.seek(0)
                Log.note("Using file of size {{length}} instead of str()",  length= total_bytes)

                return data
            except Exception, e:
                Log.error("Could not write file > {{num}} bytes",  num= total_bytes, cause=e)
        b = source.read(MIN_READ_SIZE)
Ejemplo n.º 3
0
def groupby_size(data, size):
    if hasattr(data, "next"):
        iterator = data
    elif hasattr(data, "__iter__"):
        iterator = data.__iter__()
    else:
        Log.error("do not know how to handle this type")

    done = DictList()
    def more():
        output = DictList()
        for i in range(size):
            try:
                output.append(iterator.next())
            except StopIteration:
                done.append(True)
                break
        return output

    # THIS IS LAZY
    i = 0
    while True:
        output = more()
        yield (i, output)
        if len(done) > 0:
            break
        i += 1
Ejemplo n.º 4
0
def groupby_Multiset(data, min_size, max_size):
    # GROUP multiset BASED ON POPULATION OF EACH KEY, TRYING TO STAY IN min/max LIMITS
    if min_size == None:
        min_size = 0

    total = 0
    i = 0
    g = list()
    for k, c in data.items():
        if total < min_size or total + c < max_size:
            total += c
            g.append(k)
        elif total < max_size:
            yield (i, g)
            i += 1
            total = c
            g = [k]

        if total >= max_size:
            Log.error("({{min}}, {{max}}) range is too strict given step of {{increment}}",
                min=min_size,
                max=max_size,
                increment=c
            )

    if g:
        yield (i, g)
Ejemplo n.º 5
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        def _output():
            for g, v in itertools.groupby(data, get_key):
                group = Dict()
                for k, gg in zip(keys, g):
                    group[k] = gg
                yield (group, wrap(v))

        return _output()

    except Exception, e:
        Log.error("Problem grouping", e)
Ejemplo n.º 6
0
    def execute(self, requests):
        """
        RETURN A GENERATOR THAT HAS len(requests) RESULTS (ANY ORDER)
        EXPECTING requests TO BE A list OF dicts, EACH dict IS USED AS kwargs TO GIVEN functions
        """
        if not isinstance(requests, (list, tuple, GeneratorType, Iterable)):
            Log.error("Expecting requests to be a list or generator", stack_depth=1)
        else:
            requests = list(requests)

        # FILL QUEUE WITH WORK
        self.inbound.extend(requests)

        num = len(requests)

        def output():
            for i in xrange(num):
                result = self.outbound.pop()
                if "exception" in result:
                    raise result["exception"]
                else:
                    yield result["response"]

        if self.outbound is not None:
            return output()
        else:
            return
Ejemplo n.º 7
0
    def write(self, data):
        if not self.parent.exists:
            self.parent.create()
        with open(self._filename, "wb") as f:
            if isinstance(data, list) and self.key:
                from pyLibrary.debugs.logs import Log

                Log.error("list of data and keys are not supported, encrypt before sending to file")

            if isinstance(data, list):
                pass
            elif isinstance(data, basestring):
                data=[data]
            elif hasattr(data, "__iter__"):
                pass

            for d in data:
                if not isinstance(d, unicode):
                    from pyLibrary.debugs.logs import Log

                    Log.error("Expecting unicode data only")
                if self.key:
                    f.write(crypto.encrypt(d, self.key).encode("utf8"))
                else:
                    f.write(d.encode("utf8"))
Ejemplo n.º 8
0
    def insert_list(self, table_name, records):
        if not records:
            return

        columns = set()
        for r in records:
            columns |= set(r.keys())
        columns = jx.sort(columns)

        try:
            self.execute(
                "DELETE FROM " + self.quote_column(table_name) + " WHERE _id IN {{ids}}",
                {"ids": self.quote_column([r["_id"] for r in records])}
            )

            command = \
                "INSERT INTO " + self.quote_column(table_name) + "(" + \
                ",".join([self.quote_column(k) for k in columns]) + \
                ") VALUES " + ",\n".join([
                    "(" + ",".join([self.quote_value(r.get(k, None)) for k in columns]) + ")"
                    for r in records
                ])
            self.execute(command)
        except Exception, e:
            Log.error("problem with insert", e)
Ejemplo n.º 9
0
def _convert_in(op, term):
    if not term:
        Log.error("Expecting a term")
    if not isinstance(term, Mapping):
        Log.error("Expecting {{op}} to have dict value",  op= op)
    var, val = term.items()[0]

    if isinstance(val, list):
        v2 = [vv for vv in val if vv != None]

        if len(v2) == 0:
            if len(val) == 0:
                return False
            else:
                return {"missing": {"field": var}}

        if len(v2) == 1:
            output = {"term": {var: v2[0]}}
        else:
            output = {"terms": {var: v2}}

        if len(v2) != len(val):
            output = {"or": [
                {"missing": {"field": var}},
                output
            ]}
        return output
    else:
        return {"term": term}
Ejemplo n.º 10
0
    def __init__(
        self,
        exchange,  # name of the Pulse exchange
        topic,  # message name pattern to subscribe to  ('#' is wildcard)
        target=None,  # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
        target_queue=None,  # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
        host='pulse.mozilla.org',  # url to connect,
        port=5671,  # tcp port
        user=None,
        password=None,
        vhost="/",
        start=0,  # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
        ssl=True,
        applabel=None,
        heartbeat=False,  # True to also get the Pulse heartbeat message
        durable=False,  # True to keep queue after shutdown
        serializer='json',
        broker_timezone='GMT',
        settings=None
    ):
        self.target_queue = target_queue
        self.pulse_target = target
        if (target_queue == None and target == None) or (target_queue != None and target != None):
            Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)")

        Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker)
        self.settings = settings
        settings.callback = self._got_result
        settings.user = coalesce(settings.user, settings.username)
        settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name)
        settings.topic = topic

        self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings)
        self.count = coalesce(start, 0)
        self.start()
def process_test_result(source_key, source, destination, please_stop=None):
    path = key2path(source_key)
    destination.delete({"and": [
        {"term": {"etl.source.id": path[1]}},
        {"term": {"etl.source.source.id": path[0]}}
    ]})

    lines = source.read_lines()

    keys = []
    data = []
    for l in lines:
        record = convert.json2value(l)
        if record._id==None:
            continue
        record.result.crash_result = None  #TODO: Remove me after May 2015
        keys.append(record._id)
        data.append({
            "id": record._id,
            "value": record
        })
        record._id = None
    if data:
        try:
            destination.extend(data)
        except Exception, e:
            if "Can not decide on index by build.date" in e:
                if source.bucket.name == "ekyle-test-result":
                    # KNOWN CORRUPTION
                    # TODO: REMOVE LATER (today = Mar2015)
                    delete_list = source.bucket.keys(prefix=key_prefix(source_key))
                    for d in delete_list:
                        source.bucket.delete_key(d)
            Log.error("Can not add to sink", e)
Ejemplo n.º 12
0
def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):
    try:
        return value.strftime(format)
    except Exception, e:
        from pyLibrary.debugs.logs import Log

        Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e)
Ejemplo n.º 13
0
    def forall(self, sql, param=None, _execute=None):
        assert _execute
        num = 0

        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}",  sql= indent(sql))
            self.cursor.execute(sql)

            columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
            for r in self.cursor:
                num += 1
                _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

        except Exception, e:
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e, stack_depth=1)
Ejemplo n.º 14
0
def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path


    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)
Ejemplo n.º 15
0
    def __init__(self, filename, buffering=2 ** 14, suffix=None):
        """
        YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
        """
        if filename == None:
            from pyLibrary.debugs.logs import Log

            Log.error("File must be given a filename")
        elif isinstance(filename, basestring):
            self.key = None
            if filename.startswith("~"):
                home_path = os.path.expanduser("~")
                if os.sep == "\\":
                    home_path = home_path.replace(os.sep, "/")
                if home_path.endswith("/"):
                    home_path = home_path[:-1]
                filename = home_path + filename[1::]
            self._filename = filename.replace(os.sep, "/")  # USE UNIX STANDARD
        else:
            self.key = convert.base642bytearray(filename.key)
            self._filename = "/".join(filename.path.split(os.sep))  # USE UNIX STANDARD

        while self._filename.find(".../") >= 0:
            # LET ... REFER TO GRANDPARENT, .... REFER TO GREAT-GRAND-PARENT, etc...
            self._filename = self._filename.replace(".../", "../../")
        self.buffering = buffering


        if suffix:
            self._filename = File.add_suffix(self._filename, suffix)
Ejemplo n.º 16
0
    def execute(
        self,
        command,
        param=None,
        retry=True     # IF command FAILS, JUST THROW ERROR
    ):
        if param:
            command = expand_template(command, self.quote_param(param))

        output = None
        done = False
        while not done:
            try:
                with self.locker:
                    if not self.connection:
                        self._connect()

                with Closer(self.connection.cursor()) as curs:
                    curs.execute(command)
                    if curs.rowcount >= 0:
                        output = curs.fetchall()
                self.connection.commit()
                done = True
            except Exception, e:
                try:
                    self.connection.rollback()
                    # TODO: FIGURE OUT WHY rollback() DOES NOT HELP
                    self.connection.close()
                except Exception, f:
                    pass
                self.connection = None
                self._connect()
                if not retry:
                    Log.error("Problem with command:\n{{command|indent}}",  command= command, cause=e)
Ejemplo n.º 17
0
 def simple_token(index, c):
     if c == b'"':
         json.mark(index - 1)
         while True:
             c = json[index]
             index += 1
             if c == b"\\":
                 index += 1
             elif c == b'"':
                 break
         return json_decoder(json.release(index).decode("utf8")), index
     elif c in b"{[":
         Log.error("Expecting a primitive value")
     elif c == b"t" and json.slice(index, index + 3) == "rue":
         return True, index + 3
     elif c == b"n" and json.slice(index, index + 3) == "ull":
         return None, index + 3
     elif c == b"f" and json.slice(index, index + 4) == "alse":
         return False, index + 4
     else:
         json.mark(index-1)
         while True:
             c = json[index]
             if c in b',]}':
                 break
             index += 1
         return float(json.release(index)), index
Ejemplo n.º 18
0
    def create(self):
        try:
            os.makedirs(self._filename)
        except Exception, e:
            from pyLibrary.debugs.logs import Log

            Log.error("Could not make directory {{dir_name}}",  dir_name= self._filename, cause=e)
Ejemplo n.º 19
0
    def _decode(index, parent_path, path, name2index, expected_vars=NO_VARS):
        c, index = skip_whitespace(index)

        if not path:
            if c != b"[":
                # TREAT VALUE AS SINGLE-VALUE ARRAY
                yield _decode_token(index, c, parent_path, path, name2index, None, expected_vars)
            else:
                c, index = skip_whitespace(index)
                if c == b']':
                    return  # EMPTY ARRAY

                while True:
                    value, index = _decode_token(index, c, parent_path, path, name2index, None, expected_vars)
                    c, index = skip_whitespace(index)
                    if c == b']':
                        yield value, index
                        return
                    elif c == b',':
                        c, index = skip_whitespace(index)
                        yield value, index

        else:
            if c != b'{':
                Log.error("Expecting all objects to at least have {{path}}", path=path[0])

            for j, i in _decode_object(index, parent_path, path, name2index, expected_vars=expected_vars):
                yield j, i
Ejemplo n.º 20
0
    def _decode_token(index, c, full_path, path, name2index, destination, expected_vars):
        if c == b'{':
            if not expected_vars:
                index = jump_to_end(index, c)
                value = None
            elif expected_vars[0] == ".":
                json.mark(index-1)
                index = jump_to_end(index, c)
                value = json_decoder(json.release(index).decode("utf8"))
            else:
                count = 0
                for v, i in _decode_object(index, full_path, path, name2index, destination, expected_vars=expected_vars):
                    index = i
                    value = v
                    count += 1
                if count != 1:
                    Log.error("Expecting object, nothing nested")
        elif c == b'[':
            if not expected_vars:
                index = jump_to_end(index, c)
                value = None
            else:
                json.mark(index - 1)
                index = jump_to_end(index, c)
                value = json_decoder(json.release(index).decode("utf8"))
        else:
            if expected_vars and expected_vars[0] == ".":
                value, index = simple_token(index, c)
            else:
                index = jump_to_end(index, c)
                value = None

        return value, index
Ejemplo n.º 21
0
 def get_index(self, row):
     if self.computed_domain:
         try:
             part = row[self.start]
             return self.domain.getIndexByKey(part["key"])
         except Exception, e:
             Log.error("problem", cause=e)
Ejemplo n.º 22
0
    def send(self, topic, message):
        """Publishes a pulse message to the proper exchange."""

        if not message:
            Log.error("Expecting a message")

        message._prepare()

        if not self.connection:
            self.connect()

        producer = Producer(
            channel=self.connection,
            exchange=Exchange(self.settings.exchange, type='topic'),
            routing_key=topic
        )

        # The message is actually a simple envelope format with a payload and
        # some metadata.
        final_data = Dict(
            payload=message.data,
            _meta=set_default({
                'exchange': self.settings.exchange,
                'routing_key': message.routing_key,
                'serializer': self.settings.serializer,
                'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))),
                'count': self.count
            }, message.metadata)
        )

        producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer)
        self.count += 1
Ejemplo n.º 23
0
def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings
Ejemplo n.º 24
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
Ejemplo n.º 25
0
def json2value(json_string, params={}, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE)
            json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            # LOOKUP REFERENCES
            json_string = expand_template(json_string, params)

        try:
            value = wrap(json_decoder(unicode(json_string)))
        except Exception, e:
            Log.error("can not decode\n{{content}}", content=json_string, cause=e)

        if leaves:
            value = wrap_leaves(value)

        return value
Ejemplo n.º 26
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = zip(*grid)

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception, e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e,stack_depth=1)
Ejemplo n.º 27
0
def latin12unicode(value):
    if isinstance(value, unicode):
        Log.error("can not convert unicode from latin1")
    try:
        return unicode(value.decode('iso-8859-1'))
    except Exception, e:
        Log.error("Can not convert {{value|quote}} to unicode", value=value)
Ejemplo n.º 28
0
 def quote_value(self, value):
     """
     convert values to mysql code for the same
     mostly delegate directly to the mysql lib, but some exceptions exist
     """
     try:
         if value == None:
             return "NULL"
         elif isinstance(value, SQL):
             if not value.param:
                 # value.template CAN BE MORE THAN A TEMPLATE STRING
                 return self.quote_sql(value.template)
             param = {k: self.quote_sql(v) for k, v in value.param.items()}
             return expand_template(value.template, param)
         elif isinstance(value, basestring):
             return self.db.literal(value)
         elif isinstance(value, datetime):
             return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')"
         elif hasattr(value, '__iter__'):
             return self.db.literal(json_encode(value))
         elif isinstance(value, Mapping):
             return self.db.literal(json_encode(value))
         elif Math.is_number(value):
             return unicode(value)
         else:
             return self.db.literal(value)
     except Exception, e:
         Log.error("problem quoting SQL", e)
Ejemplo n.º 29
0
def string2url(value):
    if isinstance(value, unicode):
        return "".join([_map2url[c] for c in unicode2latin1(value)])
    elif isinstance(value, str):
        return "".join([_map2url[c] for c in value])
    else:
        Log.error("Expecting a string")
Ejemplo n.º 30
0
 def _convert_from(self, frum):
     if isinstance(frum, basestring):
         return Dict(name=frum)
     elif isinstance(frum, (Container, Query)):
         return frum
     else:
         Log.error("Expecting from clause to be a name, or a container")
Ejemplo n.º 31
0
def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File
        possible_error = None
        if File.new_instance(File(obj.__file__).parent,
                             attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                from pyLibrary.debugs.exceptions import Except
                possible_error = Except.wrap(e)

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log
            Log.warning(PATH_NOT_FOUND + ". Returning None.",
                        cause=possible_error)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log
            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])
Ejemplo n.º 32
0
    def __init__(
        self,
        alias,  # NAME OF THE ALIAS
        type=None,  # SCHEMA NAME, WILL HUNT FOR ONE IF None
        explore_metadata=True,  # IF PROBING THE CLUSTER FOR METADATA IS ALLOWED
        debug=False,
        timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
        settings=None):
        self.debug = debug
        if self.debug:
            Log.alert("Elasticsearch debugging on {{index|quote}} is on",
                      index=settings.index)

        self.settings = settings
        self.cluster = Cluster(settings)

        if type == None:
            if not explore_metadata:
                Log.error(
                    "Alias() was given no `type` (aka schema) and not allowed to explore metadata.  Do not know what to do now."
                )

            indices = self.cluster.get_metadata().indices
            if not self.settings.alias or self.settings.alias == self.settings.index:
                candidates = [(name, i) for name, i in indices.items()
                              if self.settings.index in i.aliases]
                index = qb.sort(candidates, 0).last()[1]
            else:
                index = indices[self.settings.index]

            # FIND MAPPING WITH MOST PROPERTIES (AND ASSUME THAT IS THE CANONICAL TYPE)
            max_prop = -1
            for _type, mapping in index.mappings.items():
                num_prop = len(mapping.properties.keys())
                if max_prop < num_prop:
                    max_prop = num_prop
                    self.settings.type = _type
                    type = _type

            if type == None:
                Log.error("Can not find schema type for index {{index}}",
                          index=coalesce(self.settings.alias,
                                         self.settings.index))

        self.path = "/" + alias + "/" + type
Ejemplo n.º 33
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(Thread.STOP))
        while not please_stop:
            try:
                if not self.todo:
                    with self.meta.columns.locker:
                        old_columns = filter(
                            lambda c: (c.last_updated == None or c.last_updated
                                       < Date.now() - TOO_OLD) and c.type
                            not in ["object", "nested"], self.meta.columns)
                        if old_columns:
                            Log.note("Old columns wth dates {{dates|json}}",
                                     dates=wrap(old_columns).last_updated)
                            self.todo.extend(old_columns)
                            # TEST CONSISTENCY
                            for c, d in product(list(self.todo.queue),
                                                list(self.todo.queue)):
                                if c.es_column == d.es_column and c.table == d.table and c != d:
                                    Log.error("")
                        else:
                            Log.note("no more metatdata to update")

                column = self.todo.pop(timeout=10 * MINUTE)
                if column:
                    Log.note("update {{table}}.{{column}}",
                             table=column.table,
                             column=column.es_column)
                    if column.type in ["object", "nested"]:
                        with self.meta.columns.locker:
                            column.last_updated = Date.now()
                        continue
                    elif column.last_updated >= Date.now() - TOO_OLD:
                        continue
                    try:
                        self._update_cardinality(column)
                        if DEBUG and not column.table.startswith(
                                TEST_TABLE_PREFIX):
                            Log.note("updated {{column.name}}", column=column)
                    except Exception, e:
                        Log.warning(
                            "problem getting cardinality for {{column.name}}",
                            column=column,
                            cause=e)
            except Exception, e:
                Log.warning("problem in cardinality monitor", cause=e)
Ejemplo n.º 34
0
def datetime2unix(value):
    try:
        if value == None:
            return None
        elif isinstance(value, datetime):
            epoch = datetime(1970, 1, 1)
            diff = value - epoch
            return diff.total_seconds()
        elif isinstance(value, date):
            epoch = date(1970, 1, 1)
            diff = value - epoch
            return diff.total_seconds()
        else:
            from pyLibrary.debugs.logs import Log
            Log.error("Can not convert {{value}} of type {{type}}", value=value, type=value.__class__)
    except Exception, e:
        from pyLibrary.debugs.logs import Log
        Log.error("Can not convert {{value}}", value=value, cause=e)
Ejemplo n.º 35
0
def typed_encode(value):
    """
    pypy DOES NOT OPTIMIZE GENERATOR CODE WELL
    """
    try:
        _buffer = UnicodeBuilder(1024)
        _typed_encode(value, _buffer)
        output = _buffer.build()
        return output
    except Exception, e:
        # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
        from pyLibrary.debugs.logs import Log

        Log.warning("Serialization of JSON problems", e)
        try:
            return pretty_json(value)
        except Exception, f:
            Log.error("problem serializing object", f)
Ejemplo n.º 36
0
def _tuple(template, data, fields, depth, output):
    deep_path = None
    deep_fields = DictList()
    for d in data:
        record = template
        for f in fields:
            index, children, record = _tuple_deep(d, f, depth, record)
            if index:
                path = f.value[0:index:]
                deep_fields.append(f)
                if deep_path and path != deep_path:
                    Log.error("Dangerous to select into more than one branch at time")
        if not children:
            output.append(record)
        else:
            _tuple(record, children, deep_fields, depth + 1, output)

    return output
Ejemplo n.º 37
0
    def __getitem__(self, index):
        offset = index - self.start
        if offset < 0:
            Log.error("Can not go in reverse on stream index=={{index}}", index=index)

        if self._mark == -1:
            while self.buffer_length <= offset:
                self.start += len(self.buffer)
                offset = index - self.start
                self.buffer = self.get_more()
                self.buffer_length = len(self.buffer)
        else:
            while self.buffer_length <= offset:
                self.buffer += self.get_more()
                self.buffer_length = len(self.buffer)


        return self.buffer[offset]
Ejemplo n.º 38
0
def process_unittest(source_key,
                     etl_header,
                     buildbot_summary,
                     unittest_log,
                     destination,
                     please_stop=None):

    timer = Timer("Process log {{file}} for {{key}}", {
        "file": etl_header.name,
        "key": source_key
    })
    try:
        with timer:
            summary = accumulate_logs(source_key, etl_header.name,
                                      unittest_log, please_stop)
    except Exception, e:
        Log.error("Problem processing {{key}}", key=source_key, cause=e)
        summary = None
Ejemplo n.º 39
0
def parse_partition(part):
    for p in part.partitions:
        if part.index:
            p.index = part.index  # COPY INDEX DOWN
        parse_partition(p)
        p.value = coalesce(p.value, p.name)
        p.parent = part

    if not part.esfilter:
        if len(part.partitions) > 100:
            Log.error(
                "Must define an esfilter on {{name}} there are too many partitions ({{num_parts}})",
                name=part.name,
                num_parts=len(part.partitions))

        # DEFAULT esfilter IS THE UNION OF ALL CHILD FILTERS
        if part.partitions:
            part.esfilter = {"or": part.partitions.esfilter}
Ejemplo n.º 40
0
 def assertRaises(self, problem, function, *args, **kwargs):
     try:
         function(*args, **kwargs)
     except Exception, e:
         e = Except.wrap(e)
         if isinstance(problem, basestring):
             if problem in e:
                 return
             Log.error(
                 "expecting an exception returning {{problem|quote}} got something else instead",
                 problem=problem,
                 cause=e)
         elif not isinstance(e, problem):
             Log.error(
                 "expecting an exception of type {{type}} to be raised",
                 type=problem)
         else:
             return
Ejemplo n.º 41
0
def almost_equal(first, second, digits=None, places=None, delta=None):
    try:
        if first == second:
            return True

        if delta is not None:
            if abs(first - second) <= delta:
                return True
        else:
            places = coalesce(places, digits, 18)
            diff = math.log10(abs(first - second))
            if diff < Math.ceiling(math.log10(first)) - places:
                return True

        return False
    except Exception, e:
        from pyLibrary.debugs.logs import Log
        Log.error("problem comparing", cause=e)
Ejemplo n.º 42
0
    def floor(self, interval=None):
        if not isinstance(interval, Duration):
            from pyLibrary.debugs.logs import Log
            Log.error("Expecting an interval as a Duration object")

        output = Duration(0)
        if interval.month:
            if self.month:
                output.month = int(Math.floor(self.month / interval.month) * interval.month)
                output.milli = output.month * MILLI_VALUES.month
                return output

            # A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH
            output.month = int(Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month)
            output.milli = output.month * MILLI_VALUES.month
        else:
            output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli)
        return output
Ejemplo n.º 43
0
def value2json(obj, pretty=False, sort_keys=False):
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON",
                     type=" (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " +
                      str(type(obj)))
        return json
    except Exception, e:
        e = Except.wrap(e)
        with suppress_exception:
            json = pypy_json_encode(obj)
            return json

        Log.error("Can not encode into JSON: {{value}}",
                  value=repr(obj),
                  cause=e)
Ejemplo n.º 44
0
def pipe2value(value):
    type = value[0]
    if type == '0':
        return None
    if type == 'n':
        return value2number(value[1::])

    if type != 's' and type != 'a':
        Log.error("unknown pipe type ({{type}}) in {{value}}",
                  type=type,
                  value=value)

    # EXPECTING MOST STRINGS TO NOT HAVE ESCAPED CHARS
    output = _unPipe(value)
    if type == 's':
        return output

    return [pipe2value(v) for v in output.split("|")]
Ejemplo n.º 45
0
    def getSelect(self, **kwargs):
        if self.fields:
            if len(self.fields) == 1:
                return Dict(name=self.full_name,
                            value=self.fields[0],
                            aggregate="none")
            else:
                return Dict(name=self.full_name,
                            value=self.fields,
                            aggregate="none")

        domain = self.getDomain(**kwargs)
        if not domain.getKey:
            Log.error("Should not happen")
        if not domain.NULL:
            Log.error("Should not happen")

        return Dict(name=self.full_name, domain=domain, aggregate="none")
Ejemplo n.º 46
0
 def _iter():
     g = 0
     out = DictList()
     try:
         for i, d in enumerate(data):
             out.append(d)
             if (i + 1) % max_size == 0:
                 yield g, out
                 g += 1
                 out = DictList()
         if out:
             yield g, out
     except Exception, e:
         e = Except.wrap(e)
         if out:
             # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR
             yield g, out
         Log.error("Problem inside jx.groupby", e)
Ejemplo n.º 47
0
def _all_default(d, default, seen=None):
    """
    ANY VALUE NOT SET WILL BE SET BY THE default
    THIS IS RECURSIVE
    """
    if default is None:
        return
    if isinstance(default, Dict):
        default = object.__getattribute__(default,
                                          "_dict")  # REACH IN AND GET THE dict
        # from pyLibrary.debugs.logs import Log
        # Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__)

    for k, default_value in default.items():
        default_value = unwrap(
            default_value
        )  # TWO DIFFERENT Dicts CAN SHARE id() BECAUSE THEY ARE SHORT LIVED
        existing_value = _get_attr(d, [k])

        if existing_value == None:
            if default_value != None:
                if isinstance(default_value, Mapping):
                    df = seen.get(id(default_value))
                    if df is not None:
                        _set_attr(d, [k], df)
                    else:
                        copy_dict = {}
                        seen[id(default_value)] = copy_dict
                        _set_attr(d, [k], copy_dict)
                        _all_default(copy_dict, default_value, seen)
                else:
                    # ASSUME PRIMITIVE (OR LIST, WHICH WE DO NOT COPY)
                    try:
                        _set_attr(d, [k], default_value)
                    except Exception, e:
                        if PATH_NOT_FOUND not in e:
                            from pyLibrary.debugs.logs import Log
                            Log.error("Can not set attribute {{name}}",
                                      name=k,
                                      cause=e)
        elif isinstance(existing_value, list) or isinstance(
                default_value, list):
            _set_attr(d, [k],
                      listwrap(existing_value) + listwrap(default_value))
Ejemplo n.º 48
0
    def __init__(
            self,
            exchange,  # name of the Pulse exchange
            topic,  # message name pattern to subscribe to  ('#' is wildcard)
            target=None,  # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
            target_queue=None,  # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
            host='pulse.mozilla.org',  # url to connect,
            port=5671,  # tcp port
            user=None,
            password=None,
            vhost="/",
            start=0,  # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
            ssl=True,
            applabel=None,
            heartbeat=False,  # True to also get the Pulse heartbeat message
            durable=False,  # True to keep queue after shutdown
            serializer='json',
            broker_timezone='GMT',
            settings=None):
        global count
        count = coalesce(start, 0)

        self.target_queue = target_queue
        self.pulse_target = target
        if (target_queue == None and target == None) or (target_queue != None
                                                         and target != None):
            Log.error(
                "Expecting a queue (for fast digesters) or a target (for slow digesters)"
            )

        Thread.__init__(self,
                        name="Pulse consumer for " + settings.exchange,
                        target=self._worker)
        self.settings = settings
        settings.callback = self._got_result
        settings.user = coalesce(settings.user, settings.username)
        settings.applabel = coalesce(settings.applable, settings.queue,
                                     settings.queue_name)
        settings.topic = topic

        self.pulse = ModifiedGenericConsumer(settings,
                                             connect=True,
                                             **settings)
        self.start()
Ejemplo n.º 49
0
    def __init__(
        self,
        index,  # NAME OF THE INDEX, EITHER ALIAS NAME OR FULL VERSION NAME
        type=None,  # SCHEMA NAME, (DEFAULT TO TYPE IN INDEX, IF ONLY ONE)
        alias=None,
        explore_metadata=True,  # PROBING THE CLUSTER FOR METADATA IS ALLOWED
        read_only=True,
        tjson=False,  # STORED AS TYPED JSON
        timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
        debug=False,  # DO NOT SHOW THE DEBUG STATEMENTS
        settings=None
    ):
        if index==None:
            Log.error("not allowed")
        if index == alias:
            Log.error("must have a unique index name")

        self.cluster_state = None
        self.debug = debug
        self.settings = settings
        self.cluster = Cluster(settings)

        try:
            full_index = self.get_index(index)
            if full_index and alias==None:
                settings.alias = settings.index
                settings.index = full_index
            if full_index==None:
                Log.error("not allowed")
            if type == None:
                # NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT?
                with self.cluster.metadata_locker:
                    index_ = self.cluster._metadata.indices[self.settings.index]
                if not index_:
                    indices = self.cluster.get_metadata().indices
                    index_ = indices[self.settings.index]

                candidate_types = list(index_.mappings.keys())
                if len(candidate_types) != 1:
                    Log.error("Expecting `type` parameter")
                self.settings.type = type = candidate_types[0]
        except Exception, e:
            # EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER
            Log.error("not expected", cause=e)
Ejemplo n.º 50
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(
            convert.value2json(kwargs[b"json"]))
    elif b'data':
        kwargs[b"data"] = convert.unicode2utf8(
            convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = convert.json2value(convert.utf82unicode(c))
    except Exception, e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)
Ejemplo n.º 51
0
def value2url(value):
    if value == None:
        Log.error("")

    if isinstance(value, Mapping):
        output = "&".join([
            value2url(k) + "=" + (value2url(v) if isinstance(v, basestring)
                                  else value2url(value2json(v)))
            for k, v in value.items()
        ])
    elif isinstance(value, unicode):
        output = "".join([_map2url[c] for c in unicode2latin1(value)])
    elif isinstance(value, str):
        output = "".join([_map2url[c] for c in value])
    elif hasattr(value, "__iter__"):
        output = ",".join(value2url(v) for v in value)
    else:
        output = unicode(value)
    return output
Ejemplo n.º 52
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": [],
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": _normalize_where(command.where, self)
                }
            },
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")

            scripts.append("ctx._source." + k + " = " +
                           expressions.qb_expression_to_ruby(v) + ";\n")
        script = "".join(scripts)

        if results.hits.hits:
            command = []
            for id in results.hits.hits._id:
                command.append({"update": {"_id": id}})
                command.append({"script": script})
            content = ("\n".join(convert.value2json(c)
                                 for c in command) + "\n").encode('utf-8')
            self._es.cluster._post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"})
Ejemplo n.º 53
0
        def assign(source, destination):
            for i, f in enumerate(prefix):
                source = source.get(f)
                if source is None:
                    return 0, None
                if isinstance(source, list):
                    return depth + i + 1, source

            f = field.value.last()
            try:
                if not f:  # NO NAME FIELD INDICATES SELECT VALUE
                    destination[name] = source
                else:
                    destination[name] = source.get(f)
            except Exception, e:
                Log.error("{{value}} does not have {{field}} property",
                          value=source,
                          field=f,
                          cause=e)
Ejemplo n.º 54
0
def unique_index(data, keys=None, fail_on_dup=True):
    """
    RETURN dict THAT USES KEYS TO INDEX DATA
    ONLY ONE VALUE ALLOWED PER UNIQUE KEY
    """
    o = UniqueIndex(listwrap(keys), fail_on_dup=fail_on_dup)

    for d in data:
        try:
            o.add(d)
        except Exception, e:
            o.add(d)
            Log.error(
                "index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}",
                index=keys,
                key=select([d], keys)[0],
                value1=o[d],
                value2=d,
                cause=e)
Ejemplo n.º 55
0
    def query(self, q):
        frum = self
        if is_aggs(q):
            frum = list_aggs(frum.data, q)
        else:  # SETOP
            try:
                if q.filter != None or q.esfilter != None:
                    Log.error("use 'where' clause")
            except AttributeError, e:
                pass

            if q.where is not TRUE_FILTER and not isinstance(q.where, TrueOp):
                frum = frum.filter(q.where)

            if q.sort:
                frum = frum.sort(q.sort)

            if q.select:
                frum = frum.select(q.select)
Ejemplo n.º 56
0
    def delete(self, filter):
        self.cluster.get_metadata()

        if self.cluster.cluster_state.version.number.startswith("0.90"):
            query = {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}
        elif self.cluster.cluster_state.version.number.startswith("1."):
            query = {"query": {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}}
        else:
            raise NotImplementedError

        if self.debug:
            Log.note("Delete bugs:\n{{query}}",  query= query)

        keep_trying = True
        while keep_trying:
            result = self.cluster.delete(
                self.path + "/_query",
                data=convert.value2json(query),
                timeout=60
            )
            keep_trying = False
            for name, status in result._indices.items():
                if status._shards.failed > 0:
                    if status._shards.failures[0].reason.find("rejected execution (queue capacity ") >= 0:
                        keep_trying = True
                        Thread.sleep(seconds=5)
                        break

            if not keep_trying:
                for name, status in result._indices.items():
                    if status._shards.failed > 0:
                        Log.error(
                            "ES shard(s) report Failure to delete from {{index}}: {{message}}.  Query was {{query}}",
                            index=name,
                            query=query,
                            message=status._shards.failures[0].reason
                        )
Ejemplo n.º 57
0
    def add(self, val):
        val = dictwrap(val)
        key = value2key(self._keys, val)
        if key == None:
            Log.error("Expecting key to be not None")

        d = self._data.get(key)
        if d is None:
            self._data[key] = unwrap(val)
            self.count += 1
        elif d is not val:
            if self.fail_on_dup:
                Log.error("key {{key|json}} already filled", key=key)
            else:
                Log.warning(
                    "key {{key|json}} already filled\nExisting\n{{existing|json|indent}}\nValue\n{{value|json|indent}}",
                    key=key,
                    existing=d,
                    value=val)
Ejemplo n.º 58
0
    def _all_lines(self, encoding="utf8"):
        try:
            iterator = self.raw.stream(4096, decode_content=False)

            if self.headers.get('content-encoding') == 'gzip':
                return ibytes2ilines(icompressed2ibytes(iterator),
                                     encoding=encoding)
            elif self.headers.get('content-type') == 'application/zip':
                return ibytes2ilines(icompressed2ibytes(iterator),
                                     encoding=encoding)
            elif self.url.endswith(".gz"):
                return ibytes2ilines(icompressed2ibytes(iterator),
                                     encoding=encoding)
            else:
                return ibytes2ilines(iterator,
                                     encoding=encoding,
                                     closer=self.close)
        except Exception, e:
            Log.error("Can not read content", cause=e)
Ejemplo n.º 59
0
def index(data, keys=None):
    # return dict that uses keys to index data
    o = Index(keys)

    if isinstance(data, Cube):
        if data.edges[0].name == keys[0]:
            #QUICK PATH
            names = list(data.data.keys())
            for d in (set_default(dot.zip(names, r), {keys[0]: p})
                      for r, p in zip(zip(*data.data.values()),
                                      data.edges[0].domain.partitions.value)):
                o.add(d)
            return o
        else:
            Log.error("Can not handle indexing cubes at this time")

    for d in data:
        o.add(d)
    return o
Ejemplo n.º 60
0
def parse_time_expression(value):
    def simple_date(sign, dig, type, floor):
        if dig or sign:
            from pyLibrary.debugs.logs import Log
            Log.error("can not accept a multiplier on a datetime")

        if floor:
            return Date(type).floor(Duration(floor))
        else:
            return Date(type)

    terms = re.match(r'(\d*[|\w]+)\s*([+-]\s*\d*[|\w]+)*', value).groups()

    sign, dig, type = re.match(r'([+-]?)\s*(\d*)([|\w]+)', terms[0]).groups()
    if "|" in type:
        type, floor = type.split("|")
    else:
        floor = None

    if type in MILLI_VALUES.keys():
        value = Duration(dig+type)
    else:
        value = simple_date(sign, dig, type, floor)

    for term in terms[1:]:
        if not term:
            continue
        sign, dig, type = re.match(r'([+-])\s*(\d*)([|\w]+)', term).groups()
        if "|" in type:
            type, floor = type.split("|")
        else:
            floor = None

        op = {"+": "__add__", "-": "__sub__"}[sign]
        if type in MILLI_VALUES.keys():
            if floor:
                from pyLibrary.debugs.logs import Log
                Log.error("floor (|) of duration not accepted")
            value = value.__getattribute__(op)(Duration(dig+type))
        else:
            value = value.__getattribute__(op)(simple_date(sign, dig, type, floor))

    return value