def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "duration" self.NULL = Null self.min = Duration(self.min) self.max = Duration(self.max) self.interval = Duration(self.interval) if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") Log.error("not implemented yet") # VERIFY PARTITIONS DO NOT OVERLAP return elif not all([self.min, self.max, self.interval]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{ "min": v, "max": v + self.interval, "dataIndex": i } for i, v in enumerate( Duration.range(self.min, self.max, self.interval))])
def __init__( self, rollover_field, # the FIELD with a timestamp to use for determining which index to push to rollover_interval, # duration between roll-over to new index rollover_max, # remove old indexes, do not add old records schema, # es schema queue_size=10000, # number of documents to queue in memory batch_size=5000, # number of documents to push at once typed=None, # indicate if we are expected typed json kwargs=None # plus additional ES settings ): if kwargs.tjson != None: Log.error("not expected") if typed == None: Log.error("not expected") schema.settings.index.max_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES schema.settings.index.max_inner_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration( rollover_interval) self.rollover_max = self.settings.rollover_max = Duration(rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def __init__(self, rollover_field, rollover_interval, rollover_max, queue_size=10000, batch_size=5000, kwargs=None): """ :param rollover_field: the FIELD with a timestamp to use for determining which index to push to :param rollover_interval: duration between roll-over to new index :param rollover_max: remove old indexes, do not add old records :param queue_size: number of documents to queue in memory :param batch_size: number of documents to push at once :param kwargs: plus additional ES settings :return: """ self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration( kwargs.rollover_interval) self.rollover_max = self.settings.rollover_max = Duration( kwargs.rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def __sub__(self, other): if other == None: return None if isinstance(other, datetime): return Duration(self.unix - Date(other).unix) if isinstance(other, Date): return Duration(self.unix - other.unix) return self.add(-other)
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "time" self.NULL = Null self.min = Date(self.min) self.max = Date(self.max) self.interval = Duration(self.interval) self.sort = Null if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") Log.error("not implemented yet") # VERIFY PARTITIONS DO NOT OVERLAP return self.verify_attributes_not_null(["min", "max", "interval"]) self.key = "min" self.partitions = wrap([{ "min": v, "max": v + self.interval, "dataIndex": i } for i, v in enumerate(Date.range(self.min, self.max, self.interval)) ])
def simple_date(sign, dig, type, floor): if dig or sign: from mo_logs import Log Log.error("can not accept a multiplier on a datetime") if floor: return Date(type).floor(Duration(floor)) else: return Date(type)
def refresh(please_stop): try: es.set_refresh_interval(seconds=coalesce( Duration(self.settings.refresh_interval).seconds, 60 * 10), timeout=5) except Exception: Log.note("Could not set refresh interval for {{index}}", index=es.settings.index)
def parse_time_expression(value): def simple_date(sign, dig, type, floor): if dig or sign: from mo_logs import Log Log.error("can not accept a multiplier on a datetime") if floor: return Date(type).floor(Duration(floor)) else: return Date(type) terms = re.match(r'(\d*[|\w]+)\s*([+-]\s*\d*[|\w]+)*', value).groups() sign, dig, type = re.match(r'([+-]?)\s*(\d*)([|\w]+)', terms[0]).groups() if "|" in type: type, floor = type.split("|") else: floor = None if type in MILLI_VALUES.keys(): value = Duration(dig + type) else: value = simple_date(sign, dig, type, floor) for term in terms[1:]: if not term: continue sign, dig, type = re.match(r'([+-])\s*(\d*)([|\w]+)', term).groups() if "|" in type: type, floor = type.split("|") else: floor = None op = {"+": "__add__", "-": "__sub__"}[sign] if type in MILLI_VALUES.keys(): if floor: from mo_logs import Log Log.error("floor (|) of duration not accepted") value = value.__getattribute__(op)(Duration(dig + type)) else: value = value.__getattribute__(op)(simple_date( sign, dig, type, floor)) return value
def get_instance_metadata(timeout=None): if not isinstance(timeout, (int, float)): timeout = Duration(timeout).seconds output = wrap({ k.replace("-", "_"): v for k, v in boto_utils.get_instance_metadata( timeout=coalesce(timeout, 5), num_retries=2).items() }) return output
def parse_time_expression(value): def simple_date(sign, dig, type, floor): if dig or sign: from mo_logs import Log Log.error("can not accept a multiplier on a datetime") if floor: return Date(type).floor(Duration(floor)) else: return Date(type) terms = re.match(r'(\d*[|\w]+)\s*([+-]\s*\d*[|\w]+)*', value).groups() sign, dig, type = re.match(r'([+-]?)\s*(\d*)([|\w]+)', terms[0]).groups() if "|" in type: type, floor = type.split("|") else: floor = None if type in MILLI_VALUES.keys(): value = Duration(dig+type) else: value = simple_date(sign, dig, type, floor) for term in terms[1:]: if not term: continue sign, dig, type = re.match(r'([+-])\s*(\d*)([|\w]+)', term).groups() if "|" in type: type, floor = type.split("|") else: floor = None op = {"+": "__add__", "-": "__sub__"}[sign] if type in MILLI_VALUES.keys(): if floor: from mo_logs import Log Log.error("floor (|) of duration not accepted") value = value.__getattribute__(op)(Duration(dig+type)) else: value = value.__getattribute__(op)(simple_date(sign, dig, type, floor)) return value
def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE branches=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None): if not _hg_branches: _late_imports() self.es_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): response = http.head(self.settings.hg.url) if branches == None: self.branches = _hg_branches.get_branches(kwargs=kwargs) self.es = None return self.last_cache_miss = Date.now() set_default(repo, {"schema": revision_schema}) self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index( kwargs=repo) def setup_es(please_stop): with suppress_exception: self.es.add_alias() with suppress_exception: self.es.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) self.branches = _hg_branches.get_branches(kwargs=kwargs) self.timeout = timeout Thread.run("hg daemon", self._daemon)
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "duration" self.NULL = Null self.min = Duration(self.min) self.max = Duration(self.max) self.interval = Duration(self.interval) if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") Log.error("not implemented yet") # VERIFY PARTITIONS DO NOT OVERLAP return elif not all([self.min, self.max, self.interval]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex":i} for i, v in enumerate(Duration.range(self.min, self.max, self.interval))])
def test_ceiling_hours(self): date = Date("2018-10-01 12:42:00").ceiling(Duration("6hour")) expected = Date("2018-10-01 18:00:00") self.assertEqual(date, expected)
def test_ceiling_hours_unchanged(self): date = Date('2018-10-01 18:00:00').ceiling(Duration("6hour")) expected = Date('2018-10-01 18:00:00') self.assertEqual(date, expected)
def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None, ): if not _hg_branches: _late_imports() if not is_text(repo.index): Log.error("Expecting 'index' parameter") self.repo_locker = Lock() self.moves_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) self.last_cache_miss = Date.now() # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): http.head(self.settings.hg.url) set_default(repo, { "type": "revision", "schema": revision_schema, }) kwargs.branches = set_default( { "index": repo.index + "-branches", "type": "branch", }, repo, ) moves = set_default( { "index": repo.index + "-moves", }, repo, ) self.branches = _hg_branches.get_branches(kwargs=kwargs) cluster = elasticsearch.Cluster(kwargs=repo) self.repo = cluster.get_or_create_index(kwargs=repo) self.moves = cluster.get_or_create_index(kwargs=moves) def setup_es(please_stop): with suppress_exception: self.repo.add_alias() with suppress_exception: self.moves.add_alias() with suppress_exception: self.repo.set_refresh_interval(seconds=1) with suppress_exception: self.moves.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) Thread.run("hg daemon", self._daemon)
def total(self): if not self.end: Log.error( "please ask for total time outside the context of measuring") return Duration(self.agg)
def duration(self): end = time() if not self.end: return Duration(end - self.start) return Duration(self.interval)
def test_duration_hashable(self): a = {Duration("hour"): "hour"} self.assertEqual(a[Duration("60minute")], "hour")
def duration(self): if not self.end: return Duration(time() - self.start) return Duration(self.interval)