def _fetch_mtime(self, last_update, parse_timestamp): mtime = None if last_update: if isinstance(last_update, basestring): mtime = dt_parse(last_update) else: mtime = last_update else: mtime = self.get_last_field('_start') # convert timestamp to datetime object mtime = ts2dt(mtime) logger.info("Last update mtime: %s" % mtime) if mtime: if parse_timestamp is None: parse_timestamp = self.get_property('parse_timestamp', default=True) if parse_timestamp: if not (hasattr(mtime, 'tzinfo') and mtime.tzinfo): # We need the timezone, to readjust relative to the # server's tz mtime = mtime.replace(tzinfo=pytz.utc) mtime = mtime.strftime('%Y-%m-%d %H:%M:%S %z') dt_format = "yyyy-MM-dd HH:mm:ss z" mtime = "parseTimestamp('%s', '%s')" % (mtime, dt_format) else: mtime = "'%s'" % mtime return mtime
def test_ts2dt(): ''' args: ts, milli=False, tz_aware=True ''' from metriqueu.utils import ts2dt # FIXME: millisecond precision, better? now_time = int(time()) now_time_milli = int(time()) * 1000 now_date = dt.utcfromtimestamp(now_time) now_date_iso = now_date.isoformat() ' datetime already, return it back' assert ts2dt(now_date) == now_date ' tz_aware defaults to true ' try: ' cant compare offset-naive and offset-aware datetimes ' assert ts2dt(now_time) != now_date except TypeError: pass assert ts2dt(now_date, tz_aware=False) == now_date assert ts2dt(now_time_milli, milli=True, tz_aware=False) == now_date try: ' string variants not accepted "nvalid literal for float()"' ts2dt(now_date_iso) == now_date except ValueError: pass
def _fetch_mtime(self, last_update): mtime = None if last_update: if isinstance(last_update, basestring): mtime = dt_parse(last_update) else: mtime = last_update else: mtime = self.get_last_field('_start') # convert timestamp to datetime object mtime = ts2dt(mtime) self.logger.info("Last update mtime: %s" % mtime) return mtime
def _log_inconsistency(self, last_doc, last_val, field, removed, added, when, log_type): incon = {'oid': last_doc['_oid'], 'field': field, 'removed': removed, 'removed_type': str(type(removed)), 'added': added, 'added_type': str(type(added)), 'last_val': last_val, 'last_val_type': str(type(last_val)), 'when': str(ts2dt(when))} if log_type == 'json': self.log_inconsistency(json.dumps(incon, ensure_ascii=False)) else: m = u'{oid} {field}: {removed}-> {added} has {last_val}; ' m += u'({removed_type}-> {added_type} has {last_val_type})' m += u' ... on {when}' self.log_inconsistency(m.format(**incon))
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # compare tz aware/naive depending if acts 'when' is tz_aware or not tz_aware = True if activities and activities[0][0].tzinfo else False # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime td_start = ts2dt(time_doc['_start'], tz_aware=tz_aware) activities = filter(lambda act: (act[0] < td_start and act[1] in time_doc), activities) incon_log_type = self.config.get('incon_log_type') creation_field = self.get_property('cfield') # make sure that activities are sorted by when descending activities.sort(reverse=True, key=lambda o: o[0]) new_doc = {} for when, field, removed, added in activities: when = dt2ts(when) last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = deepcopy(last_doc) last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] # FIXME: pass in field and call _type() within _activity_backwards? # for added/removed? new_val, inconsistent = self._activity_backwards(new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: self._log_inconsistency(last_doc, last_val, field, removed, added, when, incon_log_type) new_doc.setdefault('_corrupted', {}) # set curreupted field value to the the value that was added # and continue processing as if that issue didn't exist new_doc['_corrupted'][field] = added # Add the objects to the batch batch_updates.extend([last_doc, new_doc]) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] if creation_field: creation_ts = dt2ts(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] except Exception as e: logger.error('Error updating creation time; %s' % e) return batch_updates
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime td_start = time_doc['_start'] = ts2dt(time_doc['_start']) activities = filter(lambda act: (act[0] < td_start and act[1] in time_doc), activities) # make sure that activities are sorted by when descending activities = sorted(activities, reverse=True) for when, field, removed, added in activities: removed = dt2ts(removed) if isinstance(removed, datetime) else removed added = dt2ts(added) if isinstance(added, datetime) else added last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = last_doc last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc.pop('_id') if '_id' in new_doc else None new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] new_val, inconsistent = _activity_backwards(new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: incon = {'oid': last_doc['_oid'], 'field': field, 'removed': removed, 'removed_type': str(type(removed)), 'added': added, 'added_type': str(type(added)), 'last_val': last_val, 'last_val_type': str(type(last_val))} self.logger.error(json.dumps(incon)) if '_corrupted' not in new_doc: new_doc['_corrupted'] = {} new_doc['_corrupted'][field] = added # Add the objects to the batch batch_updates.append(last_doc) batch_updates.append(new_doc) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] creation_field = self.get_property('cfield') creation_ts = ts2dt(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] except Exception as e: self.logger.error('Error updating creation time; %s' % e) return batch_updates