Example #1
0
    def _fetch_mtime(self, last_update, parse_timestamp):
        mtime = None
        if last_update:
            if isinstance(last_update, basestring):
                mtime = dt_parse(last_update)
            else:
                mtime = last_update
        else:
            mtime = self.get_last_field('_start')
        # convert timestamp to datetime object
        mtime = ts2dt(mtime)
        logger.info("Last update mtime: %s" % mtime)

        if mtime:
            if parse_timestamp is None:
                parse_timestamp = self.get_property('parse_timestamp',
                                                    default=True)
            if parse_timestamp:
                if not (hasattr(mtime, 'tzinfo') and mtime.tzinfo):
                    # We need the timezone, to readjust relative to the
                    # server's tz
                    mtime = mtime.replace(tzinfo=pytz.utc)
                mtime = mtime.strftime('%Y-%m-%d %H:%M:%S %z')
                dt_format = "yyyy-MM-dd HH:mm:ss z"
                mtime = "parseTimestamp('%s', '%s')" % (mtime, dt_format)
            else:
                mtime = "'%s'" % mtime

        return mtime
Example #2
0
def test_ts2dt():
    ''' args: ts, milli=False, tz_aware=True '''
    from metriqueu.utils import ts2dt

    # FIXME: millisecond precision, better?
    now_time = int(time())
    now_time_milli = int(time()) * 1000
    now_date = dt.utcfromtimestamp(now_time)
    now_date_iso = now_date.isoformat()

    ' datetime already, return it back'
    assert ts2dt(now_date) == now_date

    ' tz_aware defaults to true '
    try:
        ' cant compare offset-naive and offset-aware datetimes '
        assert ts2dt(now_time) != now_date
    except TypeError:
        pass

    assert ts2dt(now_date, tz_aware=False) == now_date

    assert ts2dt(now_time_milli, milli=True, tz_aware=False) == now_date

    try:
        ' string variants not accepted "nvalid literal for float()"'
        ts2dt(now_date_iso) == now_date
    except ValueError:
        pass
Example #3
0
 def _fetch_mtime(self, last_update):
     mtime = None
     if last_update:
         if isinstance(last_update, basestring):
             mtime = dt_parse(last_update)
         else:
             mtime = last_update
     else:
         mtime = self.get_last_field('_start')
     # convert timestamp to datetime object
     mtime = ts2dt(mtime)
     self.logger.info("Last update mtime: %s" % mtime)
     return mtime
Example #4
0
 def _log_inconsistency(self, last_doc, last_val, field, removed, added,
                        when, log_type):
     incon = {'oid': last_doc['_oid'],
              'field': field,
              'removed': removed,
              'removed_type': str(type(removed)),
              'added': added,
              'added_type': str(type(added)),
              'last_val': last_val,
              'last_val_type': str(type(last_val)),
              'when': str(ts2dt(when))}
     if log_type == 'json':
         self.log_inconsistency(json.dumps(incon, ensure_ascii=False))
     else:
         m = u'{oid} {field}: {removed}-> {added} has {last_val}; '
         m += u'({removed_type}-> {added_type} has {last_val_type})'
         m += u' ... on {when}'
         self.log_inconsistency(m.format(**incon))
Example #5
0
    def _activity_import_doc(self, time_doc, activities):
        '''
        Import activities for a single document into timeline.
        '''
        batch_updates = [time_doc]
        # compare tz aware/naive depending if acts 'when' is tz_aware or not
        tz_aware = True if activities and activities[0][0].tzinfo else False
        # We want to consider only activities that happend before time_doc
        # do not move this, because time_doc._start changes
        # time_doc['_start'] is a timestamp, whereas act[0] is a datetime
        td_start = ts2dt(time_doc['_start'], tz_aware=tz_aware)
        activities = filter(lambda act: (act[0] < td_start and
                                         act[1] in time_doc), activities)
        incon_log_type = self.config.get('incon_log_type')
        creation_field = self.get_property('cfield')
        # make sure that activities are sorted by when descending
        activities.sort(reverse=True, key=lambda o: o[0])
        new_doc = {}
        for when, field, removed, added in activities:
            when = dt2ts(when)
            last_doc = batch_updates.pop()
            # check if this activity happened at the same time as the last one,
            # if it did then we need to group them together
            if last_doc['_end'] == when:
                new_doc = deepcopy(last_doc)
                last_doc = batch_updates.pop()
            else:
                new_doc = deepcopy(last_doc)
                new_doc['_start'] = when
                new_doc['_end'] = when
                last_doc['_start'] = when
            last_val = last_doc[field]

            # FIXME: pass in field and call _type() within _activity_backwards?
            # for added/removed?
            new_val, inconsistent = self._activity_backwards(new_doc[field],
                                                             removed, added)
            new_doc[field] = new_val

            # Check if the object has the correct field value.
            if inconsistent:
                self._log_inconsistency(last_doc, last_val, field,
                                        removed, added, when, incon_log_type)
                new_doc.setdefault('_corrupted', {})
                # set curreupted field value to the the value that was added
                # and continue processing as if that issue didn't exist
                new_doc['_corrupted'][field] = added
            # Add the objects to the batch
            batch_updates.extend([last_doc, new_doc])
        # try to set the _start of the first version to the creation time
        try:
            # set start to creation time if available
            last_doc = batch_updates[-1]
            if creation_field:
                creation_ts = dt2ts(last_doc[creation_field])
                if creation_ts < last_doc['_start']:
                    last_doc['_start'] = creation_ts
                elif len(batch_updates) == 1:
                    # we have only one version, that we did not change
                    return []
        except Exception as e:
            logger.error('Error updating creation time; %s' % e)
        return batch_updates
Example #6
0
def _activity_import_doc(self, time_doc, activities):
    '''
    Import activities for a single document into timeline.
    '''
    batch_updates = [time_doc]
    # We want to consider only activities that happend before time_doc
    # do not move this, because time_doc._start changes
    # time_doc['_start'] is a timestamp, whereas act[0] is a datetime
    td_start = time_doc['_start'] = ts2dt(time_doc['_start'])
    activities = filter(lambda act: (act[0] < td_start and
                                     act[1] in time_doc), activities)
    # make sure that activities are sorted by when descending
    activities = sorted(activities, reverse=True)
    for when, field, removed, added in activities:
        removed = dt2ts(removed) if isinstance(removed, datetime) else removed
        added = dt2ts(added) if isinstance(added, datetime) else added
        last_doc = batch_updates.pop()
        # check if this activity happened at the same time as the last one,
        # if it did then we need to group them together
        if last_doc['_end'] == when:
            new_doc = last_doc
            last_doc = batch_updates.pop()
        else:
            new_doc = deepcopy(last_doc)
            new_doc.pop('_id') if '_id' in new_doc else None
            new_doc['_start'] = when
            new_doc['_end'] = when
            last_doc['_start'] = when
        last_val = last_doc[field]
        new_val, inconsistent = _activity_backwards(new_doc[field],
                                                    removed, added)
        new_doc[field] = new_val
        # Check if the object has the correct field value.
        if inconsistent:
            incon = {'oid': last_doc['_oid'],
                     'field': field,
                     'removed': removed,
                     'removed_type': str(type(removed)),
                     'added': added,
                     'added_type': str(type(added)),
                     'last_val': last_val,
                     'last_val_type': str(type(last_val))}
            self.logger.error(json.dumps(incon))
            if '_corrupted' not in new_doc:
                new_doc['_corrupted'] = {}
            new_doc['_corrupted'][field] = added
        # Add the objects to the batch
        batch_updates.append(last_doc)
        batch_updates.append(new_doc)
    # try to set the _start of the first version to the creation time
    try:
        # set start to creation time if available
        last_doc = batch_updates[-1]
        creation_field = self.get_property('cfield')
        creation_ts = ts2dt(last_doc[creation_field])
        if creation_ts < last_doc['_start']:
            last_doc['_start'] = creation_ts
        elif len(batch_updates) == 1:
            # we have only one version, that we did not change
            return []
    except Exception as e:
        self.logger.error('Error updating creation time; %s' % e)
    return batch_updates