Example #1
0
def date_pql_string(date):
    if date is None:
        return '_end == None'
    if date == '~':
        return ''

    before = lambda d: '_start <= %f' % dt2ts(d)
    after = lambda d: '(_end >= %f or _end == None)' % dt2ts(d)
    split = date.split('~')
    # replace all occurances of 'T' with ' '
    # this is used for when datetime is passed in
    # like YYYY-MM-DDTHH:MM:SS instead of
    #      YYYY-MM-DD HH:MM:SS as expected
    # and drop all occurances of 'timezone' like substring
    split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split]
    if len(split) == 1:
        # 'dt'
        return '%s and %s' % (before(split[0]), after(split[0]))
    elif split[0] == '':
        # '~dt'
        return before(split[1])
    elif split[1] == '':
        # 'dt~'
        return after(split[0])
    else:
        # 'dt~dt'
        return '%s and %s' % (before(split[1]), after(split[0]))
Example #2
0
def test_date_pql_string():
    from metriqued.utils import date_pql_string as _

    assert _(None) == '_end == None'
    assert _('~') == ''

    d1 = datetime(2000, 1, 1, 0, 0, 0)
    d1_ts = dt2ts(d1)

    ba = '_start <= %f and (_end >= %f or _end == None)' % (d1_ts, d1_ts)
    d1_str = str(d1)  # test no T b/w date/time
    # test passing only a date (no ~ separator)
    assert _(d1_str) == ba

    d1_iso = d1.isoformat()  # test with T b/w date/time
    # test 'before()'
    assert _('~%s' % d1_iso) == '_start <= %f' % d1_ts

    # test 'after()'
    d1_tz = d1.replace(tzinfo=pytz.UTC).isoformat()  # test with timezone
    assert _('%s~' % d1_tz) == '(_end >= %f or _end == None)' % d1_ts

    d1_date = '2000-01-01'  # without time
    assert _('~%s' % d1_date) == '_start <= %f' % d1_ts

    # test 'date~date' date range, passing in raw datetime objects
    d1 = datetime(2000, 1, 1, 0, 0, 0)
    d1_ts = dt2ts(d1)
    d2 = datetime(2000, 1, 2, 0, 0, 0)
    d2_ts = dt2ts(d2)
    ba = '_start <= %f and (_end >= %f or _end == None)' % (d2_ts, d1_ts)
    assert _('%s~%s' % (d1, d2)) == ba
Example #3
0
def date_pql_string(date):
    '''
    Generate a new pql date query component that can be used to
    query for date (range) specific data in cubes.

    :param date: metrique date (range) to apply to pql query

    If date is None, the resulting query will be a current value
    only query (_end == None)

    The tilde '~' symbol is used as a date range separated.

    A tilde by itself will mean 'all dates ranges possible'
    and will therefore search all objects irrelevant of it's
    _end date timestamp.

    A date on the left with a tilde but no date on the right
    will generate a query where the date range starts
    at the date provide and ends 'today'.
    ie, from date -> now.

    A date on the right with a tilde but no date on the left
    will generate a query where the date range starts from
    the first date available in the past (oldest) and ends
    on the date provided.
    ie, from beginning of known time -> date.

    A date on both the left and right will be a simple date
    range query where the date range starts from the date
    on the left and ends on the date on the right.
    ie, from date to date.
    '''
    if date is None:
        return '_end == None'
    if date == '~':
        return ''

    before = lambda d: '_start <= %f' % dt2ts(d)
    after = lambda d: '(_end >= %f or _end == None)' % dt2ts(d)
    split = date.split('~')
    # replace all occurances of 'T' with ' '
    # this is used for when datetime is passed in
    # like YYYY-MM-DDTHH:MM:SS instead of
    #      YYYY-MM-DD HH:MM:SS as expected
    # and drop all occurances of 'timezone' like substring
    split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split]
    if len(split) == 1:
        # 'dt'
        return '%s and %s' % (before(split[0]), after(split[0]))
    elif split[0] == '':
        # '~dt'
        return before(split[1])
    elif split[1] == '':
        # 'dt~'
        return after(split[0])
    else:
        # 'dt~dt'
        return '%s and %s' % (before(split[1]), after(split[0]))
Example #4
0
def test_dt2ts():
    '''  '''
    from metriqueu.utils import dt2ts

    # FIXME: millisecond precision, better?
    now_time = int(time())
    now_date = dt.utcfromtimestamp(now_time)
    now_date_iso = now_date.isoformat()

    assert dt2ts(now_time) == now_time
    assert dt2ts(now_date) == now_time
    assert dt2ts(now_date_iso) == now_time
Example #5
0
def obj_hook(dct):
    _dct = {}
    for k, v in dct.items():
        _k = str(k).replace('.', '_')
        if k == 'timestamp':
            try:
                # convert milliseconds to seconds
                v = v / 1000. if v else v
            except:
                # some cases timestamp is a datetime str
                v = dt2ts(v)
        if k == 'date':
            v = dt2ts(v)
        _dct[_k] = v
    return _dct
Example #6
0
    def _linreg_future(self, series, since, days=20):
        '''
        Predicts future using linear regression.

        :param pandas.Series series:
            A series in which the values will be places.
            The index will not be touched.
            Only the values on dates > `since` will be predicted.
        :param datetime since:
            The starting date from which the future will be predicted.
        :param integer days:
            Specifies how many past days should be used in the linear
            regression.
        '''
        last_days = pd.date_range(end=since, periods=days)
        hist = self.history(last_days)

        xi = np.array(map(dt2ts, hist.index))
        A = np.array([xi, np.ones(len(hist))])
        y = hist.values
        w = np.linalg.lstsq(A.T, y)[0]

        for d in series.index[series.index > since]:
            series[d] = w[0] * dt2ts(d) + w[1]
            series[d] = 0 if series[d] < 0 else series[d]

        return series
Example #7
0
def json_encode(obj):
    '''
    Convert datetime.datetime to timestamp
    '''
    if isinstance(obj, datetime):
        return dt2ts(obj)
    else:
        return json_encoder.default(obj)
Example #8
0
def json_encode(obj):
    """
    Convert datetime.datetime to timestamp

    :param obj: value to (possibly) convert
    """
    if isinstance(obj, datetime):
        return dt2ts(obj)
    else:
        return json_encoder.default(obj)
Example #9
0
def test_query_add_date():
    from metriqued.utils import query_add_date as _
    d1 = datetime(2000, 1, 1, 0, 0, 0)
    d1_ts = dt2ts(d1)

    q = 'i_heart == "metrique"'
    _pql = '_start <= %f' % d1_ts
    assert _(q, '~') == q
    assert _(q, None) == '%s and _end == None' % q
    assert _(q, '~%s' % d1) == '%s and %s' % (q, _pql)
Example #10
0
def obj_hook(dct):
    '''
    JSON decoder.

    Converts the following:
        * 'timestamp' from epoch milliseconds to epoch seconds
        * 'date' strings to epoch seconds
    '''
    _dct = {}
    for k, v in dct.items():
        _k = str(k).replace('.', '_')
        if k == 'timestamp':
            try:
                # convert milliseconds to seconds
                v = v / 1000. if v else v
            except:
                # some cases timestamp is a datetime str
                v = dt2ts(v)
        if k == 'date':
            v = dt2ts(v)
        _dct[_k] = v
    return _dct
Example #11
0
    def get_objects(self):
        '''
        Run `rpm -q` command on a {local, remote} system to get back
        details of installed RPMs.

        Default rpm details extracted are as follows:
            * name
            * version
            * release
            * arch
            * nvra
            * license
            * os
            * packager
            * platform
            * sourcepackage
            * sourcerpm
            * summary
        '''
        fmt = ':::'.join('%%{%s}' % f for f in self.fields)
        if self.ssh_host:
            output = self._ssh_cmd(fmt)
        else:
            output = self._local_cmd(fmt)
        if isinstance(output, basestring):
            output = output.strip().split('\n')
        lines = [l.strip().split(':::') for l in output]
        now = dt2ts(datetime.now())
        host = self.ssh_host or socket.gethostname()
        objects = []
        for line in lines:
            obj = {'host': host, '_start': now}
            for i, item in enumerate(line):
                if item == '(none)':
                    item = None
                obj[self.fields[i]] = item
            obj['_oid'] = '%s__%s' % (host, obj['nvra'])
            objects.append(obj)
        objects = self.normalize(objects)
        return objects
Example #12
0
    def _activity_import_doc(self, time_doc, activities):
        '''
        Import activities for a single document into timeline.
        '''
        batch_updates = [time_doc]
        # compare tz aware/naive depending if acts 'when' is tz_aware or not
        tz_aware = True if activities and activities[0][0].tzinfo else False
        # We want to consider only activities that happend before time_doc
        # do not move this, because time_doc._start changes
        # time_doc['_start'] is a timestamp, whereas act[0] is a datetime
        td_start = ts2dt(time_doc['_start'], tz_aware=tz_aware)
        activities = filter(lambda act: (act[0] < td_start and
                                         act[1] in time_doc), activities)
        incon_log_type = self.config.get('incon_log_type')
        creation_field = self.get_property('cfield')
        # make sure that activities are sorted by when descending
        activities.sort(reverse=True, key=lambda o: o[0])
        new_doc = {}
        for when, field, removed, added in activities:
            when = dt2ts(when)
            last_doc = batch_updates.pop()
            # check if this activity happened at the same time as the last one,
            # if it did then we need to group them together
            if last_doc['_end'] == when:
                new_doc = deepcopy(last_doc)
                last_doc = batch_updates.pop()
            else:
                new_doc = deepcopy(last_doc)
                new_doc['_start'] = when
                new_doc['_end'] = when
                last_doc['_start'] = when
            last_val = last_doc[field]

            # FIXME: pass in field and call _type() within _activity_backwards?
            # for added/removed?
            new_val, inconsistent = self._activity_backwards(new_doc[field],
                                                             removed, added)
            new_doc[field] = new_val

            # Check if the object has the correct field value.
            if inconsistent:
                self._log_inconsistency(last_doc, last_val, field,
                                        removed, added, when, incon_log_type)
                new_doc.setdefault('_corrupted', {})
                # set curreupted field value to the the value that was added
                # and continue processing as if that issue didn't exist
                new_doc['_corrupted'][field] = added
            # Add the objects to the batch
            batch_updates.extend([last_doc, new_doc])
        # try to set the _start of the first version to the creation time
        try:
            # set start to creation time if available
            last_doc = batch_updates[-1]
            if creation_field:
                creation_ts = dt2ts(last_doc[creation_field])
                if creation_ts < last_doc['_start']:
                    last_doc['_start'] = creation_ts
                elif len(batch_updates) == 1:
                    # we have only one version, that we did not change
                    return []
        except Exception as e:
            logger.error('Error updating creation time; %s' % e)
        return batch_updates
Example #13
0
def _activity_import_doc(self, time_doc, activities):
    '''
    Import activities for a single document into timeline.
    '''
    batch_updates = [time_doc]
    # We want to consider only activities that happend before time_doc
    # do not move this, because time_doc._start changes
    # time_doc['_start'] is a timestamp, whereas act[0] is a datetime
    td_start = time_doc['_start'] = ts2dt(time_doc['_start'])
    activities = filter(lambda act: (act[0] < td_start and
                                     act[1] in time_doc), activities)
    # make sure that activities are sorted by when descending
    activities = sorted(activities, reverse=True)
    for when, field, removed, added in activities:
        removed = dt2ts(removed) if isinstance(removed, datetime) else removed
        added = dt2ts(added) if isinstance(added, datetime) else added
        last_doc = batch_updates.pop()
        # check if this activity happened at the same time as the last one,
        # if it did then we need to group them together
        if last_doc['_end'] == when:
            new_doc = last_doc
            last_doc = batch_updates.pop()
        else:
            new_doc = deepcopy(last_doc)
            new_doc.pop('_id') if '_id' in new_doc else None
            new_doc['_start'] = when
            new_doc['_end'] = when
            last_doc['_start'] = when
        last_val = last_doc[field]
        new_val, inconsistent = _activity_backwards(new_doc[field],
                                                    removed, added)
        new_doc[field] = new_val
        # Check if the object has the correct field value.
        if inconsistent:
            incon = {'oid': last_doc['_oid'],
                     'field': field,
                     'removed': removed,
                     'removed_type': str(type(removed)),
                     'added': added,
                     'added_type': str(type(added)),
                     'last_val': last_val,
                     'last_val_type': str(type(last_val))}
            self.logger.error(json.dumps(incon))
            if '_corrupted' not in new_doc:
                new_doc['_corrupted'] = {}
            new_doc['_corrupted'][field] = added
        # Add the objects to the batch
        batch_updates.append(last_doc)
        batch_updates.append(new_doc)
    # try to set the _start of the first version to the creation time
    try:
        # set start to creation time if available
        last_doc = batch_updates[-1]
        creation_field = self.get_property('cfield')
        creation_ts = ts2dt(last_doc[creation_field])
        if creation_ts < last_doc['_start']:
            last_doc['_start'] = creation_ts
        elif len(batch_updates) == 1:
            # we have only one version, that we did not change
            return []
    except Exception as e:
        self.logger.error('Error updating creation time; %s' % e)
    return batch_updates
Example #14
0
    def save_objects(self, owner, cube, objects, mtime=None):
        '''
        :param str owner: target owner's cube
        :param str cube: target cube (collection) to save objects to
        :param list objects: list of dictionary-like objects to be stored
        :param datetime mtime: datetime to apply as mtime for objects
        :rtype: list - list of object ids saved

        Get a list of dictionary objects from client and insert
        or save them to the timeline.

        Apply the given mtime to all objects or apply utcnow(). _mtime
        is used to support timebased 'delta' updates.
        '''
        self.cube_exists(owner, cube)
        self.requires_owner_write(owner, cube)
        mtime = dt2ts(mtime) if mtime else utcnow()
        current_mtime = self.get_cube_last_start(owner, cube)
        if current_mtime and mtime and current_mtime > mtime:
            # don't fail, but make sure the issue is logged!
            # likely, a ntp time sync is required
            logger.warn(
                "object mtime is < server mtime; %s < %s; " % (mtime,
                                                               current_mtime))
        _cube = self.timeline(owner, cube, admin=True)

        olen_r = len(objects)
        logger.debug('[%s.%s] Recieved %s objects' % (owner, cube, olen_r))

        objects = self.prepare_objects(_cube, objects, mtime)

        logger.debug('[%s.%s] %s objects match their current version in db' % (
            owner, cube, olen_r - len(objects)))

        if not objects:
            logger.debug('[%s.%s] No NEW objects to save' % (owner, cube))
            return []
        else:
            logger.debug('[%s.%s] Saving %s objects' % (owner, cube,
                                                        len(objects)))
            # End the most recent versions in the db of those objects that
            # have newer versionsi (newest version must have _end == None,
            # activity import saves objects for which this might not be true):
            to_snap = dict([(o['_oid'], o['_start']) for o in objects
                            if o['_end'] is None])
            if to_snap:
                db_versions = _cube.find({'_oid': {'$in': to_snap.keys()},
                                          '_end': None},
                                         fields={'_id': 1, '_oid': 1})
                snapped = 0
                for doc in db_versions:
                    _cube.update({'_id': doc['_id']},
                                 {'$set': {'_end': to_snap[doc['_oid']]}},
                                 multi=False)
                    snapped += 1
                logger.debug('[%s.%s] Updated %s OLD versions' %
                             (owner, cube, snapped))
            # Insert all new versions:
            insert_bulk(_cube, objects)
            logger.debug('[%s.%s] Saved %s NEW versions' % (owner, cube,
                                                            len(objects)))
            # return object ids saved
            return [o['_oid'] for o in objects]
Example #15
0
    def get_objects(self, repo_fullname=DEFAULT_REPO, since=None):
        '''
        Given valid github credentials and a repository name, generate
        a list of github issue objects for all existing issues in the
        repository.

        All issues are returned, including open and closed.

        :param repo_fullname: github repository name (ie, 'user/repo')
        :param since: dateonly return issues updated since date

        An example repo_fullname is 'kejbaly2/metrique'.

        Issue objects contain the following properties:
            * _oid (issue id)
            * assignee
            * body
            * closed_at
            * closed_by
            * created_at
            * labels
            * milestone
            * name
            * number
            * repo url
            * state
            * title
            * updated_at
            * full github url
            * user (reported by)

        '''
        repo_fullname = repo_fullname
        repo = self.proxy.get_repo(repo_fullname)
        if not repo:
            raise ValueError("invalid repo: %s" % repo)

        if isinstance(since, basestring):
            since = dt_parse(since)

        if since:
            _open = repo.get_issues(since=since)
            _closed = repo.get_issues(state='closed', since=since)
        else:
            _open = repo.get_issues()
            _closed = repo.get_issues(state='closed')

        objects = []
        for i in chain(_open, _closed):
            obj = {
                '_oid': i.id,
                'assignee': getattr(i.assignee, 'login', None),
                'body': i.body,
                'closed_at': dt2ts(i.closed_at),
                'closed_by': getattr(i.closed_by, 'login', None),
                'created_at': dt2ts(i.created_at),
                'labels': [l.name for l in i.labels],
                'milestone': getattr(i.milestone, 'title', None),
                'name': repo_fullname,
                'number': i.number,
                'repo': i.repository.url,
                'state': i.state,
                'title': i.title,
                'updated_at': dt2ts(i.updated_at),
                'url': i.url,
                'user': i.user.name,
            }
            objects.append(obj)
            break
        objects = self.normalize(objects)
        return objects