def __call__(self, user_record): total = 0 start = get_date(self.start, user_record) end = get_date(self.end, user_record) if not start or not end: user_record[self.id] = None return user_record for i , contrib in enumerate(user_record['_contribs']): if (contrib['ns'] not in self.ns or contrib['timestamp'] < start or contrib['timestamp'] > end): continue logging.debug('processing user %s contrib %d/%d' % (user_record['username'], i, len(user_record['_contribs']))) curr_rev = user_record['_rev_cache'][contrib['revid']] end_rev = self.get_last_revision(user_record, contrib['title'], start, end) try: curr_content = curr_rev['*'] except (KeyError, TypeError): logging.warning('curr rev does not contain the content field: %s', curr_rev) curr_content = '' try: end_content = end_rev['*'] except (KeyError, TypeError): logging.warning('end rev does not contain the content field: %s', end_rev) end_content = '' diff_with_end = get_diff(curr_content, end_content) overlap_ranges = self.get_overlaps(contrib['diff'], diff_with_end) contrib_total = reduce(lambda cum, (start,end) : cum + end - start, overlap_ranges, 0) total += contrib_total user_record[self.id] = total return user_record
def __call__(self, user_record): count = 0 start = get_date(self.start, user_record) end = get_date(self.end, user_record) if not (start and end): # this means that this is not a valid date range for this user user_record[self.id] = 'NA' return user_record for contrib in user_record['_contribs']: if contrib['timestamp'] < start: continue if contrib['timestamp'] > end: continue if contrib['ns'] not in self.ns: continue count += 1 user_record[self.id] = count return user_record
def __call__(self, user_record): pages = [] start = get_date(self.start, user_record) end = get_date(self.end, user_record) if not (start and end): # this means that this is not a valid date range for this user user_record[self.id] = 'NA' return user_record for contrib in user_record['_contribs']: if contrib['timestamp'] < start: continue if contrib['timestamp'] > end: continue if contrib['ns'] not in self.ns: continue pages.append(contrib['pageid']) user_record[self.id] = len(set(pages)) return user_record