Exemple #1
0
 def __call__(self, user_record):
     total = 0
     start = get_date(self.start, user_record)
     end = get_date(self.end, user_record)
     if not start or not end:
         user_record[self.id] = None
         return user_record
     for i , contrib in enumerate(user_record['_contribs']):
         if (contrib['ns'] not in self.ns or
                 contrib['timestamp'] < start or
                 contrib['timestamp'] > end):
             continue
         logging.debug('processing user %s contrib %d/%d' % 
                       (user_record['username'], i, len(user_record['_contribs'])))
         curr_rev = user_record['_rev_cache'][contrib['revid']]
         end_rev = self.get_last_revision(user_record, contrib['title'], start, end)
         
         try:
             curr_content = curr_rev['*']
         except (KeyError, TypeError):
             logging.warning('curr rev does not contain the content field: %s', curr_rev)
             curr_content = ''
         try:
             end_content = end_rev['*']
         except (KeyError, TypeError):
             logging.warning('end rev does not contain the content field: %s', end_rev)
             end_content = ''
         diff_with_end = get_diff(curr_content, end_content)
         overlap_ranges = self.get_overlaps(contrib['diff'], diff_with_end)
         contrib_total = reduce(lambda cum, (start,end) : cum + end - start, overlap_ranges, 0)
         total += contrib_total
     user_record[self.id] = total
     return user_record
Exemple #2
0
 def __call__(self, user_record):
     count = 0
     start = get_date(self.start, user_record)
     end = get_date(self.end, user_record)
     if not (start and end):
         # this means that this is not a valid date range for this user
         user_record[self.id] = 'NA'
         return user_record
     for contrib in user_record['_contribs']:
         if contrib['timestamp'] < start:
             continue
         if contrib['timestamp'] > end:
             continue
         if contrib['ns'] not in self.ns:
             continue
         count += 1
     user_record[self.id] = count
     return user_record
Exemple #3
0
 def __call__(self, user_record):
     pages = []
     start = get_date(self.start, user_record)
     end = get_date(self.end, user_record)
     if not (start and end):
         # this means that this is not a valid date range for this user
         user_record[self.id] = 'NA'
         return user_record
     for contrib in user_record['_contribs']:
         if contrib['timestamp'] < start:
             continue
         if contrib['timestamp'] > end:
             continue
         if contrib['ns'] not in self.ns:
             continue
         pages.append(contrib['pageid'])
     user_record[self.id] = len(set(pages))
     return user_record