def __init__(self, name, **kwargs): self.name = name self.projects = [] self.time = data_converter.create_datacontainer(datatype='list') self.verified = True for kw in kwargs: setattr(self, kw, kwargs[kw])
def determine_article_count(articles_edited, first_year, final_year): ''' This function counts the number of unique articles by year edited by a particular editor. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'dict') for year in articles_edited: for month in articles_edited[year]: for ns in articles_edited[year][month]: dc[year][month][ns] = len(articles_edited[year][month][ns]) dc = cleanup_datacontainer(dc, {}) return dc
def determine_last_edit_by_year(edits, first_year, final_year): ''' This function determines the date of the last edit in a given year for a given editor. ''' dc = data_converter.create_datacontainer(first_year, final_year, 0) for edit in edits: date = str(edit['date'].year) if dc[date] == 0: dc[date] = edit['date'] elif dc[date] < edit['date']: dc[date] = edit['date'] return dc
def determine_number_edits(edits, first_year, final_year): ''' This function counts the number of edits per namespace per month per year. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'dict') for edit in edits: ns = str(edit['ns']) year = str(edit['date'].year) month = str(edit['date'].month) dc[year][month].setdefault(ns, 0) dc[year][month][ns] += 1 dc = cleanup_datacontainer(dc, {}) return dc
def determine_number_reverts(edits, first_year, final_year): ''' This function counts the number of times an edit was reverted in a given month/year. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'dict') for edit in edits: year = str(edit['date'].year) month = str(edit['date'].month) ns = str(edit['ns']) if edit['revert']: dc[year][month].setdefault(ns, 0) dc[year][month][ns] += 1 dc = cleanup_datacontainer(dc, {}) return dc
def determine_namespaces_workedon(edits, first_year, final_year): ''' This function creates a list of namespaces that an editor has worked on in a given month/year. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'set') for edit in edits: year = str(edit['date'].year) month = str(edit['date'].month) dc[year][month].add(edit['ns']) for year in dc: for month in dc[year]: dc[year][month] = list(dc[year][month]) dc = cleanup_datacontainer(dc, []) return dc
def add(self, key, value): if value == 'NEXT': self.n += 1 edits = self.drop_years_no_obs(self.editors[key]['edits']) self.insert(key, edits, self.editors[key]['username']) del self.editors[key] else: if key not in self.editors: self.editors[key] = {} self.editors[key]['obs'] = 0 self.editors[key][ 'edits'] = data_converter.create_datacontainer( 2001, self.final_year, 'list') self.editors[key]['username'] = value.pop('username') else: value.pop('username') year = str(value['date'].year) self.editors[key]['edits'][year].append(value) self.editors[key]['obs'] += 1
def determine_edit_volume(edits, first_year, final_year): ''' This function counts the number of characters added and remove by year by month by namespace for a particular editor. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'dict') for edit in edits: year = str(edit['date'].year) month = str(edit['date'].month) ns = str(edit['ns']) dc[year][month].setdefault(ns, {}) if edit['delta'] < 0: dc[year][month][ns].setdefault('removed', 0) dc[year][month][ns]['removed'] += edit['delta'] elif edit['delta'] > 0: dc[year][month][ns].setdefault('added', 0) dc[year][month][ns]['added'] += edit['delta'] dc = cleanup_datacontainer(dc, {}) return dc
def determine_articles_workedon(edits, first_year, final_year): ''' This function creates a list of article_ids that an editor has worked on in a given month/year. ''' dc = data_converter.create_datacontainer(first_year, final_year) dc = data_converter.add_months_to_datacontainer(dc, 'dict') for edit in edits: year = str(edit['date'].year) month = str(edit['date'].month) ns = str(edit['ns']) dc[year][month].setdefault(ns, set()) dc[year][month][ns].add(edit['article_id']) #convert the set to a list as mongo cannot store sets. for year in dc: for month in dc[year]: for ns in dc[year][month]: dc[year][month][ns] = list(dc[year][month][ns]) dc = cleanup_datacontainer(dc, {}) return dc
def __call__(self): if self.edits == []: return first_year, final_year = determine_year_range(self.edits) last_edit_by_year = determine_last_edit_by_year( self.edits, first_year, final_year) articles_edited = determine_articles_workedon(self.edits, first_year, final_year) article_count = determine_article_count(articles_edited, first_year, final_year) namespaces_edited = determine_namespaces_workedon( self.edits, first_year, final_year) character_count = determine_edit_volume(self.edits, first_year, final_year) revert_count = determine_number_reverts(self.edits, first_year, final_year) edit_count = determine_number_edits(self.edits, first_year, final_year) totals = {} counts = data_converter.create_datacontainer(first_year, final_year) totals = calculate_totals(totals, counts, character_count, 'character_count') totals = calculate_totals(totals, counts, revert_count, 'revert_count') totals = calculate_totals(totals, counts, article_count, 'article_count') totals = calculate_totals(totals, counts, edit_count, 'edit_count') cum_edit_count_main_ns, cum_edit_count_other_ns = calculate_cum_edits( self.edits) edits = sort_edits(self.edits) if len(edits) > self.cutoff: new_wikipedian = edits[self.cutoff]['date'] else: new_wikipedian = False first_edit = edits[0]['date'] final_edit = edits[-1]['date'] data = { 'user_id': self.editor_id, 'username': self.username, 'new_wikipedian': new_wikipedian, 'cum_edit_count_main_ns': cum_edit_count_main_ns, 'cum_edit_count_other_ns': cum_edit_count_other_ns, 'final_edit': final_edit, 'first_edit': first_edit, 'last_edit_by_year': last_edit_by_year, 'articles_edited': articles_edited, 'edit_count': edit_count, 'namespaces_edited': namespaces_edited, 'article_count': article_count, 'character_count': character_count, 'revert_count': revert_count, 'totals': totals, } self.db_dataset.insert(data)