def __init__(self,year,activation=5,overall=False): self.year = year '''The year the cohort started. ''' self.activation = activation '''Minimum number of edits per month to be included in the cohort ''' self.overall = overall ts,ts_i = utils.create_time_stamps_month(fromym='%s01'%self.year,toym='201012') self.time_stamps = ts self.time_stamps_index = ts_i '''Only take time_stamps starting with self.year ''' self.cohorts = [self.year,'others'] '''Cohort definition ''' self.cohort_labels = ['%s cohort'% i for i in self.cohorts] '''Cohort labels ''' if self.overall: self.sqlQuery = "SELECT * FROM fabian WHERE user_id IN (SELECT user_id FROM fabian WHERE first_edit_year=%s GROUP BY user_id HAVING SUM(add_edits)>%s);"%(self.year,self.activation) else: self.sqlQuery = 'SELECT * FROM fabian WHERE add_edits > %s AND first_edit_year=%s'%(self.activation,self.year) Cohort.__init__(self)
def __init__(self, period=3): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits'] self.cohorts = [1, 5, 10, 50, 100, 500, 1000, '>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0]) for i in range(1, len(self.cohorts) - 1): self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1, self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' self.period = period '''The number of month an editor is considered new ''' self.old_user_id = None '''The user_id of the previously encountered editor as we iterate through the table ''' self.lastym = None '''The ym at the end of the `period` months after the first edit of an editor ''' self.firstedit = None self.fe_index = None self.edits = 0 self.added = 0 self.removed = 0 self.net = 0 Cohort.__init__(self)
def __init__(self, minedits=1, maxedits=None): self.cohorts = [int(i) for i in range(0, len(settings.time_stamps))] '''Cohort definition ''' self.cohort_labels = ['%s month old' % i for i in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH '''The SQL query returns edit information for each editor for each ym she has edited.''' self.minedits = minedits '''Minimum number of edits by editor in a given month to be included''' self.maxedits = maxedits '''Maximum number of edits by editor in a given month to be included''' self.ncolors = utils.numberOfMonths(settings.time_stamps[0], settings.time_stamps[-1]) / 6 ''' Number of visible colors in the wikipride plots. E.g. one color for every six month for wikipride plots ''' Cohort.__init__(self)
def __init__(self): if self.cohorts is None or self.cohort_labels is None: logger.error( "self.cohorts or self.cohort_labels not properly defined") # raise Exception("self.cohorts or self.cohort_labels not properly defined") Cohort.__init__(self)
def __init__(self,reverttype,activation=1): ''' arg reverttype: str, type of revert: 'reverting','reverted','revertedto' ''' self.reverttype = reverttype self.activation = activation '''Minimum number of reverts by editor to be included''' Cohort.__init__(self)
def __init__(self): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits'] self.cohorts = [1,5,10,50,100,500,1000,'>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0]) for i in range(1,len(self.cohorts)-1): self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' Cohort.__init__(self)
def __init__(self,activation=1): # editor_types = ['Administrator','Bureaucrat','Eliminator','Huggler','Bot','Multiple','Other'] editor_types = ['Administrator','Huggler','Bot','Admin & Huggle','Other'] self.editor_types_dict = {a : i for i,a in enumerate(editor_types)} '''Dictionary mapping editory type to numpy.array matrix ''' self.cohorts = [int(i) for i in range(0,len(editor_types))] '''Cohort definition ''' self.cohort_labels = editor_types '''Cohort labels ''' #initialize helper structures try: from db import sql cur = sql.getSSCursor() # administrators cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='sysop';") self.administrators = set(i[0] for i in cur) # bureaucrat cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='bureaucrat';") self.bureaucrats = set(i[0] for i in cur) # eliminators cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='eliminator';") self.eliminators = set(i[0] for i in cur) # hugglers cur.execute("select user_id from u_declerambaul.ptwiki_huggle;") self.hugglers = set(i[0] for i in cur) # bots cur.execute("select user_id from u_declerambaul.ptwiki_bots;") self.bots = set(i[0] for i in cur) self.sqlQuery = 'SELECT * FROM u_declerambaul.ptwiki_%s_editor_year_month;'%reverttype except: logging.error("Could not establish SQL connection to initialize RevertsByEditorType.") Cohort.__init__(self)
def __init__(self,activitylevels=[5,10,100]): self.cohorts = activitylevels '''Cohort definition ''' self.cohort_labels = ['%s+ edits'%l for l in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH_NS0_NOREDIRECT '''The SQL query returns edit information for each editor for each ym she has edited.''' # self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6 ''' Number of visible colors in the wikipride plots. ''' Cohort.__init__(self)
def __init__(self): self.cohorts = ('0', '1', '2', '3', '4', '5','other') '''Cohort definition ''' self.cohort_labels = ['%s namespace'% i for i in self.cohorts] '''Cohort labels ''' self.cohort_index = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5} self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH_NAMESPACE '''The SQL query returns edit information for each editor for each ym she has edited.''' Cohort.__init__(self)
def __init__(self, activitylevels=[5, 10, 100]): self.cohorts = activitylevels '''Cohort definition ''' self.cohort_labels = ['%s+ edits' % l for l in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH_NS0_NOREDIRECT '''The SQL query returns edit information for each editor for each ym she has edited.''' # self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6 ''' Number of visible colors in the wikipride plots. ''' Cohort.__init__(self)
def __init__(self): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits'] self.cohorts = [1, 5, 10, 50, 100, 500, 1000, '>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0]) for i in range(1, len(self.cohorts) - 1): self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1, self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' Cohort.__init__(self)
def __init__(self): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information self.cohorts = [1, 5, 50, 100, 500, 1000, '>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0]) for i in range(1, len(self.cohorts) - 1): self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1, self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH '''The SQL query returns edit information for each editor for each ym she has edited.''' Cohort.__init__(self)
def __init__(self): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information self.cohorts = [1,5,50,100,500,1000,'>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0]) for i in range(1,len(self.cohorts)-1): self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH '''The SQL query returns edit information for each editor for each ym she has edited.''' Cohort.__init__(self)
def __init__(self): self.cohorts = ['New Editors'] '''Cohort definition ''' self.cohort_labels = self.cohorts '''Cohort labels ''' self.sqlQuery = """SELECT first_edit_year, first_edit_month, count(*) AS recruits FROM %s GROUP BY first_edit_year, first_edit_month;"""%tables.USER_COHORT '''The SQL query returns the new editor count for each ym.''' Cohort.__init__(self)
def __init__(self,minedits=1,maxedits=None): self.cohorts = [int(i) for i in range(0,len(settings.time_stamps))] '''Cohort definition ''' self.cohort_labels = ['%s month old'% i for i in self.cohorts] '''Cohort labels ''' self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH '''The SQL query returns edit information for each editor for each ym she has edited.''' self.minedits = minedits '''Minimum number of edits by editor in a given month to be included''' self.maxedits = maxedits '''Maximum number of edits by editor in a given month to be included''' self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6 ''' Number of visible colors in the wikipride plots. E.g. one color for every six month for wikipride plots ''' Cohort.__init__(self)
def __init__(self,activation=5): self.activation = activation self.NS = ( '4', '5' ) ts,ts_i = utils.create_time_stamps_month(fromym='200401',toym='201012') self.time_stamps = ts self.time_stamps_index = ts_i '''Only take time_stamps starting with self.year ''' self.cohorts = range(2004,2011) '''Cohort definition ''' self.cohort_labels = ['%s cohort'% i for i in self.cohorts] '''Cohort labels ''' # self.sqlQuery = 'SELECT * FROM fabian WHERE namespace IN (4,5) AND add_edits > %s AND first_edit_year in (%s)'%(self.activation,','.join([str(c) for c in self.cohorts])) Cohort.__init__(self)
def __init__(self,period=3 ): # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have # this information # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits'] self.cohorts = [1,5,10,50,100,500,1000,'>1000 edits'] '''Cohort definition ''' self.cohort_labels = self.cohorts[:] self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0]) for i in range(1,len(self.cohorts)-1): self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i]) # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts] '''Cohort labels ''' self.period = period '''The number of month an editor is considered new ''' self.old_user_id = None '''The user_id of the previously encountered editor as we iterate through the table ''' self.lastym = None '''The ym at the end of the `period` months after the first edit of an editor ''' self.firstedit = None self.fe_index = None self.edits = 0 self.added = 0 self.removed = 0 self.net = 0 Cohort.__init__(self)
def __init__(self): if self.cohorts is None or self.cohort_labels is None: logger.error("self.cohorts or self.cohort_labels not properly defined") # raise Exception("self.cohorts or self.cohort_labels not properly defined") Cohort.__init__(self)