def __init__(self,activation=1): # editor_types = ['Administrator','Bureaucrat','Eliminator','Huggler','Bot','Multiple','Other'] editor_types = ['Administrator','Huggler','Bot','Admin & Huggle','Other'] self.editor_types_dict = {a : i for i,a in enumerate(editor_types)} '''Dictionary mapping editory type to numpy.array matrix ''' self.cohorts = [int(i) for i in range(0,len(editor_types))] '''Cohort definition ''' self.cohort_labels = editor_types '''Cohort labels ''' #initialize helper structures try: from db import sql cur = sql.getSSCursor() # administrators cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='sysop';") self.administrators = set(i[0] for i in cur) # bureaucrat cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='bureaucrat';") self.bureaucrats = set(i[0] for i in cur) # eliminators cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='eliminator';") self.eliminators = set(i[0] for i in cur) # hugglers cur.execute("select user_id from u_declerambaul.ptwiki_huggle;") self.hugglers = set(i[0] for i in cur) # bots cur.execute("select user_id from u_declerambaul.ptwiki_bots;") self.bots = set(i[0] for i in cur) self.sqlQuery = 'SELECT * FROM u_declerambaul.ptwiki_%s_editor_year_month;'%reverttype except: logging.error("Could not establish SQL connection to initialize RevertsByEditorType.") Cohort.__init__(self)
def createAutoConfirmedUserTable(): '''This is a function rather than a SQL query because a script is used to create the dataset which is then imported back into the MySQL database ''' from db import sql tempfile = os.path.join(TEMPDIR, 'user_autoconfirmed.tsv') output = open(tempfile, 'a') fourdays = timedelta(days=4) curSS = sql.getSSCursor() logger.info( 'Creating temp file to store autoconfirmation date of all users') curSS.execute('''SELECT u.user_id, u.user_name, u.user_registration, (SELECT rev_timestamp FROM %s.revision WHERE rev_user=u.user_id ORDER BY rev_timestamp ASC LIMIT 9, 1) AS tenthedit FROM %s.user u;''' % (settings.sqlwikidb, settings.sqlwikidb)) for i, res in enumerate(curSS): u_id = res[0] u_text = res[1] tenedits = res[3] ins = (u_id, u_text, 0, 'NULL') if tenedits: # an editors has to have ten edits to be auto-confirmed tenedits = datetime.strptime(tenedits, '%Y%m%d%H%M%S') reg_time = res[2] reg_plus_four = None if reg_time: reg_plus_four = datetime.strptime(reg_time, '%Y%m%d%H%M%S') + fourdays # print 'four days after:',reg_plus_four if reg_plus_four > tenedits: # 10 edits in less than 4 days, auto-confirmed after 4 days # print '-> auto-confirmed after four days' auto = datetime.strftime(reg_plus_four, '%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id, u_text, 1, auto) else: # 10th edit after than 4 days, auto-confirmed after 10 edits # print '-> auto-confirmed after 10 edits' auto = datetime.strftime(tenedits, '%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id, u_text, 1, auto) else: # no registration time, just use 10 edits (there are only few like that) auto = datetime.strftime(tenedits, '%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id, u_text, 1, auto) logger.info('\t'.join(str(v) for v in ins)) output.write('\t'.join(str(v) for v in ins) + '\n') if i % 100 == 0: if i % 3000 == 0: sys.stdout.write('\n.') else: sys.stdout.write('.') sys.stdout.flush() curSS.close() output.close() cur = sql.getCursor() cur.execute('''DROP TABLE %s.user_autoconfirmed; CREATE TABLE IF NOT EXISTS %s.%s_user_autoconfirmed (user_id int(5) unsigned, user_name varchar(255), auto_confirmation tinyint(1) unsigned, confirmation_timestamp char(14)); ''' % (settings.sqluserdb, settings.sqluserdb, settings.sqlwikidb)) cur.close() logger.info('Importing auto_confirmation data into MySQL') os.system('mysqlimport --local %s %s' % (settings.sqluserdb, tempfile)) logger.info('Creating index on user_autoconfirmed table') cur = sql.getCursor() cur.execute("CREATE INDEX user_id ON %s.user_autoconfirmed (user_id);" % settings.sqluserdb)
def createAutoConfirmedUserTable(): '''This is a function rather than a SQL query because a script is used to create the dataset which is then imported back into the MySQL database ''' from db import sql tempfile = os.path.join(TEMPDIR,'user_autoconfirmed.tsv') output = open(tempfile, 'a') fourdays = timedelta(days=4) curSS = sql.getSSCursor() logger.info('Creating temp file to store autoconfirmation date of all users') curSS.execute('''SELECT u.user_id, u.user_name, u.user_registration, (SELECT rev_timestamp FROM %s.revision WHERE rev_user=u.user_id ORDER BY rev_timestamp ASC LIMIT 9, 1) AS tenthedit FROM %s.user u;'''%(settings.sqlwikidb,settings.sqlwikidb)) for i,res in enumerate(curSS): u_id = res[0] u_text = res[1] tenedits = res[3] ins = (u_id,u_text,0,'NULL') if tenedits: # an editors has to have ten edits to be auto-confirmed tenedits = datetime.strptime(tenedits,'%Y%m%d%H%M%S') reg_time = res[2] reg_plus_four = None if reg_time: reg_plus_four = datetime.strptime(reg_time,'%Y%m%d%H%M%S') + fourdays # print 'four days after:',reg_plus_four if reg_plus_four>tenedits: # 10 edits in less than 4 days, auto-confirmed after 4 days # print '-> auto-confirmed after four days' auto = datetime.strftime(reg_plus_four,'%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id,u_text,1,auto) else: # 10th edit after than 4 days, auto-confirmed after 10 edits # print '-> auto-confirmed after 10 edits' auto = datetime.strftime(tenedits,'%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id,u_text,1,auto) else: # no registration time, just use 10 edits (there are only few like that) auto = datetime.strftime(tenedits,'%Y%m%d%H%M%S') # ins = '"%s","%s",1,"%s"'%(u_id,u_text,auto) ins = (u_id,u_text,1,auto) logger.info('\t'.join(str(v) for v in ins)) output.write('\t'.join(str(v) for v in ins)+'\n') if i%100==0: if i%3000==0: sys.stdout.write('\n.') else: sys.stdout.write('.') sys.stdout.flush() curSS.close() output.close() cur = sql.getCursor() cur.execute('''DROP TABLE %s.user_autoconfirmed; CREATE TABLE IF NOT EXISTS %s.%s_user_autoconfirmed (user_id int(5) unsigned, user_name varchar(255), auto_confirmation tinyint(1) unsigned, confirmation_timestamp char(14)); '''%(settings.sqluserdb,settings.sqluserdb,settings.sqlwikidb)) cur.close() logger.info('Importing auto_confirmation data into MySQL') os.system('mysqlimport --local %s %s'%(settings.sqluserdb,tempfile)) logger.info('Creating index on user_autoconfirmed table') cur = sql.getCursor() cur.execute("CREATE INDEX user_id ON %s.user_autoconfirmed (user_id);"%settings.sqluserdb)