Example #1
0
def contributions(idiomas):
    """
    Create some graphs and files with statistical results about authors contributions
    
    @type  idiomas: list of strings
    @param idiomas: list of strings indicating the language versions to process
    """
    for idioma in idiomas:
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
        #acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_pages")
        #dbaccess.query_SQL(acceso[1], "page_id, page_namespace", "page", where="page_namespace=0", create="pag_namespace")
        tcnoann=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_NoAnnons_author_"+idioma)
        tcauthor=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_author_"+idioma)
        #tc_ann=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_Annons_author_text_"+idioma)
        dbaccess.close_Connection(acceso[0])
        
        data=__tup_to_list(tcnoann)
        listay_tcnoann=data.pop()
        listax=data.pop()
        data=__tup_to_list(tcauthor)
        listay_tcauthor=data.pop()
        listax=data.pop()
        #data=__tup_to_list(tc_ann)
        #listay_tc_ann=data.pop()
        #listax=data.pop()
        r.png("graphics/"+idioma+"/gini_TContrib_NoAnn_"+idioma+".png")
        __lorenz_Curve(listay_tcnoann)
        r.png("graphics/"+idioma+"/gini_TContrib_"+idioma+".png")
        __lorenz_Curve(listay_tcauthor)
 def UserNumContribsGini(self, cursor):
     """
     A class to perform analysis on contributions with Gini graphs
     """
     ##        Retrieve info from DB and plot Gini graph
     tcnoann = dbaccess.query_SQL(cursor, select=" * ", table="stats_Contrib_NoAnnons_author_" + self.language)
     giniGraph.createGraphic("Gini graph for " + self.language, (tcnoann,), self.graphType)
Example #3
0
 def UserNumContribsCompGini(self, cursor, languages):
     ##        Retrieve info from DB and plot Gini comparative graph
     dataSeries = []
     for language in languages:
         dataSeries.append(dbaccess.query_SQL(cursor,\
         select=" * ", table="stats_Contrib_NoAnnons_author_"+self.language))
     giniGraph.createGraphic("Gini_Comparative", dataSeries, self.graphType)
 def UserNumContribsCompGini(self, cursor, languages):
     ##        Retrieve info from DB and plot Gini comparative graph
     dataSeries = []
     for language in languages:
         dataSeries.append(
             dbaccess.query_SQL(cursor, select=" * ", table="stats_Contrib_NoAnnons_author_" + self.language)
         )
     giniGraph.createGraphic("Gini_Comparative", dataSeries, self.graphType)
Example #5
0
 def UserNumContribsGini(self, cursor):
     """
     A class to perform analysis on contributions with Gini graphs
     """
     ##        Retrieve info from DB and plot Gini graph
     tcnoann = dbaccess.query_SQL(cursor,
                                  select=" * ",
                                  table="stats_Contrib_NoAnnons_author_" +
                                  self.language)
     giniGraph.createGraphic("Gini graph for " + self.language, (tcnoann, ),
                             self.graphType)
Example #6
0
 def articleSizeHistogram(self, cursor):
     """
     Histogram for the size of articles and split in two subpopulations
     """
     ##        Retrive dataset with length of pages from DB
     pageLen = dbaccess.query_SQL(cursor,
                                  select="page_id, page_len",
                                  table="aux")
     ##        Plot aggregate histogram and split histograms for subpopulations
     splitHistGraph.createGraphic("Histogram", (pageLen,),"eps", xlabst="Page length (log)",\
     ylabst="Probability densities", mainTitle="Histogram for length of articles")
 def articleSizeHistogram(self, cursor):
     """
     Histogram for the size of articles and split in two subpopulations
     """
     ##        Retrive dataset with length of pages from DB
     pageLen = dbaccess.query_SQL(cursor, select="page_id, page_len", table="aux")
     ##        Plot aggregate histogram and split histograms for subpopulations
     splitHistGraph.createGraphic(
         "Histogram",
         (pageLen,),
         "eps",
         xlabst="Page length (log)",
         ylabst="Probability densities",
         mainTitle="Histogram for length of articles",
     )
Example #8
0
def comparative_contributions():
    listaidiomas=["dewiki", "jawiki", "frwiki", "plwiki", "nlwiki", "itwiki", "ptwiki", "eswiki", "svwiki"]
##    lista=["eswiki", "svwiki"]
    
    r.png("graphics/AAA/gini_comparative_top10.png")
    flag=0
    for idioma in listaidiomas:
        print "Starting comparative Gini analysis for language..."+idioma+"\n"
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
        tcnoann=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_NoAnnons_author_"+idioma)
        dbaccess.close_Connection(acceso[0])
        data=__tup_to_list(tcnoann)
        listay_tcnoann=data.pop()
        listax=data.pop()
        if flag==0:
            _lorenz_Comp_Curves(listay_tcnoann,flag)
            flag=1
        else:
            _lorenz_Comp_Curves(listay_tcnoann,flag)
    r.dev_off()
    print "Comparative graphic for Gini curves finished!!"
Example #9
0
def histogram(idiomas):
    """
    Create histograms depicting article size distribution for a certain language version
    
    @type  idiomas: list of strings
    @param idiomas: list of strings indicating the language versions to process
    """
    filenames=["boxplot_log.png", "histogram_log.png", "histogram_log_low.png", "histogram_log_high.png", "ecdf_log_low.png", "ecdf_log_high.png", "data/page_len_log.data", "/data/histograms.info", "ecdf_total.png"]
    
    for idioma in idiomas:
        print "Creando histogramas para el idioma ... "+idioma
        #Print to another file the names of graphics files, following the order in the GNU R script histogram.R
        f=open("./data/hist_files_names.data",'w')
        for line in filenames:
            f.write("./graphics/"+idioma+"/"+line+"\n")
        f.close()
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
    
        #Considering only database pages corresponding to articles, with NAMESPACE=MAIN=0
        #dbaccess.dropTab_SQL(acceso[1], "aux")
        #dbaccess.query_SQL(acceso[1],"page_id, page_len","page", where="page_namespace=0", order="page_len", create="aux")
        result=dbaccess.query_SQL(acceso[1], "page_id, page_len", "aux")
        dbaccess.close_Connection(acceso[0])
        data=__tup_to_list(result)
        page_len=data.pop()
        for i in range(len(page_len)):
            if page_len[i]!=0:
                page_len[i]=math.log10(page_len[i])
        
        #Print to another file a list with article sizes to plot histograms
        f=open("./graphics/"+idioma+"/data/page_len_log.data", 'w')
        for value in page_len:
            f.writelines(str(value)+"\n")
        f.close()
        
        #CALL THE GNU R SCRIPT Histogram.R
        succ=os.system("R --vanilla < ./histogram.R > debug_R")
        if succ==0:
            print "Funcion histogram ejecutada con exito para el lenguage... "+idioma
Example #10
0
    def UserNumContribsGenerations(self):
        """
        Same 3D study as in LibreSoftware
        """
        self.periodCommitsCommiter = \
        dbaccess.query_SQL(self.acceso[1],\
        select="period, author, contribs",\
        tables="contribs_period_author_"+self.language,\
        order="period, contribs DESC")
        self.lastPeriod = int(self.periodCommitsCommiter[-1][0])

        # Perform all the analysis
        print('Performing analysis with period = months\n')
        #self.commitsPerPeriodPerCommiter()
        self.commitsPerPeriod()
        self.largestCommiters()
        self.topFractionCommits(0.1)
        #self.topFractionCommits(0.5)
        #self.topFractionCommits(1.0)
        ##        FIXME: repeat executions with different percentages
        ##        TODO: add periodified plotbars for topFractionCommiters
        self.topFractionCommiters(0.05)
    def UserNumContribsGenerations(self):
        """
        Same 3D study as in LibreSoftware
        """
        self.periodCommitsCommiter = dbaccess.query_SQL(
            self.acceso[1],
            select="period, author, contribs",
            tables="contribs_period_author_" + self.language,
            order="period, contribs DESC",
        )
        self.lastPeriod = int(self.periodCommitsCommiter[-1][0])

        # Perform all the analysis
        print ("Performing analysis with period = months\n")
        # self.commitsPerPeriodPerCommiter()
        self.commitsPerPeriod()
        self.largestCommiters()
        self.topFractionCommits(0.1)
        # self.topFractionCommits(0.5)
        # self.topFractionCommits(1.0)
        ##        FIXME: repeat executions with different percentages
        ##        TODO: add periodified plotbars for topFractionCommiters
        self.topFractionCommiters(0.05)
    def UserNumContribsGroup(self, cursor):
        """
        A class to plot comparative graphics with contributions from 
        different groups
        """
        ###Reproduction of the article Power of the few...
        ##        Admins and bots IDs can be retrieved from DB as subselects in the where clause
        ##########################
        ##Drop bots contribs from DB source view
        ##########################
        ##CREATE VIEW FOR PERIODS FROM 0 IN MONTHS
        minYear = dbaccess.query_SQL(
            cursor, select="MIN(year)", tables="stats_Contrib_NoAnnons_months_author_" + self.language
        )

        minMonth = dbaccess.query_SQL(
            cursor,
            select="MIN(month)",
            tables="stats_Contrib_NoAnnons_months_author_" + self.language,
            where="year=" + str(int(minYear[0][0])),
        )

        dbaccess.createView(
            cursor,
            view="contribs_period_author_" + self.language,
            columns="period, author, contribs",
            query="SELECT ((year*12)+month-("
            + str(int(minYear[0][0]))
            + "*12)-"
            + str(int(minMonth[0][0]))
            + ") as period, author, theCount FROM "
            + "stats_Contrib_NoAnnons_months_author_"
            + self.language
            + " WHERE author NOT IN "
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot') ORDER BY period",
        )

        ##        Retrieve number of revision made by admins per month
        revsAdminsPerMonth = dbaccess.query_SQL(
            cursor,
            select="period, SUM(contribs)",
            tables="contribs_period_author_" + self.language,
            where="author IN (SELECT ug_user FROM user_groups where ug_group='sysop')",
            group="period",
            order="period",
        )
        ##        Plot FIG 2
        self.simpleGraph.createGraphic(
            "revs_admins_per_month",
            (revsAdminsPerMonth,),
            xlabst="Months",
            ylabst="Revisions",
            mainTitle="Revisions per month for admins " + self.language,
            graphType=self.graphType,
            log=False,
        )

        contribsMonth = dbaccess.query_SQL(
            cursor,
            select="period, SUM(contribs)",
            tables="contribs_period_author_" + self.language,
            group="period",
            order="period",
        )

        ##        divide element by element
        ##        Supposedly, there is at least one rev per month made by an admin
        percContribsAdmins = []
        for totAdminContrib in revsAdminsPerMonth:
            for totContrib in contribsMonth:
                if totAdminContrib[0] == totContrib[0]:
                    ##            append (period, adminsContribs/totContribs)
                    perc = float(totAdminContrib[1]) / float(totContrib[1])
                    percContribsAdmins.append((totAdminContrib[0], perc * 100))
                    break
        ##        Plot FIG 1 % of total revs per month made by admins
        self.simpleGraph.createGraphic(
            "perc_revs_admins_per_month",
            (percContribsAdmins,),
            xlabst="Months",
            ylabst="% revisions",
            mainTitle="% of total revisions per month made by admins " + self.language,
            graphType=self.graphType,
            log=False,
        )

        ##    FIG 4 TOTAL EDITS MADE BY USERS WITH DIFFERENT EDIT LEVELS
        ##    CREATE WHERE CLAUSES FOR CLUSTER OF USERS IDENTIFIED BY CONTRIBUTIONS LEVEL
        ##    5 LEVELS: <100, 100-1K, 1K-5K, 5K-10K, >10K
        usersLevel1 = (
            "author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"
            + self.language
            + " WHERE theCount<=100 AND author NOT IN "
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        )
        usersLevel2 = (
            "author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"
            + self.language
            + " WHERE theCount BETWEEN 101 AND 1000 AND author NOT IN"
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        )
        usersLevel3 = (
            "author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"
            + self.language
            + " WHERE theCount BETWEEN 1001 AND 5000 AND author NOT IN "
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        )
        usersLevel4 = (
            "author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"
            + self.language
            + " WHERE theCount BETWEEN 5001 AND 10000 AND author NOT IN "
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        )
        usersLevel5 = (
            "author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"
            + self.language
            + " WHERE theCount>10000 AND author NOT IN "
            + "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        )

        ##        Some vars used in for iterations
        levels = (usersLevel1, usersLevel2, usersLevel3, usersLevel4, usersLevel5)
        listContribsLevel = []
        listAvgContribsLevel = []
        listPercContribsLevel = []
        listUsersLevel = []
        listPercUsersLevel = []

        ##        Retrieve tot num of users per month
        usersMonth = dbaccess.query_SQL(
            cursor,
            select="period, COUNT(DISTINCT(author))",
            tables="contribs_period_author_" + self.language,
            group="period",
            order="period",
        )

        for level in levels:
            ##            Retrieve contribs per month for this level
            contribsLevelMonth = dbaccess.query_SQL(
                cursor,
                select="period, SUM(contribs)",
                tables="contribs_period_author_" + self.language,
                where=level,
                group="period",
                order="period",
            )
            listContribsLevel.append(contribsLevelMonth)

            percContribsLevel = []
            ##            Append (period, contribsLevel/totContribs) checking periods correspondence
            for totLevelContrib in contribsLevelMonth:
                for totContrib in contribsMonth:
                    if totLevelContrib[0] == totContrib[0]:
                        perc = float(totLevelContrib[1]) / float(totContrib[1])
                        percContribsLevel.append((totLevelContrib[0], perc * 100))
                        break
            listPercContribsLevel.append(percContribsLevel)

            ##            Retrieve number of users per level per month
            usersLevelMonth = dbaccess.query_SQL(
                cursor,
                select="period, COUNT(DISTINCT(author))",
                tables="contribs_period_author_" + self.language,
                where=level,
                group="period",
                order="period",
            )
            ##Append to the list of users per level per month
            listUsersLevel.append(usersLevelMonth)

            avgUsersLevel = []
            ##            Retrieve avg number of revs per user in each group, per month
            for contribs in contribsLevelMonth:
                for totUsers in usersLevelMonth:
                    if contribs[0] == totUsers[0]:
                        avg = float(contribs[1]) / float(totUsers[1])
                        avgUsersLevel.append((contribs[0], avg))
            listAvgContribsLevel.append(avgUsersLevel)

            percUsersLevel = []
            ##            Append (period, usersLevel/totUsers)
            for users, totUsers in zip(usersLevelMonth, usersMonth):
                for totUsers in usersMonth:
                    if users[0] == totUsers[0]:
                        perc = float(users[1]) / float(totUsers[1])
                        percUsersLevel.append((users[0], perc * 100))
                        break
            listPercUsersLevel.append(percUsersLevel)

        ##        2D graph for FIG 4
        self.multiGraph.createGraphic(
            "perc_revs_per_userlevel_month",
            listPercContribsLevel,
            xlabst="months",
            ylabst="% revisions",
            mainTitle="% of total revs per user level per month",
            graphType=self.graphType,
            format=[],
            log=False,
        )

        ##        Plot 2D multi graph (FIG 5)
        self.multiGraph.createGraphic(
            "revs_per_userlevel_month",
            listContribsLevel,
            xlabst="months",
            ylabst="revisions",
            mainTitle="Total revisions per user level per month",
            graphType=self.graphType,
            format=[],
            log=True,
        )

        ##    FIG 6 AVERAGE NUMBER OF EDITS PER USER PER MONTH FOR EACH LEVEL
        self.multiGraph.createGraphic(
            "avg_revs_per_userlevel_month",
            listAvgContribsLevel,
            xlabst="months",
            ylabst="avg. revisions",
            mainTitle="Avg revisions per user level per month",
            graphType=self.graphType,
            format=[],
            log=True,
        )

        ##        FIG 7 POPULATION GROWTH FOR EACH USER GROUP
        self.multiGraph.createGraphic(
            "users_per_level_month",
            listUsersLevel,
            xlabst="months",
            ylabst="log(num users)",
            mainTitle="Growth of each user group per month",
            graphType=self.graphType,
            format=[],
            log=True,
        )
        ##        FIG 8 % OF TOTAL POPULATION OF EACH USER GROUP
        self.multiGraph.createGraphic(
            "perc_users_per_level_month",
            listPercUsersLevel,
            xlabst="months",
            ylabst="% users",
            mainTitle="% of users in each user group per month",
            graphType=self.graphType,
            format=[],
            log=False,
        )

        """
Example #13
0
def community_contrib(idiomas):
    for idioma in idiomas:
        list_admins=test_admins.process_admins(idioma)
        num_admins=list_admins.pop()
        where_clause1=list_admins.pop()
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
        admins_ids=dbaccess.raw_query_SQL(acceso[1], "SELECT DISTINCT(author) FROM stats_"+idioma+" WHERE "+where_clause1+" LIMIT "+str(num_admins))
##        MONTAR WHERE CLAUSE CON ADMINS IDS
        list_admins_ids=[]
        for item in list_admins_ids:
            list_admins_ids.append(int(item[0]))
        where_clause2=test_admins.process_users_ids(list_admins_ids,idioma)
        edits_admin_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_NoAnnons_months_author_"+idioma+" ", where=where_clause2, group="year, month ", order="year, month")
        dates_admins=[]
        admins_contribs=[]
        for element in edits_admin_month:
            dates_admins.append(list(element[0:2]))
            admins_contribs.append(int(element[2]))
##        PASAR A UN ARCHIVO PARA PLOT (FIG 2)
##        RECUPERAMOS CONTRIBUCIONES TOTALES POR MESES
        total_edits_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, group="year, month ")
        dates_contribs=[]
        total_contribs=[]
        for element in total_edits_month:
            dates_contribs.append(list(element[0:2]))
            total_contribs.append(int(element[2]))
##        DIVIDIR LA PRIMERA LISTA POR LA SEGUNDA
        perc_contribs_admins=[]
        for admin_contrib, total_contrib in zip(admins_contribs, total_contribs):
            perc_contribs_admins.append((float(admin_contrib)/total_contrib))
##        PASAR A UN ARCHIVO PARA PLOT (FIG 1)

##    FIG 4 TOTAL EDITS MADE BY USERS WITH DIFFERENT EDIT LEVELS
##    CREATE CLUSTER OF USERS IDENTIFIED BY CONTRIBUTIONS LEVEL
##    5 LEVELS: <100, 100-1K, 1K-5K, 5K-10K, >10K
        users_level1=[]
        users_level2=[]
        users_level3=[]
        users_level4=[]
        users_level5=[]
        level1=dbaccess.query_SQL(acceso[1], select="DISTINCT(author)", tables="stats_Contrib_author_"+idioma, where="theCount<=100")
        for userid in level1:
            users_level1.append(int(userid[0]))
        level2=dbaccess.query_SQL(acceso[1], select="DISTINCT(author)", tables="stats_Contrib_author_"+idioma, where="theCount>100 AND theCount<=1000")
        for userid in level2:
            users_level2.append(int(userid[0]))
        level3=dbaccess.query_SQL(acceso[1], select="DISTINCT(author)", tables="stats_Contrib_author_"+idioma, where="theCount>1000 AND theCount<=5000")
        for userid in level3:
            users_level3.append(int(userid[0]))
        level4=dbaccess.query_SQL(acceso[1], select="DISTINCT(author)", tables="stats_Contrib_author_"+idioma, where="theCount>5000 AND theCount<=10000")
        for userid in level4:
            users_level4.append(int(userid[0]))
        level5=dbaccess.query_SQL(acceso[1], select="DISTINCT(author)", tables="stats_Contrib_author_"+idioma, where="theCount>10000")
        for userid in level5:
            users_level5.append(int(userid[0]))
        where_clause_level1=test_admins.process_users_ids(users_level1,idioma)
        where_clause_level2=test_admins.process_users_ids(users_level2,idioma)
        where_clause_level3=test_admins.process_users_ids(users_level3,idioma)
        where_clause_level4=test_admins.process_users_ids(users_level4,idioma)
        where_clause_level5=test_admins.process_users_ids(users_level5,idioma)
        
        contribs_level1_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level1, group="year, month")
        contribs_level2_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level2, group="year, month")
        contribs_level3_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level3, group="year, month")
        contribs_level4_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level4, group="year, month")
        contribs_level5_month=dbaccess.query_SQL(acceso[1], select="year, month, SUM(theCount)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level5, group="year, month")
        list_level1=__process_contribs(contribs_level1_month, total_contribs)
        perc_contribs_level1=list_level1.pop()
        contribs_level1=list_level1.pop()
        dates_level1=list_level1.pop()
        
        list_level2=__process_contribs(contribs_level2_month, total_contribs)
        perc_contribs_level2=list_level2.pop()
        contribs_level2=list_level2.pop()
        dates_level2=list_level2.pop()
        
        list_level3=__process_contribs(contribs_level3_month, total_contribs)
        perc_contribs_level3=list_level3.pop()
        contribs_level3=list_level3.pop()
        dates_level3=list_level1.pop()
        
        list_level4=__process_contribs(contribs_level4_month, total_contribs)
        perc_contribs_level4=list_level4.pop()
        contribs_level4=list_level4.pop()
        dates_level4=list_level4.pop()
        
        list_level5=__process_contribs(contribs_level5_month, total_contribs)
        perc_contribs_level5=list_level5.pop()
        contribs_level5=list_level5.pop()
        dates_level5=list_level5.pop()
        
##    FIG 5 PLOT 4b
##    FIG 6 AVERAGE NUMBER OF EDITS PER USER PER MONTH FOR EACH LEVEL
##        RETRIEVE NUM USERS FOR EACH MONTH IN EACH LEVEL WHO HAVE MADE AT LEAST ONE CONTRIB
        num_users_1_month=dbaccess.query_SQL(acceso[1], select="COUNT(DISTINCT author)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level1, group="year, month")
        num_users_2_month=dbaccess.query_SQL(acceso[1], select="COUNT(DISTINCT author)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level2, group="year, month")
        num_users_3_month=dbaccess.query_SQL(acceso[1], select="COUNT(DISTINCT author)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level3, group="year, month")
        num_users_4_month=dbaccess.query_SQL(acceso[1], select="COUNT(DISTINCT author)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level4, group="year, month")
        num_users_5_month=dbaccess.query_SQL(acceso[1], select="COUNT(DISTINCT author)", tables="stats_Contrib_months_author_"+idioma, where=where_clause_level5, group="year, month")
        list_users_1_month=[]
        for element in num_users_1_month:
            list_users_1_month.append(int(element[0]))
        list_users_2_month=[]
        for element in num_users_2_month:
            list_users_2_month.append(int(element[0]))
        list_users_3_month=[]
        for element in num_users_3_month:
            list_users_3_month.append(int(element[0]))
        list_users_4_month=[]
        for element in num_users_4_month:
            list_users_4_month.append(int(element[0]))
        list_users_5_month=[]
        for element in num_users_5_month:
            list_users_5_month.append(int(element[0]))
        
##        DIVIDE TOT NUM CONTRIBS PER LEVEL PER MONTH BY THE NUM USERS FOR EACH MONTH IN EACH LEVEL
        avg_contribs_user_1_month=[]
        for contribmonth, usermonth in zip(contribs_level1, list_users_1_month):
            avg_contribs_user_1_month.append(float(contribmonth)/usermonth)
        avg_contribs_user_2_month=[]
        for contribmonth, usermonth in zip(contribs_level2, list_users_2_month):
            avg_contribs_user_2_month.append(float(contribmonth)/usermonth)
        avg_contribs_user_3_month=[]
        for contribmonth, usermonth in zip(contribs_level3, list_users_3_month):
            avg_contribs_user_3_month.append(float(contribmonth)/usermonth)
        avg_contribs_user_4_month=[]
        for contribmonth, usermonth in zip(contribs_level4, list_users_4_month):
            avg_contribs_user_4_month.append(float(contribmonth)/usermonth)
        avg_contribs_user_5_month=[]
        for contribmonth, usermonth in zip(contribs_level5, list_users_5_month):
            avg_contribs_user_5_month.append(float(contribmonth)/usermonth)
        
##        FIG 7 POPULATION GROWTH FOR EACH USER GROUP
##        SIMPLY RETRIEVE list_users_X_month
##        FIG 8 % OF TOTAL POPULATION OF EACH USER GROUP
        perc_users_1_months=[]
        perc_users_2_months=[]
        perc_users_3_months=[]
        perc_users_4_months=[]
        perc_users_5_months=[]
        for e1, e2, e3, e4, e5 in zip(list_users_1_month,list_users_2_month,list_users_3_month,list_users_4_month,list_users_5_month):
            total_users_month=e1+e2+e3+e4+e5
            perc_users_1_months.append((float(e1)/total_users_month))
            perc_users_2_months.append((float(e2)/total_users_month))
            perc_users_3_months.append((float(e3)/total_users_month))
            perc_users_4_months.append((float(e4)/total_users_month))
            perc_users_5_months.append((float(e5)/total_users_month))
            
###############################
##    FINAL DUTIES, TRANSFER DATA AND EXECUTE R SCRIPT
        filenames=["dates_admin_contrib.data","contribs_admins_months.data", "perc_contribs_months.data","dates_level1_contrib.data", "contribs_level1_months.data", "perc_contribs_level1_months.data", "dates_level2_contrib.data", "contribs_level2_months.data", "perc_contribs_level2_months.data","dates_level3_contrib.data", "contribs_level3_months.data", "perc_contribs_level3_months.data","dates_level4_contrib.data", "contribs_level4_months.data", "perc_contribs_level4_months.data","dates_level5_contrib.data" ,"contribs_level5_months.data", "perc_contribs_level5_months.data", "avg_contribs_user_1_month.data", "avg_contribs_user_2_month.data", "avg_contribs_user_3_month.data", "avg_contribs_user_4_month.data", "avg_contribs_user_5_month.data", "users_1_month.data", "users_2_month.data", "users_3_month.data", "users_4_month.data", "users_5_month.data", "perc_users_1_months.data","perc_users_2_months.data", "perc_users_3_months.data", "perc_users_4_months.data", "perc_users_5_months.data"]
        
        filenames_out=["Figure1.png", "Figure_2.png", "Figure4.png", "Figure5.png", "Figure6.png", "Figure7.png", "Figure8.png"]
        
        dataList=[dates_contribs, admins_contribs, perc_contribs_admins, dates_level1, contribs_level1, perc_contribs_level1,dates_level2, contribs_level2, perc_contribs_level2,dates_level3, contribs_level3, perc_contribs_level3, dates_level4, contribs_level4, perc_contribs_level4,dates_level5, contribs_level5, perc_contribs_level5, avg_contribs_user_1_month, avg_contribs_user_2_month, avg_contribs_user_3_month, avg_contribs_user_4_month, avg_contribs_user_5_month, list_users_1_month, list_users_2_month, list_users_3_month, list_users_4_month, list_users_5_month, perc_users_1_months, perc_users_2_months, perc_users_3_months, perc_users_4_months, perc_users_5_months]
        
        for filename, data in zip (filenames, dataList):
            if(filename.find('date')!=-1):
                f=open("./graphics/"+idioma+"/data/"+filename, 'w')
                for adate in data:
                    f.writelines(str(adate)+"\n")
                f.close()
            else:
                __makeDataFile(idioma, filename, data)
        
        #Pass data filenames to the GNU R script with a file
        f=open("./data/community_contrib_files_names.data",'w')
        for line in filenames:
            f.write("./graphics/"+idioma+"/data/"+line+"\n")
        f.close()
        
        #Idem with graphic output filenames
        f=open("./data/community_contrib_files_out.data",'w')
        for line in filenames_out:
            f.write("./graphics/"+idioma+"/"+line+"\n")
        f.close()
            
        #CALL GNU R SCRIPT measuring_Wiki.R
        
        succ=os.system("R --vanilla < ./community_contrib.R > debug_R")
        if succ==0:
            print "Funcion community_contrib.R ejecutada con exito para el lenguage... "+idioma
Example #14
0
def measuring(idiomas):
    """
    Create some graphs following the research presented by Jakob Voss in his paper
    Mesuring Wikipedia (ISSI 2005)
    
    @type  idiomas: list of strings
    @param idiomas: list of strings indicating the language versions to process
    """
##   Generates some graphics reproducing those in Measuring Wikipedia article
    filenames=["total_edits.data", "noannons_edits.data", "annon_edits.data", "authors_per_article_desc.data", "articles_per_logged_author_desc.data",  "articles_per_anonymous_author_desc.data"]
    
    filenames_out=["total_edits_per_author.png", "total_edits_per_noannon_author.png", "total_edits_per_annon_author.png", "diff_authors_per_article_descending.png", "diff_articles_per_logged_author_descending.png", "diff_articles_per_anonymous_author_descending.png"]
    
    for idioma in idiomas:
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
    ##    acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_pages")
        #Combined evolution graphics
        #ALL THESE GRAPHICS ARE ALREADY GENERATED BY ERIK ZATCHE'S OFFICIAL PERL SCRIPTS
            #Database size
            #Total number of words
            #Total number of internal links
            #Number of articles (including redirects)
            #Number of active wikipedians (more than 5 contributions in a given month)
            #Number of very active wikipedians (more than 100 contributions in a given month)
        
        #Namespace size
            #OK, it is generated in summary_evol() method
            
        #Evolution in time of article size (histogram)
            #IDEA: Download page.sql files for a language for each semester period
            
        #Number of distinct authors per article (descending sorted graphic)
            #Already generated in summary_evol, ONLY NEED TO SORT AND ADJUST IN GNU R
        diffAuthorperArticle=dbaccess.query_SQL(acceso[1], "page_id, theCount", "stats_Article_NoAnnons_page_id_"+idioma)
        
        #Number of distinct articles per author (descending sorted graphic)
            #Idem as in the previous case
        diffArticlesNoann=dbaccess.query_SQL(acceso[1], "author, theCount", "stats_Article_NoAnnons_author_"+idioma)
        diffArticlesAnn=dbaccess.query_SQL(acceso[1], "author_text, theCount", "stats_Article_Annons_author_text_"+idioma)        
        
        data=__tup_to_list(diffAuthorperArticle)
        lisdiffauthorartic=data.pop()
        data=__tup_to_list(diffArticlesNoann)
        lisdiffarticleaut=data.pop()
        data=__tup_to_list(diffArticlesAnn,2)
        lisdiffarticleannon=data.pop()
##        Ordenamos los resultados para que se puedan ajustar a una Power Law
        lisdiffauthorartic.sort(reverse=True)
        lisdiffarticleaut.sort(reverse=True)
        lisdiffarticleannon.sort(reverse=True)
        
        #Number of edtis per author
            #Retrieve results from database
            #We have already created GINI graphics for this parameter
            #ALSO AVAILABLE DATABASE TABLES WITH EVOLUTION IN TIME OF THIS PARAMETER
        
        tcnoann=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_NoAnnons_author_"+idioma)
        tcauthor=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_author_"+idioma)
        tc_ann=dbaccess.query_SQL(acceso[1]," * ","stats_Contrib_Annons_author_text_"+idioma)
        
        data=__tup_to_list(tcnoann)
        listcnoann=data.pop()
        data=__tup_to_list(tcauthor)
        listcauthors=data.pop()
        #BTW, we are also obtaining but not using the IP adresses of annon users
        data=__tup_to_list(tc_ann,2)
        listcann=data.pop()
        
##        Arranging results in a decreasing way to adjust them to a power law
        listcnoann.sort(reverse=True)
        listcauthors.sort(reverse=True)
        listcann.sort(reverse=True)
        
        #Ingoing and outgoing number of links per article
            #STILL TO BE DEVELOPED
            #NEED TO FIRST IDENTIFY LINKS FOR A GIVEN ARTICLE IN THE DATABASE
            #LINKS TABLES MAY HELP, but in these dump versions they are all empty!!!
            
            #BROKEN LINKS also need to be considered
        
        dbaccess.close_Connection(acceso[0])
        
        dataList=[listcauthors, listcnoann, listcann, lisdiffauthorartic, lisdiffarticleaut, lisdiffarticleannon]
        
        for filename, data in zip (filenames, dataList):
            if(filename.find('date')!=-1):
                __makeDatesFile(idioma, filename, data)
            else:
                __makeDataFile(idioma, filename, data)
        
        #Pass data filenames to the GNU R script with a file
        f=open("./data/measuring_files_names.data",'w')
        for line in filenames:
            f.write("./graphics/"+idioma+"/data/"+line+"\n")
        f.close()
        
        #Idem with graphic output filenames
        f=open("./data/measuring_files_out.data",'w')
        for line in filenames_out:
            f.write("./graphics/"+idioma+"/"+line+"\n")
        f.close()
            
        #CALL GNU R SCRIPT measuring_Wiki.R
        
        succ=os.system("R --vanilla < ./measuring_Wiki.R > debug_R")
        if succ==0:
            print "Funcion measuring_Wiki.R ejecutada con exito para el lenguage... "+idioma
Example #15
0
def summary_evol(idiomas):
    """
    Create some graphs summarizing the evolution in time of critical quantitative
    parameters for each language version to explore
    
    @type  idiomas: list of strings
    @param idiomas: list of strings indicating the language versions to process
    """
##	¡¡WARNING!! Please be careful when selecting values from tables storing evolution in time of number of articles, size etc.
##  You must always use a GROUP BY(pageCount, limitDate) clause, due to 
##  periods of inactivity that could generate duplicate entries in the graphics
    filenames=["page_dates.data", "page_Count_evol.data", "page_Len_Sum_log.data", "contribs_evol.data", "nspaces.data", "nspace_distrib.data", "diffArticles.data", "authors.data", "diff_authors_x_article.data", "authors_authors_per_pagelen.data", "pagelen_authors_per_pagelen.data"]

    filenames_out=["Tot_num_articles_absx_absy.png", "Tot_num_articles_absx_logy.png", "Tot_num_articles_logx_logy.png", "Tot_pagelensum_absx_absy.png", "Tot_pagelensum_absx_logy.png", "Tot_pagelensum_logx_logy.png", "Tot_contribs_absx_absy.png", "Tot_contribs_absx_logy.png", "Tot_contribs_logx_logy.png", "Diffs_articles_per_author.png", "Diffs_authors_per_article.png", "Diff_authors_against_page_len.png"]
    
    for idioma in idiomas:
        acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_stub")
        #acceso = dbaccess.get_Connection("localhost", 3306, "root", "phoenix", idioma+"_pages")
        result=dbaccess.query_SQL(acceso[1], "pageCount, limitDate", "stats_Evolution_Content_months_"+idioma, group="(limitDate)")
        result2=dbaccess.query_SQL(acceso[1], "pageLenSum, limitDate", "stats_Evolution_Content_months_"+idioma, group="(limitDate)")
        result3=dbaccess.query_SQL(acceso[1], "contribs, limitDate", "stats_Evolution_Content_months_"+idioma, group="(limitDate)")
        
        resultnspace=dbaccess.query_SQL(acceso[1], "pages_nspace, namespace", "stats_nspace_"+idioma)
        
        diffArticlesNoann=dbaccess.query_SQL(acceso[1], "author, theCount", "stats_Article_NoAnnons_author_"+idioma)
        
        diffInitNoann=dbaccess.query_SQL(acceso[1], "author, theCount", "stats_Article_Init_NoAnnons_author_"+idioma)
        
        totRevperArticle=dbaccess.query_SQL(acceso[1], "page_id, theCount", "stats_Contrib_NoAnnons_page_id_"+idioma)
        
        diffAuthorperArticle=dbaccess.query_SQL(acceso[1], "page_id, theCount", "stats_Article_NoAnnons_page_id_"+idioma)
        
        dautxplen=dbaccess.query_SQL(acceso[1], "page_len, authors", "stats_pagelen_difauthors_"+idioma)
        
        dbaccess.close_Connection(acceso[0])
        
        data=__tup_to_list(result, 1)
        dates_x=data.pop()
        page_Count=data.pop()
        
##        if idioma=="frwiki":
        data2=__tup_to_list(result2, 2)
        dates_x=data2.pop()
        dates_x.pop(0)
        dates_x.pop(0)
        page_Len_Sum=data2.pop()
        page_Len_Sum.pop(0)
        page_Len_Sum.pop(0)
##        else:
##            data2=__tup_to_list(result2, 1)
##            dates_x=data2.pop()
##            page_Len_Sum=data2.pop()
        
        data3=__tup_to_list(result3, 1)
        dates_x=data3.pop()
        contribs=data3.pop()
        
        datanspace=__tup_to_list(resultnspace)
        namespaces=datanspace.pop()
        pages_nspace=datanspace.pop()
        
        dataDiffArticlesNoann=__tup_to_list(diffArticlesNoann)
        diffArticles=dataDiffArticlesNoann.pop()
        authors=dataDiffArticlesNoann.pop()
        
        dataDiffInitNoann=__tup_to_list(diffInitNoann)
        diffInitArticles=dataDiffInitNoann.pop()
        authors=dataDiffInitNoann.pop()
        
        datatotRevperArticle=__tup_to_list(totRevperArticle)
        totalRev=datatotRevperArticle.pop()
        article=datatotRevperArticle.pop()
        
        datadiffAuthorperArticle=__tup_to_list(diffAuthorperArticle)
        diffAuthors=datadiffAuthorperArticle.pop()
        article=datadiffAuthorperArticle.pop()
        
        datadautxplen=__tup_to_list(dautxplen)
        autxplen=datadautxplen.pop()
        lenautxplen=datadautxplen.pop()

##  Introduce in data list results form queries in the proper order
##  corresponding with the name files we pass to the GNU R script summary_evol.R      
        for i in range(len(page_Len_Sum)):
            if page_Len_Sum[i]!=0:
                page_Len_Sum[i]=math.log10(page_Len_Sum[i])
                
        dataList=[dates_x, page_Count, page_Len_Sum, contribs, namespaces, pages_nspace, diffArticles, authors, diffAuthors, autxplen, lenautxplen]

        for filename, data in zip (filenames, dataList):
            if(filename.find('date')!=-1):
                __makeDatesFile(idioma, filename, data)
            else:
                __makeDataFile(idioma, filename, data)
        
        ######################################
        
        #Pass data filenames to the GNU R script with a file
        f=open("./data/summary_files_names.data",'w')
        for line in filenames:
            f.write("./graphics/"+idioma+"/data/"+line+"\n")
        f.close()
        
        #Idem with graphic output filenames
        f=open("./data/summary_files_out.data",'w')
        for line in filenames_out:
            f.write("./graphics/"+idioma+"/"+line+"\n")
        f.close()
            
        #CALL THE GNU R SCRIPT summary_evol.R
        
        succ=os.system("R --vanilla < ./summary_evol.R > debug_R")
        if succ==0:
            print "Funcion summary_evol ejecutada con exito para el lenguage... "+idioma
Example #16
0
    def UserNumContribsGroup(self, cursor):
        """
        A class to plot comparative graphics with contributions from 
        different groups
        """
        ###Reproduction of the article Power of the few...
        ##        Admins and bots IDs can be retrieved from DB as subselects in the where clause
        ##########################
        ##Drop bots contribs from DB source view
        ##########################
        ##CREATE VIEW FOR PERIODS FROM 0 IN MONTHS
        minYear=dbaccess.query_SQL(cursor, select="MIN(year)",\
        tables="stats_Contrib_NoAnnons_months_author_"+self.language)

        minMonth=dbaccess.query_SQL(cursor, select="MIN(month)",\
        tables="stats_Contrib_NoAnnons_months_author_"+self.language,\
        where="year="+str(int(minYear[0][0])))

        dbaccess.createView(cursor, view="contribs_period_author_"+self.language,\
        columns="period, author, contribs",\
        query="SELECT ((year*12)+month-("+str(int(minYear[0][0]))+"*12)-"+str(int(minMonth[0][0]))\
        +") as period, author, theCount FROM "+\
        "stats_Contrib_NoAnnons_months_author_"+self.language+" WHERE author NOT IN "+\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot') ORDER BY period")

        ##        Retrieve number of revision made by admins per month
        revsAdminsPerMonth=dbaccess.query_SQL(cursor,select="period, SUM(contribs)",\
        tables="contribs_period_author_"+self.language,\
        where="author IN (SELECT ug_user FROM user_groups where ug_group='sysop')",\
        group="period",order="period")
        ##        Plot FIG 2
        self.simpleGraph.createGraphic("revs_admins_per_month", (revsAdminsPerMonth,),\
        xlabst="Months", ylabst="Revisions",mainTitle="Revisions per month for admins "+\
        self.language, graphType=self.graphType, log=False)

        contribsMonth=dbaccess.query_SQL(cursor,\
        select="period, SUM(contribs)",\
        tables="contribs_period_author_"+self.language,\
        group="period",order="period")

        ##        divide element by element
        ##        Supposedly, there is at least one rev per month made by an admin
        percContribsAdmins = []
        for totAdminContrib in revsAdminsPerMonth:
            for totContrib in contribsMonth:
                if totAdminContrib[0] == totContrib[0]:
                    ##            append (period, adminsContribs/totContribs)
                    perc = float(totAdminContrib[1]) / float(totContrib[1])
                    percContribsAdmins.append((totAdminContrib[0], perc * 100))
                    break
##        Plot FIG 1 % of total revs per month made by admins
        self.simpleGraph.createGraphic("perc_revs_admins_per_month", (percContribsAdmins,),\
        xlabst="Months", ylabst="% revisions",\
        mainTitle="% of total revisions per month made by admins "+self.language,\
        graphType=self.graphType, log=False)

        ##    FIG 4 TOTAL EDITS MADE BY USERS WITH DIFFERENT EDIT LEVELS
        ##    CREATE WHERE CLAUSES FOR CLUSTER OF USERS IDENTIFIED BY CONTRIBUTIONS LEVEL
        ##    5 LEVELS: <100, 100-1K, 1K-5K, 5K-10K, >10K
        usersLevel1="author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"+\
        self.language+" WHERE theCount<=100 AND author NOT IN "+\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        usersLevel2="author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"+\
        self.language+" WHERE theCount BETWEEN 101 AND 1000 AND author NOT IN" +\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        usersLevel3="author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"+\
        self.language+" WHERE theCount BETWEEN 1001 AND 5000 AND author NOT IN "+\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        usersLevel4="author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"+\
        self.language+" WHERE theCount BETWEEN 5001 AND 10000 AND author NOT IN "+\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"
        usersLevel5="author IN (SELECT DISTINCT(author) FROM stats_Contrib_NoAnnons_author_"+\
        self.language+" WHERE theCount>10000 AND author NOT IN "+\
        "(SELECT DISTINCT(ug_user) FROM user_groups WHERE ug_group='bot'))"

        ##        Some vars used in for iterations
        levels = (usersLevel1, usersLevel2, usersLevel3, usersLevel4,
                  usersLevel5)
        listContribsLevel = []
        listAvgContribsLevel = []
        listPercContribsLevel = []
        listUsersLevel = []
        listPercUsersLevel = []

        ##        Retrieve tot num of users per month
        usersMonth=dbaccess.query_SQL(cursor,\
        select="period, COUNT(DISTINCT(author))",\
        tables="contribs_period_author_"+self.language,\
        group="period",order="period")

        for level in levels:
            ##            Retrieve contribs per month for this level
            contribsLevelMonth=dbaccess.query_SQL(cursor,\
            select="period, SUM(contribs)",\
            tables="contribs_period_author_"+self.language,\
            where=level, group="period", order="period")
            listContribsLevel.append(contribsLevelMonth)

            percContribsLevel = []
            ##            Append (period, contribsLevel/totContribs) checking periods correspondence
            for totLevelContrib in contribsLevelMonth:
                for totContrib in contribsMonth:
                    if totLevelContrib[0] == totContrib[0]:
                        perc = float(totLevelContrib[1]) / float(totContrib[1])
                        percContribsLevel.append(
                            (totLevelContrib[0], perc * 100))
                        break
            listPercContribsLevel.append(percContribsLevel)

            ##            Retrieve number of users per level per month
            usersLevelMonth=dbaccess.query_SQL(cursor,\
            select="period, COUNT(DISTINCT(author))",\
            tables="contribs_period_author_"+self.language,\
            where=level, group="period",order="period")
            ##Append to the list of users per level per month
            listUsersLevel.append(usersLevelMonth)

            avgUsersLevel = []
            ##            Retrieve avg number of revs per user in each group, per month
            for contribs in contribsLevelMonth:
                for totUsers in usersLevelMonth:
                    if contribs[0] == totUsers[0]:
                        avg = float(contribs[1]) / float(totUsers[1])
                        avgUsersLevel.append((contribs[0], avg))
            listAvgContribsLevel.append(avgUsersLevel)

            percUsersLevel = []
            ##            Append (period, usersLevel/totUsers)
            for users, totUsers in zip(usersLevelMonth, usersMonth):
                for totUsers in usersMonth:
                    if users[0] == totUsers[0]:
                        perc = float(users[1]) / float(totUsers[1])
                        percUsersLevel.append((users[0], perc * 100))
                        break
            listPercUsersLevel.append(percUsersLevel)

##        2D graph for FIG 4
        self.multiGraph.createGraphic("perc_revs_per_userlevel_month",\
        listPercContribsLevel, xlabst="months", ylabst="% revisions",\
        mainTitle="% of total revs per user level per month", graphType=self.graphType,\
        format=[],log=False)

        ##        Plot 2D multi graph (FIG 5)
        self.multiGraph.createGraphic("revs_per_userlevel_month",\
        listContribsLevel, xlabst="months", ylabst="revisions",\
        mainTitle="Total revisions per user level per month", graphType=self.graphType,\
        format=[],log=True)

        ##    FIG 6 AVERAGE NUMBER OF EDITS PER USER PER MONTH FOR EACH LEVEL
        self.multiGraph.createGraphic("avg_revs_per_userlevel_month",\
        listAvgContribsLevel, xlabst="months", ylabst="avg. revisions",\
        mainTitle="Avg revisions per user level per month", graphType=self.graphType,\
        format=[],log=True)

        ##        FIG 7 POPULATION GROWTH FOR EACH USER GROUP
        self.multiGraph.createGraphic("users_per_level_month",\
        listUsersLevel, xlabst="months", ylabst="log(num users)",\
        mainTitle="Growth of each user group per month", graphType=self.graphType,\
        format=[],log=True)
        ##        FIG 8 % OF TOTAL POPULATION OF EACH USER GROUP
        self.multiGraph.createGraphic("perc_users_per_level_month",\
        listPercUsersLevel, xlabst="months", ylabst="% users",\
        mainTitle="% of users in each user group per month", graphType=self.graphType,\
        format=[],log=False)
        """