Exemple #1
0
def getRevertGraph(PageList):
    """
    This function create a Revert Graph starting from a list
    of tuple with (md5_of_revision,author_of_revision)
    the list must be ordered in cronological order.
    If you want to know more about revert graph go to
    http://www.trustlet.org/wiki/Understanding_Social_Dynamics_in_Wikipedia_with_Revert_Graph
    Parameters:
       PageList: list of tuple cronological ordered, so formed (md5_of_revision,author_of_revision)
       
    return: a WeightedNetwork with the Revert Graph (weight on edge is an int)
    """
    #list = [ [('a1122dd','user1'),('a1111dd','user2'),('a1122dd','user1')], [('ee1133','user3'),('dafxed','user1'),('xx','user3'),('xfsdjfa','user3'),('ee1133','user4')] ]

    G = WeightedNetwork()

    def getBeforeVersion(index):
        """
        Get the previously (cronologically) version of page that is equal to this version.
        Is used the sHistory, to improve performance (the algorithm doesn't scan all the list)
        """
        min = None

        for i in range(
                index, -1, -1
        ):  #from ts of toCmp to 0.. es. toCmp, toCmp-1, toCmp-2 ... 0
            if sHistory[i][page] == sHistory[index][page]:
                min = sHistory[i][ts]
            else:
                break

        return min

    #label. it's only use is to make more user friendly the code
    page = 0
    user = 2
    ts = 1  #time stamp

    for rList in PageList:  #for all pages

        history = [(a, x, b) for (x, (a, b)) in enumerate(rList)]
        sHistory = sorted(
            history
        )  #sorted history, (useful to reduce the time of computation)
        lsHistory = len(sHistory)

        for i in xrange(lsHistory):
            x = sHistory[i]

            #min and max is the lowerbound and upperbound revision in history,
            #min and max has the same md5, between this value there are the reverts.
            max = x[ts]
            min = getBeforeVersion(i)

            if min > max:  #then there aren't before version
                min = max  #fix this particular case

            if min == None:
                print "OOps min == None! not good"
                continue

            for i in xrange(min, max):  #add all edges if not contraddictory

                if x[page] == history[i][page] or x[user] == history[i][user]:
                    continue

                #if there isn't the edge, create it with weight 1
                #else update edge, and sum 1 to the current weight
                try:
                    val = G.get_edge(x[user], history[i][user])
                except NetworkXError:
                    G.add_edge(x[user], history[i][user], 1)
                    continue

                G.add_edge(x[user], history[i][user], val + 1)
    return G
def getRevertGraph( PageList ):
    """
    This function create a Revert Graph starting from a list
    of tuple with (md5_of_revision,author_of_revision)
    the list must be ordered in cronological order.
    If you want to know more about revert graph go to
    http://www.trustlet.org/wiki/Understanding_Social_Dynamics_in_Wikipedia_with_Revert_Graph
    Parameters:
       PageList: list of tuple cronological ordered, so formed (md5_of_revision,author_of_revision)
       
    return: a WeightedNetwork with the Revert Graph (weight on edge is an int)
    """
    #list = [ [('a1122dd','user1'),('a1111dd','user2'),('a1122dd','user1')], [('ee1133','user3'),('dafxed','user1'),('xx','user3'),('xfsdjfa','user3'),('ee1133','user4')] ]
    
    G = WeightedNetwork( )
    
    def getBeforeVersion( index ):
        """
        Get the previously (cronologically) version of page that is equal to this version.
        Is used the sHistory, to improve performance (the algorithm doesn't scan all the list)
        """
        min = None

        for i in range( index , -1 , -1 ): #from ts of toCmp to 0.. es. toCmp, toCmp-1, toCmp-2 ... 0
            if sHistory[i][page] == sHistory[index][page]:
                min = sHistory[i][ts]
            else:
                break

        return min

    #label. it's only use is to make more user friendly the code
    page = 0
    user = 2
    ts = 1 #time stamp

    for rList in PageList: #for all pages

        history = [(a,x,b) for (x,(a,b)) in enumerate(rList)]
        sHistory = sorted( history ) #sorted history, (useful to reduce the time of computation)
        lsHistory = len( sHistory )
        
        for i in xrange(lsHistory):
            x = sHistory[i]

            #min and max is the lowerbound and upperbound revision in history, 
            #min and max has the same md5, between this value there are the reverts.
            max = x[ts]
            min = getBeforeVersion( i )
            
            if min > max: #then there aren't before version
                min = max #fix this particular case

            
            if min == None:
                print "OOps min == None! not good"
                continue

            for i in xrange( min, max ): #add all edges if not contraddictory

                if x[page] == history[i][page] or x[user] == history[i][user]:
                    continue 

                #if there isn't the edge, create it with weight 1
                #else update edge, and sum 1 to the current weight
                try: 
                    val = G.get_edge( x[user], history[i][user] )
                except NetworkXError:
                    G.add_edge( x[user], history[i][user], 1 )
                    continue
            
                G.add_edge( x[user], history[i][user], val+1 )
    return G