예제 #1
0
    def run_diagnose(self):

        standards = serial.objFromFile('prob_groups.txt')

        c = ClusterBridge()
        name_net = c.getNameNet()

        for thres in range(1, 10):
            # Experimental
            def lengthFilter(triplet):
                names = triplet[1]
                return len(names) == thres

            filtered_net = filter(lengthFilter, name_net)
            # Experimental-end

            observed = gstat.convertNameNet2Observed(filtered_net)
            prediction = matchstat.convertObserved2Prediction(standards, observed)

            # Format becomes:
            # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
            pairs = matchstat.unzipGroupPrediction(prediction)

            num_label = 4
            tester = tests.tester(num_label)
            assert(len(pairs) >= 2)     # sanity check
            tester.record(pairs[0], pairs[1])
            tester.recordMeta(pairs[2])

            #print thres
            print "Accuracy:", tester.accuracy()
            print "Conf matrix:"
            print tester.confusionMatrix(friends.groupToNumeric)
예제 #2
0
def pipelines(n_central, n_friends):
    ''' Includes all the pipelines and returns a pair of true label list and a
    predicted label list. '''
    # A list of standard distributions. Format is
    # [(label, prob), (label, prob), ...]
    #standards = serial.objFromFile('prob_groups.txt')
    standards = gstat.generateEmpiricalDistributionFromSample(n_central, n_friends)

    print standards

    # Age and a set of names. Format is
    # [(age, [names...]), (age, [names...])]
    name_net = friends.getNameNetworkSampled(n_central, n_friends)

    # Format becomes:
    # [(age, prob, meta), (age, prob, meta), ...]
    observed = gstat.convertNameNet2Observed(name_net)

    # Format becomes:
    # [(age, prediction, meta), ...]
    prediction = convertObserved2Prediction(standards, observed)

    # Format becomes:
    # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
    pairs = unzipGroupPrediction(prediction)

    return pairs
예제 #3
0
def simulateIdealFriendshipNetwork(n_friends):
    standards = serial.objFromFile('prob_groups.txt')
    #standards = gstat.generateEmpiricalDistributionFromSample(500, 20)

    name_net = friends.getPseudoNameNetSampled(n_sample = n_friends, n_num = 50)
    observed = gstat.convertNameNet2Observed(name_net)
    prediction = convertObserved2Prediction(standards, observed)
    pairs = unzipGroupPrediction(prediction)

    # Use tester class
    num_label = 4
    tester = tests.tester(num_label)
    assert(len(pairs) >= 2)     # sanity check
    tester.record(pairs[0], pairs[1])
    tester.recordMeta(pairs[2])

    #print "Accuracy:", tester.accuracy()
    #print "Conf matrix:"
    #print tester.confusionMatrix(friends.groupToNumeric)

    return tester.accuracy()
예제 #4
0
    def run(self):
        standards = serial.objFromFile('prob_groups.txt')

        c = ClusterBridge()
        name_net = c.getNameNet()

        observed = gstat.convertNameNet2Observed(name_net)
        prediction = matchstat.convertObserved2Prediction(standards, observed)

        # Format becomes:
        # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
        pairs = matchstat.unzipGroupPrediction(prediction)

        num_label = 4
        tester = tests.tester(num_label)
        assert(len(pairs) >= 2)     # sanity check
        tester.record(pairs[0], pairs[1])
        tester.recordMeta(pairs[2])

        #print thres
        print "Accuracy:", tester.accuracy()
        print "Conf matrix:"
        print tester.confusionMatrix(friends.groupToNumeric)