コード例 #1
0
ファイル: cstat.py プロジェクト: pyongjoo/twitter-research
    def run_diagnose(self):

        standards = serial.objFromFile('prob_groups.txt')

        c = ClusterBridge()
        name_net = c.getNameNet()

        for thres in range(1, 10):
            # Experimental
            def lengthFilter(triplet):
                names = triplet[1]
                return len(names) == thres

            filtered_net = filter(lengthFilter, name_net)
            # Experimental-end

            observed = gstat.convertNameNet2Observed(filtered_net)
            prediction = matchstat.convertObserved2Prediction(standards, observed)

            # Format becomes:
            # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
            pairs = matchstat.unzipGroupPrediction(prediction)

            num_label = 4
            tester = tests.tester(num_label)
            assert(len(pairs) >= 2)     # sanity check
            tester.record(pairs[0], pairs[1])
            tester.recordMeta(pairs[2])

            #print thres
            print "Accuracy:", tester.accuracy()
            print "Conf matrix:"
            print tester.confusionMatrix(friends.groupToNumeric)
コード例 #2
0
def test1():
    # A list of standard distributions. Format is
    # [(age, prob), (age, prob), ...]
    standards = serial.objFromFile('prob_groups.txt')

    # Age and a set of names. Format is
    # [(age, [names...]), (age, [names...])]
    name_net = friends.getNameNetworkSampled(10, 100)

    # Format is:
    # [(age, prob, meta), (age, prob, meta), ...]
    observed = convertNameNet2Observed(name_net)

    def match(o):   # a single observation, i.e., (age, [names...])
        age = o[0]
        p = o[1]
        meta = o[2]

        if sum(p) != 0:
            matched = matchObserved(standards, p)
            matched_ages = map(lambda x: x[0], matched)
            return (age, p, matched_ages, meta)
        else:
            return (age, 0, meta)

    matched_arr = map(match, observed)
    print serial.toString(matched_arr)
コード例 #3
0
def simulateIdealFriendshipNetwork(n_friends):
    standards = serial.objFromFile('prob_groups.txt')
    #standards = gstat.generateEmpiricalDistributionFromSample(500, 20)

    name_net = friends.getPseudoNameNetSampled(n_sample = n_friends, n_num = 50)
    observed = gstat.convertNameNet2Observed(name_net)
    prediction = convertObserved2Prediction(standards, observed)
    pairs = unzipGroupPrediction(prediction)

    # Use tester class
    num_label = 4
    tester = tests.tester(num_label)
    assert(len(pairs) >= 2)     # sanity check
    tester.record(pairs[0], pairs[1])
    tester.recordMeta(pairs[2])

    #print "Accuracy:", tester.accuracy()
    #print "Conf matrix:"
    #print tester.confusionMatrix(friends.groupToNumeric)

    return tester.accuracy()
コード例 #4
0
ファイル: cstat.py プロジェクト: pyongjoo/twitter-research
    def run(self):
        standards = serial.objFromFile('prob_groups.txt')

        c = ClusterBridge()
        name_net = c.getNameNet()

        observed = gstat.convertNameNet2Observed(name_net)
        prediction = matchstat.convertObserved2Prediction(standards, observed)

        # Format becomes:
        # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
        pairs = matchstat.unzipGroupPrediction(prediction)

        num_label = 4
        tester = tests.tester(num_label)
        assert(len(pairs) >= 2)     # sanity check
        tester.record(pairs[0], pairs[1])
        tester.recordMeta(pairs[2])

        #print thres
        print "Accuracy:", tester.accuracy()
        print "Conf matrix:"
        print tester.confusionMatrix(friends.groupToNumeric)