def run_diagnose(self): standards = serial.objFromFile('prob_groups.txt') c = ClusterBridge() name_net = c.getNameNet() for thres in range(1, 10): # Experimental def lengthFilter(triplet): names = triplet[1] return len(names) == thres filtered_net = filter(lengthFilter, name_net) # Experimental-end observed = gstat.convertNameNet2Observed(filtered_net) prediction = matchstat.convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = matchstat.unzipGroupPrediction(prediction) num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print thres print "Accuracy:", tester.accuracy() print "Conf matrix:" print tester.confusionMatrix(friends.groupToNumeric)
def test1(): # A list of standard distributions. Format is # [(age, prob), (age, prob), ...] standards = serial.objFromFile('prob_groups.txt') # Age and a set of names. Format is # [(age, [names...]), (age, [names...])] name_net = friends.getNameNetworkSampled(10, 100) # Format is: # [(age, prob, meta), (age, prob, meta), ...] observed = convertNameNet2Observed(name_net) def match(o): # a single observation, i.e., (age, [names...]) age = o[0] p = o[1] meta = o[2] if sum(p) != 0: matched = matchObserved(standards, p) matched_ages = map(lambda x: x[0], matched) return (age, p, matched_ages, meta) else: return (age, 0, meta) matched_arr = map(match, observed) print serial.toString(matched_arr)
def simulateIdealFriendshipNetwork(n_friends): standards = serial.objFromFile('prob_groups.txt') #standards = gstat.generateEmpiricalDistributionFromSample(500, 20) name_net = friends.getPseudoNameNetSampled(n_sample = n_friends, n_num = 50) observed = gstat.convertNameNet2Observed(name_net) prediction = convertObserved2Prediction(standards, observed) pairs = unzipGroupPrediction(prediction) # Use tester class num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print "Accuracy:", tester.accuracy() #print "Conf matrix:" #print tester.confusionMatrix(friends.groupToNumeric) return tester.accuracy()
def run(self): standards = serial.objFromFile('prob_groups.txt') c = ClusterBridge() name_net = c.getNameNet() observed = gstat.convertNameNet2Observed(name_net) prediction = matchstat.convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = matchstat.unzipGroupPrediction(prediction) num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print thres print "Accuracy:", tester.accuracy() print "Conf matrix:" print tester.confusionMatrix(friends.groupToNumeric)