def run_diagnose(self): standards = serial.objFromFile('prob_groups.txt') c = ClusterBridge() name_net = c.getNameNet() for thres in range(1, 10): # Experimental def lengthFilter(triplet): names = triplet[1] return len(names) == thres filtered_net = filter(lengthFilter, name_net) # Experimental-end observed = gstat.convertNameNet2Observed(filtered_net) prediction = matchstat.convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = matchstat.unzipGroupPrediction(prediction) num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print thres print "Accuracy:", tester.accuracy() print "Conf matrix:" print tester.confusionMatrix(friends.groupToNumeric)
def pipelines(n_central, n_friends): ''' Includes all the pipelines and returns a pair of true label list and a predicted label list. ''' # A list of standard distributions. Format is # [(label, prob), (label, prob), ...] #standards = serial.objFromFile('prob_groups.txt') standards = gstat.generateEmpiricalDistributionFromSample(n_central, n_friends) print standards # Age and a set of names. Format is # [(age, [names...]), (age, [names...])] name_net = friends.getNameNetworkSampled(n_central, n_friends) # Format becomes: # [(age, prob, meta), (age, prob, meta), ...] observed = gstat.convertNameNet2Observed(name_net) # Format becomes: # [(age, prediction, meta), ...] prediction = convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = unzipGroupPrediction(prediction) return pairs
def simulateIdealFriendshipNetwork(n_friends): standards = serial.objFromFile('prob_groups.txt') #standards = gstat.generateEmpiricalDistributionFromSample(500, 20) name_net = friends.getPseudoNameNetSampled(n_sample = n_friends, n_num = 50) observed = gstat.convertNameNet2Observed(name_net) prediction = convertObserved2Prediction(standards, observed) pairs = unzipGroupPrediction(prediction) # Use tester class num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print "Accuracy:", tester.accuracy() #print "Conf matrix:" #print tester.confusionMatrix(friends.groupToNumeric) return tester.accuracy()
def run(self): standards = serial.objFromFile('prob_groups.txt') c = ClusterBridge() name_net = c.getNameNet() observed = gstat.convertNameNet2Observed(name_net) prediction = matchstat.convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = matchstat.unzipGroupPrediction(prediction) num_label = 4 tester = tests.tester(num_label) assert(len(pairs) >= 2) # sanity check tester.record(pairs[0], pairs[1]) tester.recordMeta(pairs[2]) #print thres print "Accuracy:", tester.accuracy() print "Conf matrix:" print tester.confusionMatrix(friends.groupToNumeric)