def pipelines(n_central, n_friends): ''' Includes all the pipelines and returns a pair of true label list and a predicted label list. ''' # A list of standard distributions. Format is # [(label, prob), (label, prob), ...] #standards = serial.objFromFile('prob_groups.txt') standards = gstat.generateEmpiricalDistributionFromSample(n_central, n_friends) print standards # Age and a set of names. Format is # [(age, [names...]), (age, [names...])] name_net = friends.getNameNetworkSampled(n_central, n_friends) # Format becomes: # [(age, prob, meta), (age, prob, meta), ...] observed = gstat.convertNameNet2Observed(name_net) # Format becomes: # [(age, prediction, meta), ...] prediction = convertObserved2Prediction(standards, observed) # Format becomes: # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)] pairs = unzipGroupPrediction(prediction) return pairs
def test1(): # A list of standard distributions. Format is # [(age, prob), (age, prob), ...] standards = serial.objFromFile('prob_groups.txt') # Age and a set of names. Format is # [(age, [names...]), (age, [names...])] name_net = friends.getNameNetworkSampled(10, 100) # Format is: # [(age, prob, meta), (age, prob, meta), ...] observed = convertNameNet2Observed(name_net) def match(o): # a single observation, i.e., (age, [names...]) age = o[0] p = o[1] meta = o[2] if sum(p) != 0: matched = matchObserved(standards, p) matched_ages = map(lambda x: x[0], matched) return (age, p, matched_ages, meta) else: return (age, 0, meta) matched_arr = map(match, observed) print serial.toString(matched_arr)
def generateEmpiricalDistributionFromSample(n_central, n_friends): # Age and a set of names. Format is # [(age, [names...]), (age, [names...])] name_net = friends.getNameNetworkSampled(n_central, n_friends) # Format becomes: # [(age, prob, meta), (age, prob, meta), ...] observed = convertNameNet2Observed(name_net) # Format becomes: # [(label, prob), (label, prob), ...], labels are unique here aggregated = aggregateObserved(observed) return aggregated