Ejemplo n.º 1
0
def pipelines(n_central, n_friends):
    ''' Includes all the pipelines and returns a pair of true label list and a
    predicted label list. '''
    # A list of standard distributions. Format is
    # [(label, prob), (label, prob), ...]
    #standards = serial.objFromFile('prob_groups.txt')
    standards = gstat.generateEmpiricalDistributionFromSample(n_central, n_friends)

    print standards

    # Age and a set of names. Format is
    # [(age, [names...]), (age, [names...])]
    name_net = friends.getNameNetworkSampled(n_central, n_friends)

    # Format becomes:
    # [(age, prob, meta), (age, prob, meta), ...]
    observed = gstat.convertNameNet2Observed(name_net)

    # Format becomes:
    # [(age, prediction, meta), ...]
    prediction = convertObserved2Prediction(standards, observed)

    # Format becomes:
    # [(true1, true2,...), (predict1, predict2, ...), (meta1, meta2, ...)]
    pairs = unzipGroupPrediction(prediction)

    return pairs
Ejemplo n.º 2
0
def test1():
    # A list of standard distributions. Format is
    # [(age, prob), (age, prob), ...]
    standards = serial.objFromFile('prob_groups.txt')

    # Age and a set of names. Format is
    # [(age, [names...]), (age, [names...])]
    name_net = friends.getNameNetworkSampled(10, 100)

    # Format is:
    # [(age, prob, meta), (age, prob, meta), ...]
    observed = convertNameNet2Observed(name_net)

    def match(o):   # a single observation, i.e., (age, [names...])
        age = o[0]
        p = o[1]
        meta = o[2]

        if sum(p) != 0:
            matched = matchObserved(standards, p)
            matched_ages = map(lambda x: x[0], matched)
            return (age, p, matched_ages, meta)
        else:
            return (age, 0, meta)

    matched_arr = map(match, observed)
    print serial.toString(matched_arr)
Ejemplo n.º 3
0
def generateEmpiricalDistributionFromSample(n_central, n_friends):
    # Age and a set of names. Format is
    # [(age, [names...]), (age, [names...])]
    name_net = friends.getNameNetworkSampled(n_central, n_friends)

    # Format becomes:
    # [(age, prob, meta), (age, prob, meta), ...]
    observed = convertNameNet2Observed(name_net)

    # Format becomes:
    # [(label, prob), (label, prob), ...], labels are unique here
    aggregated = aggregateObserved(observed)

    return aggregated