Esempio n. 1
0
def getModel(dataset):
    m = cmaxent.MaxentModel()
    m.begin_add_event()

    # add event reading the file one by one
    for user_id, target in dataset:
        context = getTrTWContext(user_id)
        if context is None:
            continue
        weight = 1
        m.add_event(context, target, weight)

    m.end_add_event(PRUNE_COUNT)
    m.train(LBFGS_ITERATION, 'lbfgs', PRIOR_WEIGHT, TOLERANCE)

    return m
def eval_instances():
    instance_file = os.path.join(DATA, 'twitter/self_reveal/user_pool0.csv')
    filtered_file = os.path.join(DATA, 'twitter/self_reveal/user_pool2.csv')

    first_model = getModel()

    fout = open(filtered_file, 'w')

    for line in open(instance_file):
        user_id, target = line.rstrip('\n').split('\t')
        context = getTrTWContext(user_id)

        if context is None:
            continue

        weight = 1
        score = first_model.eval(context, target)

        if score > .25:
            fout.write(user_id + '\t' + target + '\n')

    fout.close()