def getModel(dataset): m = cmaxent.MaxentModel() m.begin_add_event() # add event reading the file one by one for user_id, target in dataset: context = getTrTWContext(user_id) if context is None: continue weight = 1 m.add_event(context, target, weight) m.end_add_event(PRUNE_COUNT) m.train(LBFGS_ITERATION, 'lbfgs', PRIOR_WEIGHT, TOLERANCE) return m
def eval_instances(): instance_file = os.path.join(DATA, 'twitter/self_reveal/user_pool0.csv') filtered_file = os.path.join(DATA, 'twitter/self_reveal/user_pool2.csv') first_model = getModel() fout = open(filtered_file, 'w') for line in open(instance_file): user_id, target = line.rstrip('\n').split('\t') context = getTrTWContext(user_id) if context is None: continue weight = 1 score = first_model.eval(context, target) if score > .25: fout.write(user_id + '\t' + target + '\n') fout.close()