def main(args): my_turk = turkdata.turkdata(args[0]) read_it = turkdata.init_reader(my_turk) my_turk.read_all(read_it) yes_no_set = my_turk.training_set() if verbose: pprint.pprint(yes_no_set) ## do this if you've pickled the yes_no sets #fd = open("./mt_files/Batch_63422_result_training.pkl", 'rb') #ob = pickle.load(fd) #yes_no_set = [ob[0], ob[1]] ptree_classifier = ptree.mult_sample_learn_ccc(yes_no_set[0], yes_no_set[1], n_depth) all_ngrams = n_grams.n_grams_depth_first(ptree_classifier, n_depth) print len(all_ngrams), " ngrams created" print '\nLOG LIKELIHOOD: ', n_grams.n_grams_prior(all_ngrams) [n.print_ngram(ptree_classifier.words) for n in all_ngrams] # def store(self): cl_name = str(n_depth) + '_classifier_' + time.strftime("%M-%H-%j") + '.pkl' out_name = re.sub('.csv', cl_name, my_turk.name) try: outfd = open(out_name, 'wb') pickle.dump(ptree_classifier, outfd) # , pickle.HIGHEST_PROTOCOL) print >> sys.stderr, 'Wrote ', out_name outfd.close() except pickle.PicklingError: print >> sys.stderr, 'Failed to serialize ', out_name
def main(args): # Create the turkdata training set. my_turk = turkdata.turkdata(args[0]) read_it = turkdata.init_reader(my_turk) my_turk.read_all(read_it) yes_no_set = my_turk.training_set() if verbose: pprint.pprint(yes_no_set) ## Create the cross validaton folds from the pos & neg examples cv = cv_constructor.CrossValidationDataConstructor(yes_no_set[0], yes_no_set[1], numPartitions=n_folds) cv_set = cv.getDataSets() # pprint.pprint(cv_set.next) ## Run the plug-in classifier on each fold, computing fp & fn for (training_set, test_set) in cv_set: ptree_classifier = ptree.mult_sample_learn_ccc(yes_no_set[0], yes_no_set[1], n_depth)