def run_one_trial_weighted(feature_set, feature_set_weights, feature_weights_filename, atom_type, cluster_type, k, first_doc_num, n, min_len, cheating): ''' Runs <evaluate_n_documents> using the given raw feature weights or confidence weights, and saves trail to DB. ''' session = Session() start = time.time() if cluster_type == "combine_confidences": path, auc, _, _, _, _, _ = evaluate_n_documents(feature_set, cluster_type, k, atom_type, n, min_len=min_len, feature_confidence_weights=feature_set_weights, cheating=cheating) else: path, auc, _, _, _, _, _ = evaluate_n_documents(feature_set, cluster_type, k, atom_type, n, min_len=min_len, feature_weights=feature_set_weights, cheating=cheating) end = time.time() time_elapsed = end - start version_number = DASHBOARD_VERSION trial_results = { 'atom_type' : atom_type, 'cluster_type' : cluster_type, 'features' : feature_set, 'feature_weights' : feature_set_weights, 'feature_weights_file' : feature_weights_filename, 'first_doc_num' : first_doc_num, 'n' : n, 'min_len' : min_len, 'figure_path' : os.path.basename(path), 'version_number' : version_number, 'time_elapsed' : time_elapsed, 'auc' : auc, 'cheating' : cheating } trial = IntrinsicTrial(**trial_results) session.add(trial) print 'Made a weighted trial!' session.commit() session.close() return trial
def eval_func_raw_features(self, feature_weights): feature_weights = [max(0.00001, x) for x in feature_weights] roc_path, roc_auc, _, _, _, _, _ = evaluate_n_documents(self.features, self.cluster_type, 2, self.atom_type, self.num_documents, save_roc_figure=False, feature_weights=feature_weights, first_doc_num=self.first_doc_num) print 'evaluated:', roc_auc, [w for w in feature_weights] return roc_auc
def run_one_trial(feature_set, atom_type, cluster_type, k, first_doc_num, n, min_len, cheating, eval_method='roc'): ''' Runs <evaluate_n_documents> and saves trial to DB ''' session = Session() version_number = DASHBOARD_VERSION trial_results = { 'atom_type' : atom_type, 'cluster_type' : cluster_type, 'features' : feature_set, 'first_doc_num' : first_doc_num, 'n' : n, 'min_len' : min_len, 'version_number' : version_number } if eval_method == 'roc': start = time.time() path, auc = evaluate_n_documents(feature_set, cluster_type, k, atom_type, n, min_len=min_len, cheating=cheating, eval_method=eval_method) end = time.time() time_elapsed = end - start further_params = { 'time_elapsed' : time_elapsed, 'auc' : auc, 'figure_path' : os.path.basename(path), 'cheating' : cheating } trial_results.update(further_params) trial = IntrinsicTrial(**trial_results) session.add(trial) elif eval_method == 'prec_recall': start = time.time() thresh_prec_avgs, thresh_recall_avgs, thresh_fmeasure_avgs, thresh_granularity_avgs, thresh_overall_avgs = \ evaluate_n_documents(feature_set, cluster_type, k, atom_type, n, min_len=min_len, cheating=cheating, eval_method=eval_method) end = time.time() time_elapsed = end - start for thresh in thresh_prec_avgs.keys(): precision = thresh_prec_avgs[thresh] recall = thresh_recall_avgs[thresh] fmeasure = thresh_fmeasure_avgs[thresh] granularity = thresh_granularity_avgs[thresh] overall = thresh_overall_avgs[thresh] further_params = { 'threshold' : thresh, 'time_elapsed' : time_elapsed, 'precision' : precision, 'recall' : recall, 'fmeasure' : fmeasure, 'granularity' : granularity, 'overall' : overall } # Thanks to http://stackoverflow.com/questions/6005066/adding-dictionaries-together-python one_trial_params = dict(trial_results, **further_params) # print 'Would populate with:' # printer.pprint(one_trial_params) # print '-'*40 trial = IntrinsicTrial(**one_trial_params) session.add(trial) print 'Made a trial!' session.commit() session.close()