def make_fa_summaries(self, eval_data, prefix, feature_set, apsal_sal=.4, apsal_sim=.7, **kwargs): apsal = APSalienceSummarizer() jobs = [] for event, corpus in eval_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) print "Generating AP+Salience Summary" print "\tSal Threshold: {}".format(apsal_sal) print "\tSim Threshold: {}".format(apsal_sim) jobs = [] for event, corpus in eval_data: jobs.append( (APSalienceFilteredSummary(), (event, prefix, feature_set, apsal_sal, apsal_sim))) self.do_work(filter_worker, jobs, **kwargs)
def make_fa_summaries(self, eval_data, prefix, feature_set, apsal_sal=.4, apsal_sim=.7, **kwargs): apsal = APSalienceSummarizer() jobs = [] for event, corpus in eval_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) print "Generating AP+Salience Summary" print "\tSal Threshold: {}".format(apsal_sal) print "\tSim Threshold: {}".format(apsal_sim) jobs = [] for event, corpus in eval_data: jobs.append((APSalienceFilteredSummary(), (event, prefix, feature_set, apsal_sal, apsal_sim))) self.do_work(filter_worker, jobs, **kwargs)
def tune_fa(self, dev_data, prefix, feature_set, sal_min=-2.0, sal_max=2.0, sal_step=.1, sem_sim_min=.2, sem_sim_max=.7, sem_sim_step=.05, **kwargs): apsal = APSalienceSummarizer() sal_cutoffs = np.arange(sal_min, sal_max + sal_step, sal_step) sem_sim_cutoffs = np.arange(sem_sim_min, sem_sim_max + sem_sim_step, sem_sim_step) print "Tuning on dev data." ### Run clustering ### print "Generating AP+Salience Cluster\n\t(no params)" jobs = [] for event, corpus in dev_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP+Salience Summary" print "\tSal Threshold ({}, {}), step={}".format( sal_min, sal_max, sal_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format(sal_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) jobs = [] for event, corpus in dev_data: for sem_sim_cutoff in sem_sim_cutoffs: for sal_cutoff in sal_cutoffs: jobs.append((APSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) self.do_work(filter_worker, jobs, **kwargs)
def tune_fa(self, dev_data, prefix, feature_set, sal_min=-2.0, sal_max=2.0, sal_step=.1, sem_sim_min=.2, sem_sim_max=.7, sem_sim_step=.05, **kwargs): apsal = APSalienceSummarizer() sal_cutoffs = np.arange( sal_min, sal_max + sal_step, sal_step) sem_sim_cutoffs = np.arange( sem_sim_min, sem_sim_max + sem_sim_step, sem_sim_step) print "Tuning on dev data." ### Run clustering ### print "Generating AP+Salience Cluster\n\t(no params)" jobs = [] for event, corpus in dev_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP+Salience Summary" print "\tSal Threshold ({}, {}), step={}".format( sal_min, sal_max, sal_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format( sal_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) jobs = [] for event, corpus in dev_data: for sem_sim_cutoff in sem_sim_cutoffs: for sal_cutoff in sal_cutoffs: jobs.append( (APSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) self.do_work(filter_worker, jobs, **kwargs)
def make_summaries(self, eval_data, prefix, feature_set, hac_dist=1.35, hac_sim=.7, ap_sim=.7, apsal_sal=.4, apsal_sim=.7, apsal_tr_sal=.6, apsal_tr_sim=.6, sal_rank_sal=1.8, sal_rank_sim=.4, **kwargs): ap = APSummarizer() apsal = APSalienceSummarizer() hac = HACSummarizer() print "Running with optimal params on dev data." ### Run clustering ### print "Generating AP Cluster\n\t(no params)" print "Generating AP+Salience Cluster\n\t(no params)" print "Generating HAC Cluster\n\tdist-thresh: {}".format(hac_dist) jobs = [] for event, corpus in eval_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) if not os.path.exists(ap.dir_): os.makedirs(ap.dir_) if not os.path.exists(hac.dir_): os.makedirs(hac.dir_) jobs.append((event, corpus, prefix, feature_set, hac, hac_dist)) jobs.append((event, corpus, prefix, feature_set, ap)) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP Summary" print "\tSim Threshold: {}".format(ap_sim) print "Generating AP+Salience Summary" print "\tSal Threshold: {}".format(apsal_sal) print "\tSim Threshold: {}".format(apsal_sim) print "Generating HAC Summary" print "\tDist Threshold: {}".format(hac_dist) print "\tSim Threshold: {}".format(hac_sim) print "Generating AP+Salience Time Ranked" print "\tSal Threshold: {}".format(apsal_tr_sal) print "\tSim Threshold: {}".format(apsal_tr_sim) print "Generating Salience Ranked Summary" print "\tSal Threshold: {}".format(sal_rank_sal) print "\tSim Threshold: {}".format(sal_rank_sim) jobs = [] for event, corpus in eval_data: jobs.append((HACFilteredSummary(), (event, prefix, feature_set, hac_dist, hac_sim))) jobs.append((APFilteredSummary(), (event, ap_sim))) jobs.append((APSalienceFilteredSummary(), (event, prefix, feature_set, apsal_sal, apsal_sim))) jobs.append( (APSalTRankSalThreshFilteredSummary(), (event, prefix, feature_set, apsal_tr_sal, apsal_tr_sim))) jobs.append( (RankedSalienceFilteredSummary(), (event, prefix, feature_set, sal_rank_sal, sal_rank_sim))) self.do_work(filter_worker, jobs, **kwargs)
def tune(self, dev_data, prefix, feature_set, hac_dist_min=.9, hac_dist_max=5.05, hac_dist_step=.05, sal_min=-2.0, sal_max=2.0, sal_step=.1, sem_sim_min=.2, sem_sim_max=.7, sem_sim_step=.05, rank_sim_min=.2, rank_sim_max=.4, rank_sim_step=.05, **kwargs): ap = APSummarizer() apsal = APSalienceSummarizer() hac = HACSummarizer() hac_dist_cutoffs = np.arange(hac_dist_min, hac_dist_max + hac_dist_step, hac_dist_step) sal_cutoffs = np.arange(sal_min, sal_max + sal_step, sal_step) sem_sim_cutoffs = np.arange(sem_sim_min, sem_sim_max + sem_sim_step, sem_sim_step) rank_sim_cutoffs = np.arange(rank_sim_min, rank_sim_max + rank_sim_step, rank_sim_step) print "Tuning on dev data." ### Run clustering ### print "Generating AP Cluster\n\t(no params)" print "Generating AP+Salience Cluster\n\t(no params)" print "Generating HAC Cluster" print "\tDist Threshold ({}, {}), step={} {} jobs/event".format( hac_dist_min, hac_dist_max, hac_dist_step, hac_dist_cutoffs.shape[0]) jobs = [] for event, corpus in dev_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) if not os.path.exists(ap.dir_): os.makedirs(ap.dir_) if not os.path.exists(hac.dir_): os.makedirs(hac.dir_) for cutoff in hac_dist_cutoffs: jobs.append((event, corpus, prefix, feature_set, hac, cutoff)) jobs.append((event, corpus, prefix, feature_set, ap)) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP Summary" print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format(sem_sim_cutoffs.shape[0]) print "Generating AP+Salience Summary" print "\tSal Threshold ({}, {}), step={}".format( sal_min, sal_max, sal_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format(sal_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) print "Generating HAC Summary" print "\tDist Threshold ({}, {}), step={}".format( hac_dist_min, hac_dist_max, hac_dist_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format(hac_dist_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) rsfs = RankedSalienceFilteredSummary() if not os.path.exists(rsfs.dir_): os.makedirs(rsfs.dir_) jobs = [] for event, corpus in dev_data: for sem_sim_cutoff in sem_sim_cutoffs: for dist_cutoff in hac_dist_cutoffs: jobs.append( (HACFilteredSummary(), (event, prefix, feature_set, dist_cutoff, sem_sim_cutoff))) jobs.append((APFilteredSummary(), (event, sem_sim_cutoff))) for sal_cutoff in sal_cutoffs: jobs.append((APSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) jobs.append((APSalTRankSalThreshFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) for rank_sim_cutoff in rank_sim_cutoffs: for sal_cutoff in sal_cutoffs: jobs.append((RankedSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, rank_sim_cutoff))) self.do_work(filter_worker, jobs, **kwargs)
def make_summaries(self, eval_data, prefix, feature_set, hac_dist=1.35, hac_sim=.7, ap_sim=.7, apsal_sal=.4, apsal_sim=.7, apsal_tr_sal=.6, apsal_tr_sim=.6, sal_rank_sal=1.8, sal_rank_sim=.4, **kwargs): ap = APSummarizer() apsal = APSalienceSummarizer() hac = HACSummarizer() print "Running with optimal params on dev data." ### Run clustering ### print "Generating AP Cluster\n\t(no params)" print "Generating AP+Salience Cluster\n\t(no params)" print "Generating HAC Cluster\n\tdist-thresh: {}".format(hac_dist) jobs = [] for event, corpus in eval_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) if not os.path.exists(ap.dir_): os.makedirs(ap.dir_) if not os.path.exists(hac.dir_): os.makedirs(hac.dir_) jobs.append( (event, corpus, prefix, feature_set, hac, hac_dist)) jobs.append((event, corpus, prefix, feature_set, ap)) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP Summary" print "\tSim Threshold: {}".format(ap_sim) print "Generating AP+Salience Summary" print "\tSal Threshold: {}".format(apsal_sal) print "\tSim Threshold: {}".format(apsal_sim) print "Generating HAC Summary" print "\tDist Threshold: {}".format(hac_dist) print "\tSim Threshold: {}".format(hac_sim) print "Generating AP+Salience Time Ranked" print "\tSal Threshold: {}".format(apsal_tr_sal) print "\tSim Threshold: {}".format(apsal_tr_sim) print "Generating Salience Ranked Summary" print "\tSal Threshold: {}".format(sal_rank_sal) print "\tSim Threshold: {}".format(sal_rank_sim) jobs = [] for event, corpus in eval_data: jobs.append( (HACFilteredSummary(), (event, prefix, feature_set, hac_dist, hac_sim))) jobs.append( (APFilteredSummary(), (event, ap_sim))) jobs.append( (APSalienceFilteredSummary(), (event, prefix, feature_set, apsal_sal, apsal_sim))) jobs.append( (APSalTRankSalThreshFilteredSummary(), (event, prefix, feature_set, apsal_tr_sal, apsal_tr_sim))) jobs.append( (RankedSalienceFilteredSummary(), (event, prefix, feature_set, sal_rank_sal, sal_rank_sim))) self.do_work(filter_worker, jobs, **kwargs)
def tune(self, dev_data, prefix, feature_set, hac_dist_min=.9, hac_dist_max=5.05, hac_dist_step=.05, sal_min=-2.0, sal_max=2.0, sal_step=.1, sem_sim_min=.2, sem_sim_max=.7, sem_sim_step=.05, rank_sim_min=.2, rank_sim_max=.4, rank_sim_step=.05, **kwargs): ap = APSummarizer() apsal = APSalienceSummarizer() hac = HACSummarizer() hac_dist_cutoffs = np.arange( hac_dist_min, hac_dist_max + hac_dist_step, hac_dist_step) sal_cutoffs = np.arange( sal_min, sal_max + sal_step, sal_step) sem_sim_cutoffs = np.arange( sem_sim_min, sem_sim_max + sem_sim_step, sem_sim_step) rank_sim_cutoffs = np.arange( rank_sim_min, rank_sim_max + rank_sim_step, rank_sim_step) print "Tuning on dev data." ### Run clustering ### print "Generating AP Cluster\n\t(no params)" print "Generating AP+Salience Cluster\n\t(no params)" print "Generating HAC Cluster" print "\tDist Threshold ({}, {}), step={} {} jobs/event".format( hac_dist_min, hac_dist_max, hac_dist_step, hac_dist_cutoffs.shape[0]) jobs = [] for event, corpus in dev_data: print event.fs_name() apsal_tsv_dir = apsal.get_tsv_dir(prefix, feature_set) if not os.path.exists(apsal_tsv_dir): os.makedirs(apsal_tsv_dir) if not os.path.exists(ap.dir_): os.makedirs(ap.dir_) if not os.path.exists(hac.dir_): os.makedirs(hac.dir_) for cutoff in hac_dist_cutoffs: jobs.append( (event, corpus, prefix, feature_set, hac, cutoff)) jobs.append((event, corpus, prefix, feature_set, ap)) jobs.append((event, corpus, prefix, feature_set, apsal)) self.do_work(cluster_worker, jobs, **kwargs) ### Run filtering ### print print "Generating AP Summary" print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format( sem_sim_cutoffs.shape[0]) print "Generating AP+Salience Summary" print "\tSal Threshold ({}, {}), step={}".format( sal_min, sal_max, sal_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format( sal_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) print "Generating HAC Summary" print "\tDist Threshold ({}, {}), step={}".format( hac_dist_min, hac_dist_max, hac_dist_step) print "\tSim Threshold ({}, {}), step={}".format( sem_sim_min, sem_sim_max, sem_sim_step) print "\t{} jobs/event".format( hac_dist_cutoffs.shape[0] * sem_sim_cutoffs.shape[0]) rsfs = RankedSalienceFilteredSummary() if not os.path.exists(rsfs.dir_): os.makedirs(rsfs.dir_) jobs = [] for event, corpus in dev_data: for sem_sim_cutoff in sem_sim_cutoffs: for dist_cutoff in hac_dist_cutoffs: jobs.append( (HACFilteredSummary(), (event, prefix, feature_set, dist_cutoff, sem_sim_cutoff))) jobs.append( (APFilteredSummary(), (event, sem_sim_cutoff))) for sal_cutoff in sal_cutoffs: jobs.append( (APSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) jobs.append( (APSalTRankSalThreshFilteredSummary(), (event, prefix, feature_set, sal_cutoff, sem_sim_cutoff))) for rank_sim_cutoff in rank_sim_cutoffs: for sal_cutoff in sal_cutoffs: jobs.append( (RankedSalienceFilteredSummary(), (event, prefix, feature_set, sal_cutoff, rank_sim_cutoff))) self.do_work(filter_worker, jobs, **kwargs)