logger.info('computing gpsstop from current time') gpsstop = t ### We do not require boundaries to be integer multiples of stride gpsstart = opts.gpsstart if not gpsstart: logger.info('computing gpsstart from gpsstop') gpsstart = gpsstop - stride lookback = 0 ### directory into which we write data output_dir = "%s/%d_%d/"%(traindir, gpsstart, gpsstart + stride) if not os.path.exists(output_dir): os.makedirs(output_dir) pat = idq.pat(output_dir, ifo, usertag, gpsstart-lookback, lookback+gpsstop-gpsstart) ##### Get Science Segments logger.info('test - Begin: querying science segments') """if not opts.ignore_science_segments: try: ### this returns a string seg_xml_file = idq.segment_query(config, gpsstart - lookback , gpsstop, url=segdb_url) ### load xml document xmldoc = ligolw_utils.load_fileobj(seg_xml_file, contenthandler=ligolw.LIGOLWContentHandler)[0] ### science segments xml filename seg_file = idq.segxml(output_dir, "_%s"%dq_name, gpsstart - lookback , lookback+stride)
if opts.force: raise e else: gpsstart += stride continue logger.info('Done.') #=============================================================================================== # preparing auxmvc training samples #=============================================================================================== if mla: logger.info('preparing training auxmvc samples') ### output file for training samples pat = idq.pat(output_dir, ifo, usertag, gpsstart-lookback, lookback+stride) if not build_auxmvc_vectors: ### we cat together pat files instead of building vectors from scratch ### run job that prepares training samples (ptas_exit_status, _) = idq.execute_prepare_training_auxmvc_samples(output_dir, realtimedir, config, gpsstart - lookback, gpsstart + stride, pat, dq_segments=seg_file, dq_segments_name=dq_name ) os.chdir(cwd) ### go back to starting directory if build_auxmvc_vectors or ptas_exit_status!=0: ### we need to build vectors if build_auxmvc_vectors: ### no realtime directory... logger.warning('WARNING: building auxmvc vectors, this should be necessary only at the very first training cycle') else: logger.warning('WARNING: patfile generation failed for some reason. Attempt to build auxmvc vectors from scratch') if ovl and (not opts.ignore_science_segments): ### need to reset sciseg pointer! ### write segments to ascii list sciseg_path = idq.segascii(output_dir, "_%s"%dq_name, gpsstart-lookback, lookback+stride) logger.info('writing science segments to file : '+sciseg_path)
if opts.force: raise e else: gpsstart += stride continue logger.info('Done.') #=============================================================================================== # preparing auxmvc training samples #=============================================================================================== if mla: logger.info('preparing training auxmvc samples') ### output file for training samples pat = idq.pat(output_dir, ifo, usertag, gpsstart - lookback, lookback + stride) if not build_auxmvc_vectors: ### we cat together pat files instead of building vectors from scratch ### run job that prepares training samples (ptas_exit_status, _) = idq.execute_prepare_training_auxmvc_samples( output_dir, realtimedir, config, gpsstart - lookback, gpsstart + stride, pat, dq_segments=seg_file, dq_segments_name=dq_name) os.chdir(cwd) ### go back to starting directory
### get rid of unwanted AUX triggers trgdict.include( [[min(mintime - padding, t), max(maxtime + padding, t + stride)]]) ### get rid of unwanted GW triggers from previous or next strides; these will be evaluated in their own strides trgdict.include([[t, t + stride]], channels=[gwchannel]) #==================== # generate patfiles for mla classifiers #==================== if mla: # only build patfiles if machine-learning algorithms are present print 'building auxmvc feature vectors ...' pat = idq.pat( opts.outdir, ifo, usertag, gps_start, twopadding ) #"%s/%s_%d-%d.pat"%(opts.outdir, ifo, gps_start, twopadding) # generating auxmvc vector samples. result is saved into pat file # FIXME: depending how padding is done we should adjust behavior of build_auxmvc_vectors # Currently it keeps gw trigger from [t, t + stride] and uses time_window to pad this segment for auxiliary triggers # we do not filter out unclean beacuse it is already done when clean_gps times are formed auxmvc_vectors = idq.build_auxmvc_vectors( trgdict, gwchannel, auxmvc_coinc_window, auxmc_gw_signif_thr, pat, gps_start_time=gps_start, gps_end_time=gps_padd, channels=auxmvc_selected_channels,
trgdict.resort() ### make sure the trgdict's trigger lists are stored in the correct order ### get rid of unwanted AUX triggers trgdict.include([[min(mintime - padding, t), max(maxtime + padding, t + stride)]]) ### get rid of unwanted GW triggers from previous or next strides; these will be evaluated in their own strides trgdict.include([[t, t + stride]], channels=[gwchannel]) #==================== # generate patfiles for mla classifiers #==================== if mla: # only build patfiles if machine-learning algorithms are present print 'building auxmvc feature vectors ...' pat = idq.pat(opts.outdir, ifo, usertag, gps_start, twopadding) #"%s/%s_%d-%d.pat"%(opts.outdir, ifo, gps_start, twopadding) # generating auxmvc vector samples. result is saved into pat file # FIXME: depending how padding is done we should adjust behavior of build_auxmvc_vectors # Currently it keeps gw trigger from [t, t + stride] and uses time_window to pad this segment for auxiliary triggers # we do not filter out unclean beacuse it is already done when clean_gps times are formed auxmvc_vectors = idq.build_auxmvc_vectors(trgdict, gwchannel, auxmvc_coinc_window, auxmc_gw_signif_thr, pat, gps_start_time=gps_start, gps_end_time=gps_padd, channels=auxmvc_selected_channels, unsafe_channels=auxmvc_unsafe_channels, clean_times=clean_gps, clean_window=clean_window, filter_out_unclean=False ) #============================================= # predictions #============================================= dats = {} for classifier in classifiers: flavor = classifiersD[classifier]['flavor']
print("Number of cleans: " + str(len(clean_gps))) ## keep only the most relevant cleans if len(clean_gps) > max_cln_samples: clean_gps = clean_gps[-max_cln_samples:] print("Number of cleans after max cap: " + str(len(clean_gps))) ### keep only times that are within science time if not opts.ignore_science_segments: logger.info(' filtering trigger_dict through scisegs') trigger_dict.include(scisegs) ### already loaded into memory above here print("Number of trigs in scisegs: " + str(len(trigger_dict[gwchannel]))) print("Number of triggers def into build_auxmvc_vectors(): " + str(len(trigger_dict[gwchannel]))) ### build vectors, also writes them into pat pat = idq.pat(output_dir, ifo, usertag, gpsstart, gpsstop-gpsstart) logger.info(' writing %s'%pat) idq.build_auxmvc_vectors(trigger_dict, gwchannel, auxmvc_coinc_window, auxmc_gw_signif_thr, pat, gps_start_time=gpsstart, gps_end_time=gpsstop, channels=auxmvc_selected_channels, unsafe_channels=auxmvc_unsafe_channels, clean_times=clean_gps, clean_window=clean_window, filter_out_unclean=False, locked_segments = "segment_times.txt", include_time_locked = opts.include_time_locked, include_time_until_end = opts.include_time_until_end) print("Done with pat file building") ptas_exit_status = 0 ### used to check for success if ptas_exit_status != 0: ### check that process executed correctly logger.warning('WARNING: Preparing training auxmvc samples failed') if opts.force: raise StandardError, "auxmvc samples required for successful training" else: logger.warning('WARNING: skipping re-training the MLA classifiers') else:
#evaluate_all_combinations(patfile, trainedforestbase, trainedforestend, config) #pR.plot_ROC_combinations("ER7_ROC_Curves-A.png", 1117329857, 1118336935) gpsstart = 1117329857 gpsstop = 1118336935 ### get start and end seg times formatted as list of lists tl.get_segments(gpsstart, gpsstop) scisegs = tl.parse_segments("segment_times.txt") newsegs = [] for seg in scisegs: if (seg[1]-seg[0] >= 3600): newsegs.append(seg) scisegs=newsegs for seg in scisegs: patf = idq.pat("/home/jessica.hyde/test/dev-branch/now/train/%s_%s/" % (gpsstart, gpsstop), "L1", "LowThresh", seg[0], seg[1]-seg[0]) patfile = patf trainedforestbase = "/home/jessica.hyde/test/dev-branch/now/train/%s_%s/mvsc/" % (gpsstart, gpsstop) trainedforestend = "L1_mla-%s-%s-A.spr" % (int(seg[0]), int(seg[1]-seg[0])) evaluate_all_combinations(patfile, trainedforestbase, trainedforestend, config) pR.plot_ROC_combinations("LowThreshROCSegment%s-%s-A.png" % (seg[0], seg[1]), gpsstart, gpsstop, seg[0], seg[1]) # trainedforestend = "L1_mla-%s-%s-B.spr" % (int(seg[0]), int(seg[1]-seg[0])) # evaluate_all_combinations(patfile, trainedforestbase, trainedforestend, config) # pR.plot_ROC_combinations("LowThreshROCSegment%s-%s-B.png" % (seg[0], seg[1]), gpsstart, gpsstop, seg[0], seg[1]) """for seg in scisegs: patf = idq.pat("/home/jessica.hyde/test/dev-branch/now/train/%s_%s/" % (gpsstart, gpsstop), "L1", "", seg[0], seg[1]-seg[0]) patfile = patf[:-4] + "-B.pat" trainedforestbase = "/home/jessica.hyde/test/dev-branch/now/train/%s_%s/mvsc/" % (gpsstart, gpsstop) trainedforestend = "L1_mla-%s-%s-A.spr" % (int(seg[0]), int(seg[1]-seg[0]))