def test_nds2_host_order(self): """Test :func:`gwpy.io.nds2.host_resolution_order` """ # check None returns CIT hro = io_nds2.host_resolution_order(None, env=None) assert hro == [('nds.ligo.caltech.edu', 31200)] # check L1 returns (LLO, CIT) hro = io_nds2.host_resolution_order('L1', env=None) assert hro == [('nds.ligo-la.caltech.edu', 31200), ('nds.ligo.caltech.edu', 31200)] # check NDSSERVER works os.environ['NDSSERVER'] = 'test1.ligo.org:80,test2.ligo.org:43' hro = io_nds2.host_resolution_order(None) assert hro == [('test1.ligo.org', 80), ('test2.ligo.org', 43), ('nds.ligo.caltech.edu', 31200)] # check that NDSSERVER and an IFO spec works at the same time hro = io_nds2.host_resolution_order('L1') assert hro == [('test1.ligo.org', 80), ('test2.ligo.org', 43), ('nds.ligo-la.caltech.edu', 31200), ('nds.ligo.caltech.edu', 31200)] # test named environment variable os.environ['TESTENV'] = 'test1.ligo.org:80,test2.ligo.org:43' hro = io_nds2.host_resolution_order(None, env='TESTENV') assert hro == [('test1.ligo.org', 80), ('test2.ligo.org', 43), ('nds.ligo.caltech.edu', 31200)] # test epoch='now' doesn't change anything os.environ.pop('NDSSERVER') hro = io_nds2.host_resolution_order('L1', epoch='now', env=None) assert hro == [('nds.ligo-la.caltech.edu', 31200), ('nds.ligo.caltech.edu', 31200)] # test old epoch puts CIT ahead of LLO hro = io_nds2.host_resolution_order('L1', epoch='Jan 1 2015', env=None) assert hro == [('nds.ligo.caltech.edu', 31200), ('nds.ligo-la.caltech.edu', 31200)] # test epoch doesn't operate with env hro = io_nds2.host_resolution_order('L1', epoch='now', env='TESTENV') assert hro == [('test1.ligo.org', 80), ('test2.ligo.org', 43), ('nds.ligo-la.caltech.edu', 31200), ('nds.ligo.caltech.edu', 31200)] # test warnings for unknown IFO with pytest.warns(UserWarning): hro = io_nds2.host_resolution_order('X1') assert hro == [('nds.ligo.caltech.edu', 31200)]
def main(args=None): """Run the old lasso command-line interface """ parser = create_parser() args = parser.parse_args(args=args) start = int(args.gpsstart) end = int(args.gpsend) pad = args.filter_padding try: flower, fupper = args.band_pass except TypeError: flower, fupper = None LOGGER.info('{} Slow Correlation {}-{}'.format(args.ifo, start, end)) if args.primary_channel == '{ifo}:GDS-CALIB_STRAIN': args.primary_frametype = '%s_HOFT_C00' % args.ifo primary = args.primary_channel.format(ifo=args.ifo) rangechannel = args.range_channel.format(ifo=args.ifo) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) os.chdir(args.output_dir) nprocplot = args.nproc_plot or args.nproc # load data LOGGER.info("-- Loading range data") rangets = get_data( rangechannel, start, end, frametype=args.range_frametype, verbose=True, nproc=args.nproc) if args.trend_type == 'minute': dstart, dend = rangets.span else: dstart = start dend = end LOGGER.info("-- Loading h(t) data") darmts = get_data(primary, dstart-pad, dend+pad, verbose=True, frametype=args.primary_frametype, nproc=args.nproc) # get darm BLRMS LOGGER.debug("-- Filtering h(t) data") if args.trend_type == 'minute': stride = 60 else: stride = 1 if flower: darmblrms = ( darmts.highpass(flower/2., fstop=flower/4., filtfilt=False, ftype='butter') .notch(60, filtfilt=False) .bandpass(flower, fupper, fstop=[flower/2., fupper*1.5], filtfilt=False, ftype='butter') .crop(dstart, dend).rms(stride)) darmblrms.name = '%s %s-%s Hz BLRMS' % (primary, flower, fupper) else: darmblrms = darmts.notch(60).crop(dstart, dend).rms(stride) darmblrms.name = '%s RMS' % primary if args.remove_outliers: LOGGER.debug( "-- Removing outliers above %f sigma" % args.remove_outliers) gwlasso.remove_outliers(darmblrms, args.remove_outliers) gwlasso.remove_outliers(rangets, args.remove_outliers) if args.trend_type == 'minute': # calculate the r value between the DARM BLRMS and the Range timeseries corr_p = numpy.corrcoef(rangets.value, darmblrms.value)[0, 1] # calculate the ρ value between the DARM BLRMS and the Range timeseries corr_s = spearmanr(rangets.value, darmblrms.value)[0] else: # for second trends, set correlation to 0 since sample rates differ corr_p = 0 corr_s = 0 # create scaled versions of data to compare to each other LOGGER.debug("-- Creating scaled data") rangescaled = rangets.detrend() rangerms = numpy.sqrt(sum(rangescaled**2.0)/len(rangescaled)) darmscaled = darmblrms.detrend() darmrms = numpy.sqrt(sum(darmscaled**2.0)/len(darmscaled)) # create scaled darm using the rms(range) and the rms(darm) if args.trend_type == 'minute': darmscaled *= (-rangerms / darmrms) # get aux data LOGGER.info("-- Loading auxiliary channel data") host, port = io_nds2.host_resolution_order(args.ifo)[0] if args.channel_file is None: channels = ChannelList.query_nds2('*.mean', host=host, port=port, type='m-trend') else: with open(args.channel_file, 'r') as f: channels = f.read().rstrip('\n').split('\n') nchan = len(channels) LOGGER.debug("Identified %d channels" % nchan) if args.trend_type == 'minute': frametype = '%s_M' % args.ifo # for minute trends else: frametype = '%s_T' % args.ifo # for second trends auxdata = get_data( list(map(str, channels)), dstart, dend, verbose=True, pad=0, frametype=frametype, nproc=args.nproc) gpsstub = '%d-%d' % (start, end-start) re_delim = re.compile('[:_-]') LOGGER.info("-- Processing channels") counter = multiprocessing.Value('i', 0) p1 = (.1, .1, .9, .95) p2 = (.1, .15, .9, .9) def process_channel(input_,): chan, ts = input_ flat = ts.value.min() == ts.value.max() if flat: corr1 = None corr2 = None corr1s = None corr2s = None plot1 = None plot2 = None plot3 = None else: corr1 = numpy.corrcoef(ts.value, darmblrms.value)[0, 1] corr1s = spearmanr(ts.value, darmblrms.value)[0] if args.trend_type == 'minute': corr2 = numpy.corrcoef(ts.value, rangets.value)[0, 1] corr2s = spearmanr(ts.value, rangets.value)[0] else: corr2 = 0.0 corr2s = 0.0 # if all corralations are below threshold it does not plot if((abs(corr1) < args.threshold) and (abs(corr1s) < args.threshold) and (abs(corr2) < args.threshold) and (abs(corr2s) < args.threshold)): plot1 = None plot2 = None plot3 = None return (chan, corr1, corr2, plot1, plot2, plot3, corr1s, corr2s) plot = Plot(darmblrms, ts, rangets, xscale="auto-gps", separate=True, figsize=(12, 12)) plot.subplots_adjust(*p1) plot.axes[0].set_ylabel('$h(t)$ BLRMS [strain]') plot.axes[1].set_ylabel('Channel units') plot.axes[2].set_ylabel('Sensitive range [Mpc]') for ax in plot.axes: ax.legend(loc='best') ax.set_xlim(start, end) ax.set_epoch(start) channelstub = re_delim.sub('_', str(chan)).replace('_', '-', 1) plot1 = '%s_TRENDS-%s.png' % (channelstub, gpsstub) try: plot.save(plot1) except (IOError, IndexError): plot.save(plot1) except RuntimeError as e: if 'latex' in str(e).lower(): plot.save(plot1) else: raise plot.close() # plot auto-scaled verions tsscaled = ts.detrend() tsrms = numpy.sqrt(sum(tsscaled**2.0)/len(tsscaled)) if args.trend_type == 'minute': tsscaled *= (rangerms / tsrms) if corr1 > 0: tsscaled *= -1 else: tsscaled *= (darmrms / tsrms) if corr1 < 0: tsscaled *= -1 plot = Plot(darmscaled, rangescaled, tsscaled, xscale="auto-gps", figsize=[12, 6]) plot.subplots_adjust(*p2) ax = plot.gca() ax.set_xlim(start, end) ax.set_epoch(start) ax.set_ylabel('Scaled amplitude [arbitrary units]') ax.legend(loc='best') plot2 = '%s_COMPARISON-%s.png' % (channelstub, gpsstub) try: plot.save(plot2) except (IOError, IndexError): plot.save(plot2) except RuntimeError as e: if 'latex' in str(e).lower(): plot.save(plot2) else: raise plot.close() # plot scatter plots rangeColor = 'red' darmblrmsColor = 'blue' tsCopy = ts.reshape(-1, 1) rangetsCopy = rangets.reshape(-1, 1) darmblrmsCopy = darmblrms.reshape(-1, 1) darmblrmsReg = linear_model.LinearRegression() darmblrmsReg.fit(tsCopy, darmblrmsCopy) darmblrmsFit = darmblrmsReg.predict(tsCopy) rangeReg = linear_model.LinearRegression() rangeReg.fit(tsCopy, rangetsCopy) rangeFit = rangeReg.predict(tsCopy) fig = Plot(figsize=(12, 6)) fig.subplots_adjust(*p2) ax = fig.add_subplot(121) ax.set_xlabel('Channel units') ax.set_ylabel('Sensitive range [Mpc]') yrange = abs(max(darmblrms.value) - min(darmblrms.value)) upperLim = max(darmblrms.value) + .1 * yrange lowerLim = min(darmblrms.value) - .1 * yrange ax.set_ylim(lowerLim, upperLim) ax.text(.9, .1, 'r = ' + str('{0:.2}'.format(corr1)), verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', size=20, bbox=dict(boxstyle='square', facecolor='white', alpha=.75, edgecolor='black')) fig.add_scatter(ts, darmblrms, color=darmblrmsColor) fig.add_line(ts, darmblrmsFit, color='black') ax = fig.add_subplot(122) ax.set_xlabel('Channel units') ax.set_ylabel('$h(t)$ BLRMS [strain]') ax.text(.9, .1, 'r = ' + str('{0:.2}'.format(corr2)), verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', size=20, bbox=dict(boxstyle='square', facecolor='white', alpha=.75, edgecolor='black')) fig.add_scatter(ts, rangets, color=rangeColor) fig.add_line(ts, rangeFit, color='black') plot3 = '%s_SCATTER-%s.png' % (channelstub, gpsstub) try: fig.save(plot3) except (IOError, IndexError): fig.save(plot3) except RuntimeError as e: if 'latex' in str(e).lower(): fig.save(plot3) else: raise plt.close(fig) # increment counter and print status with counter.get_lock(): counter.value += 1 pc = 100 * counter.value / nchan LOGGER.debug("Completed [%d/%d] %3d%% %-50s" % (counter.value, nchan, pc, '(%s)' % str(chan))) sys.stdout.flush() return chan, corr1, corr2, plot1, plot2, plot3, corr1s, corr2s pool = multiprocessing.Pool(nprocplot) results = pool.map(process_channel, list(auxdata.items())) results.sort(key=lambda x: (x[1] is not None and max(abs(x[1]), abs(x[2]), abs(x[6]), abs(x[7])) or 0, x[0]), reverse=True) with open('results.txt', 'w') as f: for ch, corr1, corr2, _, _, _, corr1s, corr2s in results: print('%s %s %s %s %s' % ( ch, corr1, corr2, corr1s, corr2s), file=f) # -- write html trange = '%d-%d' % (start, end) title = '%s Slow Correlations: %s' % (args.ifo, trange) links = [trange] + [(s, '#%s' % s.lower()) for s in ['Parameters', 'Results']] (brand, class_) = htmlio.get_brand(args.ifo, 'Correlations', start) navbar = htmlio.navbar(links, class_=class_, brand=brand) page = htmlio.new_bootstrap_page(title=title, navbar=navbar) # header if flower: pstr = ('<code>%s</code> (band-limited %s-%s Hz)' % (primary, flower, fupper)) else: pstr = primary if args.trend_type == 'minute': pstr += ' and <code>%s</code>' % rangechannel page.div(class_='pb-2 mt-3 mb-2 border-bottom') page.h1(title) page.p("This analysis searched %d channels for linear correlations with %s" % (nchan, pstr)) page.div.close() # run parameters contents = [ ('Primary channel', '{} ({})'.format( primary, args.primary_frametype.format(ifo=args.ifo))), ('Range channel', '{} ({})'.format(rangechannel, args.range_frametype or '-')), ('Band-pass', '{}-{}'.format(flower, fupper))] page.add(htmlio.parameter_table(contents, start=start, end=end)) # results page.h2('Results', class_='mt-4', id_='results') r_blrms = "<i>r<sub>blrms</sub> </i>" r_range = "<i>r<sub>range</sub> </i>" r = "<i>r</i>" rho_blrms = "<i>ρ<sub>blrms</sub> </i>" rho_range = "<i>ρ<sub>range</sub> </i>" rho = "<i>ρ</i>" Pearson_wikilink = htmlio.markup.oneliner.a( "Pearson's correlation coefficient", href="https://en.wikipedia.org/wiki/" "Pearson_product-moment_correlation_coefficient", rel="external") numpylink = htmlio.markup.oneliner.a( "<code>numpy.corrcoef</code>", href="http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/" "numpy.corrcoef.html", rel="external") Spearman_wikilink = htmlio.markup.oneliner.a( "Spearman's correlation coefficient", href="https://en.wikipedia.org/wiki/" "Spearman%27s_rank_correlation_coefficient", rel="external") scipylink = htmlio.markup.oneliner.a( "<code>scipy.stats.spearmanr</code>", href="http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/" "scipy.stats.spearmanr.html", rel="external") page.p("In the results below, all %s values are calculated as" " the square of %s using %s and all %s values are calculated" " as the square of %s using %s." % (r, Pearson_wikilink, numpylink, rho, Spearman_wikilink, scipylink)) if args.trend_type == 'minute': page.p("%s and %s are reported for <code>%s</code>." " %s and %s are reported for <code>%s</code>." " The %s between these two channels is %.2f." " The %s between these two channels is %.2f." % (r_blrms, rho_blrms, primary, r_range, rho_range, rangechannel, r, corr_p, rho, corr_s)) page.div(id_='accordion') for i, (ch, corr1, corr2, plot1, plot2, plot3, corr1s, corr2s) in enumerate(results): if corr1 is None: h = '%s [flat]' % ch elif plot1 is None: h = ('%s [%s = %.2f, %s = %.2f] [%s = %.2f, %s = %.2f]' ' [below threshold]' % (ch, r_blrms, corr1, r_range, corr2, rho_blrms, corr1s, rho_range, corr2s)) elif args.trend_type == 'minute': h = ('%s [%s = %.2f, %s = %.2f] [%s = %.2f, %s = %.2f]' % (ch, r_blrms, corr1, r_range, corr2, rho_blrms, corr1s, rho_range, corr2s)) else: h = '%s [%s = %.2f]' % (ch, r_blrms, corr1) if (corr1 is None) or (corr1 == 0) or (plot1 is None): context = 'bg-light' elif((numpy.absolute(corr1) >= .6) or (numpy.absolute(corr1s) >= .6) or (numpy.absolute(corr2) >= .6) or (numpy.absolute(corr2s) >= .6)): context = 'text-white bg-danger' elif((numpy.absolute(corr1) >= .4) or (numpy.absolute(corr1s) >= .4) or (numpy.absolute(corr2) >= .4) or (numpy.absolute(corr2s) >= .4)): context = 'text-white bg-warning' else: context = 'text-white bg-info' page.div(class_='card %s' % context) # heading page.div(class_='card-header') page.a(h, class_='collapsed card-link cis-link', href='#channel%d' % i, **{'data-toggle': 'collapse'}) page.div.close() # card-header # body page.div(id_='channel%d' % i, class_='collapse', **{'data-parent': '#accordion'}) page.div(class_='card-body') if corr1 is None: page.p("The amplitude data for this channel is flat" " (does not change) for the chosen time period.") elif plot1 is None: page.p("Niether r nor rho are above the threshold of %.2f." % (args.threshold)) else: for p in (plot1, plot2, plot3): img = htmlio.FancyPlot(p) page.add(htmlio.fancybox_img(img)) page.div.close() # card-body page.div.close() # collapse page.div.close() # card page.div.close() # accordion htmlio.close_page(page, 'index.html') # save and close LOGGER.info("-- Process Completed")
def main(args=None): """Run the lasso command-line interface """ # declare global variables # this is needed for multiprocessing utilities global auxdata, cluster_threshold, cmap, colors, counter, gpsstub global line_size_aux, line_size_primary, max_correlated_channels global nonzerocoef, nonzerodata, p1, primary, primary_mean, primary_std global primaryts, range_is_primary, re_delim, start, target, times global threshold, trend_type, xlim parser = create_parser() args = parser.parse_args(args=args) # get run params start = int(args.gpsstart) end = int(args.gpsend) pad = args.filter_padding # set pertinent global variables cluster_threshold = args.cluster_coefficient line_size_aux = args.line_size_aux line_size_primary = args.line_size_primary threshold = args.threshold trend_type = args.trend_type # let's go LOGGER.info('{} Lasso correlations {}-{}'.format(args.ifo, start, end)) # get primary channel frametype primary = args.primary_channel.format(ifo=args.ifo) range_is_primary = 'EFFECTIVE_RANGE_MPC' in args.primary_channel if args.primary_cache is not None: LOGGER.info("Using custom primary cache file") elif args.primary_frametype is None: try: args.primary_frametype = DEFAULT_FRAMETYPE[ args.primary_channel.split(':')[1]].format(ifo=args.ifo) except KeyError as exc: raise type(exc)("Could not determine primary channel's frametype, " "please specify with --primary-frametype") # create output directory if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) os.chdir(args.output_dir) # multiprocessing for plots nprocplot = (args.nproc_plot or args.nproc) if USETEX else 1 # bandpass primary if args.band_pass: try: flower, fupper = args.band_pass except TypeError: flower, fupper = None LOGGER.info("-- Loading primary channel data") bandts = get_data(primary, start - pad, end + pad, verbose='Reading primary:'.rjust(30), frametype=args.primary_frametype, source=args.primary_cache, nproc=args.nproc) if flower < 0 or fupper >= float((bandts.sample_rate / 2.).value): raise ValueError( "bandpass frequency is out of range for this " "channel, band (Hz): {0}, sample rate: {1}".format( args.band_pass, bandts.sample_rate)) # get darm BLRMS LOGGER.debug("-- Filtering data") if trend_type == 'minute': stride = 60 else: stride = 1 if flower: darmbl = (bandts.highpass( flower / 2., fstop=flower / 4., filtfilt=False, ftype='butter').notch(60, filtfilt=False).bandpass( flower, fupper, fstop=[flower / 2., fupper * 1.5], filtfilt=False, ftype='butter').crop(start, end)) darmblrms = darmbl.rms(stride) darmblrms.name = '%s %s-%s Hz BLRMS' % (primary, flower, fupper) else: darmbl = bandts.notch(60).crop(start, end) darmblrms = darmbl.rms(stride) darmblrms.name = '%s RMS' % primary primaryts = darmblrms bandts_asd = bandts.asd(4, 2, method='median') darmbl_asd = darmbl.asd(4, 2, method='median') spectrum_plots = gwplot.make_spectrum_plots(start, end, flower, fupper, args.primary_channel, bandts_asd, darmbl_asd) spectrum_plot_zoomed_out = spectrum_plots[0] spectrum_plot_zoomed_in = spectrum_plots[1] else: # load primary channel data LOGGER.info("-- Loading primary channel data") primaryts = get_data(primary, start, end, frametype=args.primary_frametype, source=args.primary_cache, verbose='Reading:'.rjust(30), nproc=args.nproc).crop(start, end) if args.remove_outliers: LOGGER.debug("-- Removing outliers above %f sigma" % args.remove_outliers) gwlasso.remove_outliers(primaryts, args.remove_outliers) elif args.remove_outliers_pf: LOGGER.debug("-- Removing outliers in the bottom {} percent " "of data".format(args.remove_outliers_pf)) gwlasso.remove_outliers(primaryts, args.remove_outliers_pf, method='pf') start = int(primaryts.span()[0]) end = int(primaryts.span()[1]) primary_mean = numpy.mean(primaryts.value) primary_std = numpy.std(primaryts.value) # get aux data LOGGER.info("-- Loading auxiliary channel data") if args.channel_file is None: host, port = io_nds2.host_resolution_order(args.ifo)[0] channels = ChannelList.query_nds2('*.mean', host=host, port=port, type='m-trend') else: with open(args.channel_file, 'r') as f: channels = [name.rstrip('\n') for name in f] nchan = len(channels) LOGGER.debug("Identified %d channels" % nchan) if trend_type == 'minute': frametype = '%s_M' % args.ifo # for minute trends else: frametype = '%s_T' % args.ifo # for second trends # read aux channels auxdata = get_data(channels, start, end, verbose='Reading:'.rjust(30), frametype=frametype, nproc=args.nproc, pad=0).crop(start, end) # -- removes flat data to be re-introdused later LOGGER.info('-- Pre-processing auxiliary channel data') auxdata = gwlasso.remove_flat(auxdata) flatable = Table(data=(list(set(channels) - set(auxdata.keys())), ), names=('Channels', )) LOGGER.debug('Removed {0} channels with flat data'.format(len(flatable))) LOGGER.debug('{0} channels remaining'.format(len(auxdata))) # -- remove bad data LOGGER.info("Removing any channels with bad data...") nbefore = len(auxdata) auxdata = gwlasso.remove_bad(auxdata) nafter = len(auxdata) LOGGER.debug('Removed {0} channels with bad data'.format(nbefore - nafter)) LOGGER.debug('{0} channels remaining'.format(nafter)) data = numpy.array([scale(ts.value) for ts in auxdata.values()]).T # -- perform lasso regression ------------------- # create model LOGGER.info('-- Fitting data to target') target = scale(primaryts.value) model = gwlasso.fit(data, target, alpha=args.alpha) LOGGER.info('Alpha: {}'.format(model.alpha)) # restructure results for convenience allresults = Table(data=(list(auxdata.keys()), model.coef_, numpy.abs(model.coef_)), names=('Channel', 'Lasso coefficient', 'rank')) allresults.sort('rank') allresults.reverse() useful = allresults['rank'] > 0 allresults.remove_column('rank') results = allresults[useful] # non-zero coefficient zeroed = allresults[numpy.invert(useful)] # zero coefficient # extract data for useful channels nonzerodata = {name: auxdata[name] for name in results['Channel']} nonzerocoef = {name: coeff for name, coeff in results.as_array()} # print results LOGGER.info('Found {} channels with |Lasso coefficient| >= {}:\n\n'.format( len(results), threshold)) print(results) print('\n\n') # convert to pandas set_option('max_colwidth', -1) df = results.to_pandas() df.index += 1 # write results to files gpsstub = '%d-%d' % (start, end - start) resultsfile = '%s-LASSO_RESULTS-%s.csv' % (args.ifo, gpsstub) results.write(resultsfile, format='csv', overwrite=True) zerofile = '%s-ZERO_COEFFICIENT_CHANNELS-%s.csv' % (args.ifo, gpsstub) zeroed.write(zerofile, format='csv', overwrite=True) flatfile = '%s-FLAT_CHANNELS-%s.csv' % (args.ifo, gpsstub) flatable.write(flatfile, format='csv', overwrite=True) # -- generate lasso plots modelFit = model.predict(data) re_delim = re.compile(r'[:_-]') p1 = (.1, .15, .9, .9) # global plot defaults for plot1, lasso model times = primaryts.times.value xlim = primaryts.span cmap = get_cmap('tab20') colors = [cmap(i) for i in numpy.linspace(0, 1, len(nonzerodata) + 1)] plot = Plot(figsize=(12, 4)) plot.subplots_adjust(*p1) ax = plot.gca(xscale='auto-gps', epoch=start, xlim=xlim) ax.plot(times, _descaler(target), label=texify(primary), color='black', linewidth=line_size_primary) ax.plot(times, _descaler(modelFit), label='Lasso model', linewidth=line_size_aux) if range_is_primary: ax.set_ylabel('Sensitive range [Mpc]') ax.set_title('Lasso Model of Range') else: ax.set_ylabel('Primary Channel Units') ax.set_title('Lasso Model of Primary Channel') ax.legend(loc='best') plot1 = gwplot.save_figure(plot, '%s-LASSO_MODEL-%s.png' % (args.ifo, gpsstub), bbox_inches='tight') # summed contributions plot = Plot(figsize=(12, 4)) plot.subplots_adjust(*p1) ax = plot.gca(xscale='auto-gps', epoch=start, xlim=xlim) ax.plot(times, _descaler(target), label=texify(primary), color='black', linewidth=line_size_primary) summed = 0 for i, name in enumerate(results['Channel']): summed += scale(nonzerodata[name].value) * nonzerocoef[name] if i: label = 'Channels 1-{0}'.format(i + 1) else: label = 'Channel 1' ax.plot(times, _descaler(summed), label=label, color=colors[i], linewidth=line_size_aux) if range_is_primary: ax.set_ylabel('Sensitive range [Mpc]') else: ax.set_ylabel('Primary Channel Units') ax.set_title('Summations of Channel Contributions to Model') ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) plot2 = gwplot.save_figure(plot, '%s-LASSO_CHANNEL_SUMMATION-%s.png' % (args.ifo, gpsstub), bbox_inches='tight') # individual contributions plot = Plot(figsize=(12, 4)) plot.subplots_adjust(*p1) ax = plot.gca(xscale='auto-gps', epoch=start, xlim=xlim) ax.plot(times, _descaler(target), label=texify(primary), color='black', linewidth=line_size_primary) for i, name in enumerate(results['Channel']): this = _descaler(scale(nonzerodata[name].value) * nonzerocoef[name]) if i: label = 'Channels 1-{0}'.format(i + 1) else: label = 'Channel 1' ax.plot(times, this, label=texify(name), color=colors[i], linewidth=line_size_aux) if range_is_primary: ax.set_ylabel('Sensitive range [Mpc]') else: ax.set_ylabel('Primary Channel Units') ax.set_title('Individual Channel Contributions to Model') ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) plot3 = gwplot.save_figure(plot, '%s-LASSO_CHANNEL_CONTRIBUTIONS-%s.png' % (args.ifo, gpsstub), bbox_inches='tight') # -- process aux channels, making plots LOGGER.info("-- Processing channels") counter = multiprocessing.Value('i', 0) # process channels pool = multiprocessing.Pool(nprocplot) results = pool.map(_process_channel, enumerate(list(nonzerodata.items()))) results = sorted(results, key=lambda x: abs(x[1]), reverse=True) # generate clustered time series plots counter = multiprocessing.Value('i', 0) max_correlated_channels = 20 if args.no_cluster is False: LOGGER.info("-- Generating clusters") pool = multiprocessing.Pool(nprocplot) clusters = pool.map(_generate_cluster, enumerate(results)) channelsfile = '%s-CHANNELS-%s.csv' % (args.ifo, gpsstub) numpy.savetxt(channelsfile, channels, delimiter=',', fmt='%s') # write html trange = '%d-%d' % (start, end) title = '%s Lasso Correlation: %s' % (args.ifo, trange) if args.band_pass: links = [trange ] + [(s, '#%s' % s.lower()) for s in ['Parameters', 'Spectra', 'Model', 'Results']] else: links = [trange] + [(s, '#%s' % s.lower()) for s in ['Parameters', 'Model', 'Results']] (brand, class_) = htmlio.get_brand(args.ifo, 'Lasso', start) navbar = htmlio.navbar(links, class_=class_, brand=brand) page = htmlio.new_bootstrap_page(title='%s Lasso | %s' % (args.ifo, trange), navbar=navbar) page.h1(title, class_='pb-2 mt-3 mb-2 border-bottom') # -- summary table content = [ ('Primary channel', markup.oneliner.code(primary)), ('Primary frametype', markup.oneliner.code(args.primary_frametype) or '-'), ('Primary cache file', markup.oneliner.code(args.primary_cache) or '-'), ('Outlier threshold', '%s sigma' % args.remove_outliers), ('Lasso coefficient threshold', str(threshold)), ('Cluster coefficient threshold', str(args.cluster_coefficient)), ('Non-zero coefficients', str(numpy.count_nonzero(model.coef_))), ('α (model)', '%.4f' % model.alpha) ] if args.band_pass: content.insert( 2, ('Primary bandpass', '{0}-{1} Hz'.format(flower, fupper))) page.h2('Parameters', class_='mt-4 mb-4', id_='parameters') page.div(class_='row') page.div(class_='col-md-9 col-sm-12') page.add(htmlio.parameter_table(content, start=start, end=end)) page.div.close() # col-md-9 col-sm-12 # -- download button files = [('%s analyzed channels (CSV)' % nchan, channelsfile), ('%s flat channels (CSV)' % len(flatable), flatfile), ('%s zeroed channels (CSV)' % len(zeroed), zerofile)] page.div(class_='col-md-3 col-sm-12') page.add( htmlio.download_btn(files, label='Channel information', btnclass='btn btn-%s dropdown-toggle' % args.ifo.lower())) page.div.close() # col-md-3 col-sm-12 page.div.close() # rowa # -- command-line page.h5('Command-line:') page.add(htmlio.get_command_line(about=False, prog=PROG)) if args.band_pass: page.h2('Primary channel spectra', class_='mt-4', id_='spectra') page.div(class_='card border-light card-body shadow-sm') page.div(class_='row') page.div(class_='col-md-6') spectra_img1 = htmlio.FancyPlot(spectrum_plot_zoomed_out) page.add(htmlio.fancybox_img(spectra_img1)) page.div.close() # col-md-6 page.div(class_='col-md-6') spectra_img2 = htmlio.FancyPlot(spectrum_plot_zoomed_in) page.add(htmlio.fancybox_img(spectra_img2)) page.div.close() # col-md-6 page.div.close() # row page.div.close() # card border-light card-body shadow-sm # -- model information page.h2('Model information', class_='mt-4', id_='model') page.div(class_='card card-%s card-body shadow-sm' % args.ifo.lower()) page.div(class_='row') page.div(class_='col-md-8 offset-md-2', id_='results-table') page.p('Below are the top {} mean minute-trend channels, ranked by ' 'Lasso correlation with the primary.'.format(df.shape[0])) page.add( df.to_html(classes=('table', 'table-sm', 'table-hover'), formatters={ 'Lasso coefficient': lambda x: "%.4f" % x, 'Channel': lambda x: str(htmlio.cis_link(x.split('.')[0])), '__index__': lambda x: str(x) }, escape=False, border=0).replace(' style="text-align: right;"', '')) page.div.close() # col-md-10 offset-md-1 page.div.close() # row page.div(class_='row', id_='primary-lasso') page.div(class_='col-md-8 offset-md-2') img1 = htmlio.FancyPlot(plot1) page.add(htmlio.fancybox_img(img1)) # primary lasso plot page.div.close() # col-md-8 offset-md-2 page.div.close() # primary-lasso page.div(class_='row', id_='channel-summation') img2 = htmlio.FancyPlot(plot2) page.div(class_='col-md-8 offset-md-2') page.add(htmlio.fancybox_img(img2)) page.div.close() # col-md-8 offset-md-2 page.div.close() # channel-summation page.div(class_='row', id_='channels-and-primary') img3 = htmlio.FancyPlot(plot3) page.div(class_='col-md-8 offset-md-2') page.add(htmlio.fancybox_img(img3)) page.div.close() # col-md-8 offset-md-2 page.div.close() # channels-and-primary page.div.close() # card card-<ifo> card-body shadow-sm # -- results page.h2('Top channels', class_='mt-4', id_='results') page.div(id_='results') # for each aux channel, create information container and put plots in it for i, (ch, lassocoef, plot4, plot5, plot6, ts) in enumerate(results): # set container color/context based on lasso coefficient if lassocoef == 0: break elif abs(lassocoef) < threshold: h = '%s [lasso coefficient = %.4f] (Below threshold)' % (ch, lassocoef) else: h = '%s [lasso coefficient = %.4f]' % (ch, lassocoef) if ((lassocoef is None) or (lassocoef == 0) or (abs(lassocoef) < threshold)): card = 'card border-light mb-1 shadow-sm' card_header = 'card-header bg-light' elif abs(lassocoef) >= .5: card = 'card border-danger mb-1 shadow-sm' card_header = 'card-header text-white bg-danger' elif abs(lassocoef) >= .2: card = 'card border-warning mb-1 shadow-sm' card_header = 'card-header text-white bg-warning' else: card = 'card border-info mb-1 shadow-sm' card_header = 'card-header text-white bg-info' page.div(class_=card) # heading page.div(class_=card_header) page.a(h, class_='collapsed card-link cis-link', href='#channel%d' % i, **{'data-toggle': 'collapse'}) page.div.close() # card-header # body page.div(id_='channel%d' % i, class_='collapse', **{'data-parent': '#results'}) page.div(class_='card-body') if lassocoef is None: page.p('The amplitude data for this channel is flat (does not ' 'change) within the chosen time period.') elif abs(lassocoef) < threshold: page.p('Lasso coefficient below the threshold of %g.' % (threshold)) else: for image in [plot4, plot5, plot6]: img = htmlio.FancyPlot(image) page.div(class_='row') page.div(class_='col-md-8 offset-md-2') page.add(htmlio.fancybox_img(img)) page.div.close() # col-md-8 offset-md-2 page.div.close() # row page.add('<hr class="row-divider">') if args.no_cluster is False: if clusters[i][0] is None: page.p("<font size='3'><br />No channels were highly " "correlated with this channel.</font>") else: page.div(class_='row', id_='clusters') page.div(class_='col-md-12') cimg = htmlio.FancyPlot(clusters[i][0]) page.add(htmlio.fancybox_img(cimg)) page.div.close() # col-md-12 page.div.close() # clusters if clusters[i][1] is not None: corr_link = markup.oneliner.a( 'Export {} channels (CSV)'.format( max_correlated_channels), href=clusters[i][1], download=clusters[i][1], ) page.button( corr_link, class_='btn btn-%s' % args.ifo.lower(), ) page.div.close() # card-body page.div.close() # collapse page.div.close() # card page.div.close() # results htmlio.close_page(page, 'index.html') # save and close LOGGER.info("-- Process Completed")