Example #1
0
def add_timeseries(timeseries, key=None, coalesce=True):
    """Add a `TimeSeries` to the global memory cache

    Parameters
    ----------
    timeseries : `TimeSeries` or `StateVector`
        the data series to add

    key : `str`, optional
        the key with which to store these data, defaults to the
        `~gwpy.timeseries.TimeSeries.name` of the series

    coalesce : `bool`, optional
        coalesce contiguous series after adding, defaults to `True`
    """
    if timeseries.channel is not None:
        # transfer parameters from timeseries.channel to the globalv channel
        update_missing_channel_params(timeseries.channel)
    if key is None:
        key = timeseries.name or timeseries.channel.ndsname
    if isinstance(timeseries, StateVector):
        globalv.DATA.setdefault(key, StateVectorList())
    else:
        globalv.DATA.setdefault(key, TimeSeriesList())
    globalv.DATA[key].append(timeseries)
    if coalesce:
        globalv.DATA[key].coalesce()
Example #2
0
def get_range(channel, segments, config=ConfigParser(), cache=None,
              query=True, nds='guess', return_=True, multiprocess=True,
              datafind_error='raise', frametype=None,
              stride=None, fftlength=None, overlap=None,
              method=None, **rangekwargs):
    """Calculate the sensitive distance for a given strain channel
    """
    if not rangekwargs:
        rangekwargs = {'mass1': 1.4, 'mass2': 1.4}
    rangetype = 'energy' in rangekwargs and 'burst' or 'inspiral'
    if rangetype == 'burst':
        range_func = astro.burst_range
    else:
        range_func = astro.inspiral_range
    channel = get_channel(channel)
    key = get_range_channel(channel, **rangekwargs)
    # get old segments
    havesegs = globalv.DATA.get(key, TimeSeriesList()).segments
    new = segments - havesegs
    query &= abs(new) != 0
    # calculate new range
    out = TimeSeriesList()
    if query:
        # get spectrograms
        spectrograms = get_spectrogram(channel, new, config=config,
                                       cache=cache, multiprocess=multiprocess,
                                       frametype=frametype, format='psd',
                                       datafind_error=datafind_error, nds=nds,
                                       stride=stride, fftlength=fftlength,
                                       overlap=overlap, method=method)
        # calculate range for each PSD in each spectrogram
        for sg in spectrograms:
            ts = TimeSeries(numpy.zeros(sg.shape[0],), unit='Mpc',
                            epoch=sg.epoch, dx=sg.dx, channel=key)
            for i in range(sg.shape[0]):
                psd = sg[i]
                psd = Spectrum(psd.value, f0=psd.x0, df=psd.dx)
                ts[i] = range_func(psd, **rangekwargs)
            add_timeseries(ts, key=key)

    if return_:
        return get_timeseries(key, segments, query=False)
Example #3
0
def generate_timeseries(data_list, setname="MagneticFields"):
    """
    Generate time series using list of HDF5 data file paths

    Parameters
    ----------
    data_list : dictionary
      Dictionary that stored the path to the HDF5 data file
      for each segment of data available.
    
    Returns
    -------
    full_data : Array of segment's name
    """
    full_data = TimeSeriesList()
    for seg in sorted(data_list):
        hfile = h5py.File(data_list[seg], "r")
        full_data.append(retrieve_data_timeseries(hfile, "MagneticFields"))
        hfile.close()
    return full_data
Example #4
0
def add_timeseries(timeseries, key=None, coalesce=True):
    """Add a `TimeSeries` to the global memory cache
    """
    if key is None:
        key = timeseries.name or timeseries.channel.ndsname
    if isinstance(timeseries, StateVector):
        globalv.DATA.setdefault(key, StateVectorList())
    else:
        globalv.DATA.setdefault(key, TimeSeriesList())
    globalv.DATA[key].append(timeseries)
    if coalesce:
        globalv.DATA[key].coalesce()
Example #5
0
 def get_tmpltbank_data(self):
     """Read inspiral horizon data from the TmpltBank cache
     """
     tmpltsegs = find_cache_segments(self.tmpltbankcache)
     ifo = self.channel.split(':')[0]
     rangechannel = '%s:horizon_distance' % ifo
     sizechannel = '%s:tmpltbank_size' % ifo
     globalv.DATA[rangechannel] = TimeSeriesList()
     globalv.DATA[sizechannel] = TimeSeriesList()
     for seg in tmpltsegs:
         segcache = self.tmpltbankcache.sieve(segment=seg)
         rangedata = []
         sizedata = []
         for ce in segcache:
             xmldoc = llwutils.load_filename(ce.path)
             svtable = SummValueTable.get_table(xmldoc)
             svtable.sort(key=lambda row: float(row.comment.split('_')[0]))
             rangedata.append(svtable[0].value * (1.4)**(5 / 6.))
             sizedata.append(len(SnglInspiralTable.get_table(xmldoc)))
         if rangedata:
             dt = float(abs(segcache[0].segment))
             epoch = segcache[0].segment[0] + dt / 2.
             globalv.DATA[rangechannel].append(
                 TimeSeries(rangedata,
                            sample_rate=1 / dt,
                            epoch=epoch,
                            name=rangechannel))
             try:
                 globalv.DATA[rangechannel].coalesce()
             except ValueError:
                 pass
             globalv.DATA[sizechannel].append(
                 TimeSeries(sizedata,
                            sample_rate=1 / dt,
                            epoch=epoch,
                            name=sizechannel))
             try:
                 globalv.DATA[sizechannel].coalesce()
             except ValueError:
                 pass
Example #6
0
def get_range(channel, segments, config=None, cache=None,
              query=True, nds=None, return_=True, nproc=1,
              datafind_error='raise', frametype=None,
              stride=None, fftlength=None, overlap=None,
              method=None, **rangekwargs):
    """Calculate the sensitive distance for a given strain channel
    """
    channel, key = _metadata(channel, **rangekwargs)
    # get new segments
    havesegs = globalv.DATA.get(key, TimeSeriesList()).segments
    new, query = _segments_diff(segments, havesegs, query)
    if query:  # calculate new range
        spectrograms = get_spectrogram(
            channel, new, config=config, cache=cache, nds=nds, format='psd',
            frametype=frametype, nproc=nproc, datafind_error=datafind_error,
            stride=stride, fftlength=fftlength, overlap=overlap, method=method)
        for sg in spectrograms:  # calculate range for each spectrogram
            ts = astro.range_timeseries(sg, **rangekwargs)
            ts.channel = key
            add_timeseries(ts, key=key)

    if return_:
        return get_timeseries(key, segments, query=False)
Example #7
0
def getDataInRange(station,
                   startTime,
                   endTime,
                   sortTime=True,
                   convert=False,
                   path='./',
                   verbose=False):
    '''
    Get list of data in time range
    
    station: str
        Name of station.
    startTime: float (unix time), str
        Earliest time. String formatted as 'yyyy-mm-dd-HH-MM-SS' 
        (omitted values defaulted as 0)
    endTime: float (unix time), str
        Last time. Format same as startTime
    sortTime: bool (default: True)
        Actively sort output by start time (using data in file)
    convert: boolean (default: False)
        Whether to use conversion function from file.
    path: str (default './')
        Location of files
    verbose: bool (default False)
        Verbose output
    
    returns (data, sanity, fileList). Data and sanity are astropy TimeSeriesList
    
    Note: must evaluate values in 'sanity' (e.g., using 'value' attribute) to get boolean
    Note: use, e.g., dataTSL.join(pad=float('nan'),gap='pad') to combine 
    TimeSeriesList into single Time series.
    '''
    if (verbose):
        print 'getDataInRange() --- Finding files'
    fList = getFListInRange(station, startTime, endTime, path=path)
    numFiles = len(fList)

    # get data
    if (verbose):
        print 'getDataInRange() --- Reading files'
    dataList = [None] * numFiles
    saneList = [None] * numFiles
    for i in range(numFiles):
        dataList[i], saneList[i] = getDataFromFile(fList[i], convert=convert)

    # sort if needed
    if (sortTime):
        if (verbose):
            print 'getDataInRange() --- Sorting data'
        # do insertion sort (likely that list is sorted)
        sortIndex = range(numFiles)  # sorted list of indices

        for sRange in range(1, numFiles):  # sRange is size of sorted segment
            # note, sortIndex[sRange] = sRange
            insPtTime = dataList[sRange].epoch  # for point being inserted
            insHere = sRange  # place to insert point
            while (insHere > 0
                   and dataList[sortIndex[insHere - 1]].epoch > insPtTime):
                insHere -= 1  # decrement until finding place to insert
            # insert point
            dummy1 = sRange  # point being moved
            while (insHere <= sRange):
                dummy2 = sortIndex[insHere]
                sortIndex[insHere] = dummy1
                dummy1 = dummy2
                insHere += 1
    else:
        sortIndex = range(numFiles)

    # put data in TimeSeriesList
    dataTSL = TimeSeriesList()
    saneTSL = TimeSeriesList()
    for i in sortIndex:
        dataTSL.append(dataList[i])
        saneTSL.append(saneList[i])
    return dataTSL, saneTSL, [fList[i] for i in sortIndex]
Example #8
0
def get_data(station,start_time,end_time,rep='/GNOMEDrive/gnome/serverdata/',
             resample=None,activity=False,unit='V',output='all',segtxt=False,
             channel='MagneticFields'):
    """
    Glob all files withing user-defined period and extract data.
    
    Parameters
    ----------
    station : str
      Name of the station to be analysed
    start_time : int
      GPS timestamp of the first required magnetic field data
    end_time : int
      GPS timestamp of the last required magnetic field data
    rep : str
      Data repository. Default is the GNOME server repository.
    resample : int
      New sampling rate
    activity : bool
      Output the activity of data
    unit : str
      Output unit format (V for voltage, pT for magnetic field)
    output : str
      Output data to be extracted. If output is equal to 'ts',
      only the time series will be given.
    
    Returns
    -------
    ts_data : pycbc.types.TimeSeries
      Time series data for selected time period.
    ts_list : dictionary
      List of time series.
    activity : gwpy.segments.DataQualityDict
      List all the segment of data retrieved
    t0 : astropy.time.Time
      First timestamp
    t1 : astropy.time.Time
      Last timestamp
    """
    if start_time==None or end_time==None:
        print "ERROR: No start or end date given..."
        quit()
    # Define data attribute to be extracted from HDF5 files
    setname   = channel
    dstr      = ['%Y','%m','%d','%H','%M','%S','%f']
    dsplit    = '-'.join(dstr[:start_time.count('-')+1])
    start     = datetime.strptime(start_time,dsplit)
    dsplit    = '-'.join(dstr[:end_time.count('-')+1])
    end       = datetime.strptime(end_time,dsplit)
    dataset   = []
    for date in numpy.arange(start,end,timedelta(minutes=1)):
        date = date.astype(datetime)
        path1 = rep+station+'/'+date.strftime("%Y/%m/%d/")
        path2 = station+'_'+date.strftime("%Y%m%d_%H%M*.hdf5")
        fullpath = os.path.join(path1,path2)
        dataset += glob.glob(fullpath)
    if len(dataset)==0:
        print "ERROR: No data files were found..."
        quit()
    file_order,data_order = {},{}
    for fname in dataset:
        hfile = h5py.File(fname, "r")
        # Extract all atributes from the data
        attrs = hfile[setname].attrs
        # Define each attribute
        dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"]
        # Construct GPS starting time from data
        start_utc = construct_utc_from_metadata(dstr, t0)
        # Construct GPS ending time from data
        end_utc = construct_utc_from_metadata(dstr, t1)
        # Represent the range of times in the semi-open interval
        segfile = segment(start_utc,end_utc)
        file_order[segfile] = fname
        data_order[segfile] = hfile
    # Create list of time series from every segment
    ts_list = TimeSeriesList()
    for seg in sorted(file_order):
        hfile = h5py.File(file_order[seg], "r")
        dset = hfile[setname]
        sample_rate = dset.attrs["SamplingRate(Hz)"]
        gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"])
        data = dset[:]
        if unit=='pT':
            data = eval(dset.attrs['MagFieldEq'].replace('MagneticFields','data').replace('[pT]',''))
        ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch)
        ts_list.append(ts_data)
        hfile.close()
    # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data
    seglist = segmentlist(data_order.keys())
    # Sort the segment list
    seglist.sort()
    # Initialise dictionary for segment information
    activity = DataQualityDict()
    if segtxt:
        # Save time span for each segment in ASCII file
        with open("segments.txt", "w") as fout:
            for seg in seglist:
                print >>fout, "%10.9f %10.9f" % seg
    # FIXME: Active should be masked from the sanity channel
    activity[station] = DataQualityFlag(station,active=seglist.coalesce(),known=seglist.coalesce())
    # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data
    seglist = segmentlist(data_order.keys())
    # Sort the segment list
    seglist.sort()
    # Retrieve channel data for all the segments
    if unit=='V':
        full_data = numpy.hstack([data_order[seg][setname][:] for seg in seglist])
    if unit=='pT':
        full_data = []
        for seg in seglist:
            dset = data_order[seg][setname]
            data = dset[:]
            data = eval(dset.attrs['MagFieldEq'].replace('MagneticFields','data').replace('[pT]',''))
            full_data = numpy.hstack((full_data,data))
    for v in data_order.values():
        v.close()
    new_sample_rate = float(sample_rate) if resample==None else float(resample)
    new_data_length = len(full_data)*new_sample_rate/float(sample_rate)
    full_data = scipy.signal.resample(full_data,int(new_data_length))
    # Models a time series consisting of uniformly sampled scalar values
    ts_data = types.TimeSeries(full_data,delta_t=1./new_sample_rate,epoch=seglist[0][0])
    if output=='ts':
        return ts_data
    t0,t1 = time_convert(start_time,end_time)
    return ts_data,ts_list,activity,t0,t1
Example #9
0
; node states
600 = Low noise
"""

# -- test data

SUFFICES = [
    "STATE_N",
    "REQUEST_N",
    "NOMINAL_N",
    "OK",
    "MODE",
    "OP",
]
DATA = {
    key: TimeSeriesList(
        TimeSeries([600] * 3600 * 16, sample_rate=16, name=key, channel=key))
    for key in ["L1:GRD-ISC_LOCK_{}".format(suff) for suff in SUFFICES]
}

# -- utils --------------------------------------------------------------------


def _get_inputs(workdir):
    """Prepare, and return paths to, input data products
    """
    # set global timeseries data
    globalv.DATA = DATA
    # get path to data files
    ini = os.path.join(workdir, "config.ini")
    archive = os.path.abspath(os.path.join(workdir, "archive.h5"))
    # write to data files