Ejemplo n.º 1
0
def do_the_job(dfile, sensor, recenter=True, wtsel=None, clean=False, mbasal='meanfirst', alt_smooth=False, wavy=False):
    """
    Transforms the data reconstructing the peaks using some components of the PCA
    and uses the mean of the baseline points to move the peak

    :param pcap: Perform or not PCA
    :param dfile: datafile
    :param sensor: sensor
    :param components: Components selected from the PCA
    :param lbasal: Points to use to move the peak
    :param recenter: recenters the peak so it is in the center of the window
    :param basal: moving the peak so the begginning is closer to zero
                 'meanfirst', first n points
                 'meanmin', first min points of the first half of the peak
    :return:
    """
    print(datainfo.dpath + datainfo.name, sensor)

    f = datainfo.open_experiment_data(mode='r')
    data = datainfo.get_peaks_resample(f, dfile, sensor)
    datainfo.close_experiment_data(f)
    pcap = datainfo.get_peaks_smooth_parameters('pcasmooth')
    components = datainfo.get_peaks_smooth_parameters('components')
    baseline = datainfo.get_peaks_smooth_parameters('wbaseline')
    lbasal = range(baseline)
    if alt_smooth:
        parl = datainfo.get_peaks_alt_smooth_parameters('lambda')
        parp = datainfo.get_peaks_alt_smooth_parameters('p')

    if data is not None:
        # if there is a clean list of peaks then the PCA is computed only for the clean peaks
        if clean:
            lt = datainfo.get_clean_time(f, dfile, sensor)
            if lt is not None:
                ltime = list(lt[()])
                print(data.shape)
                data = data[ltime]
                print(data.shape)

        if alt_smooth:
            trans = np.zeros((data.shape[0], data.shape[1]))
            for i in range(data.shape[0]):
                trans[i] = baseline_als(data[i,:], parl, parp)
        elif pcap:
            pca = PCA(n_components=data.shape[1])
            res = pca.fit_transform(data)

            sexp = 0.0
            ncomp = 0
            while sexp < 0.98:
                sexp += pca.explained_variance_ratio_[ncomp]
                ncomp += 1
            components = ncomp
            print('VEX=', np.sum(pca.explained_variance_ratio_[0:components]), components)
            res[:, components:] = 0
            trans = pca.inverse_transform(res)
        else:
            trans = data

        # If recenter, find the new center of the peak and crop the data to wtsel milliseconds
        if recenter:
            # Original window size in milliseconds
            wtsel_orig = datainfo.get_peaks_resample_parameters('wtsel')
            # current window midpoint
            midpoint = int(trans.shape[1]/2.0)
            # New window size
            wtlen = int(trans.shape[1]*(wtsel/wtsel_orig))
            wtdisc = int((trans.shape[1] - wtlen)/2.0)
            # in case we have a odd number of points in the window
            if wtlen + (2*wtdisc) != wtlen:
                wtdisci = wtdisc + 1
            else:
                wtdisci = wtdisc

            new_trans = np.zeros((trans.shape[0], wtlen))
            for pk in range(trans.shape[0]):
                # find current maximum around the midpoint of the current window
                # Fixed to 10 points around the center
                center = np.argmax(trans[pk, midpoint-10:midpoint+10])
                new_trans[pk] = trans[pk,wtdisci:wtlen-wtdisc]

            trans = new_trans

        # Substract the basal

        if mbasal == 'meanfirst':
            for row in range(trans.shape[0]):
                vals = trans[row, lbasal]
                basal = np.mean(vals)
                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'meanmin':
             for row in range(trans.shape[0]):
                vals = trans[row, 0:trans.shape[1]/2]
                vals = np.array(sorted(list(vals)))
                basal = np.mean(vals[lbasal])
                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'meanlast':
             for row in range(trans.shape[0]):

                vals = trans[row, (trans.shape[1]/3)*2:trans.shape[1]]
                basal = np.mean(vals)

                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'alternative':
             for row in range(trans.shape[0]):
                basal = find_baseline(trans[row, 0:trans.shape[1]/2], resolution=25)
                trans[row] -= basal

        if wavy:
            sel = outliers_wavy(trans)
            trans = trans[sel]

        return trans, dfile, sensor
    else:
        return None, dfile, sensor
Ejemplo n.º 2
0
    ldatap = []
    ldatappca = []
    ltimes = []
    for dfiles in [datainfo.datafiles[0]]:
        print(dfiles)
        d = f[dfiles + '/' + s + '/' + 'PeaksResample']
        dataf = d[()]
        ldatap.append(dataf)
        #d = f[dfiles + '/' + s + '/' + 'Time']
        #times = d[()]
        #ltimes.append(times)
        d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA']
        dataf = d[()]
        ldatappca.append(dataf)

    data = ldatap[0]  #np.concatenate(ldata)
    datapca = ldatappca[0]  #np.concatenate(ldata)
    #ptime = ltimes[0]

    #print(len(data))
    long = data.shape[1] / 3
    for i in range(5):  #range(data.shape[0]):
        # print dataraw[i]
        # print data[i]
        #print('T = %d'%ptime[i])
        base = baseline_als(data[i], 5, 0.9)
        show_signal(base, find_baseline(data[i, :long], resolution=50))
        show_signal(base, find_baseline(base[i:long], resolution=100))
    #show_two_signals(data[i],datapca[i])
    # show_signal(datapca[i])
Ejemplo n.º 3
0
def do_the_job(dfile,
               sensor,
               recenter=True,
               wtsel=None,
               clean=False,
               mbasal='meanfirst',
               alt_smooth=False,
               wavy=False,
               vpca=0.98):
    """
    Transforms the data reconstructing the peaks using some components of the PCA
    and uses the mean of the baseline points to move the peak

    :param pcap: Perform or not PCA
    :param dfile: datafile
    :param sensor: sensor
    :param components: Components selected from the PCA
    :param lbasal: Points to use to move the peak
    :param recenter: recenters the peak so it is in the center of the window
    :param basal: moving the peak so the begginning is closer to zero
                 'meanfirst', first n points
                 'meanmin', first min points of the first half of the peak
    :return:
    """
    print(datainfo.dpath + datainfo.name, sensor)

    f = datainfo.open_experiment_data(mode='r')
    data = datainfo.get_peaks_resample(f, dfile, sensor)
    datainfo.close_experiment_data(f)
    pcap = datainfo.get_peaks_smooth_parameters('pcasmooth')
    components = datainfo.get_peaks_smooth_parameters('components')
    baseline = datainfo.get_peaks_smooth_parameters('wbaseline')
    lbasal = range(baseline)
    if alt_smooth:
        parl = datainfo.get_peaks_alt_smooth_parameters('lambda')
        parp = datainfo.get_peaks_alt_smooth_parameters('p')

    if data is not None:
        # if there is a clean list of peaks then the PCA is computed only for the clean peaks
        if clean:
            lt = datainfo.get_clean_time(f, dfile, sensor)
            if lt is not None:
                ltime = list(lt[()])
                print(data.shape)
                data = data[ltime]
                print(data.shape)

        if alt_smooth:
            trans = np.zeros((data.shape[0], data.shape[1]))
            for i in range(data.shape[0]):
                trans[i] = baseline_als(data[i, :], parl, parp)
        elif pcap:
            pca = PCA(n_components=data.shape[1])
            res = pca.fit_transform(data)

            sexp = 0.0
            ncomp = 0
            while sexp < vpca:
                sexp += pca.explained_variance_ratio_[ncomp]
                ncomp += 1
            components = ncomp
            print('VEX=', np.sum(pca.explained_variance_ratio_[0:components]),
                  components)
            res[:, components:] = 0
            trans = pca.inverse_transform(res)
        else:
            trans = data

        # If recenter, find the new center of the peak and crop the data to wtsel milliseconds
        if recenter:
            # Original window size in milliseconds
            wtsel_orig = datainfo.get_peaks_resample_parameters('wtsel')
            # current window midpoint
            midpoint = int(trans.shape[1] / 2.0)
            # New window size
            wtlen = int(trans.shape[1] * (wtsel / wtsel_orig))
            wtdisc = int((trans.shape[1] - wtlen) / 2.0)
            # in case we have a odd number of points in the window
            if wtlen + (2 * wtdisc) != wtlen:
                wtdisci = wtdisc + 1
            else:
                wtdisci = wtdisc

            new_trans = np.zeros((trans.shape[0], wtlen))
            for pk in range(trans.shape[0]):
                # find current maximum around the midpoint of the current window
                # Fixed to 10 points around the center
                center = np.argmax(trans[pk, midpoint - 10:midpoint + 10])
                new_trans[pk] = trans[pk, wtdisci:wtlen - wtdisc]

            trans = new_trans

        # Substract the basal

        if mbasal == 'meanfirst':
            for row in range(trans.shape[0]):
                vals = trans[row, lbasal]
                basal = np.mean(vals)
                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'meanmin':
            for row in range(trans.shape[0]):
                vals = trans[row, 0:trans.shape[1] / 2]
                vals = np.array(sorted(list(vals)))
                basal = np.mean(vals[lbasal])
                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'meanmin':
            for row in range(trans.shape[0]):
                vals = trans[row, 0:trans.shape[1] / 2]
                vals = np.array(sorted(list(vals)))
                basal = np.mean(vals[lbasal])
                trans[row] -= basal
                #show_two_signals(trans[row]+basal, trans[row])
        elif mbasal == 'globalmeanfirst':
            globbasal = np.mean(trans[:, lbasal])
            trans -= globbasal
        elif mbasal == 'alternative':
            for row in range(trans.shape[0]):
                basal = find_baseline(trans[row, 0:trans.shape[1] / 2],
                                      resolution=25)
                trans[row] -= basal

        if wavy:
            sel = outliers_wavy(trans)
            trans = trans[sel]

        return trans, dfile, sensor
    else:
        return None, dfile, sensor
Ejemplo n.º 4
0
    for dfiles in [datainfo.datafiles[0]]:
        print(dfiles)
        d = f[dfiles + '/' + s + '/' + 'PeaksResample']
        dataf = d[()]
        ldatap.append(dataf)
        #d = f[dfiles + '/' + s + '/' + 'Time']
        #times = d[()]
        #ltimes.append(times)
        d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA']
        dataf = d[()]
        ldatappca.append(dataf)

    data = ldatap[0] #np.concatenate(ldata)
    datapca = ldatappca[0] #np.concatenate(ldata)
    #ptime = ltimes[0]

    #print(len(data))
    long = data.shape[1]/3
    for i in range(5): #range(data.shape[0]):
        # print dataraw[i]
        # print data[i]
        #print('T = %d'%ptime[i])
        base = baseline_als(data[i], 5, 0.9)
        show_signal(base, find_baseline(data[i,:long], resolution=50))
        show_signal(base, find_baseline(base[i:long], resolution=100))
       #show_two_signals(data[i],datapca[i])
        # show_signal(datapca[i])