Exemple #1
0
def do_proc_F1(dinp, doutp, parameter):
    "scan all cols of dinp, apply proc() and store into doutp"
    size = doutp.axis1.size
    scan = min(dinp.size2,
               doutp.size2)  # min() because no need to do extra work !
    F1widgets = [
        'Processing F1: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=F1widgets,
                          maxval=scan).start()  #, fd=sys.stdout)
    for i in xrange(scan):
        c = dinp.col(i)
        apod(c, size)
        c.rfft()
        # get statistics
        buff = c.get_buffer()
        b = buff.copy()
        for i in range(10):
            b = b[b - b.mean() < 3 * b.std()]
        # computed ridge and remove
        if parameter.do_F1 and parameter.do_rem_ridge:
            c -= b.mean()
        # clean for compression
        if parameter.compress_outfile:
            threshold = parameter.compress_level * b.std()
            c.zeroing(threshold)
            c = hmclear(c)
        doutp.set_col(i, c)
        pbar.update(i)
    pbar.finish()
Exemple #2
0
def do_proc_F2mp(dinp, doutp, parameter):
    "do the F2 processing in MP"
    size = doutp.axis2.size
    scan = min(dinp.size1,
               doutp.size1)  # min() because no need to do extra work !
    F2widgets = [
        'Processing F2: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=F2widgets,
                          maxval=scan).start()  #, fd=sys.stdout)
    xarg = iterargF2(dinp, size, scan)  # construct iterator for main loop
    if parameter.mp:  # means multiprocessing //
        res = Pool.imap(_do_proc_F2, xarg)
        for i, r in enumerate(res):
            doutp.set_row(i, r)
            pbar.update(i + 1)
    elif mpiutil.MPI_size > 1:  # code for MPI processing //
        res = mpiutil.enum_imap(_do_proc_F2, xarg)  # apply it
        for i, r in res:  # and get results
            doutp.set_row(i, r)
            pbar.update(i + 1)
    else:  # plain non //
        res = imap(_do_proc_F2, xarg)
        for i, r in enumerate(res):
            doutp.set_row(i, r)
            pbar.update(i + 1)
    pbar.finish()
Exemple #3
0
def do_proc_F1_modu(dinp, doutp, parameter):
    "as do_proc_F1, but applies hypercomplex modulus() at the end"
    size = 2 * doutp.axis1.size
    scan = min(dinp.size2, doutp.size2)
    F1widgets = [
        'Processing F1 modu: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=F1widgets,
                          maxval=scan).start()  #, fd=sys.stdout)
    d = FTICRData(buffer=np.zeros(
        (2 * doutp.size1, 2)))  # 2 columns - used for hypercomplex modulus
    for i in xrange(scan):
        d.chsize(2 * doutp.size1,
                 2)  # 2 columns - used for hypercomplex modulus
        for off in (0, 1):
            p = dinp.col(2 * i + off)
            apod(p, size)
            p.rfft()
            d.set_col(off, p)
        d.axis1.itype = 1
        d.axis2.itype = 1
        d.modulus()
        # recover buffer
        c = d.col(0)
        # get statistics
        buff = c.get_buffer()
        b = buff.copy()
        for i in range(10):
            b = b[b - b.mean() < 3 * b.std()]
        # computed ridge and remove
        if parameter.do_F1 and parameter.do_rem_ridge:
            c -= b.mean()
        # clean for compression
        if parameter.compress_outfile:
            threshold = parameter.compress_level * b.std()
            c.zeroing(threshold)
            c = hmclear(c)
        doutp.set_col(i, c)
        pbar.update(i + 1)
    pbar.finish()
Exemple #4
0
def do_proc_F2(dinp, doutp, parameter):
    "do the F2 processing - serial code"
    size = doutp.axis2.size
    scan = min(dinp.size1,
               doutp.size1)  # min() because no need to do extra work !
    #scan = dinp.size1 # when was it done?
    F2widgets = [
        'Processing F2: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=F2widgets,
                          maxval=scan).start()  #, fd=sys.stdout)
    print("############  in do_proc_F2 #########")
    print("dinp.axis1.itype ", dinp.axis1.itype)
    print("dinp.axis2.itype ", dinp.axis2.itype)
    print("doutp.axis1.itype ", doutp.axis1.itype)
    print("doutp.axis2.itype ", doutp.axis2.itype)
    print("dinp.axis1.size ", dinp.axis1.size)
    print("dinp.axis2.size ", dinp.axis2.size)
    print("doutp.axis1.size ", doutp.axis1.size)
    print("doutp.axis2.size ", doutp.axis2.size)
    print("########################### doutp.report() ")
    print(doutp.report())
    #print dir(doutp)
    for i in xrange(scan):
        r = dinp.row(i)
        apod(r, size)
        r.rfft()
        if parameter.compress_outfile:
            r = hmclear(r)
        doutp.set_row(i, r)
        pbar.update(i + 1)
        if interfproc:
            output = open('InterfProc/progbar.pkl', 'wb')
            pb = ['F2', int((i + 1) / float(scan) * 100)]
            pickle.dump(pb, output)
            output.close()
    pbar.finish()
Exemple #5
0
def do_palma(npkd,
             miniSNR=32,
             mppool=None,
             nbiter=1000,
             lamda=0.1,
             uncertainty=1.2,
             precision=1E-8):
    """
    realize PALMA computation on each column of the 2D datasets
    dataset should have been prepared with prepare_palma()
    
    the noise in the initial spectrum is analysed on the first DOSY increment
    then each column is processed with palma() if its intensity is sufficient

    miniSNR: determines the minimum Signal to Noise Ratio of the signal for allowing the processing
    mppool: if passed as a multiprocessing.Pool, it will be used for parallel processing
    
    the other parameters are transparently passed to palma()

    """
    import multiprocessing as mp
    import itertools
    from spike.util import progressbar as pg
    from spike.util import widgets

    # local functions
    def palmaiter(npkd):
        "iterator for // processing around palma() using mp.pool.imap()"
        #for c in npkd.xcol(): #
        for icol in np.random.permutation(
                npkd.size2):  # create a randomized range
            c = npkd.col(icol)
            yield (icol, c, N, valmini, nbiter, lamda, precision, uncertainty)

    # prepare
    if mppool is not None:
        if isinstance(mppool, mp.pool.Pool):
            paral = True
        else:
            raise Exception(
                "parameter mpool should be either None or of multiprocessing.Pool type"
            )
    else:
        paral = False
    npkd.check2D()
    K = npkd.axis1.K
    M, N = K.shape
    output = npkd.copy()
    output.chsize(sz1=N)
    chi2 = np.zeros(
        npkd.size2)  # this vector contains the final chi2 for each column
    noise = spike.util.signal_tools.findnoiselevel(npkd.row(0).get_buffer())
    valmini = noise * miniSNR
    # loop
    xarg = palmaiter(npkd)
    wdg = [
        'PALMA: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=wdg,
                          maxval=npkd.size2).start()  #, fd=sys.stdout)
    if paral:
        result = mppool.imap(process, xarg)
    else:
        result = itertools.imap(process, xarg)
    # collect
    for ii, res in enumerate(result):
        # if icol%50 == 0 :
        #     print ("DOSY # %d / %d"%(icol,npkd.size2))
        pbar.update(ii + 1)
        sys.stdout.flush()
        icol, c, lchi2 = res
        chi2[icol] = lchi2
        output.set_col(icol, c)

    # for icol in range(npkd.size2):
    #     #if icol%10 ==0: print (icol, "iteration")
    #     c = npkd.col(icol)
    #     if (c[0] > miniSNR*noise):
    #         y = c.get_buffer()
    #         c = c.palma(N, nbiter=nbiter, lamda!!, precision=precision, uncertainty=uncertainty)
    #         chi2[icol] = np.linalg.norm(y-np.dot(K,c.get_buffer()))
    #     else:
    #         c = c.set_buffer(np.zeros(N))
    #     result.set_col(icol, c)
    pbar.finish()
    output.axis1.chi2 = chi2

    return output
Exemple #6
0
def Import_and_Process_LC(folder,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData
    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            print("***************************************")
            print(params)
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not stored')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    if sys.maxsize == 2**31 - 1:  # the flag used by array depends on architecture - here on 32bit
        flag = 'l'  # Apex files are in int32
    else:  # here in 64bit
        flag = 'i'  # strange, but works here.
    spectre = FTICRData(shape=(sizeF2, ))  # to handle FT
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 10
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        for i1 in range(sizeF1):
            absmax = 0.0
            #print(i1, ipacket, end='  ')
            tbuf = f.read(4 * sizeF2)
            if len(tbuf) != 4 * sizeF2:
                break
            abuf = np.array(array.array(flag, tbuf), dtype=float)
            # processing
            spectre.set_buffer(abuf)
            spectre.adapt_size()
            spectre.hamming().zf(2).rfft().modulus()  # double the size
            mu, sigma = spectre.robust_stats(iterations=5)
            spectre.buffer -= mu
            if compress:
                spectre.zeroing(sigma * comp_level).eroding()
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, datai in enumerate(datalist):
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    spectre.set_buffer(abuf)
                    spectre.adapt_size()
                    spectre.chsize(
                        datai.size2).hamming().zf(2).rfft().modulus()
                    mu, sigma = spectre.robust_stats(iterations=5)
                    spectre.buffer -= mu
                    if compress:
                        spectre.zeroing(sigma * comp_level).eroding()
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1)
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[i1 - ipacket:i1, :] = packet[:ipacket, :]
    # store maxvalues in the file
    HF.store_internal_object(maxvalues, h5name='maxvalues')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # then write projection as 'projectionF2'
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]
    pbar.finish()
    HF.flush()
    return data
Exemple #7
0
def do_proc_F1_demodu_modu(dinp, doutp, parameter):
    "as do_proc_F1, but applies demodu and then complex modulus() at the end"
    size = 2 * doutp.axis1.size
    scan = min(dinp.size2, doutp.size2)
    F1widgets = [
        'Processing F1 demodu-modulus: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=F1widgets,
                          maxval=scan).start()  #, fd=sys.stdout)

    if parameter.freq_f1demodu == 0:  # means not given in .mscf file -> compute from highmass
        hshift = dinp.axis2.lowfreq  # frequency shift in points, computed from lowfreq of excitation pulse - assumiing the pulse was from high to low !
    else:
        hshift = parameter.freq_f1demodu
    shift = doutp.axis1.htoi(hshift)
    rot = dinp.axis1.htoi(
        hshift)  # rot correction is applied in the starting space
    # sampling
    if parameter.samplingfile is not None:  #    NUS
        dinp.axis1.load_sampling(
            parameter.samplingfile
        )  # load sampling file, and compute rot in non-NUS space
        cdinp = dinp.col(0)
        cdinp.zf()
        rot = cdinp.axis1.htoi(hshift)
        #        print( "11111111", shift, rot)
        del (cdinp)
    if debug > 0: print("LEFT_POINT", shift)
    doutp.axis1.offsetfreq = hshift

    xarg = iterarg(dinp, rot, size,
                   parameter)  # construct iterator for main loop
    if parameter.mp:  # means multiprocessing //
        res = Pool.imap(_do_proc_F1_demodu_modu, xarg)
        for i, buf in enumerate(res):
            doutp.buffer[:, i] = buf
            #            doutp.set_col(i,p)
            pbar.update(i + 1)
    elif mpiutil.MPI_size > 1:  # code for MPI processing //
        res = mpiutil.enum_imap(_do_proc_F1_demodu_modu, xarg)  # apply it
        for i, buf in res:  # and get results
            doutp.buffer[:, i] = buf
            #            doutp.set_col(i, p)
            pbar.update(i + 1)
            if interfproc:
                output = open('InterfProc/progbar.pkl', 'wb')
                pb = ['F1', int((i + 1) / float(scan) * 100)]
                pickle.dump(pb, output)  # for Qt progressbar
                output.close()
        if interfproc:
            output = open('InterfProc/progbar.pkl', 'wb')
            pb = ['end']
            pickle.dump(pb, output)  # for Qt progressbar
            output.close()
    else:  # plain non //
        res = imap(_do_proc_F1_demodu_modu, xarg)
        for i, buf in enumerate(res):
            doutp.buffer[:, i] = buf
            #            doutp.set_col(i,p)
            pbar.update(i + 1)
    pbar.finish()
Exemple #8
0
def Import_and_Process_LC(folder,
                          nProc=1,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    import multiprocessing as mp
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData

    if nProc > 1:
        print("** running on %d processors" % nProc)
        Pool = mp.Pool(nProc)

    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            #print("***************************************")
            #print(params)
            pass
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not found')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 11
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        absmax = 0.0

        xarg = iterargF2(f, sizeF1, sizeF2, compress, comp_level,
                         allsizes)  # construct iterator for main loop

        if nProc > 1:
            res = Pool.imap(processF2row,
                            xarg)  # multiproc processing using Pool
        else:
            res = map(processF2row, xarg)  # plain single proc processing
        for i1, spectres in enumerate(res):  # and get results
            spectre = spectres.pop(0)
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, spectre in enumerate(spectres):
                datai = datalist[idt]
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1 + 1)
            last = i1
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[last - ipacket:last, :] = packet[:ipacket, :]
    pbar.finish()

    # then write projection as 'projectionF2'
    print('writing projections')
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]

    # store maxvalues in the file
    print('writing max abs value')
    HF.store_internal_object(maxvalues, h5name='maxvalues')

    print('writing parameters')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # and close
    HF.flush()
    if nProc > 1:
        Pool.close()  # finally closes multiprocessing slaves
    return data
Exemple #9
0
def main():
    """does the whole job,
    if we are running in MPI, this is only called by job #0
    all other jobs are running mpi.slave()
    """
    argv = sys.argv
    if len(argv) != 2:
        print("""
syntax is :
(mpirun -np N) python  program   configfile.mscf
""")
        sys.exit(1)

    # get parameters
    configfile = argv[1]
    cp = NPKConfigParser()
    cp.readfp(open(configfile))
    infile = cp.getword("Cadzow", "namein")
    print("infile", infile)
    outfile = cp.getword("Cadzow", "nameout")
    print("outfile", outfile)

    algo = cp.getword("Cadzow", "algorithm")
    print("algorithm", algo)
    n_of_line = cp.getint("Cadzow", "n_of_lines", 70)
    print("n_of_line", n_of_line)
    n_of_iter = cp.getint("Cadzow", "n_of_iters", 1)
    print("n_of_iter", n_of_iter)
    orda = cp.getint("Cadzow", "order", 500)
    print("order", orda)
    n_of_column = cp.getint("Cadzow", "n_of_column", 100)
    print("n_of_column", n_of_column)
    progress = cp.getboolean("Cadzow", "progress", True)

    d0 = load_input(infile)
    d0.check2D()  # raise error if not a 2D
    Set_Table_Param()

    hfar = HDF5File(outfile, "w", debug=0)  # OUTFILE
    d1 = FTICRData(dim=2)  # create dummy 2D
    copyaxes(d0, d1)  # copy axes from d0 to d1
    group = 'resol1'
    hfar.create_from_template(d1, group)

    # prepare index and method
    if n_of_column == 0:
        indexes = range(d0.size2)  # process all
    else:
        indexes = selectcol(d0, n_of_column)  # selections

    if algo == "Cadzow":
        meth = cadz
    elif algo == "rQRd":  #
        meth = rqr
    else:
        raise ("wrong algo")

    # then loop
    t0 = time.time()
    if progress:
        widgets = [
            'Processing %s: ' % (algo),
            pg.Percentage(), ' ',
            pg.Bar(marker='-', left='[', right=']'),
            pg.ETA()
        ]
        pbar = pg.ProgressBar(widgets=widgets,
                              maxval=len(indexes))  #, fd=sys.stdout)

    d1D = d0.col(0)  # template
    xarg = iterarg(indexes, d0, n_of_line, n_of_iter, orda)
    if mpiutil.MPI_size > 1:  # means we are running under MPI !
        mpiutil.mprint('MPI Master job  - starting slave jobs - ')
        res = mpiutil.enum_imap(meth, xarg)  # apply it
        for i, p in res:  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    else:
        import itertools
        res = itertools.imap(meth, xarg)  # apply it
        for i, p in enumerate(res):  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    print("Processing time : ", time.time() - t0)