Ejemplo n.º 1
0
def downsample2D(data, outp, n1, n2, compress=False, compress_level=3.0):
    """
    takes data (a 2D) and generate a smaller dataset downsampled by factor (n1,n2) on each axis
    then returned data-set is n1*n2 times smaller
    - do a filtered decimation along n2
    - simply takes the mean along n1
    - set to zero all entries below 3*sigma if compress is True
    ** Not fully tested on non powers of 2 **
    """
    if debug > 0: print("in downsample2D : %s x %s" % (n1, n2))
    for i in xrange(0, data.size1, n1):
        temp = np.zeros(data.size2 // n2)
        for j in xrange(n1):
            if n2 > 1:
                try:
                    yy = decimate(data.row(i + j).buffer,
                                  int(n2),
                                  ftype="fir",
                                  zero_phase=True)  # filter along F2
                except TypeError:  # The zero_phase keyword was added in scipy 0.18.0.
                    yy = decimate(data.row(i + j).buffer, int(n2),
                                  ftype="fir")  # filter along F2
            else:
                yy = data.row(i + j).buffer
            temp += yy
        temp *= (1.0 / n1)
        if compress:
            b = temp.copy()
            for j in range(3):
                b = b[b - b.mean() < 3 * b.std()]
            threshold = compress_level * b.std(
            )  # compress_level * b.std()  is 3*sigma by default
            temp[abs(temp) < threshold] = 0.0
        outp.buffer[i // n1, :] = temp
    copyaxes(data, outp)
    outp.adapt_size()
    return outp
Ejemplo n.º 2
0
def main():
    """does the whole job,
    if we are running in MPI, this is only called by job #0
    all other jobs are running mpi.slave()
    """
    argv = sys.argv
    if len(argv) != 2:
        print("""
syntax is :
(mpirun -np N) python  program   configfile.mscf
""")
        sys.exit(1)

    # get parameters
    configfile = argv[1]
    cp = NPKConfigParser()
    cp.readfp(open(configfile))
    infile = cp.getword("Cadzow", "namein")
    print("infile", infile)
    outfile = cp.getword("Cadzow", "nameout")
    print("outfile", outfile)

    algo = cp.getword("Cadzow", "algorithm")
    print("algorithm", algo)
    n_of_line = cp.getint("Cadzow", "n_of_lines", 70)
    print("n_of_line", n_of_line)
    n_of_iter = cp.getint("Cadzow", "n_of_iters", 1)
    print("n_of_iter", n_of_iter)
    orda = cp.getint("Cadzow", "order", 500)
    print("order", orda)
    n_of_column = cp.getint("Cadzow", "n_of_column", 100)
    print("n_of_column", n_of_column)
    progress = cp.getboolean("Cadzow", "progress", True)

    d0 = load_input(infile)
    d0.check2D()  # raise error if not a 2D
    Set_Table_Param()

    hfar = HDF5File(outfile, "w", debug=0)  # OUTFILE
    d1 = FTICRData(dim=2)  # create dummy 2D
    copyaxes(d0, d1)  # copy axes from d0 to d1
    group = 'resol1'
    hfar.create_from_template(d1, group)

    # prepare index and method
    if n_of_column == 0:
        indexes = range(d0.size2)  # process all
    else:
        indexes = selectcol(d0, n_of_column)  # selections

    if algo == "Cadzow":
        meth = cadz
    elif algo == "rQRd":  #
        meth = rqr
    else:
        raise ("wrong algo")

    # then loop
    t0 = time.time()
    if progress:
        widgets = [
            'Processing %s: ' % (algo),
            pg.Percentage(), ' ',
            pg.Bar(marker='-', left='[', right=']'),
            pg.ETA()
        ]
        pbar = pg.ProgressBar(widgets=widgets,
                              maxval=len(indexes))  #, fd=sys.stdout)

    d1D = d0.col(0)  # template
    xarg = iterarg(indexes, d0, n_of_line, n_of_iter, orda)
    if mpiutil.MPI_size > 1:  # means we are running under MPI !
        mpiutil.mprint('MPI Master job  - starting slave jobs - ')
        res = mpiutil.enum_imap(meth, xarg)  # apply it
        for i, p in res:  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    else:
        import itertools
        res = itertools.imap(meth, xarg)  # apply it
        for i, p in enumerate(res):  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    print("Processing time : ", time.time() - t0)
Ejemplo n.º 3
0
def Import_and_Process_LC(folder,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData
    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            print("***************************************")
            print(params)
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not stored')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    if sys.maxsize == 2**31 - 1:  # the flag used by array depends on architecture - here on 32bit
        flag = 'l'  # Apex files are in int32
    else:  # here in 64bit
        flag = 'i'  # strange, but works here.
    spectre = FTICRData(shape=(sizeF2, ))  # to handle FT
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 10
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        for i1 in range(sizeF1):
            absmax = 0.0
            #print(i1, ipacket, end='  ')
            tbuf = f.read(4 * sizeF2)
            if len(tbuf) != 4 * sizeF2:
                break
            abuf = np.array(array.array(flag, tbuf), dtype=float)
            # processing
            spectre.set_buffer(abuf)
            spectre.adapt_size()
            spectre.hamming().zf(2).rfft().modulus()  # double the size
            mu, sigma = spectre.robust_stats(iterations=5)
            spectre.buffer -= mu
            if compress:
                spectre.zeroing(sigma * comp_level).eroding()
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, datai in enumerate(datalist):
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    spectre.set_buffer(abuf)
                    spectre.adapt_size()
                    spectre.chsize(
                        datai.size2).hamming().zf(2).rfft().modulus()
                    mu, sigma = spectre.robust_stats(iterations=5)
                    spectre.buffer -= mu
                    if compress:
                        spectre.zeroing(sigma * comp_level).eroding()
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1)
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[i1 - ipacket:i1, :] = packet[:ipacket, :]
    # store maxvalues in the file
    HF.store_internal_object(maxvalues, h5name='maxvalues')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # then write projection as 'projectionF2'
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]
    pbar.finish()
    HF.flush()
    return data
Ejemplo n.º 4
0
def main(argv=None):
    """
    Does the whole on-file processing, 
    syntax is
    processing.py [ configuration_file.mscf ]
    if no argument is given, the standard file : process.mscf is used.
    """
    import datetime as dt
    print('CONFIG:', os.path.realpath(os.curdir), os.path.exists(sys.argv[1]))
    stdate = dt.datetime.strftime(dt.datetime.now(), "%Y-%m-%d_%Hh%M")
    logflux = TeeLogger(erase=True, log_name="processing_%s.log" % stdate)
    print("Processing 2D FT-MS data -",
          dt.datetime.strftime(dt.datetime.now(), "%Y-%h-%d %Hh%M"))
    print("""
=============================
    reading configuration
=============================""")
    global Pool  # This global will hold the multiprocessing.Pool if needed
    Pool = None
    t0 = time.time()
    t00 = t0
    ######### read arguments
    if not argv:
        argv = sys.argv
    try:  # First try to read config file from arg list
        configfile = argv[1]
    except IndexError:  # then assume standard name
        configfile = "process.mscf"
    print("using %s as configuration file" % configfile)
    if interfproc:
        output = open('InterfProc/progbar.pkl', 'wb')
        pb = ['F2', 0]
        pickle.dump(pb, output)
        output.close()
    #### get parameters from configuration file - store them in a parameter object
    cp = NPKConfigParser()
    print('address configfile is ', configfile)
    try:
        cp.read_file(open(configfile, 'r'))
    except:
        cp.readfp(open(configfile, 'r'))
    print("reading config file")
    param = Proc_Parameters(cp)  # parameters from config file..
    # get optionnal parameters
    opt_param = {}
    for p in ("F1_specwidth", "F2_specwidth", "highmass", "ref_mass",
              "ref_freq"):
        v = cp.getfloat("import", p, 0.0)
        if v != 0.0:
            opt_param[p] = v
    if param.mp:
        Pool = mp.Pool(
            param.nproc
        )  # if multiprocessing, creates slaves early, while memory is empty !
    param.report()
    logflux.log.flush()  # flush logfile
    ######## determine files and load inputfile
    ### input file either raw to be imported or already imported
    imported = False
    print("""
=============================
    preparating files
=============================""")
    if not os.path.exists(param.infile):
        print("importing %s into %s" %
              (".", param.infile))  #To be corrected MAD
        d0 = Import_2D[param.format](param.apex, param.infile)
        imported = True
        if opt_param != {}:  # if some parameters were overloaded in config file
            # hum close, open, close, open ...
            d0.hdf5file.close()
            del (d0)
            hf = HDF5File(param.infile, "rw")
            for item in opt_param:
                if item.startswith('F1_'):
                    fileitem = item[3:]
                    hf.axes_update(axis=1, infos={fileitem: opt_param[item]})
                    print("Updating axis F1 %s to %f" %
                          (fileitem, opt_param[item]))
                elif item.startswith('F2_'):
                    fileitem = item[3:]
                    hf.axes_update(axis=2, infos={fileitem: opt_param[item]})
                    print("Updating axis F2 %s to %f" %
                          (fileitem, opt_param[item]))
                else:
                    hf.axes_update(axis=1, infos={item: opt_param[item]})
                    hf.axes_update(axis=2, infos={item: opt_param[item]})
                    print("Updating all axes %s to %f" %
                          (item, opt_param[item]))
            hf.close()
            d0 = load_input(param.infile)
    else:
        d0 = load_input(param.infile)
    d0.check2D()  # raise error if not a 2D
    try:
        d0.params
    except:
        d0.params = {}  # create empty dummy params block
    if imported:
        print_time(time.time() - t0, "Import")
    else:
        print_time(time.time() - t0, "Load")
    logflux.log.flush()  # flush logfile
    ###### Read processing arguments
    Set_Table_Param()
    if debug > 0:
        Report_Table_Param()
        print(d0.report())
    ### compute final sizes
    allsizes = comp_sizes(d0,
                          zflist=param.zflist,
                          szmlist=param.szmlist,
                          largest=param.largest)
    if debug > 0: print(allsizes)
    (sizeF1,
     sizeF2) = allsizes.pop(0)  # this is the largest, to be processed by FT
    ### prepare intermediate file
    if debug > 0: print("preparing intermediate file ")
    if param.interfile is None:  # We have to create one !
        interfile = os.path.join(
            param.tempdir,
            'tmpfile_for_{}'.format(os.path.basename(param.outfile)))
        print("creating TEMPFILE:", interfile)
    else:
        interfile = param.interfile
    ### in F2
    if param.do_F2:  # create
        temp = HDF5File(interfile, "w")
        datatemp = FTICRData(dim=2)
        copyaxes(d0, datatemp)
        datatemp.params = d0.params
        if param.do_modulus:
            datatemp.axis1.size = min(d0.size1, sizeF1)
            datatemp.axis2.size = 2 * sizeF2
        else:
            datatemp.axis1.size = min(d0.size1, sizeF1)
            datatemp.axis2.size = sizeF2
        temp.create_from_template(datatemp)
    else:  # already existing
        datatemp = load_input(param.interfile)
    datatemp.params = d0.params
    logflux.log.flush()  # flush logfile
    ### prepare output file
    if debug > 0: print("preparing output file ")
    if param.do_F1:
        hfar = HDF5File(param.outfile,
                        "w")  #, debug=debug)  # OUTFILE for all resolutions
        d1 = FTICRData(dim=2)  # create dummy 2D
        copyaxes(d0, d1)  # copy axes from d0 to d1
        d1.axis2.size = sizeF2
        d1.axis1.size = sizeF1
        group = 'resol1'
        if param.compress_outfile:  # file is compressed
            hfar.set_compression(True)
        hfar.create_from_template(d1, group)
        d1.params = d0.params
        if debug > 0:
            print("######################### d1.report() ################")
            print(d1.report())
            print("######################### Checked ################")
    else:
        d1 = None
        hfar = None
    logflux.log.flush()  # flush logfile
    ###### Do processing
    print("""
=============================
    FT processing
=============================""")
    t0 = time.time()
    do_process2D(d0, datatemp, d1, param)  # d0 original, d1 processed
    # close temp file
    # try:
    #     d0.hdf5file.close()
    # except AttributeError:      # depends on how d0 was loaded
    #     pass
    datatemp.hdf5file.close()
    ### update files
    if param.do_F1:
        hfar.axes_update(group=group,
                         axis=1,
                         infos={'offsetfreq': d1.axis1.offsetfreq})
    if param.interfile is None:
        temp.close()
        os.unlink(interfile)
    print("==  FT Processing finished  ==")
    print_time(time.time() - t0, "FT processing time")
    logflux.log.flush()  # flush logfile
    ### downsample result
    if param.do_F1:
        print("""
=============================
    downsampling
=============================""")
        downprevious = d1  # used to downsample by step   downprevious -downto-> down
        t0 = time.time()
        for (i, (sizeF1, sizeF2)) in enumerate(allsizes):
            if (downprevious.size1 % sizeF1) != 0 or (downprevious.size2 %
                                                      sizeF2) != 0:
                print(
                    "downsampling not available for level %d : %d x %d -> %d x %d"
                    % ((i + 1), downprevious.size1, downprevious.size2, sizeF1,
                       sizeF2))
                continue
            zflevel = "level %d" % (i + 1)
            group = 'resol%d' % (i + 2)  # +2 because we poped the first value
            print("downsampling %s - %s  (%d x %d)" %
                  (zflevel, group, sizeF1, sizeF2))
            down = FTICRData(dim=2)  # create dummy 2D
            copyaxes(d1, down)  # copy axes from d1 to down
            down.axis1.size = sizeF1
            down.axis2.size = sizeF2
            #create_branch(hfar, group, d1)
            hfar.create_from_template(down, group)
            if debug > 0: print(down)
            downsample2D(downprevious,
                         down,
                         downprevious.size1 // sizeF1,
                         downprevious.size2 // sizeF2,
                         compress=param.compress_outfile)
            downprevious = down
        print_time(time.time() - t0, "Downsampling time")
    print("== Processing finished  ==")
    print_time(time.time() - t00, "Total processing time")
    logflux.log.flush()  # flush logfile
    ### clean and close output files
    # copy attached to outputfile
    print("""
=============================
    cleaning and closing
=============================""")
    # copy files and parameters
    if hfar is not None:
        hfar.store_internal_file(filename=configfile,
                                 h5name="config.mscf",
                                 where='/attached')  # first mscf
        try:
            hfar.store_internal_object(
                h5name='params',
                obj=d0.hdf5file.retrieve_object(h5name='params'))
        except:
            print("No params copied to Output file")
        else:
            print("parameters and configuration file copied")

        for h5name in ["apexAcquisition.method",
                       "ExciteSweep"]:  # then parameter files
            try:
                Finh5 = d0.hdf5file.open_internal_file(h5name)
            except:
                print("no %s internal file to copy" % h5name)
            else:  # performed only if no error
                Fouth5 = hfar.open_internal_file(h5name, access='w')
                Fouth5.write(Finh5.read())
                Finh5.close()
                Fouth5.close()
                print("%s internal file copied" % h5name)
        # then logfile
        logflux.log.flush()  # flush logfile
        hfar.store_internal_file(filename=logflux.log_name,
                                 h5name="processing.log",
                                 where='/attached')
        print("log file copied")
        # and close
        d0.hdf5file.close()
        hfar.close()
    else:
        d0.hdf5file.close()

    if param.mp:
        Pool.close()  # finally closes multiprocessing slaves
    logflux.log.flush()  # flush logfile
Ejemplo n.º 5
0
def Import_and_Process_LC(folder,
                          nProc=1,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    import multiprocessing as mp
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData

    if nProc > 1:
        print("** running on %d processors" % nProc)
        Pool = mp.Pool(nProc)

    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            #print("***************************************")
            #print(params)
            pass
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not found')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 11
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        absmax = 0.0

        xarg = iterargF2(f, sizeF1, sizeF2, compress, comp_level,
                         allsizes)  # construct iterator for main loop

        if nProc > 1:
            res = Pool.imap(processF2row,
                            xarg)  # multiproc processing using Pool
        else:
            res = map(processF2row, xarg)  # plain single proc processing
        for i1, spectres in enumerate(res):  # and get results
            spectre = spectres.pop(0)
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, spectre in enumerate(spectres):
                datai = datalist[idt]
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1 + 1)
            last = i1
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[last - ipacket:last, :] = packet[:ipacket, :]
    pbar.finish()

    # then write projection as 'projectionF2'
    print('writing projections')
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]

    # store maxvalues in the file
    print('writing max abs value')
    HF.store_internal_object(maxvalues, h5name='maxvalues')

    print('writing parameters')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # and close
    HF.flush()
    if nProc > 1:
        Pool.close()  # finally closes multiprocessing slaves
    return data
Ejemplo n.º 6
0
def main(argv=None):
    """
    Does the whole on-file processing, 
    
    syntax is
    processing.py [ configuration_file.mscf ]
    if no argument is given, the standard file : process.mscf is used.
    
    """
    t0 = time.time()
    t00 = t0
    ######### read arguments
    if not argv:
        argv = sys.argv
    try:                        # First try to read config file from arg list
        configfile = argv[1]
    except IndexError:          # then assume standard name
        configfile = "process.mscf"
    print("using %s as configuration file"%configfile)
    
    #### get parameters from configuration file - store them in a parameter object
    cp = NPKConfigParser()
    cp.readfp(open(configfile))
    print("reading config file")
    param = Proc_Parameters(cp)
    param.report()
    # get optionnal parameters
    opt_param = {}
    for p in ("F1_specwidth", "F2_specwidth", "highmass"):
        v = cp.getfloat( "import", p, 0.0)
        if v != 0.0:
            opt_param[p] = v
            
    ######## determine files and load inputfile
    ### input file either raw to be imported or already imported
    imported = False
    if not os.path.exists(param.infile):
        print("importing %s into %s"%(dir,param.infile))
        d0 = Import_2D(param.apex, param.infile)
        imported = True
        if opt_param != {}: # if some parameters were overloaded in config file
            # hum close, open, close, open ...
            d0.hdf5file.close()
            del(d0)
            hf = HDF5File(param.infile,"rw")
            for item in opt_param:
                if item.startswith('F1_'):
                    fileitem = item[3:]
                    hf.axes_update(axis = 1, infos = {fileitem:opt_param[item]})
                    print("Updating axis F1 %s to %f"%(fileitem, opt_param[item]))
                elif item.startswith('F2_'):
                    fileitem = item[3:]
                    hf.axes_update(axis = 2, infos = {fileitem:opt_param[item]})
                    print("Updating axis F2 %s to %f"%(fileitem, opt_param[item]))
                else:
                    fileitem = item[3:]
                    hf.axes_update(axis = 1, infos = {item:opt_param[item]})
                    hf.axes_update(axis = 2, infos = {item:opt_param[item]})
                    print("Updating all axes %s to %f"%(item, opt_param[item]))
            hf.close()
            d0 = load_input(param.infile)
    else:
        d0 = load_input(param.infile)
    d0.check2D()    # raise error if not a 2D
    if imported:
        print_time( time.time()-t0, "Import")
    else:
        print_time( time.time()-t0, "Load")
    
    Set_Table_Param()
    if debug>0:
        Report_Table_Param()
        print(d0.report())
    
    ### compute final sizes
    allsizes = comp_sizes(d0, param.zflist, largest=param.largest)
    print("allsizes ",allsizes)
    if debug>0: print(allsizes)
    
    (sizeF1, sizeF2) = allsizes.pop(0)   # this is the largest, to be processed by FT
    
    ### prepare intermediate file
    if param.interfile is None:     # We have to create one !
        interfile = os.path.join(param.tempdir,'tmpfile.msh5')
        print("creating TEMPFILE:",interfile)
    else:
        interfile = param.interfile
    
    if param.do_F2:     # create
        temp =  HDF5File(interfile, "w")
        datatemp = FTICRData(dim=2)
        copyaxes(d0, datatemp)
        if param.do_modulus:
            datatemp.axis1.size = min(d0.size1, sizeF1)
            datatemp.axis2.size = 2*sizeF2
        else:
            datatemp.axis1.size = min(d0.size1, sizeF1)
            datatemp.axis2.size = sizeF2
        temp.create_from_template(datatemp)
    else:                # already existing
        datatemp = load_input(param.interfile)
    
    ### prepare output file
    if param.do_F1:
        hfar =  HDF5File(param.outfile, "w", debug=1)  # OUTFILE for all resolutions
        d1 = FTICRData( dim=2 )   # create dummy 2D
        copyaxes(d0, d1)        # copy axes from d0 to d1
        d1.axis2.size = sizeF2
        d1.axis1.size = sizeF1
        group = 'resol1'
        hfar.create_from_template(d1, group)
    else:
        d1 = None
    print("""
=============================
processing FT
=============================""")
    t0 = time.time()
    if param.do_F1:
        hfar.axes_update(group = group,axis = 1, infos = {'specwidth':d1.axis1.specwidth, 'left_point':int(d1.axis1.left_point)})
    
    if param.interfile is None:
        temp.close()
        os.unlink(interfile)
    
    print("==  FT Processing finished  ==")
    print_time(time.time()-t0, "FT processing time")
    if param.do_F1:
        down = None
        t0 = time.time()
        for (i, (sizeF1, sizeF2)) in enumerate(allsizes):
            print("d1.size1,sizeF1 ", d1.size1,sizeF1)
            print("d1.size2,sizeF2 ", d1.size2,sizeF2)
            if (d1.size1%sizeF1) != 0 or  (d1.size2%sizeF2) != 0:
                print("downsampling not available for level %d : %d x %d -> %d x %d"%(param.zflist[i+1], d1.size1, d1.size2, sizeF1, sizeF2))
                break
            try:
                zflevel = "level %d"%param.zflist[i+1]
            except IndexError:
                zflevel = "vignette"
            print("""
================
downsampling %s
================""" % zflevel)
            group = 'resol%d'%(i+2)     # +2 because we poped the first value
            if debug>1: print("downsampling", group, (sizeF1, sizeF2))
            down = FTICRData( dim=2 )   # create dummy 2D
            copyaxes(d1, down)        # copy axes from d0 to d1
            down.axis1.size = sizeF1
            down.axis2.size = sizeF2
            #create_branch(hfar, group, d1)
            hfar.create_from_template(down, group)
            if debug>0: print(down)
            downsample2D(d1, down, d1.size1/sizeF1, d1.size2/sizeF2)
            hfar.axes_update(group = group,axis = 1, infos = {'left_point':down.axis1.left_point})
        print_time(time.time()-t0, "Downsampling time")
    print("== Processing finished  ==")
    print_time(time.time()-t00, "Total processing time")