def downsample2D(data, outp, n1, n2, compress=False, compress_level=3.0): """ takes data (a 2D) and generate a smaller dataset downsampled by factor (n1,n2) on each axis then returned data-set is n1*n2 times smaller - do a filtered decimation along n2 - simply takes the mean along n1 - set to zero all entries below 3*sigma if compress is True ** Not fully tested on non powers of 2 ** """ if debug > 0: print("in downsample2D : %s x %s" % (n1, n2)) for i in xrange(0, data.size1, n1): temp = np.zeros(data.size2 // n2) for j in xrange(n1): if n2 > 1: try: yy = decimate(data.row(i + j).buffer, int(n2), ftype="fir", zero_phase=True) # filter along F2 except TypeError: # The zero_phase keyword was added in scipy 0.18.0. yy = decimate(data.row(i + j).buffer, int(n2), ftype="fir") # filter along F2 else: yy = data.row(i + j).buffer temp += yy temp *= (1.0 / n1) if compress: b = temp.copy() for j in range(3): b = b[b - b.mean() < 3 * b.std()] threshold = compress_level * b.std( ) # compress_level * b.std() is 3*sigma by default temp[abs(temp) < threshold] = 0.0 outp.buffer[i // n1, :] = temp copyaxes(data, outp) outp.adapt_size() return outp
def main(): """does the whole job, if we are running in MPI, this is only called by job #0 all other jobs are running mpi.slave() """ argv = sys.argv if len(argv) != 2: print(""" syntax is : (mpirun -np N) python program configfile.mscf """) sys.exit(1) # get parameters configfile = argv[1] cp = NPKConfigParser() cp.readfp(open(configfile)) infile = cp.getword("Cadzow", "namein") print("infile", infile) outfile = cp.getword("Cadzow", "nameout") print("outfile", outfile) algo = cp.getword("Cadzow", "algorithm") print("algorithm", algo) n_of_line = cp.getint("Cadzow", "n_of_lines", 70) print("n_of_line", n_of_line) n_of_iter = cp.getint("Cadzow", "n_of_iters", 1) print("n_of_iter", n_of_iter) orda = cp.getint("Cadzow", "order", 500) print("order", orda) n_of_column = cp.getint("Cadzow", "n_of_column", 100) print("n_of_column", n_of_column) progress = cp.getboolean("Cadzow", "progress", True) d0 = load_input(infile) d0.check2D() # raise error if not a 2D Set_Table_Param() hfar = HDF5File(outfile, "w", debug=0) # OUTFILE d1 = FTICRData(dim=2) # create dummy 2D copyaxes(d0, d1) # copy axes from d0 to d1 group = 'resol1' hfar.create_from_template(d1, group) # prepare index and method if n_of_column == 0: indexes = range(d0.size2) # process all else: indexes = selectcol(d0, n_of_column) # selections if algo == "Cadzow": meth = cadz elif algo == "rQRd": # meth = rqr else: raise ("wrong algo") # then loop t0 = time.time() if progress: widgets = [ 'Processing %s: ' % (algo), pg.Percentage(), ' ', pg.Bar(marker='-', left='[', right=']'), pg.ETA() ] pbar = pg.ProgressBar(widgets=widgets, maxval=len(indexes)) #, fd=sys.stdout) d1D = d0.col(0) # template xarg = iterarg(indexes, d0, n_of_line, n_of_iter, orda) if mpiutil.MPI_size > 1: # means we are running under MPI ! mpiutil.mprint('MPI Master job - starting slave jobs - ') res = mpiutil.enum_imap(meth, xarg) # apply it for i, p in res: # and get results d1D.buffer = p d1.set_col(indexes[i], d1D) if progress: pbar.update(i + 1) else: import itertools res = itertools.imap(meth, xarg) # apply it for i, p in enumerate(res): # and get results d1D.buffer = p d1.set_col(indexes[i], d1D) if progress: pbar.update(i + 1) print("Processing time : ", time.time() - t0)
def Import_and_Process_LC(folder, outfile="LC-MS.msh5", compress=False, comp_level=3.0, downsample=True, dparameters=None): """ Entry point to import sets of LC-MS spectra processing is done on the fly It creates and returns a HDF5 file containing the data-set compression is active if (compress=True). comp_level is the ratio (in x sigma) under which values are set to 0.0 downsample is applied if (downsample=True). These two parameters are efficient but it takes time. dparameters if present, is a dictionnary copied into the final file as json """ from spike.File import Solarix, Apex # from spike.File.Solarix import locate_acquisition, read_param from spike.NPKData import TimeAxis, copyaxes from spike.File import HDF5File as hf from spike.util import progressbar as pg from spike.util import widgets from spike.FTICR import FTICRData for _importer in (Solarix, Apex): try: parfilename = _importer.locate_acquisition(folder) params = _importer.read_param(parfilename) sizeF2 = int(params["TD"]) importer = _importer break except: print("***************************************") print(params) else: raise Exception("could not import data-set - unrecognized format") # get chromatogram minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml")) # Import parameters : size in F1 and F2 sizeF1 = len(minu) sizeF2 = int(params["TD"]) if os.path.isfile(os.path.join(folder, "ser")): fname = os.path.join(folder, "ser") else: raise Exception( "You are dealing with 1D data, you should use Import_1D") #size, specwidth, offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq data = FTICRData(dim=2) # create dummy LCMS data.axis1 = TimeAxis(size=sizeF1, tabval=np.array(minu), importunit="min", currentunit='min') data.axis2.size = 1 * sizeF2 # The processing below might change the size, so we anticipate here ! data.axis2.specwidth = float(params["SW_h"]) found = False # search for excitation bandwidth try: data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep( locate_ExciteSweep(folder)) found = True except: pass if not found: try: data.axis2.highfreq = float(params["EXC_Freq_High"]) except: data.axis2.highfreq = data.axis2.calibA / float( params["EXC_low"]) # on Apex version try: data.axis2.lowfreq = float(params["EXC_Freq_Low"]) except: data.axis2.lowfreq = data.axis2.calibA / float( params["EXC_hi"]) # on Apex version data.axis2.highmass = float(params["MW_high"]) data.axis2.left_point = 0 data.axis2.offset = 0.0 data.axis2.calibA = float(params["ML1"]) data.axis2.calibB = float(params["ML2"]) data.axis2.calibC = float(params["ML3"]) if not math.isclose(data.axis2.calibC, 0.0): print('Using 3 parameters calibration, Warning calibB is -ML2') data.axis2.calibB *= -1 data.params = params # add the parameters to the data-set HF = hf.HDF5File(outfile, "w") if compress: HF.set_compression(True) HF.create_from_template(data, group='resol1') HF.store_internal_object(params, h5name='params') # store params in the file # then store files xx.methods and scan.xml HF.store_internal_file(parfilename) HF.store_internal_file(os.path.join(folder, "scan.xml")) try: HF.store_internal_file(locate_ExciteSweep(folder)) except: print('ExciteSweep file not stored') data.hdf5file = HF # I need a link back to the file in order to close it # Start processing - first computes sizes and sub-datasets print(data) datalist = [] # remembers all downsampled dataset maxvalues = [ 0.0 ] # remembers max values in all datasets - main and downsampled if downsample: allsizes = comp_sizes(data.size1, data.size2) for i, (si1, si2) in enumerate(allsizes): datai = FTICRData(dim=2) copyaxes(data, datai) datai.axis1.size = si1 datai.axis2.size = si2 HF.create_from_template(datai, group='resol%d' % (i + 2)) datalist.append(datai) maxvalues.append(0.0) # Then go through input file if sys.maxsize == 2**31 - 1: # the flag used by array depends on architecture - here on 32bit flag = 'l' # Apex files are in int32 else: # here in 64bit flag = 'i' # strange, but works here. spectre = FTICRData(shape=(sizeF2, )) # to handle FT projection = FTICRData(buffer=np.zeros(sizeF2)) # to accumulate projection projection.axis1 = data.axis2.copy() Impwidgets = [ 'Importing: ', widgets.Percentage(), ' ', widgets.Bar(marker='-', left='[', right=']'), widgets.ETA() ] pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1, fd=sys.stdout).start() with open(fname, "rb") as f: ipacket = 0 szpacket = 10 packet = np.zeros( (szpacket, sizeF2)) # store by packet to increase compression speed for i1 in range(sizeF1): absmax = 0.0 #print(i1, ipacket, end=' ') tbuf = f.read(4 * sizeF2) if len(tbuf) != 4 * sizeF2: break abuf = np.array(array.array(flag, tbuf), dtype=float) # processing spectre.set_buffer(abuf) spectre.adapt_size() spectre.hamming().zf(2).rfft().modulus() # double the size mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() packet[ipacket, :] = spectre.buffer[:] # store into packet np.maximum(projection.buffer, spectre.buffer, out=projection.buffer) # projection if (ipacket + 1) % szpacket == 0: # and dump every szpacket maxvalues[0] = max(maxvalues[0], abs(packet.max())) # compute max data.buffer[i1 - (szpacket - 1):i1 + 1, :] = packet[:, :] # and copy packet[:, :] = 0.0 ipacket = 0 else: ipacket += 1 # now downsample for idt, datai in enumerate(datalist): if i1 % (sizeF1 // datai.size1) == 0: # modulo the size ratio ii1 = (i1 * datai.size1) // sizeF1 spectre.set_buffer(abuf) spectre.adapt_size() spectre.chsize( datai.size2).hamming().zf(2).rfft().modulus() mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() maxvalues[idt + 1] = max( maxvalues[idt + 1], spectre.absmax) # compute max (0 is full spectrum) datai.buffer[ii1, :] = spectre.buffer[:] pbar.update(i1) # flush the remaining packet maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max())) data.buffer[i1 - ipacket:i1, :] = packet[:ipacket, :] # store maxvalues in the file HF.store_internal_object(maxvalues, h5name='maxvalues') if dparameters is not None: HF.store_internal_object(dparameters, h5name='import_parameters') # then write projection as 'projectionF2' proj = FTICRData(dim=1) proj.axis1 = data.axis2.copy() HF.create_from_template(proj, group='projectionF2') proj.buffer[:] = projection.buffer[:] pbar.finish() HF.flush() return data
def main(argv=None): """ Does the whole on-file processing, syntax is processing.py [ configuration_file.mscf ] if no argument is given, the standard file : process.mscf is used. """ import datetime as dt print('CONFIG:', os.path.realpath(os.curdir), os.path.exists(sys.argv[1])) stdate = dt.datetime.strftime(dt.datetime.now(), "%Y-%m-%d_%Hh%M") logflux = TeeLogger(erase=True, log_name="processing_%s.log" % stdate) print("Processing 2D FT-MS data -", dt.datetime.strftime(dt.datetime.now(), "%Y-%h-%d %Hh%M")) print(""" ============================= reading configuration =============================""") global Pool # This global will hold the multiprocessing.Pool if needed Pool = None t0 = time.time() t00 = t0 ######### read arguments if not argv: argv = sys.argv try: # First try to read config file from arg list configfile = argv[1] except IndexError: # then assume standard name configfile = "process.mscf" print("using %s as configuration file" % configfile) if interfproc: output = open('InterfProc/progbar.pkl', 'wb') pb = ['F2', 0] pickle.dump(pb, output) output.close() #### get parameters from configuration file - store them in a parameter object cp = NPKConfigParser() print('address configfile is ', configfile) try: cp.read_file(open(configfile, 'r')) except: cp.readfp(open(configfile, 'r')) print("reading config file") param = Proc_Parameters(cp) # parameters from config file.. # get optionnal parameters opt_param = {} for p in ("F1_specwidth", "F2_specwidth", "highmass", "ref_mass", "ref_freq"): v = cp.getfloat("import", p, 0.0) if v != 0.0: opt_param[p] = v if param.mp: Pool = mp.Pool( param.nproc ) # if multiprocessing, creates slaves early, while memory is empty ! param.report() logflux.log.flush() # flush logfile ######## determine files and load inputfile ### input file either raw to be imported or already imported imported = False print(""" ============================= preparating files =============================""") if not os.path.exists(param.infile): print("importing %s into %s" % (".", param.infile)) #To be corrected MAD d0 = Import_2D[param.format](param.apex, param.infile) imported = True if opt_param != {}: # if some parameters were overloaded in config file # hum close, open, close, open ... d0.hdf5file.close() del (d0) hf = HDF5File(param.infile, "rw") for item in opt_param: if item.startswith('F1_'): fileitem = item[3:] hf.axes_update(axis=1, infos={fileitem: opt_param[item]}) print("Updating axis F1 %s to %f" % (fileitem, opt_param[item])) elif item.startswith('F2_'): fileitem = item[3:] hf.axes_update(axis=2, infos={fileitem: opt_param[item]}) print("Updating axis F2 %s to %f" % (fileitem, opt_param[item])) else: hf.axes_update(axis=1, infos={item: opt_param[item]}) hf.axes_update(axis=2, infos={item: opt_param[item]}) print("Updating all axes %s to %f" % (item, opt_param[item])) hf.close() d0 = load_input(param.infile) else: d0 = load_input(param.infile) d0.check2D() # raise error if not a 2D try: d0.params except: d0.params = {} # create empty dummy params block if imported: print_time(time.time() - t0, "Import") else: print_time(time.time() - t0, "Load") logflux.log.flush() # flush logfile ###### Read processing arguments Set_Table_Param() if debug > 0: Report_Table_Param() print(d0.report()) ### compute final sizes allsizes = comp_sizes(d0, zflist=param.zflist, szmlist=param.szmlist, largest=param.largest) if debug > 0: print(allsizes) (sizeF1, sizeF2) = allsizes.pop(0) # this is the largest, to be processed by FT ### prepare intermediate file if debug > 0: print("preparing intermediate file ") if param.interfile is None: # We have to create one ! interfile = os.path.join( param.tempdir, 'tmpfile_for_{}'.format(os.path.basename(param.outfile))) print("creating TEMPFILE:", interfile) else: interfile = param.interfile ### in F2 if param.do_F2: # create temp = HDF5File(interfile, "w") datatemp = FTICRData(dim=2) copyaxes(d0, datatemp) datatemp.params = d0.params if param.do_modulus: datatemp.axis1.size = min(d0.size1, sizeF1) datatemp.axis2.size = 2 * sizeF2 else: datatemp.axis1.size = min(d0.size1, sizeF1) datatemp.axis2.size = sizeF2 temp.create_from_template(datatemp) else: # already existing datatemp = load_input(param.interfile) datatemp.params = d0.params logflux.log.flush() # flush logfile ### prepare output file if debug > 0: print("preparing output file ") if param.do_F1: hfar = HDF5File(param.outfile, "w") #, debug=debug) # OUTFILE for all resolutions d1 = FTICRData(dim=2) # create dummy 2D copyaxes(d0, d1) # copy axes from d0 to d1 d1.axis2.size = sizeF2 d1.axis1.size = sizeF1 group = 'resol1' if param.compress_outfile: # file is compressed hfar.set_compression(True) hfar.create_from_template(d1, group) d1.params = d0.params if debug > 0: print("######################### d1.report() ################") print(d1.report()) print("######################### Checked ################") else: d1 = None hfar = None logflux.log.flush() # flush logfile ###### Do processing print(""" ============================= FT processing =============================""") t0 = time.time() do_process2D(d0, datatemp, d1, param) # d0 original, d1 processed # close temp file # try: # d0.hdf5file.close() # except AttributeError: # depends on how d0 was loaded # pass datatemp.hdf5file.close() ### update files if param.do_F1: hfar.axes_update(group=group, axis=1, infos={'offsetfreq': d1.axis1.offsetfreq}) if param.interfile is None: temp.close() os.unlink(interfile) print("== FT Processing finished ==") print_time(time.time() - t0, "FT processing time") logflux.log.flush() # flush logfile ### downsample result if param.do_F1: print(""" ============================= downsampling =============================""") downprevious = d1 # used to downsample by step downprevious -downto-> down t0 = time.time() for (i, (sizeF1, sizeF2)) in enumerate(allsizes): if (downprevious.size1 % sizeF1) != 0 or (downprevious.size2 % sizeF2) != 0: print( "downsampling not available for level %d : %d x %d -> %d x %d" % ((i + 1), downprevious.size1, downprevious.size2, sizeF1, sizeF2)) continue zflevel = "level %d" % (i + 1) group = 'resol%d' % (i + 2) # +2 because we poped the first value print("downsampling %s - %s (%d x %d)" % (zflevel, group, sizeF1, sizeF2)) down = FTICRData(dim=2) # create dummy 2D copyaxes(d1, down) # copy axes from d1 to down down.axis1.size = sizeF1 down.axis2.size = sizeF2 #create_branch(hfar, group, d1) hfar.create_from_template(down, group) if debug > 0: print(down) downsample2D(downprevious, down, downprevious.size1 // sizeF1, downprevious.size2 // sizeF2, compress=param.compress_outfile) downprevious = down print_time(time.time() - t0, "Downsampling time") print("== Processing finished ==") print_time(time.time() - t00, "Total processing time") logflux.log.flush() # flush logfile ### clean and close output files # copy attached to outputfile print(""" ============================= cleaning and closing =============================""") # copy files and parameters if hfar is not None: hfar.store_internal_file(filename=configfile, h5name="config.mscf", where='/attached') # first mscf try: hfar.store_internal_object( h5name='params', obj=d0.hdf5file.retrieve_object(h5name='params')) except: print("No params copied to Output file") else: print("parameters and configuration file copied") for h5name in ["apexAcquisition.method", "ExciteSweep"]: # then parameter files try: Finh5 = d0.hdf5file.open_internal_file(h5name) except: print("no %s internal file to copy" % h5name) else: # performed only if no error Fouth5 = hfar.open_internal_file(h5name, access='w') Fouth5.write(Finh5.read()) Finh5.close() Fouth5.close() print("%s internal file copied" % h5name) # then logfile logflux.log.flush() # flush logfile hfar.store_internal_file(filename=logflux.log_name, h5name="processing.log", where='/attached') print("log file copied") # and close d0.hdf5file.close() hfar.close() else: d0.hdf5file.close() if param.mp: Pool.close() # finally closes multiprocessing slaves logflux.log.flush() # flush logfile
def Import_and_Process_LC(folder, nProc=1, outfile="LC-MS.msh5", compress=False, comp_level=3.0, downsample=True, dparameters=None): """ Entry point to import sets of LC-MS spectra processing is done on the fly It creates and returns a HDF5 file containing the data-set compression is active if (compress=True). comp_level is the ratio (in x sigma) under which values are set to 0.0 downsample is applied if (downsample=True). These two parameters are efficient but it takes time. dparameters if present, is a dictionnary copied into the final file as json """ import multiprocessing as mp from spike.File import Solarix, Apex # from spike.File.Solarix import locate_acquisition, read_param from spike.NPKData import TimeAxis, copyaxes from spike.File import HDF5File as hf from spike.util import progressbar as pg from spike.util import widgets from spike.FTICR import FTICRData if nProc > 1: print("** running on %d processors" % nProc) Pool = mp.Pool(nProc) for _importer in (Solarix, Apex): try: parfilename = _importer.locate_acquisition(folder) params = _importer.read_param(parfilename) sizeF2 = int(params["TD"]) importer = _importer break except: #print("***************************************") #print(params) pass else: raise Exception("could not import data-set - unrecognized format") # get chromatogram minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml")) # Import parameters : size in F1 and F2 sizeF1 = len(minu) sizeF2 = int(params["TD"]) if os.path.isfile(os.path.join(folder, "ser")): fname = os.path.join(folder, "ser") else: raise Exception( "You are dealing with 1D data, you should use Import_1D") #size, specwidth, offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq data = FTICRData(dim=2) # create dummy LCMS data.axis1 = TimeAxis(size=sizeF1, tabval=np.array(minu), importunit="min", currentunit='min') data.axis2.size = 1 * sizeF2 # The processing below might change the size, so we anticipate here ! data.axis2.specwidth = float(params["SW_h"]) found = False # search for excitation bandwidth try: data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep( locate_ExciteSweep(folder)) found = True except: pass if not found: try: data.axis2.highfreq = float(params["EXC_Freq_High"]) except: data.axis2.highfreq = data.axis2.calibA / float( params["EXC_low"]) # on Apex version try: data.axis2.lowfreq = float(params["EXC_Freq_Low"]) except: data.axis2.lowfreq = data.axis2.calibA / float( params["EXC_hi"]) # on Apex version data.axis2.highmass = float(params["MW_high"]) data.axis2.left_point = 0 data.axis2.offset = 0.0 data.axis2.calibA = float(params["ML1"]) data.axis2.calibB = float(params["ML2"]) data.axis2.calibC = float(params["ML3"]) if not math.isclose(data.axis2.calibC, 0.0): print('Using 3 parameters calibration, Warning calibB is -ML2') data.axis2.calibB *= -1 data.params = params # add the parameters to the data-set HF = hf.HDF5File(outfile, "w") if compress: HF.set_compression(True) HF.create_from_template(data, group='resol1') HF.store_internal_object(params, h5name='params') # store params in the file # then store files xx.methods and scan.xml HF.store_internal_file(parfilename) HF.store_internal_file(os.path.join(folder, "scan.xml")) try: HF.store_internal_file(locate_ExciteSweep(folder)) except: print('ExciteSweep file not found') data.hdf5file = HF # I need a link back to the file in order to close it # Start processing - first computes sizes and sub-datasets print(data) datalist = [] # remembers all downsampled dataset maxvalues = [ 0.0 ] # remembers max values in all datasets - main and downsampled if downsample: allsizes = comp_sizes(data.size1, data.size2) for i, (si1, si2) in enumerate(allsizes): datai = FTICRData(dim=2) copyaxes(data, datai) datai.axis1.size = si1 datai.axis2.size = si2 HF.create_from_template(datai, group='resol%d' % (i + 2)) datalist.append(datai) maxvalues.append(0.0) # Then go through input file projection = FTICRData(buffer=np.zeros(sizeF2)) # to accumulate projection projection.axis1 = data.axis2.copy() Impwidgets = [ 'Importing: ', widgets.Percentage(), ' ', widgets.Bar(marker='-', left='[', right=']'), widgets.ETA() ] pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1, fd=sys.stdout).start() with open(fname, "rb") as f: ipacket = 0 szpacket = 11 packet = np.zeros( (szpacket, sizeF2)) # store by packet to increase compression speed absmax = 0.0 xarg = iterargF2(f, sizeF1, sizeF2, compress, comp_level, allsizes) # construct iterator for main loop if nProc > 1: res = Pool.imap(processF2row, xarg) # multiproc processing using Pool else: res = map(processF2row, xarg) # plain single proc processing for i1, spectres in enumerate(res): # and get results spectre = spectres.pop(0) packet[ipacket, :] = spectre.buffer[:] # store into packet np.maximum(projection.buffer, spectre.buffer, out=projection.buffer) # projection if (ipacket + 1) % szpacket == 0: # and dump every szpacket maxvalues[0] = max(maxvalues[0], abs(packet.max())) # compute max data.buffer[i1 - (szpacket - 1):i1 + 1, :] = packet[:, :] # and copy packet[:, :] = 0.0 ipacket = 0 else: ipacket += 1 # now downsample for idt, spectre in enumerate(spectres): datai = datalist[idt] if i1 % (sizeF1 // datai.size1) == 0: # modulo the size ratio ii1 = (i1 * datai.size1) // sizeF1 maxvalues[idt + 1] = max( maxvalues[idt + 1], spectre.absmax) # compute max (0 is full spectrum) datai.buffer[ii1, :] = spectre.buffer[:] pbar.update(i1 + 1) last = i1 # flush the remaining packet maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max())) data.buffer[last - ipacket:last, :] = packet[:ipacket, :] pbar.finish() # then write projection as 'projectionF2' print('writing projections') proj = FTICRData(dim=1) proj.axis1 = data.axis2.copy() HF.create_from_template(proj, group='projectionF2') proj.buffer[:] = projection.buffer[:] # store maxvalues in the file print('writing max abs value') HF.store_internal_object(maxvalues, h5name='maxvalues') print('writing parameters') if dparameters is not None: HF.store_internal_object(dparameters, h5name='import_parameters') # and close HF.flush() if nProc > 1: Pool.close() # finally closes multiprocessing slaves return data
def main(argv=None): """ Does the whole on-file processing, syntax is processing.py [ configuration_file.mscf ] if no argument is given, the standard file : process.mscf is used. """ t0 = time.time() t00 = t0 ######### read arguments if not argv: argv = sys.argv try: # First try to read config file from arg list configfile = argv[1] except IndexError: # then assume standard name configfile = "process.mscf" print("using %s as configuration file"%configfile) #### get parameters from configuration file - store them in a parameter object cp = NPKConfigParser() cp.readfp(open(configfile)) print("reading config file") param = Proc_Parameters(cp) param.report() # get optionnal parameters opt_param = {} for p in ("F1_specwidth", "F2_specwidth", "highmass"): v = cp.getfloat( "import", p, 0.0) if v != 0.0: opt_param[p] = v ######## determine files and load inputfile ### input file either raw to be imported or already imported imported = False if not os.path.exists(param.infile): print("importing %s into %s"%(dir,param.infile)) d0 = Import_2D(param.apex, param.infile) imported = True if opt_param != {}: # if some parameters were overloaded in config file # hum close, open, close, open ... d0.hdf5file.close() del(d0) hf = HDF5File(param.infile,"rw") for item in opt_param: if item.startswith('F1_'): fileitem = item[3:] hf.axes_update(axis = 1, infos = {fileitem:opt_param[item]}) print("Updating axis F1 %s to %f"%(fileitem, opt_param[item])) elif item.startswith('F2_'): fileitem = item[3:] hf.axes_update(axis = 2, infos = {fileitem:opt_param[item]}) print("Updating axis F2 %s to %f"%(fileitem, opt_param[item])) else: fileitem = item[3:] hf.axes_update(axis = 1, infos = {item:opt_param[item]}) hf.axes_update(axis = 2, infos = {item:opt_param[item]}) print("Updating all axes %s to %f"%(item, opt_param[item])) hf.close() d0 = load_input(param.infile) else: d0 = load_input(param.infile) d0.check2D() # raise error if not a 2D if imported: print_time( time.time()-t0, "Import") else: print_time( time.time()-t0, "Load") Set_Table_Param() if debug>0: Report_Table_Param() print(d0.report()) ### compute final sizes allsizes = comp_sizes(d0, param.zflist, largest=param.largest) print("allsizes ",allsizes) if debug>0: print(allsizes) (sizeF1, sizeF2) = allsizes.pop(0) # this is the largest, to be processed by FT ### prepare intermediate file if param.interfile is None: # We have to create one ! interfile = os.path.join(param.tempdir,'tmpfile.msh5') print("creating TEMPFILE:",interfile) else: interfile = param.interfile if param.do_F2: # create temp = HDF5File(interfile, "w") datatemp = FTICRData(dim=2) copyaxes(d0, datatemp) if param.do_modulus: datatemp.axis1.size = min(d0.size1, sizeF1) datatemp.axis2.size = 2*sizeF2 else: datatemp.axis1.size = min(d0.size1, sizeF1) datatemp.axis2.size = sizeF2 temp.create_from_template(datatemp) else: # already existing datatemp = load_input(param.interfile) ### prepare output file if param.do_F1: hfar = HDF5File(param.outfile, "w", debug=1) # OUTFILE for all resolutions d1 = FTICRData( dim=2 ) # create dummy 2D copyaxes(d0, d1) # copy axes from d0 to d1 d1.axis2.size = sizeF2 d1.axis1.size = sizeF1 group = 'resol1' hfar.create_from_template(d1, group) else: d1 = None print(""" ============================= processing FT =============================""") t0 = time.time() if param.do_F1: hfar.axes_update(group = group,axis = 1, infos = {'specwidth':d1.axis1.specwidth, 'left_point':int(d1.axis1.left_point)}) if param.interfile is None: temp.close() os.unlink(interfile) print("== FT Processing finished ==") print_time(time.time()-t0, "FT processing time") if param.do_F1: down = None t0 = time.time() for (i, (sizeF1, sizeF2)) in enumerate(allsizes): print("d1.size1,sizeF1 ", d1.size1,sizeF1) print("d1.size2,sizeF2 ", d1.size2,sizeF2) if (d1.size1%sizeF1) != 0 or (d1.size2%sizeF2) != 0: print("downsampling not available for level %d : %d x %d -> %d x %d"%(param.zflist[i+1], d1.size1, d1.size2, sizeF1, sizeF2)) break try: zflevel = "level %d"%param.zflist[i+1] except IndexError: zflevel = "vignette" print(""" ================ downsampling %s ================""" % zflevel) group = 'resol%d'%(i+2) # +2 because we poped the first value if debug>1: print("downsampling", group, (sizeF1, sizeF2)) down = FTICRData( dim=2 ) # create dummy 2D copyaxes(d1, down) # copy axes from d0 to d1 down.axis1.size = sizeF1 down.axis2.size = sizeF2 #create_branch(hfar, group, d1) hfar.create_from_template(down, group) if debug>0: print(down) downsample2D(d1, down, d1.size1/sizeF1, d1.size2/sizeF2) hfar.axes_update(group = group,axis = 1, infos = {'left_point':down.axis1.left_point}) print_time(time.time()-t0, "Downsampling time") print("== Processing finished ==") print_time(time.time()-t00, "Total processing time")