def loadspike(self): fullpath = self.selected try: DATA = FTICRData(name=fullpath) except: self.waitarea.clear_output(wait=True) with self.waitarea: print('Error while loading', self.selected) self.waitarea.clear_output(wait=True) with self.outinfo: traceback.print_exc() return data = None DATA.filename = self.selected # filename and fullpath are equivalent ! DATA.fullpath = fullpath audit = U.auditinitial(title="Load file", append=False) DATA.set_unit('m/z') self.datap = Dataproc(data) self.datap.data = None self.datap.DATA = DATA self.showinfo() self.out1D.clear_output() with self.out1D: DATA.display(title=self.title, new_fig={'figsize': (10, 5)}) self.tabs.selected_index = 1
def display(self): if self.name != 'None' and self.direct.value != 'off': scale = 1 if self.direct.value == 'up': mult = 1 elif self.direct.value == 'down': mult = -1 else: return FTICRData(name=self.name.value).set_unit('m/z').mult(mult).display( new_fig=self.fig, scale=scale, color=self.color.value, label=op.basename(op.dirname(self.name.value)))
def comp_sizes(): """ calculate size of output file when change sizemultipliers """ if request.method == 'POST': return make_response('method must be GET', 400) # post_data = request.get_json() sizeF1 = int(request.args.get("sizeF1")) sizeF2 = int(request.args.get("sizeF2")) m1 = float(request.args.get("m1")) m2 = float(request.args.get("m2")) if not sizeF1 or not sizeF2 or not m1 or not m2: return make_response( jsonify({ "msg": "Make sure you filled up sizemultipliers field", "status": "fail" }), 400) dd = FTICRData(dim=2) dd.axis1.size = sizeF1 dd.axis2.size = sizeF2 szmul = [m1, m2] allsizes = proc_spike.comp_sizes(d0=dd, szmlist=szmul) sizes = allsizes[0] somme = 0 for a, b in allsizes: somme += a * b return make_response( jsonify({ "msg": "Success", "status": "success", "spec_size": { "sizeF1": sizes[0], "sizeF2": sizes[1] }, "uncompressed_size": str(somme // 1024 // 1024 * 8) }), 201)
def main(): """does the whole job, if we are running in MPI, this is only called by job #0 all other jobs are running mpi.slave() """ argv = sys.argv if len(argv) != 2: print(""" syntax is : (mpirun -np N) python program configfile.mscf """) sys.exit(1) # get parameters configfile = argv[1] cp = NPKConfigParser() cp.readfp(open(configfile)) infile = cp.getword("Cadzow", "namein") print("infile", infile) outfile = cp.getword("Cadzow", "nameout") print("outfile", outfile) algo = cp.getword("Cadzow", "algorithm") print("algorithm", algo) n_of_line = cp.getint("Cadzow", "n_of_lines", 70) print("n_of_line", n_of_line) n_of_iter = cp.getint("Cadzow", "n_of_iters", 1) print("n_of_iter", n_of_iter) orda = cp.getint("Cadzow", "order", 500) print("order", orda) n_of_column = cp.getint("Cadzow", "n_of_column", 100) print("n_of_column", n_of_column) progress = cp.getboolean("Cadzow", "progress", True) d0 = load_input(infile) d0.check2D() # raise error if not a 2D Set_Table_Param() hfar = HDF5File(outfile, "w", debug=0) # OUTFILE d1 = FTICRData(dim=2) # create dummy 2D copyaxes(d0, d1) # copy axes from d0 to d1 group = 'resol1' hfar.create_from_template(d1, group) # prepare index and method if n_of_column == 0: indexes = range(d0.size2) # process all else: indexes = selectcol(d0, n_of_column) # selections if algo == "Cadzow": meth = cadz elif algo == "rQRd": # meth = rqr else: raise ("wrong algo") # then loop t0 = time.time() if progress: widgets = [ 'Processing %s: ' % (algo), pg.Percentage(), ' ', pg.Bar(marker='-', left='[', right=']'), pg.ETA() ] pbar = pg.ProgressBar(widgets=widgets, maxval=len(indexes)) #, fd=sys.stdout) d1D = d0.col(0) # template xarg = iterarg(indexes, d0, n_of_line, n_of_iter, orda) if mpiutil.MPI_size > 1: # means we are running under MPI ! mpiutil.mprint('MPI Master job - starting slave jobs - ') res = mpiutil.enum_imap(meth, xarg) # apply it for i, p in res: # and get results d1D.buffer = p d1.set_col(indexes[i], d1D) if progress: pbar.update(i + 1) else: import itertools res = itertools.imap(meth, xarg) # apply it for i, p in enumerate(res): # and get results d1D.buffer = p d1.set_col(indexes[i], d1D) if progress: pbar.update(i + 1) print("Processing time : ", time.time() - t0)
def mscf_header_info(repo_id, file_full_path, parent_dir): """ download .method, ExciteSweep and scan.xml file return project_dict, a dictionary in which contain infomation about chosen mscf file. project_dict will be writen in header of output mscf file """ # load method file local_corresponse_files = load_corresponse_files(repo_id, parent_dir) local_method_file = local_corresponse_files['apexAcquisition.method'] params_method_file = Solarix.read_param(local_method_file) # Check if it is Apex or Solarix. # if it is Apex, params_method_file = Solarix.read_param(local_method_file) will return empty dictionary project_format = 'Solarix' if len(params_method_file) < 2: project_format = 'Apex' params_method_file = Apex.read_param(local_method_file) project_dict = {} project_name = parent_dir.strip('/').split('/')[-1] project_dict['name'] = project_name # # create object FTICR_Data = FTICRData(dim=2) # ser_file_path = os.path.join(project_full_path,"ser") # ser_file_date_aquisition = os.path.getmtime(ser_file_path) # project_dict["ser_date_aquisition"] = datetime.fromtimestamp(ser_file_date_aquisition) # find Bo FTICR_Data.axis1.calibA = float(params_method_file["ML1"]) FTICR_Data.axis2.calibA = float(params_method_file["ML1"]) project_dict["Bo"] = round(FTICR_Data.Bo, 2) # Import parameters : size in F1 and F2 try: local_scan_file = local_corresponse_files['scan.xml'] sizeF1 = Solarix.read_scan(local_scan_file) except: sizeF1 = 0 sizeF2 = int(params_method_file["TD"]) project_dict["sizeF1"] = sizeF1 // 1024 project_dict["sizeF2"] = sizeF2 // 1024 project_dict["data_size"] = 4 * sizeF1 * sizeF2 // (1024 * 1024) # determine excitation window try: #CR for compatibility with Apex format as there is no EXciteSweep file local_excitesweep_file = local_corresponse_files['ExciteSweep'] fl, fh = Solarix.read_ExciteSweep(local_excitesweep_file) freql, freqh = fl[0], fh[0] except: freqh = float(params_method_file["EXC_hi"]) freql = float(params_method_file["EXC_low"]) mzl = round(FTICR_Data.axis2.htomz(freql), 2) mzh = round(FTICR_Data.axis2.htomz(freqh), 2) if (project_format == 'Apex'): project_dict["freqh"] = mzh project_dict["freql"] = mzl project_dict["mzh"] = freqh project_dict["mzl"] = freql else: project_dict["freqh"] = freqh project_dict["freql"] = freql project_dict["mzh"] = mzh project_dict["mzl"] = mzl # show f2_specwidth f2_specwidth = float(params_method_file["SW_h"]) lowmass = FTICR_Data.axis2.htomz(f2_specwidth) project_dict["f2_specwidth"] = f2_specwidth project_dict["lowmass"] = round(lowmass, 2) # set f1_specwidth default value # determine f1_specwidth f1 = float(params_method_file["IN_26"] ) # IN_26 is used in 2D sequence as incremental time if f1 < 1E-3 and f1 > 0.0: # seems legit f1_specwidth = round(1.0 / (2 * f1), 2) else: f1_specwidth = 50000.0000 project_dict["F1_specwidth"] = f1_specwidth return project_dict
def config(): """ author: DMD - casc4de This function help us to modify an existed config file - mscf or also creates a new one. """ # get variable project short path: project_spath project_spath = request.args.get('project_spath') # get variable config file name config_filename = request.args.get('config_filename') # create experiment config form form = ConfigForm() # file_name = project_name + '.mscf' # define the root path of all .d projects projects_root_folder_path = user_SeaDrive_path() # define config file path config_file_path = os.path.join(projects_root_folder_path, project_spath, config_filename) # define the chosen project path project_full_path = os.path.join(projects_root_folder_path, project_spath) #####Information about the chosen project###### project_dict = {} _, project_name = os.path.split(project_spath) project_dict['name'] = project_name # create object FTICR_Data = FTICRData(dim=2) ser_file_path = os.path.join(project_full_path, "ser") ser_file_date_aquisition = os.path.getmtime(ser_file_path) project_dict["ser_date_aquisition"] = datetime.fromtimestamp( ser_file_date_aquisition) # find method file param_filename = Solarix.locate_acquisition(project_full_path) params_method_file = Solarix.read_param(param_filename) # find Bo FTICR_Data.axis1.calibA = float(params_method_file["ML1"]) FTICR_Data.axis2.calibA = float(params_method_file["ML1"]) project_dict["Bo"] = round(FTICR_Data.Bo, 2) # Import parameters : size in F1 and F2 sizeF1 = Solarix.read_scan(os.path.join(project_full_path, "scan.xml")) sizeF2 = int(params_method_file["TD"]) project_dict["sizeF1"] = sizeF1 // 1024 project_dict["sizeF2"] = sizeF2 // 1024 project_dict["data_size"] = 4 * sizeF1 * sizeF2 // (1024 * 1024) # determine excitation window try: #CR for compatibility with Apex format as there is no EXciteSweep file fl, fh = Solarix.read_ExciteSweep( Solarix.locate_ExciteSweep(project_full_path)) freql, freqh = fl[0], fh[0] except: freqh = float(params_method_file["EXC_hi"]) freql = float(params_method_file["EXC_low"]) mzl = round(FTICR_Data.axis2.htomz(freql), 2) mzh = round(FTICR_Data.axis2.htomz(freqh), 2) project_dict["freqh"] = freqh project_dict["freql"] = freql project_dict["mzh"] = mzh project_dict["mzl"] = mzl # show f2_specwidth f2_specwidth = float(params_method_file["SW_h"]) lowmass = FTICR_Data.axis2.htomz(f2_specwidth) project_dict["f2_specwidth"] = f2_specwidth project_dict["lowmass"] = round(lowmass, 2) #####END Information about the chosen project###### # default config file default_conf_file = os.path.join(metadata.root_path, "static", "files", "process2D.default.mscf") default_config = NPKConfigParser() default_config.readfp(open(default_conf_file, 'r')) # ['import', 'processing', 'peak_picking'] default_sections = default_config.sections() # set f1_specwidth default value # determine f1_specwidth f1 = float(params_method_file["IN_26"] ) # IN_26 is used in 2D sequence as incremental time if f1 < 1E-3 and f1 > 0.0: # seems legit f1_specwidth = round(1.0 / (2 * f1), 2) else: f1_specwidth = None project_dict["f1_specwidth"] = f1_specwidth # create processing params object base on Proc_Parameters() object in spike lib proc_params = proc_spike.Proc_Parameters() # check if the mscf config file is existed or not. If not, create new file with default values if os.path.isfile(config_file_path): config = NPKConfigParser() try: config.readfp(open(config_file_path, 'r')) except Exception: return render_template( "errors/404.html", message= "There are some attributes which are duplicated. Check again.") # load config data into proc_params object proc_params.load(config) # convert proc_params to dictionary config_dict = proc_params.__dict__ # highmass and F1_specwidth are not in Proc_Parameters object so add them in config_dict manually. config_dict['highmass'] = config['import']['highmass'] # set config_dict['F1_specwidth'] = F1_specwidth in the the existed config file config_dict['F1_specwidth'] = config['import']['F1_specwidth'] config_dict['sizemultipliers'] = config['processing'][ 'sizemultipliers'] # return config_dict else: proc_params.load(default_config) # convert proc_params to dictionary config_dict = proc_params.__dict__ # set config_dict['F1_specwidth'] = F1_specwidth from the estimate of project data config_dict['F1_specwidth'] = f1_specwidth config_dict['sizemultipliers'] = default_config['processing'][ 'sizemultipliers'] # return config_dict['sizemultipliers'] # return config_dict if request.method == "GET": # Set value for select forms form.compress_outfile.data = str(config_dict["compress_outfile"]) form.do_sane.data = str(config_dict.get("do_sane", "False")) form.format.data = str(config_dict.get("format", "solarix")) form.samplingfile.data = str(config_dict.get("samplingfile")) # by default, N.U.S field is False form.nus.data = str(False) form.save_file.data = str(config_filename.split(".")[0]) if form.validate_on_submit(): # get form data data = request.form.to_dict() # fill up config_dict with data from form for key, val in data.items(): config_dict[key] = val config_dict["format"] = data["format"].capitalize() # defind output file save_file_name = data['save_file'].split('.')[0] + ".mscf" ### SET DEFAULT VALUES FOR OUTPUT CONFIG FILE### # do_F2 = True config_dict['do_F2'] = True # do_F1 = True config_dict['do_F1'] = True # do_f1demodu = True config_dict['do_f1demodu'] = True # do_modulus = True config_dict['do_modulus'] = True # do_rem_ridge = True config_dict['do_rem_ridge'] = True # urqrd_rank = 30 config_dict['urqrd_rank'] = 30 # urqrd_iterations = 1 config_dict['urqrd_iterations'] = 1 config_dict['tempdir'] = "/tmp/processing/" config_dict['infile'] = "ser.msh5" config_dict[ 'outfile'] = "{project_name}/{config_filename}_mr.msh5".format( project_name=project_name, config_filename=save_file_name.split(".")[0]) #NUS - Non Uniform Sampled if data["nus"] == False: config_dict["do_pgsane"] = False else: config_dict["do_pgsane"] = True # create a new config file save_file_path = os.path.join(project_full_path, save_file_name) # return project_full_path with open(save_file_path, "w") as save: # write header of config file save.write( "#Project folder: {} \n".format(project_dict['name']) + "#Date of acquisition: {} \n".format( project_dict['ser_date_aquisition']) + "#Estimate Bo from internal calibration: {}T \n".format( project_dict['Bo']) + "#Experiment size (F1 x F2): {}k x {}k \n".format( project_dict['sizeF1'], project_dict['sizeF2']) + "#Data size: {}MB \n".format(project_dict['data_size']) + "#Excitation pulses from {}Hz (m/z={}) to {}Hz (m/z={}) \n". format(project_dict['freqh'], project_dict['mzh'], project_dict['freql'], project_dict['mzl']) + "#Acquisition spectral width: {}Hz (low mass: {}) \n".format( project_dict['f2_specwidth'], project_dict['lowmass'])) for section in default_sections: # config_key and its value which are got from submited form for config_key, val in config_dict.items(): try: # if config section match with sections in default config file, then change value in default file if default_config.get(section, config_key): default_config.set(section, config_key, val) except Exception: pass # save the new config file default_config.write(save) # allow user to download it return send_from_directory(directory=project_full_path, filename=save_file_name, as_attachment=True) # return config_dict return render_template("metadata/config_2.html", config_dict=config_dict, form=form, errors=form.errors, project_spath=project_spath, config_filename=config_filename, project_dict=project_dict)
def Import_and_Process_LC(folder, outfile="LC-MS.msh5", compress=False, comp_level=3.0, downsample=True, dparameters=None): """ Entry point to import sets of LC-MS spectra processing is done on the fly It creates and returns a HDF5 file containing the data-set compression is active if (compress=True). comp_level is the ratio (in x sigma) under which values are set to 0.0 downsample is applied if (downsample=True). These two parameters are efficient but it takes time. dparameters if present, is a dictionnary copied into the final file as json """ from spike.File import Solarix, Apex # from spike.File.Solarix import locate_acquisition, read_param from spike.NPKData import TimeAxis, copyaxes from spike.File import HDF5File as hf from spike.util import progressbar as pg from spike.util import widgets from spike.FTICR import FTICRData for _importer in (Solarix, Apex): try: parfilename = _importer.locate_acquisition(folder) params = _importer.read_param(parfilename) sizeF2 = int(params["TD"]) importer = _importer break except: print("***************************************") print(params) else: raise Exception("could not import data-set - unrecognized format") # get chromatogram minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml")) # Import parameters : size in F1 and F2 sizeF1 = len(minu) sizeF2 = int(params["TD"]) if os.path.isfile(os.path.join(folder, "ser")): fname = os.path.join(folder, "ser") else: raise Exception( "You are dealing with 1D data, you should use Import_1D") #size, specwidth, offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq data = FTICRData(dim=2) # create dummy LCMS data.axis1 = TimeAxis(size=sizeF1, tabval=np.array(minu), importunit="min", currentunit='min') data.axis2.size = 1 * sizeF2 # The processing below might change the size, so we anticipate here ! data.axis2.specwidth = float(params["SW_h"]) found = False # search for excitation bandwidth try: data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep( locate_ExciteSweep(folder)) found = True except: pass if not found: try: data.axis2.highfreq = float(params["EXC_Freq_High"]) except: data.axis2.highfreq = data.axis2.calibA / float( params["EXC_low"]) # on Apex version try: data.axis2.lowfreq = float(params["EXC_Freq_Low"]) except: data.axis2.lowfreq = data.axis2.calibA / float( params["EXC_hi"]) # on Apex version data.axis2.highmass = float(params["MW_high"]) data.axis2.left_point = 0 data.axis2.offset = 0.0 data.axis2.calibA = float(params["ML1"]) data.axis2.calibB = float(params["ML2"]) data.axis2.calibC = float(params["ML3"]) if not math.isclose(data.axis2.calibC, 0.0): print('Using 3 parameters calibration, Warning calibB is -ML2') data.axis2.calibB *= -1 data.params = params # add the parameters to the data-set HF = hf.HDF5File(outfile, "w") if compress: HF.set_compression(True) HF.create_from_template(data, group='resol1') HF.store_internal_object(params, h5name='params') # store params in the file # then store files xx.methods and scan.xml HF.store_internal_file(parfilename) HF.store_internal_file(os.path.join(folder, "scan.xml")) try: HF.store_internal_file(locate_ExciteSweep(folder)) except: print('ExciteSweep file not stored') data.hdf5file = HF # I need a link back to the file in order to close it # Start processing - first computes sizes and sub-datasets print(data) datalist = [] # remembers all downsampled dataset maxvalues = [ 0.0 ] # remembers max values in all datasets - main and downsampled if downsample: allsizes = comp_sizes(data.size1, data.size2) for i, (si1, si2) in enumerate(allsizes): datai = FTICRData(dim=2) copyaxes(data, datai) datai.axis1.size = si1 datai.axis2.size = si2 HF.create_from_template(datai, group='resol%d' % (i + 2)) datalist.append(datai) maxvalues.append(0.0) # Then go through input file if sys.maxsize == 2**31 - 1: # the flag used by array depends on architecture - here on 32bit flag = 'l' # Apex files are in int32 else: # here in 64bit flag = 'i' # strange, but works here. spectre = FTICRData(shape=(sizeF2, )) # to handle FT projection = FTICRData(buffer=np.zeros(sizeF2)) # to accumulate projection projection.axis1 = data.axis2.copy() Impwidgets = [ 'Importing: ', widgets.Percentage(), ' ', widgets.Bar(marker='-', left='[', right=']'), widgets.ETA() ] pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1, fd=sys.stdout).start() with open(fname, "rb") as f: ipacket = 0 szpacket = 10 packet = np.zeros( (szpacket, sizeF2)) # store by packet to increase compression speed for i1 in range(sizeF1): absmax = 0.0 #print(i1, ipacket, end=' ') tbuf = f.read(4 * sizeF2) if len(tbuf) != 4 * sizeF2: break abuf = np.array(array.array(flag, tbuf), dtype=float) # processing spectre.set_buffer(abuf) spectre.adapt_size() spectre.hamming().zf(2).rfft().modulus() # double the size mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() packet[ipacket, :] = spectre.buffer[:] # store into packet np.maximum(projection.buffer, spectre.buffer, out=projection.buffer) # projection if (ipacket + 1) % szpacket == 0: # and dump every szpacket maxvalues[0] = max(maxvalues[0], abs(packet.max())) # compute max data.buffer[i1 - (szpacket - 1):i1 + 1, :] = packet[:, :] # and copy packet[:, :] = 0.0 ipacket = 0 else: ipacket += 1 # now downsample for idt, datai in enumerate(datalist): if i1 % (sizeF1 // datai.size1) == 0: # modulo the size ratio ii1 = (i1 * datai.size1) // sizeF1 spectre.set_buffer(abuf) spectre.adapt_size() spectre.chsize( datai.size2).hamming().zf(2).rfft().modulus() mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() maxvalues[idt + 1] = max( maxvalues[idt + 1], spectre.absmax) # compute max (0 is full spectrum) datai.buffer[ii1, :] = spectre.buffer[:] pbar.update(i1) # flush the remaining packet maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max())) data.buffer[i1 - ipacket:i1, :] = packet[:ipacket, :] # store maxvalues in the file HF.store_internal_object(maxvalues, h5name='maxvalues') if dparameters is not None: HF.store_internal_object(dparameters, h5name='import_parameters') # then write projection as 'projectionF2' proj = FTICRData(dim=1) proj.axis1 = data.axis2.copy() HF.create_from_template(proj, group='projectionF2') proj.buffer[:] = projection.buffer[:] pbar.finish() HF.flush() return data
def Import_and_Process_LC(folder, nProc=1, outfile="LC-MS.msh5", compress=False, comp_level=3.0, downsample=True, dparameters=None): """ Entry point to import sets of LC-MS spectra processing is done on the fly It creates and returns a HDF5 file containing the data-set compression is active if (compress=True). comp_level is the ratio (in x sigma) under which values are set to 0.0 downsample is applied if (downsample=True). These two parameters are efficient but it takes time. dparameters if present, is a dictionnary copied into the final file as json """ import multiprocessing as mp from spike.File import Solarix, Apex # from spike.File.Solarix import locate_acquisition, read_param from spike.NPKData import TimeAxis, copyaxes from spike.File import HDF5File as hf from spike.util import progressbar as pg from spike.util import widgets from spike.FTICR import FTICRData if nProc > 1: print("** running on %d processors" % nProc) Pool = mp.Pool(nProc) for _importer in (Solarix, Apex): try: parfilename = _importer.locate_acquisition(folder) params = _importer.read_param(parfilename) sizeF2 = int(params["TD"]) importer = _importer break except: #print("***************************************") #print(params) pass else: raise Exception("could not import data-set - unrecognized format") # get chromatogram minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml")) # Import parameters : size in F1 and F2 sizeF1 = len(minu) sizeF2 = int(params["TD"]) if os.path.isfile(os.path.join(folder, "ser")): fname = os.path.join(folder, "ser") else: raise Exception( "You are dealing with 1D data, you should use Import_1D") #size, specwidth, offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq data = FTICRData(dim=2) # create dummy LCMS data.axis1 = TimeAxis(size=sizeF1, tabval=np.array(minu), importunit="min", currentunit='min') data.axis2.size = 1 * sizeF2 # The processing below might change the size, so we anticipate here ! data.axis2.specwidth = float(params["SW_h"]) found = False # search for excitation bandwidth try: data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep( locate_ExciteSweep(folder)) found = True except: pass if not found: try: data.axis2.highfreq = float(params["EXC_Freq_High"]) except: data.axis2.highfreq = data.axis2.calibA / float( params["EXC_low"]) # on Apex version try: data.axis2.lowfreq = float(params["EXC_Freq_Low"]) except: data.axis2.lowfreq = data.axis2.calibA / float( params["EXC_hi"]) # on Apex version data.axis2.highmass = float(params["MW_high"]) data.axis2.left_point = 0 data.axis2.offset = 0.0 data.axis2.calibA = float(params["ML1"]) data.axis2.calibB = float(params["ML2"]) data.axis2.calibC = float(params["ML3"]) if not math.isclose(data.axis2.calibC, 0.0): print('Using 3 parameters calibration, Warning calibB is -ML2') data.axis2.calibB *= -1 data.params = params # add the parameters to the data-set HF = hf.HDF5File(outfile, "w") if compress: HF.set_compression(True) HF.create_from_template(data, group='resol1') HF.store_internal_object(params, h5name='params') # store params in the file # then store files xx.methods and scan.xml HF.store_internal_file(parfilename) HF.store_internal_file(os.path.join(folder, "scan.xml")) try: HF.store_internal_file(locate_ExciteSweep(folder)) except: print('ExciteSweep file not found') data.hdf5file = HF # I need a link back to the file in order to close it # Start processing - first computes sizes and sub-datasets print(data) datalist = [] # remembers all downsampled dataset maxvalues = [ 0.0 ] # remembers max values in all datasets - main and downsampled if downsample: allsizes = comp_sizes(data.size1, data.size2) for i, (si1, si2) in enumerate(allsizes): datai = FTICRData(dim=2) copyaxes(data, datai) datai.axis1.size = si1 datai.axis2.size = si2 HF.create_from_template(datai, group='resol%d' % (i + 2)) datalist.append(datai) maxvalues.append(0.0) # Then go through input file projection = FTICRData(buffer=np.zeros(sizeF2)) # to accumulate projection projection.axis1 = data.axis2.copy() Impwidgets = [ 'Importing: ', widgets.Percentage(), ' ', widgets.Bar(marker='-', left='[', right=']'), widgets.ETA() ] pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1, fd=sys.stdout).start() with open(fname, "rb") as f: ipacket = 0 szpacket = 11 packet = np.zeros( (szpacket, sizeF2)) # store by packet to increase compression speed absmax = 0.0 xarg = iterargF2(f, sizeF1, sizeF2, compress, comp_level, allsizes) # construct iterator for main loop if nProc > 1: res = Pool.imap(processF2row, xarg) # multiproc processing using Pool else: res = map(processF2row, xarg) # plain single proc processing for i1, spectres in enumerate(res): # and get results spectre = spectres.pop(0) packet[ipacket, :] = spectre.buffer[:] # store into packet np.maximum(projection.buffer, spectre.buffer, out=projection.buffer) # projection if (ipacket + 1) % szpacket == 0: # and dump every szpacket maxvalues[0] = max(maxvalues[0], abs(packet.max())) # compute max data.buffer[i1 - (szpacket - 1):i1 + 1, :] = packet[:, :] # and copy packet[:, :] = 0.0 ipacket = 0 else: ipacket += 1 # now downsample for idt, spectre in enumerate(spectres): datai = datalist[idt] if i1 % (sizeF1 // datai.size1) == 0: # modulo the size ratio ii1 = (i1 * datai.size1) // sizeF1 maxvalues[idt + 1] = max( maxvalues[idt + 1], spectre.absmax) # compute max (0 is full spectrum) datai.buffer[ii1, :] = spectre.buffer[:] pbar.update(i1 + 1) last = i1 # flush the remaining packet maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max())) data.buffer[last - ipacket:last, :] = packet[:ipacket, :] pbar.finish() # then write projection as 'projectionF2' print('writing projections') proj = FTICRData(dim=1) proj.axis1 = data.axis2.copy() HF.create_from_template(proj, group='projectionF2') proj.buffer[:] = projection.buffer[:] # store maxvalues in the file print('writing max abs value') HF.store_internal_object(maxvalues, h5name='maxvalues') print('writing parameters') if dparameters is not None: HF.store_internal_object(dparameters, h5name='import_parameters') # and close HF.flush() if nProc > 1: Pool.close() # finally closes multiprocessing slaves return data
def processF2row(data): from spike.FTICR import FTICRData tbuf, compress, comp_level, allsizes, i1, sizeF1 = data if sys.maxsize == 2**31 - 1: # the flag used by array depends on architecture - here on 32bit flag = 'l' # Apex files are in int32 else: # here in 64bit flag = 'i' # strange, but works here. abuf = np.array(array.array(flag, tbuf), dtype=float) # processing spectre = FTICRData(buffer=abuf) # to handle FT spectre.adapt_size() spectre.hamming().zf(2).rfft().modulus() # double the size mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() spectres = [] spectres.append(spectre) # now downsampling for idt, (size1, size2) in enumerate(allsizes): if i1 % (sizeF1 // size1) == 0: # modulo the size ratio spectre = FTICRData(buffer=abuf) spectre.adapt_size() spectre.chsize(size2).hamming().zf(2).rfft().modulus() mu, sigma = spectre.robust_stats(iterations=5) spectre.buffer -= mu if compress: spectre.zeroing(sigma * comp_level).eroding() spectres.append(spectre) return spectres