コード例 #1
0
 def loadspike(self):
     fullpath = self.selected
     try:
         DATA = FTICRData(name=fullpath)
     except:
         self.waitarea.clear_output(wait=True)
         with self.waitarea:
             print('Error while loading', self.selected)
             self.waitarea.clear_output(wait=True)
         with self.outinfo:
             traceback.print_exc()
         return
     data = None
     DATA.filename = self.selected  # filename and fullpath are equivalent !
     DATA.fullpath = fullpath
     audit = U.auditinitial(title="Load file", append=False)
     DATA.set_unit('m/z')
     self.datap = Dataproc(data)
     self.datap.data = None
     self.datap.DATA = DATA
     self.showinfo()
     self.out1D.clear_output()
     with self.out1D:
         DATA.display(title=self.title, new_fig={'figsize': (10, 5)})
     self.tabs.selected_index = 1
コード例 #2
0
 def display(self):
     if self.name != 'None' and self.direct.value != 'off':
         scale = 1
         if self.direct.value == 'up':
             mult = 1
         elif self.direct.value == 'down':
             mult = -1
         else:
             return
         FTICRData(name=self.name.value).set_unit('m/z').mult(mult).display(
             new_fig=self.fig,
             scale=scale,
             color=self.color.value,
             label=op.basename(op.dirname(self.name.value)))
コード例 #3
0
ファイル: views.py プロジェクト: nguacon01/form_2D
def comp_sizes():
    """
    calculate size of output file when change sizemultipliers
    """
    if request.method == 'POST':
        return make_response('method must be GET', 400)

    # post_data = request.get_json()
    sizeF1 = int(request.args.get("sizeF1"))
    sizeF2 = int(request.args.get("sizeF2"))
    m1 = float(request.args.get("m1"))
    m2 = float(request.args.get("m2"))
    if not sizeF1 or not sizeF2 or not m1 or not m2:
        return make_response(
            jsonify({
                "msg": "Make sure you filled up sizemultipliers field",
                "status": "fail"
            }), 400)

    dd = FTICRData(dim=2)
    dd.axis1.size = sizeF1
    dd.axis2.size = sizeF2
    szmul = [m1, m2]

    allsizes = proc_spike.comp_sizes(d0=dd, szmlist=szmul)
    sizes = allsizes[0]
    somme = 0
    for a, b in allsizes:
        somme += a * b
    return make_response(
        jsonify({
            "msg": "Success",
            "status": "success",
            "spec_size": {
                "sizeF1": sizes[0],
                "sizeF2": sizes[1]
            },
            "uncompressed_size": str(somme // 1024 // 1024 * 8)
        }), 201)
コード例 #4
0
def main():
    """does the whole job,
    if we are running in MPI, this is only called by job #0
    all other jobs are running mpi.slave()
    """
    argv = sys.argv
    if len(argv) != 2:
        print("""
syntax is :
(mpirun -np N) python  program   configfile.mscf
""")
        sys.exit(1)

    # get parameters
    configfile = argv[1]
    cp = NPKConfigParser()
    cp.readfp(open(configfile))
    infile = cp.getword("Cadzow", "namein")
    print("infile", infile)
    outfile = cp.getword("Cadzow", "nameout")
    print("outfile", outfile)

    algo = cp.getword("Cadzow", "algorithm")
    print("algorithm", algo)
    n_of_line = cp.getint("Cadzow", "n_of_lines", 70)
    print("n_of_line", n_of_line)
    n_of_iter = cp.getint("Cadzow", "n_of_iters", 1)
    print("n_of_iter", n_of_iter)
    orda = cp.getint("Cadzow", "order", 500)
    print("order", orda)
    n_of_column = cp.getint("Cadzow", "n_of_column", 100)
    print("n_of_column", n_of_column)
    progress = cp.getboolean("Cadzow", "progress", True)

    d0 = load_input(infile)
    d0.check2D()  # raise error if not a 2D
    Set_Table_Param()

    hfar = HDF5File(outfile, "w", debug=0)  # OUTFILE
    d1 = FTICRData(dim=2)  # create dummy 2D
    copyaxes(d0, d1)  # copy axes from d0 to d1
    group = 'resol1'
    hfar.create_from_template(d1, group)

    # prepare index and method
    if n_of_column == 0:
        indexes = range(d0.size2)  # process all
    else:
        indexes = selectcol(d0, n_of_column)  # selections

    if algo == "Cadzow":
        meth = cadz
    elif algo == "rQRd":  #
        meth = rqr
    else:
        raise ("wrong algo")

    # then loop
    t0 = time.time()
    if progress:
        widgets = [
            'Processing %s: ' % (algo),
            pg.Percentage(), ' ',
            pg.Bar(marker='-', left='[', right=']'),
            pg.ETA()
        ]
        pbar = pg.ProgressBar(widgets=widgets,
                              maxval=len(indexes))  #, fd=sys.stdout)

    d1D = d0.col(0)  # template
    xarg = iterarg(indexes, d0, n_of_line, n_of_iter, orda)
    if mpiutil.MPI_size > 1:  # means we are running under MPI !
        mpiutil.mprint('MPI Master job  - starting slave jobs - ')
        res = mpiutil.enum_imap(meth, xarg)  # apply it
        for i, p in res:  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    else:
        import itertools
        res = itertools.imap(meth, xarg)  # apply it
        for i, p in enumerate(res):  # and get results
            d1D.buffer = p
            d1.set_col(indexes[i], d1D)
            if progress: pbar.update(i + 1)
    print("Processing time : ", time.time() - t0)
コード例 #5
0
ファイル: views.py プロジェクト: nguacon01/form_2D
def mscf_header_info(repo_id, file_full_path, parent_dir):
    """
        download .method, ExciteSweep and scan.xml file
        return project_dict, a dictionary in which contain infomation about chosen mscf file.
        project_dict will be writen in header of output mscf file
    """

    # load method file
    local_corresponse_files = load_corresponse_files(repo_id, parent_dir)
    local_method_file = local_corresponse_files['apexAcquisition.method']
    params_method_file = Solarix.read_param(local_method_file)
    # Check if it is Apex or Solarix.
    # if it is Apex, params_method_file = Solarix.read_param(local_method_file) will return empty dictionary
    project_format = 'Solarix'
    if len(params_method_file) < 2:
        project_format = 'Apex'
        params_method_file = Apex.read_param(local_method_file)

    project_dict = {}

    project_name = parent_dir.strip('/').split('/')[-1]

    project_dict['name'] = project_name
    # # create object
    FTICR_Data = FTICRData(dim=2)
    # ser_file_path = os.path.join(project_full_path,"ser")
    # ser_file_date_aquisition = os.path.getmtime(ser_file_path)
    # project_dict["ser_date_aquisition"] = datetime.fromtimestamp(ser_file_date_aquisition)

    # find Bo
    FTICR_Data.axis1.calibA = float(params_method_file["ML1"])
    FTICR_Data.axis2.calibA = float(params_method_file["ML1"])
    project_dict["Bo"] = round(FTICR_Data.Bo, 2)

    # Import parameters : size in F1 and F2
    try:
        local_scan_file = local_corresponse_files['scan.xml']
        sizeF1 = Solarix.read_scan(local_scan_file)
    except:
        sizeF1 = 0
    sizeF2 = int(params_method_file["TD"])
    project_dict["sizeF1"] = sizeF1 // 1024
    project_dict["sizeF2"] = sizeF2 // 1024
    project_dict["data_size"] = 4 * sizeF1 * sizeF2 // (1024 * 1024)

    # determine excitation window
    try:  #CR for compatibility with Apex format as there is no EXciteSweep file
        local_excitesweep_file = local_corresponse_files['ExciteSweep']
        fl, fh = Solarix.read_ExciteSweep(local_excitesweep_file)
        freql, freqh = fl[0], fh[0]
    except:
        freqh = float(params_method_file["EXC_hi"])
        freql = float(params_method_file["EXC_low"])
    mzl = round(FTICR_Data.axis2.htomz(freql), 2)
    mzh = round(FTICR_Data.axis2.htomz(freqh), 2)

    if (project_format == 'Apex'):
        project_dict["freqh"] = mzh
        project_dict["freql"] = mzl
        project_dict["mzh"] = freqh
        project_dict["mzl"] = freql
    else:
        project_dict["freqh"] = freqh
        project_dict["freql"] = freql
        project_dict["mzh"] = mzh
        project_dict["mzl"] = mzl

    # show f2_specwidth
    f2_specwidth = float(params_method_file["SW_h"])
    lowmass = FTICR_Data.axis2.htomz(f2_specwidth)
    project_dict["f2_specwidth"] = f2_specwidth
    project_dict["lowmass"] = round(lowmass, 2)

    # set f1_specwidth default value
    # determine f1_specwidth
    f1 = float(params_method_file["IN_26"]
               )  # IN_26 is used in 2D sequence as incremental time
    if f1 < 1E-3 and f1 > 0.0:  # seems legit
        f1_specwidth = round(1.0 / (2 * f1), 2)
    else:
        f1_specwidth = 50000.0000
    project_dict["F1_specwidth"] = f1_specwidth
    return project_dict
コード例 #6
0
def config():
    """
    author: DMD - casc4de
    This function help us to modify an existed config file - mscf or also creates a new one.
    """
    # get variable project short path: project_spath
    project_spath = request.args.get('project_spath')
    # get variable config file name
    config_filename = request.args.get('config_filename')

    # create experiment config form
    form = ConfigForm()

    # file_name = project_name + '.mscf'
    # define the root path of all .d projects
    projects_root_folder_path = user_SeaDrive_path()

    # define config file path
    config_file_path = os.path.join(projects_root_folder_path, project_spath,
                                    config_filename)

    # define the chosen project path
    project_full_path = os.path.join(projects_root_folder_path, project_spath)

    #####Information about the chosen project######
    project_dict = {}

    _, project_name = os.path.split(project_spath)

    project_dict['name'] = project_name
    # create object
    FTICR_Data = FTICRData(dim=2)
    ser_file_path = os.path.join(project_full_path, "ser")
    ser_file_date_aquisition = os.path.getmtime(ser_file_path)
    project_dict["ser_date_aquisition"] = datetime.fromtimestamp(
        ser_file_date_aquisition)

    # find method file
    param_filename = Solarix.locate_acquisition(project_full_path)
    params_method_file = Solarix.read_param(param_filename)

    # find Bo
    FTICR_Data.axis1.calibA = float(params_method_file["ML1"])
    FTICR_Data.axis2.calibA = float(params_method_file["ML1"])
    project_dict["Bo"] = round(FTICR_Data.Bo, 2)

    # Import parameters : size in F1 and F2
    sizeF1 = Solarix.read_scan(os.path.join(project_full_path, "scan.xml"))
    sizeF2 = int(params_method_file["TD"])
    project_dict["sizeF1"] = sizeF1 // 1024
    project_dict["sizeF2"] = sizeF2 // 1024
    project_dict["data_size"] = 4 * sizeF1 * sizeF2 // (1024 * 1024)

    # determine excitation window
    try:  #CR for compatibility with Apex format as there is no EXciteSweep file
        fl, fh = Solarix.read_ExciteSweep(
            Solarix.locate_ExciteSweep(project_full_path))
        freql, freqh = fl[0], fh[0]
    except:
        freqh = float(params_method_file["EXC_hi"])
        freql = float(params_method_file["EXC_low"])
    mzl = round(FTICR_Data.axis2.htomz(freql), 2)
    mzh = round(FTICR_Data.axis2.htomz(freqh), 2)

    project_dict["freqh"] = freqh
    project_dict["freql"] = freql
    project_dict["mzh"] = mzh
    project_dict["mzl"] = mzl

    # show f2_specwidth
    f2_specwidth = float(params_method_file["SW_h"])
    lowmass = FTICR_Data.axis2.htomz(f2_specwidth)
    project_dict["f2_specwidth"] = f2_specwidth
    project_dict["lowmass"] = round(lowmass, 2)
    #####END Information about the chosen project######

    # default config file
    default_conf_file = os.path.join(metadata.root_path, "static", "files",
                                     "process2D.default.mscf")

    default_config = NPKConfigParser()
    default_config.readfp(open(default_conf_file, 'r'))

    # ['import', 'processing', 'peak_picking']
    default_sections = default_config.sections()

    # set f1_specwidth default value
    # determine f1_specwidth
    f1 = float(params_method_file["IN_26"]
               )  # IN_26 is used in 2D sequence as incremental time
    if f1 < 1E-3 and f1 > 0.0:  # seems legit
        f1_specwidth = round(1.0 / (2 * f1), 2)
    else:
        f1_specwidth = None
    project_dict["f1_specwidth"] = f1_specwidth

    # create processing params object base on Proc_Parameters() object in spike lib
    proc_params = proc_spike.Proc_Parameters()

    # check if the mscf config file is existed or not. If not, create new file with default values
    if os.path.isfile(config_file_path):
        config = NPKConfigParser()
        try:
            config.readfp(open(config_file_path, 'r'))
        except Exception:
            return render_template(
                "errors/404.html",
                message=
                "There are some attributes which are duplicated. Check again.")
        # load config data into proc_params object
        proc_params.load(config)
        # convert proc_params to dictionary
        config_dict = proc_params.__dict__
        # highmass and F1_specwidth are not in Proc_Parameters object so add them in config_dict manually.
        config_dict['highmass'] = config['import']['highmass']
        # set config_dict['F1_specwidth'] = F1_specwidth in the the existed config file
        config_dict['F1_specwidth'] = config['import']['F1_specwidth']
        config_dict['sizemultipliers'] = config['processing'][
            'sizemultipliers']
        # return config_dict
    else:
        proc_params.load(default_config)
        # convert proc_params to dictionary
        config_dict = proc_params.__dict__
        # set config_dict['F1_specwidth'] = F1_specwidth from the estimate of project data
        config_dict['F1_specwidth'] = f1_specwidth
        config_dict['sizemultipliers'] = default_config['processing'][
            'sizemultipliers']

    # return config_dict['sizemultipliers']

    # return config_dict
    if request.method == "GET":

        # Set value for select forms
        form.compress_outfile.data = str(config_dict["compress_outfile"])
        form.do_sane.data = str(config_dict.get("do_sane", "False"))
        form.format.data = str(config_dict.get("format", "solarix"))
        form.samplingfile.data = str(config_dict.get("samplingfile"))
        # by default, N.U.S field is False
        form.nus.data = str(False)
        form.save_file.data = str(config_filename.split(".")[0])

    if form.validate_on_submit():

        # get form data
        data = request.form.to_dict()
        # fill up config_dict with data from form

        for key, val in data.items():
            config_dict[key] = val

        config_dict["format"] = data["format"].capitalize()

        # defind output file
        save_file_name = data['save_file'].split('.')[0] + ".mscf"

        ### SET DEFAULT VALUES FOR OUTPUT CONFIG FILE###
        # do_F2 = True
        config_dict['do_F2'] = True
        # do_F1 = True
        config_dict['do_F1'] = True
        # do_f1demodu = True
        config_dict['do_f1demodu'] = True
        # do_modulus = True
        config_dict['do_modulus'] = True
        # do_rem_ridge = True
        config_dict['do_rem_ridge'] = True
        # urqrd_rank = 30
        config_dict['urqrd_rank'] = 30
        # urqrd_iterations = 1
        config_dict['urqrd_iterations'] = 1

        config_dict['tempdir'] = "/tmp/processing/"
        config_dict['infile'] = "ser.msh5"
        config_dict[
            'outfile'] = "{project_name}/{config_filename}_mr.msh5".format(
                project_name=project_name,
                config_filename=save_file_name.split(".")[0])

        #NUS - Non Uniform Sampled
        if data["nus"] == False:
            config_dict["do_pgsane"] = False
        else:
            config_dict["do_pgsane"] = True

        # create a new config file
        save_file_path = os.path.join(project_full_path, save_file_name)
        # return project_full_path
        with open(save_file_path, "w") as save:
            # write header of config file
            save.write(
                "#Project folder: {} \n".format(project_dict['name']) +
                "#Date of acquisition: {} \n".format(
                    project_dict['ser_date_aquisition']) +
                "#Estimate Bo from internal calibration: {}T \n".format(
                    project_dict['Bo']) +
                "#Experiment size (F1 x F2): {}k x {}k \n".format(
                    project_dict['sizeF1'], project_dict['sizeF2']) +
                "#Data size: {}MB \n".format(project_dict['data_size']) +
                "#Excitation pulses from {}Hz (m/z={}) to {}Hz (m/z={}) \n".
                format(project_dict['freqh'], project_dict['mzh'],
                       project_dict['freql'], project_dict['mzl']) +
                "#Acquisition spectral width: {}Hz (low mass: {}) \n".format(
                    project_dict['f2_specwidth'], project_dict['lowmass']))
            for section in default_sections:
                # config_key and its value which are got from submited form
                for config_key, val in config_dict.items():
                    try:
                        # if config section match with sections in default config file, then change value in default file
                        if default_config.get(section, config_key):
                            default_config.set(section, config_key, val)
                    except Exception:
                        pass

            # save the new config file
            default_config.write(save)
        # allow user to download it
        return send_from_directory(directory=project_full_path,
                                   filename=save_file_name,
                                   as_attachment=True)

    # return config_dict
    return render_template("metadata/config_2.html",
                           config_dict=config_dict,
                           form=form,
                           errors=form.errors,
                           project_spath=project_spath,
                           config_filename=config_filename,
                           project_dict=project_dict)
コード例 #7
0
ファイル: ImportLC.py プロジェクト: CASC4DE/EUFT_Spike
def Import_and_Process_LC(folder,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData
    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            print("***************************************")
            print(params)
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not stored')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    if sys.maxsize == 2**31 - 1:  # the flag used by array depends on architecture - here on 32bit
        flag = 'l'  # Apex files are in int32
    else:  # here in 64bit
        flag = 'i'  # strange, but works here.
    spectre = FTICRData(shape=(sizeF2, ))  # to handle FT
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 10
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        for i1 in range(sizeF1):
            absmax = 0.0
            #print(i1, ipacket, end='  ')
            tbuf = f.read(4 * sizeF2)
            if len(tbuf) != 4 * sizeF2:
                break
            abuf = np.array(array.array(flag, tbuf), dtype=float)
            # processing
            spectre.set_buffer(abuf)
            spectre.adapt_size()
            spectre.hamming().zf(2).rfft().modulus()  # double the size
            mu, sigma = spectre.robust_stats(iterations=5)
            spectre.buffer -= mu
            if compress:
                spectre.zeroing(sigma * comp_level).eroding()
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, datai in enumerate(datalist):
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    spectre.set_buffer(abuf)
                    spectre.adapt_size()
                    spectre.chsize(
                        datai.size2).hamming().zf(2).rfft().modulus()
                    mu, sigma = spectre.robust_stats(iterations=5)
                    spectre.buffer -= mu
                    if compress:
                        spectre.zeroing(sigma * comp_level).eroding()
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1)
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[i1 - ipacket:i1, :] = packet[:ipacket, :]
    # store maxvalues in the file
    HF.store_internal_object(maxvalues, h5name='maxvalues')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # then write projection as 'projectionF2'
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]
    pbar.finish()
    HF.flush()
    return data
コード例 #8
0
def Import_and_Process_LC(folder,
                          nProc=1,
                          outfile="LC-MS.msh5",
                          compress=False,
                          comp_level=3.0,
                          downsample=True,
                          dparameters=None):
    """
    Entry point to import sets of LC-MS spectra
    processing is done on the fly
    It creates and returns a HDF5 file containing the data-set
    
    compression is active if (compress=True).
    comp_level is the ratio (in x sigma) under which values are set to 0.0
    downsample is applied if (downsample=True).
    These two parameters are efficient but it takes time.

    dparameters if present, is a dictionnary copied into the final file as json 
    """
    import multiprocessing as mp
    from spike.File import Solarix, Apex
    #    from spike.File.Solarix import locate_acquisition, read_param
    from spike.NPKData import TimeAxis, copyaxes
    from spike.File import HDF5File as hf
    from spike.util import progressbar as pg
    from spike.util import widgets
    from spike.FTICR import FTICRData

    if nProc > 1:
        print("** running on %d processors" % nProc)
        Pool = mp.Pool(nProc)

    for _importer in (Solarix, Apex):
        try:
            parfilename = _importer.locate_acquisition(folder)
            params = _importer.read_param(parfilename)
            sizeF2 = int(params["TD"])
            importer = _importer
            break
        except:
            #print("***************************************")
            #print(params)
            pass
        else:
            raise Exception("could  not import data-set - unrecognized format")
    # get chromatogram
    minu, tic, maxpk = import_scan(os.path.join(folder, "scan.xml"))
    # Import parameters : size in F1 and F2
    sizeF1 = len(minu)
    sizeF2 = int(params["TD"])
    if os.path.isfile(os.path.join(folder, "ser")):
        fname = os.path.join(folder, "ser")
    else:
        raise Exception(
            "You are dealing with 1D data, you should use Import_1D")
    #size, specwidth,  offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
    data = FTICRData(dim=2)  # create dummy LCMS
    data.axis1 = TimeAxis(size=sizeF1,
                          tabval=np.array(minu),
                          importunit="min",
                          currentunit='min')
    data.axis2.size = 1 * sizeF2  # The processing below might change the size, so we anticipate here !
    data.axis2.specwidth = float(params["SW_h"])
    found = False  # search for excitation bandwidth
    try:
        data.axis2.lowfreq, data.axis2.highfreq = read_ExciteSweep(
            locate_ExciteSweep(folder))
        found = True
    except:
        pass
    if not found:
        try:
            data.axis2.highfreq = float(params["EXC_Freq_High"])
        except:
            data.axis2.highfreq = data.axis2.calibA / float(
                params["EXC_low"])  # on Apex version
        try:
            data.axis2.lowfreq = float(params["EXC_Freq_Low"])
        except:
            data.axis2.lowfreq = data.axis2.calibA / float(
                params["EXC_hi"])  # on Apex version

    data.axis2.highmass = float(params["MW_high"])
    data.axis2.left_point = 0
    data.axis2.offset = 0.0
    data.axis2.calibA = float(params["ML1"])
    data.axis2.calibB = float(params["ML2"])
    data.axis2.calibC = float(params["ML3"])
    if not math.isclose(data.axis2.calibC, 0.0):
        print('Using 3 parameters calibration,  Warning calibB is -ML2')
        data.axis2.calibB *= -1

    data.params = params  # add the parameters to the data-set
    HF = hf.HDF5File(outfile, "w")
    if compress:
        HF.set_compression(True)
    HF.create_from_template(data, group='resol1')
    HF.store_internal_object(params,
                             h5name='params')  # store params in the file
    # then store files xx.methods and scan.xml
    HF.store_internal_file(parfilename)
    HF.store_internal_file(os.path.join(folder, "scan.xml"))
    try:
        HF.store_internal_file(locate_ExciteSweep(folder))
    except:
        print('ExciteSweep file not found')
    data.hdf5file = HF  # I need a link back to the file in order to close it

    # Start processing - first computes sizes and sub-datasets
    print(data)
    datalist = []  # remembers all downsampled dataset
    maxvalues = [
        0.0
    ]  # remembers max values in all datasets - main and downsampled
    if downsample:
        allsizes = comp_sizes(data.size1, data.size2)
        for i, (si1, si2) in enumerate(allsizes):
            datai = FTICRData(dim=2)
            copyaxes(data, datai)
            datai.axis1.size = si1
            datai.axis2.size = si2
            HF.create_from_template(datai, group='resol%d' % (i + 2))
            datalist.append(datai)
            maxvalues.append(0.0)

    # Then go through input file
    projection = FTICRData(buffer=np.zeros(sizeF2))  # to accumulate projection
    projection.axis1 = data.axis2.copy()
    Impwidgets = [
        'Importing: ',
        widgets.Percentage(), ' ',
        widgets.Bar(marker='-', left='[', right=']'),
        widgets.ETA()
    ]
    pbar = pg.ProgressBar(widgets=Impwidgets, maxval=sizeF1,
                          fd=sys.stdout).start()

    with open(fname, "rb") as f:
        ipacket = 0
        szpacket = 11
        packet = np.zeros(
            (szpacket,
             sizeF2))  # store by packet to increase compression speed
        absmax = 0.0

        xarg = iterargF2(f, sizeF1, sizeF2, compress, comp_level,
                         allsizes)  # construct iterator for main loop

        if nProc > 1:
            res = Pool.imap(processF2row,
                            xarg)  # multiproc processing using Pool
        else:
            res = map(processF2row, xarg)  # plain single proc processing
        for i1, spectres in enumerate(res):  # and get results
            spectre = spectres.pop(0)
            packet[ipacket, :] = spectre.buffer[:]  # store into packet
            np.maximum(projection.buffer,
                       spectre.buffer,
                       out=projection.buffer)  # projection
            if (ipacket + 1) % szpacket == 0:  # and dump every szpacket
                maxvalues[0] = max(maxvalues[0],
                                   abs(packet.max()))  # compute max
                data.buffer[i1 - (szpacket - 1):i1 +
                            1, :] = packet[:, :]  # and copy
                packet[:, :] = 0.0
                ipacket = 0
            else:
                ipacket += 1
            # now downsample
            for idt, spectre in enumerate(spectres):
                datai = datalist[idt]
                if i1 % (sizeF1 // datai.size1) == 0:  # modulo the size ratio
                    ii1 = (i1 * datai.size1) // sizeF1
                    maxvalues[idt + 1] = max(
                        maxvalues[idt + 1],
                        spectre.absmax)  # compute max (0 is full spectrum)
                    datai.buffer[ii1, :] = spectre.buffer[:]

            pbar.update(i1 + 1)
            last = i1
        # flush the remaining packet
        maxvalues[0] = max(maxvalues[0], abs(packet[:ipacket, :].max()))
        data.buffer[last - ipacket:last, :] = packet[:ipacket, :]
    pbar.finish()

    # then write projection as 'projectionF2'
    print('writing projections')
    proj = FTICRData(dim=1)
    proj.axis1 = data.axis2.copy()
    HF.create_from_template(proj, group='projectionF2')
    proj.buffer[:] = projection.buffer[:]

    # store maxvalues in the file
    print('writing max abs value')
    HF.store_internal_object(maxvalues, h5name='maxvalues')

    print('writing parameters')
    if dparameters is not None:
        HF.store_internal_object(dparameters, h5name='import_parameters')

    # and close
    HF.flush()
    if nProc > 1:
        Pool.close()  # finally closes multiprocessing slaves
    return data
コード例 #9
0
def processF2row(data):
    from spike.FTICR import FTICRData
    tbuf, compress, comp_level, allsizes, i1, sizeF1 = data
    if sys.maxsize == 2**31 - 1:  # the flag used by array depends on architecture - here on 32bit
        flag = 'l'  # Apex files are in int32
    else:  # here in 64bit
        flag = 'i'  # strange, but works here.
    abuf = np.array(array.array(flag, tbuf), dtype=float)

    # processing
    spectre = FTICRData(buffer=abuf)  # to handle FT
    spectre.adapt_size()
    spectre.hamming().zf(2).rfft().modulus()  # double the size
    mu, sigma = spectre.robust_stats(iterations=5)
    spectre.buffer -= mu
    if compress:
        spectre.zeroing(sigma * comp_level).eroding()

    spectres = []
    spectres.append(spectre)
    # now downsampling
    for idt, (size1, size2) in enumerate(allsizes):
        if i1 % (sizeF1 // size1) == 0:  # modulo the size ratio
            spectre = FTICRData(buffer=abuf)
            spectre.adapt_size()
            spectre.chsize(size2).hamming().zf(2).rfft().modulus()
            mu, sigma = spectre.robust_stats(iterations=5)
            spectre.buffer -= mu
            if compress:
                spectre.zeroing(sigma * comp_level).eroding()
            spectres.append(spectre)

    return spectres