def resolution_estimate(raw_data, n_spectra=25):
    slopes = []
    intercepts = []
    for i in range(n_spectra):
        mzs, intensities = read_random_spectrum(raw_data)
        peak_positions = np.array(gradient(mzs, intensities)[-1])
        intensities_at_peaks = intensities[peak_positions]
        high_intensity_threshold = np.percentile(intensities_at_peaks, 40)
        peak_positions = peak_positions[intensities[peak_positions] > high_intensity_threshold]
        resolutions = []
        for i, peak_pos in enumerate(peak_positions):
            resolutions.append(resolution_at_peak(peak_pos, mzs, intensities))
        resolutions = np.array(resolutions)
        mzs = mzs[peak_positions]
        mzs = mzs[resolutions > 0]
        resolutions = resolutions[resolutions > 0]
        ransac = RANSACRegressor()
        ransac.fit(np.log(mzs).reshape((-1,1)), np.log(resolutions).reshape((-1,1)))
        slope = ransac.estimator_.coef_[0][0]
        intercept = ransac.estimator_.intercept_[0]
        slopes.append(slope)
        intercepts.append(intercept)
    slope = np.median(slopes)
    intercept = np.median(intercepts)
    return lambda mz: np.exp(intercept + slope * np.log(mz)) 
Ejemplo n.º 2
0
def imzml(input_filename,
          output_filename,
          smoothMethod="nosmooth",
          centroid=False):
    import h5py
    import numpy as np
    ### Open files
    h5 = h5py.File(input_filename, 'r')  # Readonly, file must exist
    ### get root groups from input data
    root_group_names = h5.keys()
    spots = h5['Spots']
    spectraGroup = 'InitialMeasurement'
    mzs = np.asarray(
        h5['/SamplePositions/GlobalMassAxis/']['SamplePositions']
    )  # we don't write this but will use it for peak detection
    file_version = h5['Version'][
        0]  # some hard-coding to deal with different file versions
    if file_version > 5:
        coords = h5['Registrations']['0']['Coordinates']
    else:
        coords = h5['Coordinates']

    coords = np.asarray(coords).T.round(5)
    coords -= np.amin(coords, axis=0)
    step = np.array(
        [np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)])
    step[np.isnan(step)] = 1
    coords /= np.reshape(step, (3, ))
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    g = h5['Spots/0/' + spectraGroup + '/']
    mz_dtype = g['SamplePositions/SamplePositions'][:].dtype
    int_dtype = g['Intensities'][:].dtype
    print 'dim: {} x {}'.format(nrow, ncol)
    n_total = len(spots.keys())
    done = 0
    keys = map(str, sorted(map(int, h5['Spots'].keys())))
    ### write spectra
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml:
        n = 0
        for key, pos in zip(keys, coords):
            spot = spots[key]
            ## make new spectrum
            intensities = np.asarray(spot[spectraGroup]['Intensities'])
            if smoothMethod != []:
                intensities = smooth_spectrum(mzs, intensities, smoothMethod)
            if centroid:
                from pyMS import centroid_detection
                mzs, intensities, _ = centroid_detection.gradient(
                    mzs, intensities, max_output=-1, weighted_bins=3)
            # write to file
            pos = (nrow - 1 - pos[1], pos[0], pos[2])
            imzml.addSpectrum(mzs, intensities, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
    print "finished!"
Ejemplo n.º 3
0
def get_lists_of_mzs(sf):
	try:
		isotope_ms = pyisocalc.isodist(sf,plot=False,sigma=0.01,charges=-2,resolution=100000.0,do_centroid=False)
		mzlist = list(isotope_ms.get_mzs())
		intenslist = list(isotope_ms.get_intensities())
		mzs_list, intensities_list, indices_list = gradient(isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0)
		indices_list = [i if intenslist[i] > intenslist[i+1] else i+1 for i in indices_list]
		mzs_list = [mzlist[i] for i in indices_list]
		intensities_list = [intenslist[i] for i in indices_list]
		min_i = np.min([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
		max_i = np.max([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
		return {
			"isodist_mzs" : mzlist[min_i:max_i],
			"isodist_int" : intenslist[min_i:max_i],
			"grad_mzs"	  : list(mzs_list),
			"grad_int"	  : list(intensities_list),
			"grad_ind"	  : list(indices_list - min_i) }
	except:
		return {
			"isodist_mzs" : [],
			"isodist_int" : [],
			"grad_mzs"	  : [],
			"grad_int"	  : [],
			"grad_ind"	  : []
		}
def exact_mass(JSON_config_file):
    config = get_variables(JSON_config_file)
    sum_formulae, adducts, mz_list = generate_isotope_patterns(config)
    IMS_dataset = load_data(config)
    spec_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean',ppm=config['image_generation']['ppm']/2)
    from pyMS.centroid_detection import gradient
    import numpy as np
    mzs,counts,idx_list = gradient(np.asarray(spec_axis),np.asarray(mean_spec),weighted_bins=2)
    ppm_value_score = run_exact_mass_search(config,  mzs,counts, sum_formulae, adducts, mz_list)
    output_results_exactMass(config, ppm_value_score, sum_formulae, adducts, mz_list,fname='exactMass_all_adducts')
Ejemplo n.º 5
0
def mzImages(filename_in_list,save_dir):
    import sys
    sys.path.append('C:\\Users\\Luca Rappez\\Desktop\\python_codebase\\')
    from pyMS.centroid_detection import gradient
    from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5
    from pyIMS.image_measures import level_sets_measure
    import matplotlib.pyplot as plt
    import numpy as np
    #%matplotlib inline
    import bokeh as bk
    from bokeh.plotting import output_notebook
    output_notebook()
    ppm = 0.75
    if len(list)>2:
        raise ValueError('list should only have two entries')
    c_thresh=0.05
    mz_list_all=[]
    for filename_in in filename_in_list:
     # Parse data
        IMS_dataset = inMemoryIMS_hdf5(filename_in)
        hist_axis,freq_spec = IMS_dataset.generate_summary_spectrum(summary_type='freq',ppm=ppm/2)
        # Centroid detection of frequency spectrum
        mz_list,count_list,idx_list = gradient(np.asarray(hist_axis),np.asarray(freq_spec),weighted_bins=2)
        mz_list=[m for m,c in zip(mz_list,count_list) if c>c_thresh]
        mz_list_all.append(mz_list)

    im_list = [m for m in mz_list_all[0] if any([1e6*(m-mm)/m<ppm for mm in mz_list_all[1]]) ]

    """# Calcualte MoC for images of all peaks
    nlevels=30
    im_list={}
    for ii, c in enumerate(count_list):
        ion_image = IMS_dataset.get_ion_image(np.asarray([mz_list[ii],]),ppm)
        im = ion_image.xic_to_image(0)
        m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='median') #just output measure value
        m=1-m
        im_list[mz_list[ii]]={'image':im,'moc':m,'freq':c}
"""
    from pySpatialMetabolomics.tools import colourmaps
    c_map = colourmaps.get_colormap('grey')#if black images: open->save->rerun
    c_pal=[[int(255*cc) for cc in c_map(c)] for c in range(0,254)]

    # Export all images
    import png as pypng
    for filename_in in filename_in_list:
     # Parse data
        IMS_dataset = inMemoryIMS_hdf5(filename_in)
        for mz in im_list:
            with open('{}/{}_{}.png'.format(save_dir,mz,filename_in),'wb') as f_out:
                im_out = IMS_dataset.get_ion_image([mz,],[ppm,]).xic_to_image(0)
                im_out = 254*im_out/np.max(im_out)
                w,h = np.shape(im_out)
                w = pypng.Writer(h, w, palette=c_pal, bitdepth=8)
                w.write(f_out,im_out)
Ejemplo n.º 6
0
def prepare(mzs, ints, centroids=True):
    if centroids == True:
        mzs_list, intensity_list = mzs, ints
    else:    
        ints=signal.savgol_filter(ints, 5, 2)
        mzs_list, intensity_list, indices_list = \
            centroid_detection.gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3)
    mzs_list = np.asarray(mzs_list).astype(np.float64)
    intensity_list = np.asarray(intensity_list).astype(np.float32)
    intensity_list[intensity_list < 0] = 0
    return mzs_list, intensity_list
Ejemplo n.º 7
0
def centroid_IMS(input_filename, output_filename,instrumentInfo={},sharedDataInfo={}):
    from pyMS.centroid_detection import gradient
    # write out a IMS_centroid.hdf5 file
    sl = slFile(input_filename)
    n_total = np.shape(sl.spectra)[0]
    with h5py.File(output_filename,'w') as f_out:
        ### make root groups for output data
        spectral_data = f_out.create_group('spectral_data')
        spatial_data = f_out.create_group('spatial_data')
        shared_data = f_out.create_group('shared_data')

        ### populate common variables - can hardcode as I know what these are for h5 data
        # parameters
        instrument_parameters_1 = shared_data.create_group('instrument_parameters/001')
        if instrumentInfo != {}:
            for tag in instrumentInfo:
                instrument_parameters_1.attrs[tag] = instrumentInfo[tag]
        # ROIs
            #todo - determine and propagate all ROIs
        roi_1 = shared_data.create_group('regions_of_interest/001')
        roi_1.attrs['name'] = 'root region'
        roi_1.attrs['parent'] = ''
        # Sample
        sample_1 = shared_data.create_group('samples/001')
        if sharedDataInfo != {}:
            for tag in sharedDataInfo:
                sample_1.attrs[tag] = sharedDataInfo[tag]

        done = 0
        for key in range(0,n_total):
            mzs,intensities = sl.get_spectrum(key)
            mzs_c, intensities_c, _ = gradient(mzs, intensities)
            this_spectrum = spectral_data.create_group(str(key))
            _ = this_spectrum.create_dataset('centroid_mzs',data=np.float32(mzs_c),compression="gzip",compression_opts=9)
            # intensities
            _ = this_spectrum.create_dataset('centroid_intensities',data=np.float32(intensities_c),compression="gzip",compression_opts=9)
            # coordinates
            _ = this_spectrum.create_dataset('coordinates',data=(sl.coords[0, key],sl.coords[1, key],sl.coords[2, key]))
            ## link to shared parameters
            # ROI
            this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001')
            # Sample
            this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001')
            # Instrument config
            this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001')
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total)
        print "finished!"
Ejemplo n.º 8
0
def imzml(input_filename, output_filename,smoothMethod="nosmooth",centroid=False):
    import h5py
    import numpy as np
    ### Open files
    h5 = h5py.File(input_filename, 'r')  # Readonly, file must exist
    ### get root groups from input data
    root_group_names = h5.keys()
    spots = h5['Spots']
    spectraGroup = 'InitialMeasurement'
    mzs = np.asarray(h5['/SamplePositions/GlobalMassAxis/']['SamplePositions']) # we don't write this but will use it for peak detection
    file_version = h5['Version'][0]    # some hard-coding to deal with different file versions
    if file_version > 5:
        coords = h5['Registrations']['0']['Coordinates']
    else:
        coords = h5['Coordinates']

    coords = np.asarray(coords).T.round(5)
    coords -= np.amin(coords, axis=0)
    step = np.array([np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)])
    step[np.isnan(step)] = 1
    coords /= np.reshape(step, (3,))
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    g = h5['Spots/0/'+spectraGroup+'/']
    mz_dtype = g['SamplePositions/SamplePositions'][:].dtype
    int_dtype = g['Intensities'][:].dtype
    print 'dim: {} x {}'.format(nrow,ncol)
    n_total = len(spots.keys())
    done = 0
    keys = map(str, sorted(map(int, h5['Spots'].keys())))
    ### write spectra
    with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml:
        n = 0
        for key, pos in zip(keys, coords):
            spot = spots[key]
            ## make new spectrum
            intensities = np.asarray(spot[spectraGroup]['Intensities'])
            if smoothMethod != []:
                    intensities = smooth_spectrum(mzs,intensities,smoothMethod)
            if centroid:
                from pyMS import centroid_detection
                mzs, intensities, _ = centroid_detection.gradient(mzs,intensities, max_output=-1, weighted_bins=3)
            # write to file
            pos = (nrow - 1 - pos[1], pos[0], pos[2])
            imzml.addSpectrum(mzs, intensities, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total)
    print "finished!"
Ejemplo n.º 9
0
def exact_mass(JSON_config_file):
    config = get_variables(JSON_config_file)
    sum_formulae, adducts, mz_list = generate_isotope_patterns(config)
    IMS_dataset = load_data(config)
    spec_axis, mean_spec = IMS_dataset.generate_summary_spectrum(
        summary_type='mean', ppm=config['image_generation']['ppm'] / 2)
    from pyMS.centroid_detection import gradient
    import numpy as np
    mzs, counts, idx_list = gradient(np.asarray(spec_axis),
                                     np.asarray(mean_spec),
                                     weighted_bins=2)
    ppm_value_score = run_exact_mass_search(config, mzs, counts, sum_formulae,
                                            adducts, mz_list)
    output_results_exactMass(config,
                             ppm_value_score,
                             sum_formulae,
                             adducts,
                             mz_list,
                             fname='exactMass_all_adducts')
Ejemplo n.º 10
0
def get_lists_of_mzs(sf):
    try:
        isotope_ms = pyisocalc.isodist(sf,
                                       plot=False,
                                       sigma=0.01,
                                       charges=-2,
                                       resolution=100000.0,
                                       do_centroid=False)
        mzlist = list(isotope_ms.get_mzs())
        intenslist = list(isotope_ms.get_intensities())
        mzs_list, intensities_list, indices_list = gradient(
            isotope_ms.get_mzs(),
            isotope_ms.get_intensities(),
            max_output=-1,
            weighted_bins=0)
        indices_list = [
            i if intenslist[i] > intenslist[i + 1] else i + 1
            for i in indices_list
        ]
        mzs_list = [mzlist[i] for i in indices_list]
        intensities_list = [intenslist[i] for i in indices_list]
        min_i = np.min(
            [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
        max_i = np.max(
            [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
        return {
            "isodist_mzs": mzlist[min_i:max_i],
            "isodist_int": intenslist[min_i:max_i],
            "grad_mzs": list(mzs_list),
            "grad_int": list(intensities_list),
            "grad_ind": list(indices_list - min_i)
        }
    except:
        return {
            "isodist_mzs": [],
            "isodist_int": [],
            "grad_mzs": [],
            "grad_int": [],
            "grad_ind": []
        }
Ejemplo n.º 11
0
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5):
    # write a file to imzml format (centroided)
    """
    :type min_intensity: float
    """
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMS.centroid_detection import gradient
    sl = slFile(input_filename)
    mz_dtype = sl.Mzs.dtype
    int_dtype = sl.get_spectrum(0)[1].dtype
    # Convert coords to index -> kinda hacky
    coords = np.asarray(sl.coords).T.round(5)
    coords -= np.amin(coords, axis=0)
    if step==[]: #have a guesss
        step = np.array([np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)])
        step[np.isnan(step)] = 1
    coords /= np.reshape(step, (3,))
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'dim: {} x {}'.format(nrow,ncol)
    n_total = np.shape(sl.spectra)[0]
    with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml:
        done = 0
        for key in range(0,n_total):
            mzs,intensities = sl.get_spectrum(key)
            if apodization:
                import scipy.signal as signal
                #todo - add to processing list in imzml
                win = signal.hann(w_size)
                intensities = signal.fftconvolve(intensities, win, mode='same') / sum(win)
            mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity)
            pos = coords[key]
            pos = (nrow - 1 - pos[1], pos[0], pos[2])
            imzml.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total)
        print "finished!"
Ejemplo n.º 12
0
def isodist(molecules, charges=0, output='', plot=False, sigma=0.35, resolution=250, cutoff=0.0001, do_centroid=True,
            verbose=False):
    exit = checkhelpcall(molecules)
    save = checkoutput(output)
    if exit == True:
        sys.exit(0)

    molecules = molecules.split(',')
    for element in molecules:
        element = formulaExpander(element)
        if verbose:
            print (
            'The mass of %(substance)s is %(Mass)f and the calculated charge is %(Charge)d with m/z of %(Mz)f.' % {
                'substance': \
                    element, 'Mass': molmass(element), 'Charge': molcharge(element),
                'Mz': mz(molmass(element), molcharge(element), charges)})

    if charges == 0:
        charges = molcharge(element)
        if charges == 0:
            charges = 1
    else:
        if verbose:
            print "Using user-supplied charge of %d for mass spectrum" % charges

    isomasses = isotopemasses(element)
    isoratios = isotoperatios(element)

    if len(isomasses) != 1:
        ratios, masses = isotopes(isoratios, isomasses, cutoff)  # slow
        final = genDict(masses, ratios, charges, cutoff)  # very slow
    else:
        final = genDict(isomasses[0], isoratios[0], charges, cutoff)

    # for i in sorted(final.keys()): #fast
    # if final[i]>cutoff:
    # print i,final[i]
    ms_output = mass_spectrum()
    if do_centroid:
        from pyMS.centroid_detection import gradient
        pts = resolution2pts(min(final.keys()), max(final.keys()), resolution)
        xvector, yvector = genGaussian(final, sigma, pts)  # slow

        ms_output.add_spectrum(xvector, yvector)
        mz_list, intensity_list, centroid_list = gradient(ms_output.get_spectrum()[0], ms_output.get_spectrum()[1],
                                                          max_output=-1, weighted_bins=5)
        ms_output.add_centroids(mz_list, intensity_list)
    else:
        mz_idx = sorted(final.keys())
        ms_output.add_centroids(mz_idx, [final[f] for f in mz_idx])
    if plot == True:
        import matplotlib.pyplot as plt  # for plotting
        plt.plot(xvector, yvector)
        plt.plot(mz_list, intensity_list, 'rx')
        plt.show()

    if save == True:
        g = open(savefile, 'w')
        xs = xvector.tolist()
        ys = yvector.tolist()
        for i in range(0, len(xs)):
            g.write(str(xs[i]) + "\t" + str(ys[i]) + "\n")
        g.close
    return ms_output
Ejemplo n.º 13
0
def hdf5(filename_in, filename_out, info, smoothMethod="nosmooth"):
    import h5py
    import numpy as np
    import datetime
    import scipy.signal as signal
    from pyMS import centroid_detection
    import sys
    #from IPython.display import display, clear_output

    ### Open files
    f_in = h5py.File(filename_in, 'r')  # Readonly, file must exist
    f_out = h5py.File(filename_out, 'w')  # create file, truncate if exists
    print filename_in
    print filename_out
    ### get root groups from input data
    root_group_names = f_in.keys()
    spots = f_in['Spots']
    file_version = f_in['Version'][0]
    # some hard-coding to deal with different file versions
    if file_version > 5:
        coords = f_in['Registrations']['0']['Coordinates']
    else:
        coords = f_in['Coordinates']
    spectraGroup = 'InitialMeasurement'
    Mzs = np.asarray(
        f_in['/SamplePositions/GlobalMassAxis/']['SamplePositions']
    )  # we don't write this but will use it for peak detection

    ### make root groups for output data
    spectral_data = f_out.create_group('spectral_data')
    spatial_data = f_out.create_group('spatial_data')
    shared_data = f_out.create_group('shared_data')

    ### populate common variables - can hardcode as I know what these are for h5 data
    # parameters
    instrument_parameters_1 = shared_data.create_group(
        'instrument_parameters/001')
    instrument_parameters_1.attrs['instrument name'] = 'Bruker Solarix 7T'
    instrument_parameters_1.attrs['mass range'] = [Mzs[0], Mzs[-1]]
    instrument_parameters_1.attrs['analyser type'] = 'FTICR'
    instrument_parameters_1.attrs['smothing during convertion'] = smoothMethod
    instrument_parameters_1.attrs['data conversion'] = 'h5->hdf5:' + str(
        datetime.datetime.now())
    # ROIs
    #todo - determine and propagate all ROIs
    sample_1 = shared_data.create_group('samples/001')
    sample_1.attrs['name'] = info["sample_name"]
    sample_1.attrs['source'] = info["sample_source"]
    sample_1.attrs['preparation'] = info["sample_preparation"]
    sample_1.attrs['MALDI matrix'] = info["maldi_matrix"]
    sample_1.attrs['MALDI matrix application'] = info["matrix_application"]
    ### write spectra
    n = 0
    for key in spots.keys():
        spot = spots[key]
        ## make new spectrum
        #mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities']))
        if smoothMethod == 'nosmooth':
            mzs, intensities = mzs, intensities = nosmooth(
                Mzs, np.asarray(spot[spectraGroup]['Intensities']))
        elif smoothMethod == 'nosmooth':
            mzs, intensities = sg_smooth(
                Mzs, np.asarray(spot[spectraGroup]['Intensities']))
        elif smoothMethod == 'apodization':
            mzs, intensities = apodization(
                Mzs, np.asarray(spot[spectraGroup]['Intensities']))
        else:
            raise ValueError(
                'smooth method not one of: [nosmooth,nosmooth,apodization]')
        mzs_list, intensity_list, indices_list = centroid_detection.gradient(
            mzs, intensities, max_output=-1, weighted_bins=3)

        # add intensities
        this_spectrum = spectral_data.create_group(key)
        this_intensities = this_spectrum.create_dataset(
            'centroid_intensities',
            data=np.float32(intensity_list),
            compression="gzip",
            compression_opts=9)
        # add coordinates
        key_dbl = float(key)
        this_coordiantes = this_spectrum.create_dataset(
            'coordinates',
            data=(coords[0, key_dbl], coords[1, key_dbl], coords[2, key_dbl]))
        ## link to shared parameters
        # mzs
        this_mzs = this_spectrum.create_dataset('centroid_mzs',
                                                data=np.float32(mzs_list),
                                                compression="gzip",
                                                compression_opts=9)
        # ROI
        this_spectrum['ROIs/001'] = h5py.SoftLink(
            '/shared_data/regions_of_interest/001')
        # Sample
        this_spectrum['samples/001'] = h5py.SoftLink(
            '/shared_data/samples/001')
        # Instrument config
        this_spectrum['instrument_parameters'] = h5py.SoftLink(
            '/shared_data/instrument_parameters/001')
        n += 1
        if np.mod(n, 10) == 0:
            #clear_output(wait=True)
            print('{:3.2f}\% complete\r'.format(100. * n /
                                                np.shape(spots.keys())[0],
                                                end="\r")),
            sys.stdout.flush()

    f_in.close()
    f_out.close()
    print 'fin'
Ejemplo n.º 14
0
def mzImages(filename_in,save_dir):
    import sys
    sys.path.append('C:\\Users\\Luca Rappez\\Desktop\\python_codebase\\')
    from pyMS.centroid_detection import gradient
    from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5
    from pyIMS.image_measures import level_sets_measure
    import matplotlib.pyplot as plt
    import numpy as np
    #%matplotlib inline
    import bokeh as bk
    from bokeh.plotting import output_notebook
    output_notebook()

    print 'step1'
    #filename_in =  '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um.hdf5' #using a temporary hdf5 based format
    #save_dir= '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um_figures'

    # Parse data
    IMS_dataset = inMemoryIMS_hdf5(filename_in)
    print 'In memory'
    ppm = 1.5

    # Generate mean spectrum
    #hist_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean')
    hist_axis,freq_spec = IMS_dataset.generate_summary_spectrum(summary_type='freq',ppm=ppm/2)

    #p1 = bk.plotting.figure()
    #p1.line(hist_axis,mean_spec/np.max(mean_spec),color='red')
    #p1.line(hist_axis,freq_spec/np.max(freq_spec),color='orange')
    #bk.plotting.show(p1)
    print len(hist_axis)
    #plt.figure(figsize=(20,10))
    #plt.plot(hist_axis,freq_spec)
    #plt.show()

    # Centroid detection of frequency spectrum
    mz_list,count_list,idx_list = gradient(np.asarray(hist_axis),np.asarray(freq_spec),weighted_bins=2)

    c_thresh=0.05
    moc_thresh=0.99
    print np.sum(count_list>c_thresh)

    # Calcualte MoC for images of all peaks
    nlevels=30
    im_list={}
    for ii, c in enumerate(count_list):
        #print ii
        #print c
        if c>c_thresh:
            ion_image = IMS_dataset.get_ion_image(np.asarray([mz_list[ii],]),ppm)
            im = ion_image.xic_to_image(0)
            m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='median') #just output measure value
            m=1-m
            im_list[mz_list[ii]]={'image':im,'moc':m,'freq':c}

    from pySpatialMetabolomics.tools import colourmaps
    c_map = colourmaps.get_colormap('grey')#if black images: open->save->rerun
    c_pal=[[int(255*cc) for cc in c_map(c)] for c in range(0,254)]

    # Export all images
    import png as pypng
    for mz in im_list:
        if im_list[mz]['moc']>moc_thresh:
             with open('{}/{}_{}.png'.format(save_dir,mz,im_list[mz]['moc']),'wb') as f_out:
                im_out = im_list[mz]['image']
                im_out = 254*im_out/np.max(im_out)
                w,h = np.shape(im_out)
                w = pypng.Writer(h, w, palette=c_pal, bitdepth=8)
                w.write(f_out,im_out)

    #im_out = im_list[mz]['image']
    mz=333.334188269
    ion_image = IMS_dataset.get_ion_image(np.asarray([mz,]),ppm)
    im_out=ion_image.xic_to_image(0)
    m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='') #just output measure value
    print 1-m
    im_out = 254.*im_out/np.max(im_out)
    print mz
    #print im_list[mz]['moc']
    #plt.figure()
    #plt.imshow(im_moc)
    #plt.show()
Ejemplo n.º 15
0
sample_1.attrs["MALDI matrix application"] = matrix_application

### write spectra
n = 0
for i, coords in enumerate(f_in.coordinates):
    ## rename as I'm using old code :S
    spot = i
    key = str(i)
    ## make new spectrum
    mzs, ints = f_in.getspectrum(i)
    if centroids == True:
        mzs_list, intensity_list = mzs, ints
    else:
        ints = signal.savgol_filter(ints, 5, 2)
        mzs_list, intensity_list, indices_list = centroid_detection.gradient(
            np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3
        )
    if not all([m > 0 for m in intensity_list]):
        raise ValueError("whoa, wtf?")
    # add intensities
    this_spectrum = spectral_data.create_group(key)
    this_intensities = this_spectrum.create_dataset(
        "centroid_intensities", data=np.float32(intensity_list), compression="gzip", compression_opts=9
    )
    # add coordinates
    if len(coords) == 2:
        coords = (coords[0], coords[1], 0)
    this_coordiantes = this_spectrum.create_dataset("coordinates", data=(coords[0], coords[1], coords[2]))
    ## link to shared parameters
    # mzs
    this_mzs = this_spectrum.create_dataset(
Ejemplo n.º 16
0
def hdf5(filename_in, filename_out,info,smoothMethod="nosmooth"):
    import h5py
    import numpy as np
    import datetime
    import scipy.signal as signal
    from pyMS import centroid_detection
    import sys
    #from IPython.display import display, clear_output

    ### Open files
    f_in = h5py.File(filename_in, 'r')  # Readonly, file must exist
    f_out = h5py.File(filename_out, 'w')  # create file, truncate if exists
    print filename_in
    print filename_out
    ### get root groups from input data
    root_group_names = f_in.keys()
    spots = f_in['Spots']
    file_version = f_in['Version'][0]
    # some hard-coding to deal with different file versions
    if file_version > 5:
        coords = f_in['Registrations']['0']['Coordinates']
    else:
        coords = f_in['Coordinates']
    spectraGroup = 'InitialMeasurement'
    Mzs = np.asarray(f_in['/SamplePositions/GlobalMassAxis/']['SamplePositions']) # we don't write this but will use it for peak detection

    ### make root groups for output data
    spectral_data = f_out.create_group('spectral_data')
    spatial_data = f_out.create_group('spatial_data')
    shared_data = f_out.create_group('shared_data')

    ### populate common variables - can hardcode as I know what these are for h5 data
    # parameters
    instrument_parameters_1 = shared_data.create_group('instrument_parameters/001')
    instrument_parameters_1.attrs['instrument name'] = 'Bruker Solarix 7T'
    instrument_parameters_1.attrs['mass range'] = [Mzs[0],Mzs[-1]]
    instrument_parameters_1.attrs['analyser type'] = 'FTICR'
    instrument_parameters_1.attrs['smothing during convertion'] = smoothMethod
    instrument_parameters_1.attrs['data conversion'] = 'h5->hdf5:'+str(datetime.datetime.now())
    # ROIs
        #todo - determine and propagate all ROIs
    sample_1 = shared_data.create_group('samples/001')
    sample_1.attrs['name'] = info["sample_name"]
    sample_1.attrs['source'] = info["sample_source"]
    sample_1.attrs['preparation'] = info["sample_preparation"]
    sample_1.attrs['MALDI matrix'] = info["maldi_matrix"]
    sample_1.attrs['MALDI matrix application'] = info["matrix_application"]
    ### write spectra
    n = 0
    for key in spots.keys():
        spot = spots[key]
        ## make new spectrum
        #mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities']))
        if smoothMethod == 'nosmooth':
            mzs,intensities = mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities']))
        elif smoothMethod == 'nosmooth':
            mzs,intensities = sg_smooth(Mzs,np.asarray(spot[spectraGroup]['Intensities']))
        elif smoothMethod == 'apodization':
            mzs,intensities = apodization(Mzs,np.asarray(spot[spectraGroup]['Intensities']))
        else:
            raise ValueError('smooth method not one of: [nosmooth,nosmooth,apodization]')
        mzs_list, intensity_list, indices_list = centroid_detection.gradient(mzs,intensities, max_output=-1, weighted_bins=3)

        # add intensities
        this_spectrum = spectral_data.create_group(key)
        this_intensities = this_spectrum.create_dataset('centroid_intensities', data=np.float32(intensity_list),
                                                    compression="gzip", compression_opts=9)
        # add coordinates
        key_dbl = float(key)
        this_coordiantes = this_spectrum.create_dataset('coordinates',
                                                    data=(coords[0, key_dbl], coords[1, key_dbl], coords[2, key_dbl]))
        ## link to shared parameters
        # mzs
        this_mzs = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_list), compression="gzip",
                                            compression_opts=9)
        # ROI
        this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001')
        # Sample
        this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001')
        # Instrument config
        this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001')
        n += 1
        if np.mod(n, 10) == 0:
            #clear_output(wait=True)
            print('{:3.2f}\% complete\r'.format(100.*n/np.shape(spots.keys())[0], end="\r")),
            sys.stdout.flush()

    f_in.close()
    f_out.close()
    print 'fin'
def preprocess_spectrum(mzs, ints):
    ints = signal.savgol_filter(ints, 5, 2)
    mzs, ints, _ = gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3)
    order = mzs.argsort()
    return mzs[order], ints[order]