コード例 #1
0
    def __init__(self, filereadinfo, dbname='', dbpath='', time_mult=1.0):
        """
        Class parameter initialization
        """

        self.dbname = ''
        self.dbpath = ''
        self.time_mult = time_mult

        if dbname != '':
            self.dbpath = os.path.dirname(dbname)
            self.dbname = os.path.basename(dbname)

        if self.dbpath == '':
            if os.path.isdir(dbpath):
                self.dbpath = dbpath
            else:
                self.dbpath = os.getcwd()

        try:
            if not os.path.isdir(self.dbpath):
                os.makedirs(self.dbpath)
        except:
            self.dbpath = os.getcwd()

        if self.dbname == '':
            printlog(
                'Output database file name not provided. Setting output file name to:'
            )
            self.dbname = "msdata__" + time.strftime("%H%M_%d_%m_%Y") + ".h5"
            printlog(self.dbname)
        else:
            self.dbname = os.path.splitext(self.dbname)[0] + '.h5'
コード例 #2
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def recursive_copy_group_contents(source, target, overwrite=True):
    """
    Recursively copy group contents considering overwrite setting
    
    """
    for attribute in source.attrs.keys():
        if not (attribute in target.attrs) or overwrite:
            target.attrs[attribute] = source.attrs[attribute]

    for key in source.keys():
        item = source[key]
        if isinstance(item, h5py.Group):
            if key in target:
                oitem = target[key]
                if not isinstance(oitem, h5py.Group):
                    raise H5FileError(
                        'Error! Cannot copy group %s from %s into existing dataset %s in %s!'
                        % (key, source.name, key, target.name))
            else:
                oitem = target.create_group(key)
            recursive_copy_group_contents(item, oitem, overwrite)
        elif isinstance(item, h5py.Dataset):
            if not (key in target) or overwrite:
                if key in target:
                    del target[key]
                source.copy(item, target, name=key)
        else:
            printlog('Unsupported item %s of type %s ignored!' %
                     (key, type(item)))
コード例 #3
0
ファイル: timing.py プロジェクト: zzsnow/GNPS_Workflows
def toc(id_tic = 'default', printing = True):
    """
    Returns and prints (optionally) the number of seconds that passed since 
    corresponding tic(id) was called.
    
    Args:
        id_tic:   string or number to be used to identify corresponding starting
                  point. Default: 'default'.
        printing: boolean, determines if the number of elapsed seconds is 
                  printed. Default: True
                 
    Returns:
        number of seconds that passed since corresponding tic(id) was called as
        float.
    """
    global __dttime;

    try:
        t = time.time() - __dttime[id_tic];
    except:
        printlog('tic "%s" not found! Did you forget to call tic(%s)?'%(id_tic, id_tic));
        return 0.0;
    
    if printing:
        printlog('%s seconds'%t)
    return t;
コード例 #4
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def load_dataset(dbfilepath, pathinh5):

    pathinh5 = re.sub('//', '/', pathinh5)

    dataset = []
    if is_string(dbfilepath) and (os.path.exists(dbfilepath)):
        h5file_group = h5py.File(dbfilepath, 'a')
        isdbfile = 1
    elif (isinstance(dbfilepath, h5py.File)) or (isinstance(
            dbfilepath, h5py.Group)):
        h5file_group = dbfilepath
        isdbfile = 0
    else:
        return dataset

    try:
        isdata = pathinh5 in h5file_group
    except Exception as inst:
        printlog(inst)
        traceback.print_exc()
        return dataset

    if isdata == True:
        dataset = h5file_group[pathinh5][()]

    if isdbfile == 1:
        h5file_group.close()

    return dataset
コード例 #5
0
    def cp(self, source, target):
        """
        Copy source to target path
        """

        target = abs_hdf5_path(target, self.h5path).rstrip('/')

        if not target in self.h5file:
            target_group = self.h5file.create_group(target)
        else:
            target_group = self.h5file[target]

        if not isinstance(target_group, h5py.Group):
            printlog(
                'Error! Destination %s exists and it is not a group! Cannot copy %s!'
                % (target, source))
            return

        source_path = abs_hdf5_path(source, self.h5path)
        selected_items = get_items_list_from_hdf5(self.h5file,
                                                  source_path,
                                                  recursive=True)
        source_path = source_path.split('/')[:-1]
        source_path_len = len(source_path)
        for item in selected_items:
            printlog('Copying %s to %s...' % (item, target))
            rel_item = '/'.join(item.split('/')[source_path_len:])
            copy_hdf5_item(self.h5file, item, target + '/' + rel_item)
コード例 #6
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def abs_hdf5_path(h5path, reference_path='/'):
    """
    Make HDF5 relative path absolute
    """
    if not reference_path.startswith('/'):
        raise OperationError('Reference path should be absolute path!')

    if not h5path.startswith('/'):
        h5path = reference_path.rstrip('/') + '/' + h5path

    end_slash = h5path.endswith('/')

    h5path = h5path.split('/')

    newpath = []
    for i in range(len(h5path)):
        if h5path[i] == '..':
            if len(newpath) > 0:
                del newpath[-1]
            else:
                printlog(
                    'Error! Cannot go above root with ".." in path! Staying at root!'
                )
        elif h5path[i] != '.' and h5path[i] != '':
            newpath.append(h5path[i])

    result = '/%s' % ('/'.join(newpath))

    if end_slash:
        if not result.endswith('/'):
            result += '/'

    return result
コード例 #7
0
def generate_sample_plot(h5file, dataset_path, dataset_index, dataset_name,
                         output_file, crt, cmz, plot_width, top_plot_height,
                         bottom_plot_height, use_global_max, global_max):
    sp_set = dataset_path + '/sp'
    if not (sp_set in h5file):
        printlog('Error! sp not found in %s ! Skipping...' % (dataset_path))
        return

    sp = np.array(h5file[sp_set])

    #sp_int = np.sum(sp, axis = 1);

    crt = np.array(crt)
    crt = crt / 60.0

    top_plot = _make_top_rt_plot(
        crt, sp, '%s. RT Integral profile, linear scale' % dataset_name,
        plot_width, top_plot_height, use_global_max, global_max)

    bottom_plot = _make_bottom_rt_plot(
        crt, cmz, sp, '%s. Full profile, log scale' % dataset_name, plot_width,
        bottom_plot_height, True, use_global_max, global_max)
    #bottom_plot2 = _make_bottom_rt_plot(crt, cmz, sp, '%s. Full profile, linear scale'%dataset_name, plot_width, bottom_plot_height, False, use_global_max, global_max);

    bottom_plot.x_range = top_plot.x_range
    #bottom_plot2.x_range = top_plot.x_range;
    #bottom_plot2.y_range = bottom_plot.y_range;

    script, div = components(
        gridplot([
            [top_plot],
            [bottom_plot],
            #[bottom_plot2]
        ]))

    with open(output_file, 'w') as fspec:

        fspec.write('\n'.join([
            '<!DOCTYPE html>',
            '<html lang="en">',
            '    <head>',
            '        <meta charset="utf-8">',
            '        <title>RT Spectrum for all peaks</title>',
            CDN.render(),
            '        <link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">',
            '    </head>',
            '    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>',
            '    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>',
            '    <body>',
            div,
            '    </body>',
            script,
            '<style>',
            '</style>',
            '    </html>',
        ]))
コード例 #8
0
def view_tics(dbfilepath, method='', params=''):
    
    printlog('\nExtracting TICs.....')

    h5readpath = params['h5readpath']
    data = get_data(dbfilepath, h5readpath)
    figtitle = 'Total Ion Chromatograms'
    dirname = os.path.dirname(dbfilepath)


    if params['outputfile'] == '':
        filename = 'bp' + time.strftime("%H%M_%d_%m_%Y") + '.html'
    else:
        filename = params['outputfile']
        
    output_file(os.path.join(dirname, filename))

    source = ColumnDataSource(data)

    p = figure(width=params['plot_width'], height=params['plot_height'],
                   title=figtitle, x_axis_label='Time(mins)', y_axis_label='Intensity')

        
    p.multi_line(xs = 'x', ys = 'y',
                 line_width = 1, line_color = 'color', line_alpha = 0.6,
                 hover_line_color = 'color', 
                 hover_line_alpha = 1.0,
                 source = source)
                 
    print(data.keys())             
                     
    if 'histc' in data:
        p.line(x = 'hx', y = 'histc', line_width=1, line_color = 'firebrick', source = source);
    
    if 'histc_threshold' in data:
        p.line(x = 'hx', y = 'histc_threshold', line_width = 2, line_color = 'navy', source = source);
            
    if 'ref2D' in data:
        p.line(x = 'refx', y = 'ref2D', line_width = 1, line_color = 'red', source = source)
            
    if 'picked_peaks' in data:
        p.circle(x = 'peak_x', y = 'picked_peaks', color = 'peak_color', size = 3, source = source)
            

    p.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
            ('id', '@id')]))
        
    try:    
        p.toolbar.active_inspect = [None]
    except:
        pass
        
    if params['display'] == 'yes':
        show(p)
    else:
        save(p)
コード例 #9
0
 def mkdir(self, target):
     """
     Make group defined by target
     """
     target = abs_hdf5_path(target, self.h5path).rstrip('/')
     printlog('Creating group %s' % (target))
     if target in self.h5file:
         printlog('Error! %s already exists!' % target)
     else:
         self.h5file.create_group(target)
コード例 #10
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def save_dataset(dbfilepath,
                 pathinh5,
                 data,
                 chunksize='',
                 compression_opts=''):

    pathinh5 = re.sub('//', '/', pathinh5)

    if is_string(dbfilepath) and (os.path.exists(dbfilepath)):
        h5file_group = h5py.File(dbfilepath, 'a')
        isdbfile = 1
    elif (isinstance(dbfilepath, h5py.File)) or (isinstance(
            dbfilepath, h5py.Group)):
        h5file_group = dbfilepath
        isdbfile = 0
    else:
        return

    try:
        isdata = pathinh5 in h5file_group
    except Exception as inst:
        printlog(inst)
        traceback.print_exc()
        return

    if isdata:
        fdata = h5file_group[pathinh5]

        if (fdata.shape == data.shape) and (fdata.dtype == data.dtype):
            fdata[...] = data
            return
        else:
            printlog('Deleting original')
            del h5file_group[pathinh5]

    if (not chunksize) and (not compression_opts):
        h5file_group.create_dataset(pathinh5, data=data)
    elif (chunksize) and (compression_opts):
        h5file_group.create_dataset(pathinh5,
                                    data=data,
                                    chunks=chunksize,
                                    compression="gzip",
                                    compression_opts=compression_opts)
    elif (chunksize):
        h5file_group.create_dataset(pathinh5, data=data, chunks=chunksize)
    elif (compression_opts):
        h5file_group.create_dataset(pathinh5,
                                    data=data,
                                    chunks=True,
                                    compression="gzip",
                                    compression_opts=compression_opts)

    if isdbfile == 1:
        h5file_group.close()
    return
コード例 #11
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def load_preproc_obj(dbfilepath, procid, pathinh5=''):
    """
    
    **Loads the pre-processing parameters of a module from the hdf5 database.**
    
    Args: 
        
        dbfilepath: the name and path to the hdf5-database file
        
        procid: the module identifier
        
        pathinh5: the path in the hdf5 file for object storage
        
    """

    h5objpath = pathinh5 + procid
    h5objpath = re.sub('//', '/', h5objpath)

    ProcObj = {}
    if is_string(dbfilepath) and (os.path.exists(dbfilepath)):
        h5file_group = h5py.File(dbfilepath, 'a')
        isdbfile = 1
    elif (isinstance(dbfilepath, h5py.File)) or (isinstance(
            dbfilepath, h5py.Group)):
        h5file_group = dbfilepath
        isdbfile = 0
    else:
        return ProcObj

    try:
        isobj = h5objpath in h5file_group
    except Exception as inst:
        printlog(inst)
        traceback.print_exc()
        return ProcObj

    if isobj == False:
        return ProcObj
    # check whether this object is part of the preprocessing workflow
    h5obj = h5file_group[h5objpath]
    for i_name in h5obj.keys():
        if isinstance(h5obj[i_name], h5py.Group):
            h5subobj = h5obj[i_name]
            subProcObj = {}
            for j_name in h5subobj.keys():
                subProcObj[j_name] = load_dataset(h5subobj, j_name)
            ProcObj[i_name] = subProcObj
        else:
            ProcObj[i_name] = load_dataset(h5obj, i_name)

    if isdbfile == 1:
        h5file_group.close()

    return ProcObj
コード例 #12
0
 def rm(self, target):
     """
     Remove items defined by target
     """
     printlog('Removing %s...' % (target))
     targets = reversed(
         get_items_list_from_hdf5(self.h5file,
                                  abs_hdf5_path(target, self.h5path),
                                  recursive=True))
     for i in targets:
         del h5file[i]
コード例 #13
0
ファイル: anova.py プロジェクト: zzsnow/GNPS_Workflows
 def __init__(self, stat_model, methodparams):
     """
     ANOVA method initialization.
     Args: stat_model - instance of the StatisticalModel class;
           methodparams - dictionary of supplied method parameters;
           
     
     """
     self.stat_model = stat_model;
     self.methodparams = methodparams;
     
     
     printlog('\nInitialized ANOVA analyser...');
     self.p_value_threshold = self.methodparams['p_value'];
     printlog('\np-value threshold: %.5f'%self.p_value_threshold);
     
     #if self.methodparams['interactions_expected'] == 'no':
     #    self.anova_type = 2;
     #    printlog('Interactions not expected. Using ANOVA type 2...')
     #else:
     #    self.anova_type = 3;
     #    printlog('Interactions expected. Using ANOVA type 3...')
         
     if self.methodparams['robust'] == 'None':
         self.robust = None;
         printlog('Robust covariance not used...')
     else:
         self.robust = self.methodparams['robust'];
         printlog('Using covariance %s...'%self.robust);
コード例 #14
0
ファイル: interpalign.py プロジェクト: zzsnow/GNPS_Workflows
 def get_refsp_h5(self,dbfilepath,datasets,h5readpath):
     
     with h5py.File(dbfilepath, 'r') as h5file:    
         i = 0
         printlog("\nPreparing reference profile for inter-sample retention time drift alignment across %s datasets from \n%s...\n" % (len(datasets),dbfilepath))
         dataindex = 0 
         for datasetid in datasets:
             dataindex = dataindex + 1        
             try:        
                 sp2D = mh5.load_dataset(h5file,h5readpath[:-1] + datasetid+ '/sp')
                 if i==0:
                     i = i + 1
                     ref2D = sp2D
                     continue
                 if self.reference=='mean':
                     ref2D = (sp2D + ref2D)                  
                 i = i + 1
                 printlog('%s. %s: Successfully updated from -> %s%s' %(dataindex, datasetid, os.path.basename(dbfilepath),h5readpath))
             except Exception as inst:
                 printlog('%s. %s: Failed' %(dataindex, datasetid))  
                 printlog(inst)
                 traceback.print_exc()
         
         if self.reference=='mean':
             self.ref2D = ref2D/i
コード例 #15
0
ファイル: vistic.py プロジェクト: zzsnow/GNPS_Workflows
def get_average_profile(dbfilepath, dataid=''):
    if os.path.isfile(dbfilepath):
        datasets = mh5.get_dataset_names(dbfilepath, dataset_names=[])
        if not datasets:
            printlog(dbfilepath + ' database file doesn'
                     't contain any MSI datasets')
            return
    else:
        printlog(str(dbfilepath) + ' database file is not found')
        return

    sizesp = mh5.load_dataset(dbfilepath, 'sizesp')
    sp_mean = np.zeros((sizesp[0], sizesp[1]))
    crt = mh5.load_dataset(dbfilepath, 'crt')

    #crt = crt / 60;

    j = -1

    dataidx = np.array([])
    for datasetid in datasets:
        j = j + 1
        try:
            sp = mh5.load_dataset(dbfilepath, datasetid + dataid)
            sp_mean = sp_mean + sp
            dataidx = np.append(dataidx, j)
        except Exception as inst:
            printlog(os.path.basename(datasetid) + ": Failed to readin")
            printlog(inst)

    dataidx = dataidx.astype(int)
    datasets = list(map(datasets.__getitem__, (dataidx)))
    sp_mean = sp_mean / len(dataidx)

    return sp_mean, crt, datasets
コード例 #16
0
def load_mgf_fragments(fragfilename):
    rt_peaks = []
    ms_fragments = []
    reading_peaks = False
    in_record = False
    current_ms_fragments = []
    current_rt = 0.0
    with open(fragfilename, 'r') as finp:
        for s in finp:
            s = s.rstrip('\n')
            if in_record:
                if reading_peaks:
                    if s.startswith('END'):
                        reading_peaks = False
                        in_record = False
                        ms_fragments.append(current_ms_fragments)
                        rt_peaks.append(current_rt)
                        current_ms_fragments = []
                    else:
                        s = s.split(' ')
                        current_ms_fragments[0].append(float(s[0]))
                        current_ms_fragments[1].append(float(s[1]))

                elif s.startswith('RTINSECONDS='):
                    current_rt = float(s.split('=')[1]) / 60.0

                elif '=' in s:
                    pass

                elif ' ' in s:
                    s = s.split(' ')
                    try:
                        mz = float(s[0])
                        intens = float(s[1])
                        reading_peaks = True
                        current_ms_fragments.append([mz])
                        current_ms_fragments.append([intens])
                    except:
                        printlog(
                            '%s does not contain proper float pair! Ignoring...'
                            % s)

            elif s.startswith('BEGIN IONS'):
                in_record = True

    for i in range(len(ms_fragments)):
        ms_fragments[i] = np.array(ms_fragments[i], dtype=np.float64)

    return np.array(rt_peaks, dtype=np.float64), ms_fragments
コード例 #17
0
def write_meta_values(h5,
                      h5writepath,
                      sampleid,
                      metids,
                      metvals,
                      overwrite=True):
    """
    Write meta values to the open h5 file according to the sample id.
    
    Args:
        h5: HDF5 File instance (opened and writeable).
        h5writepath: string containing path to the group of samples to add 
                     metadata to.
        sampleid: name of the sample, same as the name of the group containing 
                  sample data. Matching is case sensitive.
        metids:  list of metadata variable names
        metvals: list of metadata variable values
        overwrite: boolean, defines if already existing metadata values will be 
                   overwritten.
        
    Returns:
        None
    
    """
    group_name = h5writepath + sampleid
    if group_name in h5:
        printlog('Adding metadata to %s...' % group_name)
        group = h5[group_name]

        if 'MetaData' in group:
            metagroup = group['MetaData']
            if not isinstance(metagroup, h5py.Group):
                raise HDF5FormatError(
                    'Error! A dataset "MetaData" exists in %s of %s!' %
                    (group_name, h5.filename))
        else:
            metagroup = group.create_group('MetaData')

        group.attrs['has_metadata'] = True
        for i in range(len(metids)):
            metid = str(metids[i])
            if metid != '':
                if not (metid in metagroup.attrs) or overwrite:
                    metagroup.attrs[metid] = trydecode(metvals[i])
    else:
        printlog('%s not found in %s! Skipping...' % (group_name, h5.filename))
コード例 #18
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def get_traindata_names(dbfilepath, dbroot='', dataset_names=[], istrain=1):
    """
    
    Recursively exctracts dataset names from hdf5 database
    
    """
    if is_string(dbfilepath) and (os.path.exists(dbfilepath)):
        h5file = h5py.File(dbfilepath, 'r')
        item = h5file
        isdbfile = 1
    elif (isinstance(dbfilepath, h5py.File)) or (isinstance(
            dbfilepath, h5py.Group)):
        item = dbfilepath
        isdbfile = 0
    else:
        return dataset_names

    for key, val in iteritem(dict(item)):
        try:
            subitem = dict(val)
            if ('istrain' in subitem) and ('Sp' in subitem):
                if load_dataset(item, val.name + '/istrain') == istrain:
                    success = 1
                else:
                    success = 0
            else:
                success = 0
        except Exception as inst:
            printlog(inst)
            traceback.print_exc()
            success = 0
        if success == 1:
            dataset_names.append(val.name)
        elif isinstance(val, h5py.Group):
            dbroot = dbroot + val.name
            dataset_names = get_traindata_names(val, dbroot, dataset_names,
                                                istrain)
    if isdbfile == 1:
        h5file.close()

    return dataset_names
コード例 #19
0
    def aling_h5(self,dbfilepath,datasets,h5readpath,h5writepath):
        
        #if not self.ref2D:
        
            
        printlog("\nPerforming internal sample retention time profile alignment across %s datasets from \n%s...\n" % (len(datasets),dbfilepath))
        dataindex = 0 
        with h5py.File(dbfilepath, 'a') as h5file:   
        
            #mh5.save_dataset(h5file, h5writepath + '/ref2D', data = self.ref2D, compression_opts = 5)
            
            for datasetid in datasets:
                dataindex = dataindex + 1                

                try:
                    sp2D = mh5.load_dataset(h5file, h5readpath[:-1] + datasetid + '/sp')
                    
                    nrt, nmz = sp2D.shape
                    
                    ref2D = np.mean(sp2D, axis = 1);
                    
                    mh5.save_dataset(h5file, h5writepath + datasetid.lstrip('/') + '/ref2D', data = ref2D, compression_opts = 5)
                    
                    for i in range(nmz):
                        alprof = self.align(sp2D[:, i], ref2D[:])
                        sp2D[:, i] = alprof
                        
                    mh5.save_dataset(h5file, h5writepath[:-1] + datasetid+ '/sp', data = sp2D, compression_opts = 5)
                    
                    printlog('%s. %s: Successfully aligned and deposited -> %s%s' %(dataindex, datasetid, os.path.basename(dbfilepath), h5writepath))
                    
                    target_gname = h5writepath[:-1] + datasetid;
                    source_gname = h5readpath[:-1] + datasetid;
        
                    wgroup = h5file[target_gname];
                    sgroup = h5file[source_gname];
                        
                    wgroup.attrs['is_raw'] = False;
                    wgroup.attrs['is_OK'] = True;
                    wgroup.attrs['is_processed'] = True;
                    wgroup.attrs['is_continuous'] = True;
                    wgroup.attrs['is_sample_dataset'] = True;
                    wgroup.attrs['parent'] = np.string_(source_gname)
                    mh5.copy_meta_over(sgroup, wgroup);
                    
                    
                except Exception as inst:
                    printlog('%s. %s: Failed to be deposited' %(dataindex, datasetid))  
                    printlog(inst)
                    traceback.print_exc()
コード例 #20
0
    def ls(self,
           recursive=False,
           selection='*',
           show_attributes=False,
           show_groups=True,
           show_datasets=True):

        source_path = abs_hdf5_path(selection, self.h5path)
        selected_items = get_items_list_from_hdf5(self.h5file, source_path,
                                                  recursive)

        source_path = source_path.split('/')[:-1]
        source_path_len = len(source_path)

        for item_name in selected_items:
            item = self.h5file[item_name]
            item_relpath = '/'.join(item_name.split('/')[source_path_len:])
            if isinstance(item, h5py.Group) and show_groups:
                printlog('[%s]' % item_relpath)
                if show_attributes:
                    print_attributes(item)
            elif isinstance(item, h5py.Dataset) and show_datasets:
                printlog(' %s ' % item_relpath)
                if show_attributes:
                    print_attributes(item)
            else:
                printlog('?%s ' % item_relpath)
コード例 #21
0
    def __init__(self, filereadinfo, dbname='', dbpath='', time_mult=1.0):
        """
        Class parameter initialization
        """

        self.dbname = ''
        self.dbpath = ''
        self.time_mult = time_mult

        # the keys used to retrieve certain data from the NetCDF file
        self.__mass_string = filereadinfo['massid']
        self.__intensity_string = filereadinfo['specid']
        self.__scan_string = filereadinfo['scanid']
        self.__time_string = filereadinfo['timeid']

        if dbname != '':
            self.dbpath = os.path.dirname(dbname)
            self.dbname = os.path.basename(dbname)

        if self.dbpath == '':
            if os.path.isdir(dbpath):
                self.dbpath = dbpath
            else:
                self.dbpath = os.getcwd()

        try:
            if not os.path.isdir(self.dbpath):
                os.makedirs(self.dbpath)
        except:
            self.dbpath = os.getcwd()

        if self.dbname == '':
            printlog(
                'Output database file name not provided. Setting output file name to:'
            )
            self.dbname = "msdata__" + time.strftime("%H%M_%d_%m_%Y") + ".h5"
            printlog(self.dbname)
        else:
            self.dbname = os.path.splitext(self.dbname)[0] + '.h5'
コード例 #22
0
ファイル: manageh5db.py プロジェクト: zzsnow/GNPS_Workflows
def print_structure_h5db(dbfilepath, dbroot='', offset='    '):
    """Prints the HDF5 database structure"""
    if is_string(dbfilepath) and (os.path.exists(dbfilepath)):
        h5file = h5py.File(dbfilepath, 'r')
        item = h5file
        isdbfile = 1
    elif (isinstance(dbfilepath, h5py.File)) or (isinstance(
            dbfilepath, h5py.Group)):
        item = dbfilepath
        isdbfile = 0
    else:
        return

    if isinstance(item, h5py.File):
        printlog(item.file, '(File)', item.name)

    elif isinstance(item, h5py.Dataset):
        printlog('(Dataset)', item.name, '    len =', item.shape)  #, g.dtype

    elif isinstance(item, h5py.Group):
        printlog('(Group)', item.name)

    else:
        printlog('Warning: The item type is unkown', item.name)
        sys.exit("execution is terminated")

    if isinstance(item, h5py.File) or isinstance(item, h5py.Group):
        for key, val in dict(item).iteritems():
            subitem = val
            printlog(offset,
                     key)  #,"   ", subg.name #, val, subg.len(), type(subg),
            dbroot = dbroot + 'i'
            print_structure_h5db(subitem, dbroot=dbroot, offset='    ')

    if isdbfile == 1:
        h5file.close()
コード例 #23
0
ファイル: vistic.py プロジェクト: zzsnow/GNPS_Workflows
def get_data(dbfilepath, h5readpath):
    """ Extract data from the h5file and output as a dictionary of 'x', 'y', 'ids', and 'colors' for each sample """
    if h5readpath[0] != '/':
        h5readpath = '/'.join(['', h5readpath])

    if os.path.isfile(dbfilepath):
        datasets = mh5.get_dataset_names(dbfilepath,
                                         dataset_names=[],
                                         pathinh5=h5readpath)
        if not datasets:
            printlog(dbfilepath + ' database file doesn'
                     't contain any MSI datasets')
            return
    else:
        printlog(str(dbfilepath) + ' database file is not found')
        return

    sizesp = mh5.load_dataset(dbfilepath, '/'.join([h5readpath, 'sizesp']))
    tics = np.zeros((sizesp[0], sizesp[2]))
    crt = mh5.load_dataset(dbfilepath, '/'.join([h5readpath, 'crt']))
    j = -1
    dataidx = np.array([])
    for datasetid in datasets:
        j = j + 1
        try:
            sp = mh5.load_dataset(dbfilepath,
                                  ''.join([h5readpath, datasetid, '/sp']))
            tics[:, j] = np.sum(sp, axis=1)
            dataidx = np.append(dataidx, j)
        except Exception as inst:
            printlog(os.path.basename(datasetid) + ": Failed to readin")
            printlog(inst)
            traceback.print_exc()

    dataidx = dataidx.astype(int)
    datasets = list(map(datasets.__getitem__, (dataidx)))
    tics = tics[:, dataidx]
    nrows, ncols = tics.shape
    sp = {'x': [], 'y': [], 'id': [], 'color': []}
    for i in range(ncols):
        sp['x'].append(crt / 60)
        sp['y'].append(tics[:, i])
        sp['id'].append(datasets[i])
    sp['color'] = colorgenerator(ncols)
    return sp
コード例 #24
0
 def cd(self, h5path):
     h5path = abs_hdf5_path(h5path, self.h5path)
     if h5path in h5file:
         item = h5file[h5path]
         if isinstance(item, h5py.Group):
             self.current_group = item
             self.h5path = h5path
             printlog('Current path set to %s' % h5path)
         else:
             printlog('Error! Path %s does not point to a group!' % h5path)
     else:
         printlog('Error! Path %s does not exist!' % h5path)
コード例 #25
0
ファイル: export.py プロジェクト: zzsnow/GNPS_Workflows
def export_metadata_table_to_file(dbfilepath, h5readpath, output_prefix, dataset_names, samples):

    printlog('Exporting associated metadata...');
    metacolumns = set();
    
    if not ('*' in samples):
        samples = set(samples);
        dn = [];
        #print(samples)
        #print(dataset_names)
        for dataset_name in dataset_names:
            if dataset_name.lstrip('/') in samples:
                dn.append(dataset_name);
        if not dn:
            printlog('No samples found matching the provided sample list!');
            return
            
        dataset_names = dn;    

    with h5py.File(dbfilepath, 'r') as h5file:

        for dataset_name in dataset_names:
            
            group_name = h5readpath[:-1] + dataset_name;
            group = h5file[group_name];
            if 'MetaData' in group:
                metagroup = group['MetaData'];
                for attribute in metagroup.attrs.keys():
                    metacolumns.add(attribute);
        metacolumns = sorted(list(metacolumns));
        
        if metacolumns:
            with open('%s_metadata.csv'%output_prefix, 'w') as fout:
                fout.write('Sample');
                for s in metacolumns:
                    fout.write(',"%s"'%s);
                fout.write('\n');
                for dataset_name in dataset_names:
                    group_name = h5readpath[:-1] + dataset_name;
                    group = h5file[group_name];
                    fout.write('"%s"'%dataset_name.lstrip('/'));
                    if 'MetaData' in group:
                        metagroup = group['MetaData'];
                        for attribute in metacolumns:
                            if attribute in metagroup.attrs:
                                value = metagroup.attrs[attribute];
                            else:
                                value = '';
                            fout.write(',"%s"'%value);
                        fout.write('\n');
    printlog('Done')
コード例 #26
0
                        metids = row[1:]
                    else:
                        sampleid = row[0]
                        metvals = row[1:]
                        minlen = min(len(metids), len(metvals))
                        write_meta_values(h5, h5writepath, str(sampleid),
                                          metids[0:minlen], metvals[0:minlen],
                                          parameters['overwrite'] == 'yes')


if __name__ == "__main__":
    tic()
    settings = OptionsHolder(__doc__, ImportMetaSet_options)
    settings.description = 'Metadata import'
    settings.do = 'yes'
    printlog(settings.program_description)
    #Parse command line parameters
    try:
        settings.parse_command_line_args()
    except Exception as inst:
        printlog('!!! Error in command line parameters: !!!')
        printlog(inst)
        printlog('\nRun python ' + sys.argv[0] +
                 ' --help for command line options information!')
        sys.exit(-1)

    parameters = settings.parameters
    if parameters['logfile'] != '':
        start_log(
            parameters['logfile'],
            overwrite_existing=(parameters['overwrite_logfile'] == 'yes'),
コード例 #27
0
def do_profile_alignment(dbfilepath, method='rspa', params = {'recursion':1, 
                                                              'minsegwidth':100, 
                                                              'maxpeakshift':10,
                                                              'reference':'mean',
                                                              'h5readpath': '/proc',
                                                              'h5writepath': '/proc'},
                                                                istrain=1):
    """
    Performs advanced adjustment for chromatographic peak position variations at full profile resolution
    using recursive segment-wise peak alignment strategy
    
    Args:
    
        dbfilepath: The database file path
                    
        method: The choice of peak alignment method. Default value: 'rspa', i.e. Recursive segment-wise peak alignment.  
        
        params: The dictionary of peak alignment parameters
    """     
    
    params['h5writepath'] = h5Base.correct_h5path(params['h5writepath'])
    params['h5readpath']  = h5Base.correct_h5path(params['h5readpath'])
    dataset_names = mh5.get_dataset_names(dbfilepath,dataset_names=[],pathinh5 = params['h5readpath'][:-1])
    if not dataset_names:
        return

    peak_width = mh5.load_dataset(dbfilepath, params['h5readpath'] + 'peak_width');
    printlog('Loaded min estimated peak width: %s seconds'%peak_width);
    mh5.save_dataset(dbfilepath, params['h5writepath'] + 'peak_width',\
                                 data=peak_width)

    if str(params['minsegwidth']).lower() == 'auto':
        params['minsegwidth'] = peak_width * 10.0;
        printlog('Parameter "minsegwidth" is set to %s'%params['minsegwidth']);
    else:
        try:
            params['minsegwidth'] = float(params['minsegwidth'])
        except:
            raise LoggingValueError('Error! %s value for parameter "minsegwidth" cannot be converted to float!'%params['minsegwidth'])
            

    
    if str(params['maxpeakshift']).lower() == 'auto':
        params['maxpeakshift'] = peak_width * 5;
        printlog('Parameter "maxpeakshift" is set to %s'%params['maxpeakshift']);
    else:
        try:
            params['maxpeakshift'] = float(params['maxpeakshift'])
        except:
            raise LoggingValueError('Error! %s value for parameter "maxpeakshift" cannot be converted to float!'%params['maxpeakshift'])
        

    if istrain==1:
        rtrange = mh5.load_dataset(dbfilepath, params['h5readpath'] + 'rtrange') 
        if method=='rspa':
            rtAlObj = RSPA(method,params,rtrange)
            
    elif istrain==0:
            rtAlObj = RSPA()
            rtAlObj.load_procobj(dbfilepath,params['h5readpath'])
        
    rtAlObj.aling_h5(dbfilepath, dataset_names, params['h5readpath'] , params['h5writepath'])
    
    if istrain==1:
         #save into hdf5 database file
        rtAlObj.export()
        rtAlObj.save_procobj(dbfilepath,params['h5writepath'])    
        rtAlObj.save_proc_meta(dbfilepath,params['h5writepath'],params['h5readpath'])
                    
    return
コード例 #28
0
ファイル: vistic.py プロジェクト: zzsnow/GNPS_Workflows
def view_tics(dbfilepath, method='', params=''):
    printlog('\nExtracting TICs.....')

    h5readpath = params['h5readpath']
    sp = get_data(dbfilepath, h5readpath)
    figtitle = 'Total Ion Chromatograms'
    dirname = os.path.dirname(dbfilepath)

    #screenwidth = 800
    #screenheight = 600

    # if method == 'pyplot':
    #
    #     mydpi = 96
    #     plt.figure(figsize=(screenwidth / mydpi, screenheight / mydpi), dpi=mydpi)
    #     plt.plot(crt, tics)
    #     plt.xlabel('Time(s)')
    #     plt.ylabel('Intensity')
    #     plt.title(figtitle)
    #     plt.show()

    if method == 'bokeh':

        from bokeh.plotting import figure, show, save, output_notebook, output_file, ColumnDataSource
        from bokeh.io import push_notebook
        from bokeh.models import HoverTool

        #if params['inline'] == 'yes':
        #    params['inline'] = 'yes'
        # output_notebook()

        #else:
        if params['outputfile'] == '':
            filename = 'bp' + time.strftime("%H%M_%d_%m_%Y") + '.html'
        else:
            filename = params['outputfile']

        output_file(os.path.join(dirname, filename))

        source = ColumnDataSource(sp)

        p = figure(width=params['plot_width'],
                   height=params['plot_height'],
                   title=figtitle,
                   x_axis_label='Time(mins)',
                   y_axis_label='Intensity')

        p.multi_line(xs='x',
                     ys='y',
                     line_width=1,
                     line_color='color',
                     line_alpha=0.6,
                     hover_line_color='color',
                     hover_line_alpha=1.0,
                     source=source)

        p.add_tools(
            HoverTool(show_arrow=False,
                      line_policy='next',
                      tooltips=[('id', '@id')]))

        try:
            p.toolbar.active_inspect = [None]
        except:
            pass
        #if params['inline'] == 'yes':
        #    show(p)
        # push_notebook(handle=hnotebook)
        # push_notebook(handle=handle)
        #else:
        if params['display'] == 'yes':
            show(p)
        else:
            save(p)
コード例 #29
0
ファイル: anova.py プロジェクト: zzsnow/GNPS_Workflows
 def analyse(self):
     """
     Performs analysis using ANOVA on the StatisticalModel instance supplied at the initialization step
     
     """
     printlog('\nAnalysing...')
     
     #Initialize output for stat_model. Parameters and additional_groups need to be passed as arguments to prepare
     #stat_model instance to receive the results of analysis.
     self.stat_model.initialize_output(
     parameters =  self.parameters,
     additional_groups = self.additional_groups
     );
     
     #Iterate over the available data in stat_model. Each iteration deals with on of the selected rt peaks. 
     #Each iteration returns data as a dictionary containing experimental values of 
     #quantity integrals for the corresponding peak and the metadata values for selected metadata categories.
     #E.g. data = {'quantity_integrals':[1.1,2.2,1.3,4.8], 'dose':[1,2,1,2], 'time':[8,12,24,48]} for 
     #model 'C(dose)*C(time)'
     #During iteration self.stat_model.current_index contains the index of the item of self.stat_model.rt_indeces array being processed.
     #self.stat_model.current_rt contains current rt value in seconds
     #self.stat_model.model_clean contains a clean string representation of your model, e.g.
     #for input model 'C(dose)*C(time)' you will have 'C(dose)+C(time)+C(dose):C(time)'
     
     for data in self.stat_model.data():
         printlog('\n%s of %s: %.3f min.'%(self.stat_model.current_index + 1, len(self.stat_model.rt_indeces), self.stat_model.current_rt / 60.0));
         
         #for ANOVA from statsmodels add quantity_integrals to the string model representation            
         model_description = 'quantity_integrals ~ ' + '+'.join(self.stat_model.model_clean);
         #prepare dataframe 
         dataframe = DataFrame(data = data);
         
         #fit linear model
         lm = ols(model_description, data = dataframe).fit();
         printlog(lm.summary());
         printlog('\n');
         
         try: #Sometimes ANOVA can fail, thus try:except 
                 
             #Do ANOVA            
             anova = sm.stats.anova_lm(lm, typ = 2, robust = self.robust);
             
             #results are converted from transposed dataframe to dictionary                
             #This way your results will contain a dictionary of dictionaries
             #with groups and group combinations from the model as the key
             #for the first level dictionary and calculated parameters 
             #as keys for the inner dictionaries, e.g.
             # results = {
             #  'C(Virus):C(Time)':  {'PR(>F)': 0.66408762493566431, 'sum_sq': 56504705293.388962, 'F': 0.60495953227434596}, 
             #  'C(Time)'         :  {'PR(>F)': 0.20201607440256636, 'sum_sq': 81742622757.011337, 'F': 1.7503313599529786}, 
             #  'C(Virus)'        :  {'PR(>F)': 0.80706629815712594, 'sum_sq': 10130541575.940208, 'F': 0.21692238413226958}, 
             #  'Residual'        :  {'PR(>F)': nan,                 'sum_sq': 420311046036.94336, 'F': nan}
             #  }
             results = anova.transpose().to_dict();
             
             
             #For results calculate background color to be used in HTML report based on the values obtained
             results_color = {};
             for key in results: #for each returned group
                 result_set = results[key]; #get subgroup
                 
                 p_value = result_set['PR(>F)']; #get p-value
                 if isnull(p_value):  #if not calculable - set color to grey
                     color = 0x505050;
                 elif p_value <= self.p_value_threshold: #if less than p_value_threshold - the result color is green
                     color = 0x90FF90;
                 else:
                     color = 0xFF9090;#if p_value_threshold is not passed - the result color is red
                 
                 #apply coloring to all results 
                 result_set_color = {}    #initialize dictionary for coloring of the result_set
                 results_color[key] = result_set_color; #add it to overall results_color
                 for subkey in result_set:
                     if isnull(result_set[subkey]): #if the result value for this group/subgroup is not calculable - the color is grey
                         result_set_color[subkey] = 0x505050;
                     else:
                         result_set_color[subkey] = color; #otherwise the color is based on the p_value_threshold being satisfied.
                 if not (isnull(p_value)) and p_value <= self.p_value_threshold: #Also add p_value_pass as one of the returned result values
                     result_set['p_value_pass']  = 1.0; #set it to 1 if the p_value_threshold is satisfied
                     result_set_color['p_value_pass'] = 0x90FF90;# color is green
                 else:
                     result_set['p_value_pass']  = 0.0; #0 otherwise
                     result_set_color['p_value_pass'] = 0xFF9090;# color is red
                     
                         
             #send results and their colors to stat_model for storage and reporting.                
             #stat_model instance will parse and display them according to the parameters and additional groups provided in 
             #self.stat_model.initialize_output(parameters, additional_groups)            
             self.stat_model.store_results(results, results_color);  
         except Exception as inst:
             
             printlog('ANOVA failed!');
             printlog(inst);
             traceback.print_exc();
コード例 #30
0
def do_noisefilter(dbfilepath,
                   smoothmethod='sqfilter',
                   smoothparams={
                       'window': 5,
                       'degree': 3
                   },
                   baselinemethod='tophat',
                   baselineparams={'frame': 90},
                   params={
                       'h5readpath': '/sp2D',
                       'h5writepath': '/spproc2D'
                   },
                   istrain=1):
    """
    Performs adjustment for high frequency noise and lower frequency baseline distortions 
    due to a variety of instrumental and experimental reasons
    
    Args:
    
        dbfilepath: a user-specified path to the h5 database file
                    
        smoothmethod: The type of noise filtering method. Default value: 'sqfilter', i.e. the Savitzky-Golay filter.  
        
        smoothparams: The dictionary of parameter arguments for noise filtering method 
        
        baselinemethod: The type of a baseline correction method. Default value: 'tophat', i.e. the top-hat morphological filter.
        
        baselineparams: The dictionary of parameter arguments for baseline correction method          
    """

    params['h5writepath'] = h5Base.correct_h5path(params['h5writepath'])
    params['h5readpath'] = h5Base.correct_h5path(params['h5readpath'])
    dataset_names = mh5.get_dataset_names(dbfilepath,
                                          dataset_names=[],
                                          pathinh5=params['h5readpath'][:-1])
    if not dataset_names:
        return

    peak_width = mh5.load_dataset(dbfilepath,
                                  params['h5readpath'] + 'peak_width')
    printlog('Loaded min estimated peak width: %s seconds' % peak_width)
    mh5.save_dataset(dbfilepath, params['h5writepath'] + 'peak_width',\
                                 data=peak_width)

    if str(smoothparams['window']).lower() == 'auto':
        smoothparams['window'] = peak_width * 0.5
        printlog('Parameter "window" is set to %s' % smoothparams['window'])
    else:
        try:
            smoothparams['window'] = float(smoothparams['window'])
        except:
            raise LoggingValueError(
                'Error! %s value for parameter "window" cannot be converted to float!'
                % smoothparams['window'])

    if str(baselineparams['frame']).lower() == 'auto':
        baselineparams['frame'] = peak_width * 15
        printlog('Parameter "frame" is set to %s' % baselineparams['frame'])
    else:
        try:
            baselineparams['frame'] = float(baselineparams['frame'])
        except:
            raise LoggingValueError(
                'Error! %s value for parameter "frame" cannot be converted to float!'
                % baselineparams['frame'])

    if istrain == 1:
        rtrange = mh5.load_dataset(dbfilepath,
                                   params['h5readpath'] + 'rtrange')
        if smoothmethod != 'none':
            SmoothObject = SmoothFilter(smoothmethod, smoothparams, rtrange)
        else:
            SmoothObject = []

        if baselinemethod != 'none':
            BaselineObject = BaselineFilter(baselinemethod, baselineparams,
                                            rtrange)
        else:
            BaselineObject = []

    elif istrain == 0:
        SmoothObject = SmoothFilter()
        SmoothObject.load_procobj(dbfilepath, params['h5readpath'])
        if SmoothObject.method == '':
            SmoothObject = []
        BaselineObject = BaselineFilter()
        if BaselineObject.method == '':
            BaselineObject = []
        BaselineObject.load_procobj(dbfilepath, params['h5readpath'])

    filternoise_h5(dbfilepath, dataset_names, SmoothObject, BaselineObject,
                   params)

    if istrain == 1:
        #save into hdf5 database file
        if (SmoothObject):
            SmoothObject.export(rtrange)
            SmoothObject.save_procobj(dbfilepath, params['h5writepath'])
        if (BaselineObject):
            BaselineObject.export(rtrange)
            BaselineObject.save_procobj(dbfilepath, params['h5writepath'])

        SmoothObject.save_proc_meta(dbfilepath, params['h5writepath'],
                                    params['h5readpath'])

    return