Esempio n. 1
0
    def generate_peakml_files(self, xseto, polarity_dir, peakml_params):

        ionisation = self.get_value(peakml_params, 'ionisation')
        add_scans = self.get_value(peakml_params, 'addscans')
        write_rejected = self.get_value(peakml_params, 'writeRejected')
        apodisation_filter = self.get_value(peakml_params, 'ApodisationFilter')
        ppm = self.get_value(
            peakml_params,
            'ppm')  # this doesn't seem to be set inside peakml_params ..?
        if ppm == 0:
            ppm = 5

        peakml_files = []
        regex = re.compile(re.escape('mzxml'), re.IGNORECASE)

        xset_names = run_r('names', xseto)
        for name in xset_names:
            replaced = regex.sub('peakml', name)
            outfile = os.path.join(polarity_dir, os.path.basename(replaced))
            peakml_files.append(outfile)

        for i in range(len(peakml_files)):
            logger.debug('Now creating %s', peakml_files[i])
            run_r('PeakML.xcms.write.SingleInstance',
                  xseto[i],
                  outputfile=peakml_files[i],
                  ionisation=ionisation,
                  addscans=add_scans,
                  writeRejected=write_rejected,
                  ApodisationFilter=apodisation_filter,
                  ppm=ppm)
Esempio n. 2
0
    def combine_peaksets(self, in_files, out_file, label, ppm, rtwindow,
                         combine_type):

        peakml_list_str = ','.join(in_files)
        files = robjects.StrVector([peakml_list_str])
        run_r('Pimp.combineSingle', files, out_file, label, ppm, rtwindow,
              combine_type)
Esempio n. 3
0
    def rsd_filter(self, in_peaksets, rsd):

        out_files = []
        for in_file in in_peaksets:
            basename, extension = os.path.splitext(in_file)
            out_file = basename + '_rsd' + extension
            rej_file = basename + '_rsdrej' + extension
            run_r('Pimp.rsdSingle', in_file, out_file, rej_file, rsd)
            out_files.append(os.path.abspath(out_file))

        return out_files
Esempio n. 4
0
    def gap_filling(self, in_file, peakml_params, mzmatch_outputs):

        out_file = self.get_value(mzmatch_outputs,
                                  'final.combined.gapfilled.file')[0]
        out_file = os.path.abspath(out_file)

        ionisation = self.get_value(peakml_params, 'ionisation')[0]
        ppm = self.get_value(peakml_params, 'ppm')[0]
        rtwindow = self.get_value(peakml_params, 'rtwin')[0]
        run_r('Pimp.gapFilling', in_file, out_file, ionisation, ppm, rtwindow)

        return out_file
Esempio n. 5
0
    def connect_to_rpy2(self):

        logger.info('******************************************')
        logger.info('Setup rpy2 connection')
        logger.info('******************************************')

        packrat_lib_path = self.get_env_packrat_lib_path()
        run_r('.libPaths', packrat_lib_path)

        base = importr('base')
        base.options(
            **{'java.parameters': "".join(["-Xmx", str(1024 * 8), "m"])})
        importr('PiMP')

        # activate the conversion from pandas dataframe to r
        pandas2ri.activate()
Esempio n. 6
0
    def rt_correct(self, xset, method):

        if method == 'obiwarp':
            xset_aln = run_r('retcor', xset, method='obiwarp', profStep=0.01)
        elif method == 'loess':
            xset_aln = run_r('retcor',
                             xset,
                             method='loess',
                             family="symmetric")
        else:
            xset_aln = xset

        fp = run_r('filepaths', xset_aln)

        xseto = run_r('split', xset_aln, fp)
        return xseto
Esempio n. 7
0
    def identify(self, polarity, in_file, databases, non_empty, mzmatch_params,
                 mzmatch_outputs):

        group_dict = {}
        for group_label, index, description, files, abspath in non_empty:
            group_dict[group_label] = robjects.StrVector(files)
        groups = robjects.ListVector(group_dict)

        # turns out that 'stds.xml.db' always has a value, e.g. /home/pimp/media/projects/10/analysis_38/stds_db.xml
        # regardless of whether the file exists or not.
        # this behaviour is different from the old pipeline? So here we set it to R NULL if the file doesn't exist

        stds_xml_file = self.get_value(mzmatch_outputs, 'stds.xml.db')[0]
        stds_xml_file = os.path.abspath(stds_xml_file)
        if not os.path.isfile(stds_xml_file):
            logger.info('%s is not found, setting stds.xml.db to NULL',
                        stds_xml_file)
            self.set_value(mzmatch_outputs, 'stds.xml.db', robjects.r("NULL"))

        args = {
            'in_file': in_file,
            'databases': databases,
            'groups': groups,
            'mzmatch.outputs': mzmatch_outputs,
            'mzmatch.params': mzmatch_params,
            'polarity': polarity
        }

        raw_data = run_r('Pimp.identify.metabolites', **args)
        return raw_data
Esempio n. 8
0
    def related_peaks(self, in_file, mzmatch_params, mzmatch_outputs):

        out_file = self.get_value(mzmatch_outputs,
                                  'final.combined.related.file')[0]
        out_file = os.path.abspath(out_file)

        basepeak_file = self.get_value(mzmatch_outputs,
                                       'final.combined.basepeaks.file')[0]
        basepeak_file = os.path.abspath(basepeak_file)

        ppm = self.get_value(mzmatch_params, 'ppm')[0]
        rtwindow = self.get_value(mzmatch_params, 'rtwindow')[0]
        run_r('Pimp.relatedPeaks', in_file, out_file, basepeak_file, ppm,
              rtwindow)

        return out_file, basepeak_file
Esempio n. 9
0
    def process_raw_data(self, polarity, xcms_params, mzmatch_params,
                         peakml_params, mzmatch_outputs, mzmatch_filters,
                         n_slaves):
        formatted_mzmatch_outputs = run_r('Pimp.getFormattedMzmatchOutputs',
                                          self.analysis.id, polarity,
                                          mzmatch_outputs)
        polarity_dir, combined_dir = self.create_input_directories(
            polarity, formatted_mzmatch_outputs)

        logger.info('------------------------------------------')
        logger.info('%s %s %s', polarity, polarity_dir, combined_dir)
        logger.info('------------------------------------------')

        # peak detection and rt correction
        logger.debug('Input for peak detection and RT correction %s',
                     self.metadata.files[polarity])
        self.create_peakml(polarity, polarity_dir, xcms_params, mzmatch_params,
                           peakml_params, mzmatch_outputs, n_slaves)

        # separate samples into peaksets for grouping
        non_empty = self.generate_combinations(polarity, combined_dir)

        # perform the grouping of peaks across samples into peaksets
        out_files = self.generate_peaksets(polarity_dir, combined_dir,
                                           non_empty, mzmatch_params)
        logger.debug('combined groups = %s', out_files)

        # filter each peakset
        out_files = self.filter_peaksets(out_files, mzmatch_params)
        logger.debug('filtered groups = %s', out_files)

        # combine all the peaksets into a single peakml file and filter it
        out_file = self.combine_final(out_files, mzmatch_params,
                                      formatted_mzmatch_outputs)
        logger.debug('combined final = %s', out_file)

        out_file = self.filter_final(out_file, mzmatch_filters, mzmatch_params,
                                     formatted_mzmatch_outputs)
        logger.debug('filter final = %s', out_file)

        # do gap filling
        out_file = self.gap_filling(out_file, peakml_params,
                                    formatted_mzmatch_outputs)
        logger.debug('gap-filled = %s', out_file)

        # do related peaks
        out_file, basepeak_file = self.related_peaks(
            out_file, mzmatch_params, formatted_mzmatch_outputs)
        logger.debug('related peaks = %s %s', out_file, basepeak_file)

        # do identification
        databases = self.r_dbs
        logger.debug('identification databases %s', databases)
        logger.debug('identification groups %s', non_empty)
        raw_data = self.identify(polarity, out_file, databases, non_empty,
                                 mzmatch_params, formatted_mzmatch_outputs)
        return raw_data
Esempio n. 10
0
    def run_stats(self, raw_data_dict, mzmatch_outputs, mzmatch_params):

        analysis_id = self.analysis.id

        groups, _ = self.metadata.get_groups()
        # We need to make sure we close the database connection when we are idle for long periods
        connection.close()
        df, metadata = convert_to_dataframe(groups)

        r_factors = robjects.StrVector([f.label for f in groups])
        r_contrasts = pandas2ri.py2ri(self.metadata.contrasts)

        databases = self.r_databases
        dbs = self.r_dbs
        wd = self.working_dir

        save_fixtures = True
        run_r('Pimp.runStats.save', raw_data_dict['positive'],
              raw_data_dict['negative'], analysis_id, r_factors, metadata,
              r_contrasts, databases, dbs, mzmatch_outputs, mzmatch_params,
              save_fixtures, wd)
Esempio n. 11
0
    def filter_final(self, in_file, mzmatch_filters, mzmatch_params,
                     mzmatch_outputs):
        apply_noise_filter = self.get_value(mzmatch_filters, 'noise')[0]
        if apply_noise_filter:
            noise = self.get_value(mzmatch_params, 'noise')[0]
            out_file = self.get_value(mzmatch_outputs,
                                      'final.combined.noise.filtered.file')[0]
            out_file = os.path.abspath(out_file)
            run_r('Pimp.noiseFilter', in_file, out_file, noise)
        else:
            out_file = in_file

        in_file = out_file
        filter_ppm = self.get_value(mzmatch_params, 'ppm')[0]
        filter_minintensity = self.get_value(mzmatch_params, 'minintensity')[0]
        filter_mindetections = self.get_value(mzmatch_params,
                                              'mindetections')[0]
        out_file = self.get_value(mzmatch_outputs,
                                  'final.combined.simple.filtered.file')[0]
        out_file = os.path.abspath(out_file)
        run_r('Pimp.simpleFilter', in_file, out_file, filter_ppm,
              filter_minintensity, filter_mindetections)

        return out_file
Esempio n. 12
0
    def setup(self):

        self.analysis.status = 'Processing'
        self.analysis.save(update_fields=['status'])

        self.working_dir = get_pimp_wd(self.project.id)
        self.validate_input()
        pp = self.get_analysis_params(self.analysis.id)
        self.pimp_params = self.create_analysis_dir(self.analysis.id, pp)

        # generate std xml
        stds = robjects.StrVector(self.metadata.stds)
        databases = robjects.StrVector(self.metadata.databases)
        output = run_r('Pimp.generateStdXml', stds, databases,
                       self.pimp_params, self.working_dir)
        self.r_dbs = output[output.names.index('DBS')]
        self.r_databases = output[output.names.index('databases')]
Esempio n. 13
0
    def peak_detection(self, files, xcms_params, n_slaves):

        # collect the parameters
        args = {
            'files': files,
            'method': self.get_value(xcms_params, 'method'),
            'ppm': self.get_value(xcms_params, 'ppm'),
            'peakwidth': self.get_value(xcms_params, 'peakwidth'),
            'snthresh': self.get_value(xcms_params, 'snthresh'),
            'prefilter': self.get_value(xcms_params, 'prefilter'),
            'integrate': self.get_value(xcms_params, 'integrate'),
            'mzdiff': self.get_value(xcms_params, 'mzdiff'),
            'verbose.columns': self.get_value(xcms_params, 'verbose.columns'),
            'fitgauss': self.get_value(xcms_params, 'fitgauss'),
            'nSlaves': n_slaves
        }

        # call centwave
        xset = run_r('xcmsSet', **args)
        return xset
Esempio n. 14
0
 def create_analysis_dir(self, analysis_id, pimp_params):
     # pimp_params$mzmatch.outputs will be updated inside to point to the right analysis folder
     pimp_params = run_r('Pimp.createAnalysisDir', analysis_id, pimp_params,
                         self.working_dir)
     return pimp_params
Esempio n. 15
0
 def get_analysis_params(self, analysis_id):
     pimp_params = run_r('Pimp.getAnalysisParams', analysis_id)
     return pimp_params