Ejemplo n.º 1
0
def algorithm(chromatograms, targeted):
    # Create empty files as input and finally as output
    empty_swath = pyopenms.MSExperiment()
    trafo = pyopenms.TransformationDescription()
    output = pyopenms.FeatureMap()

    # set up featurefinder and run
    featurefinder = pyopenms.MRMFeatureFinderScoring()
    # set the correct rt use values
    scoring_params = pyopenms.MRMFeatureFinderScoring().getDefaults()
    scoring_params.setValue("Scores:use_rt_score", 'false', '')
    featurefinder.setParameters(scoring_params)
    featurefinder.pickExperiment(chromatograms, output, targeted, trafo,
                                 empty_swath)

    # get the pairs
    pairs = []
    simple_find_best_feature(output, pairs, targeted)
    pairs_corrected = pyopenms.MRMRTNormalizer().rm_outliers(pairs, 0.95, 0.6)
    pairs_corrected = [list(p) for p in pairs_corrected]

    # // store transformation, using a linear model as default
    trafo_out = pyopenms.TransformationDescription()
    trafo_out.setDataPoints(pairs_corrected)
    model_params = pyopenms.Param()
    model_params.setValue("symmetric_regression", 'false', '')
    model_type = "linear"
    trafo_out.fitModel(model_type, model_params)
    return trafo_out
Ejemplo n.º 2
0
    def main(self): 
        #after path_parsing method we have self.src_full_name_list
        
        for f in get_list_full_names(self.src):
            print("Map Alignment implementation")
            print("Source file:", f)
            # to prepare(init) empty list and entity;
            self.init_entity(**self.kw)

            self.reference_map = oms.FeatureMap()
            self.toAlign_map = oms.FeatureMap()
            
            oms.FeatureXMLFile().load(self.reference_file, self.reference_map)
            oms.FeatureXMLFile().load(f, self.toAlign_map)
            
            #Set reference_map file
            self.ma.entity.setReference(self.reference_map)
            
            #3rd step create object for the computed transformation
            transformation = oms.TransformationDescription()

            # the 4rd step:
            self.ma.entity.align(self.toAlign_map, transformation)
            # the 5th step: is store result into file;
            self.dst_full_file_name = os.path.join(self.dst,\
                convert_src_to_dst_file_name(f,
                                            self.dst,
                                            self.suffix_dst_files,
                                            self.ext_dst_files) )
            
            #print("dst=",dst_full_file_name)
            oms.FeatureXMLFile().store(self.dst_full_file_name, self.toAlign_map)
            oms.FeatureXMLFile().store(self.dst_full_file_name, self.reference_map)

            print("Aligned data stored into:", self.dst_full_file_name)
Ejemplo n.º 3
0
    def test_run_mrmfeaturefinder(self):

        # load chromatograms
        chromatograms = pyopenms.MSExperiment()
        fh = pyopenms.FileHandler()
        fh.loadExperiment(self.chromatograms, chromatograms)

        # load TraML file
        targeted = pyopenms.TargetedExperiment()
        tramlfile = pyopenms.TraMLFile()
        tramlfile.load(self.tramlfile, targeted)

        # Create empty files as input and finally as output
        empty_swath = pyopenms.MSExperiment()
        trafo = pyopenms.TransformationDescription()
        output = pyopenms.FeatureMap()

        # set up featurefinder and run
        featurefinder = pyopenms.MRMFeatureFinderScoring()
        featurefinder.pickExperiment(chromatograms, output, targeted, trafo,
                                     empty_swath)

        self.assertAlmostEqual(output.size(), 3)
        self.assertAlmostEqual(output[0].getRT(), 3119.092041015, eps)
        self.assertAlmostEqual(output[0].getIntensity(), 3614.99755859375, eps)
        self.assertAlmostEqual(
            output[0].getMetaValue(b"var_xcorr_shape_weighted"),
            0.997577965259552, eps)
        self.assertAlmostEqual(output[0].getMetaValue(b"sn_ratio"),
                               86.00413513183594, eps)
Ejemplo n.º 4
0
    def test_extractor(self):
        targeted = pyopenms.TargetedExperiment()
        tramlfile = pyopenms.TraMLFile()
        tramlfile.load(self.filename, targeted)

        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename_mzml, exp)

        trafo = pyopenms.TransformationDescription()

        tmp_out = pyopenms.MSExperiment()
        extractor = pyopenms.ChromatogramExtractor()
        extractor.extractChromatograms(exp, tmp_out, targeted, 10, False,
                                       trafo, -1, "tophat")

        # Basically test that the output is non-zero (e.g. the data is
        # correctly relayed to python)
        # The functionality is not tested here!
        self.assertEqual(len(tmp_out.getChromatograms()),
                         len(targeted.getTransitions()))
        self.assertNotEqual(len(tmp_out.getChromatograms()), 0)
        self.assertEqual(tmp_out.getChromatograms()[0].size(), exp.size())
        self.assertNotEqual(tmp_out.getChromatograms()[0].size(), 0)
        self.assertNotEqual(tmp_out.getChromatograms()[0][0].getRT(), 0)
        self.assertNotEqual(tmp_out.getChromatograms()[0][0].getIntensity(), 0)
    def test_readfile_content(self):
        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename, exp)
        exp_size = exp.size()
        saccess = pyopenms.SpectrumAccessOpenMS(exp)

        ### double mz # mz around which should be extracted
        ### double rt_start # rt start of extraction (in seconds)
        ### double rt_end # rt end of extraction (in seconds)
        ### libcpp_string id # identifier
        targeted = []
        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 618.31
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr3"
        targeted.append(coord)

        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 628.45
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr1"
        targeted.append(coord)

        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 654.38
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr2"
        targeted.append(coord)

        trafo = pyopenms.TransformationDescription()

        # Start with length zero
        tmp_out = [pyopenms.OSChromatogram() for i in range(len(targeted))]
        self.assertEqual(len(tmp_out[0].getIntensityArray()), 0)

        extractor = pyopenms.ChromatogramExtractorAlgorithm()
        mz_extraction_window = 10.0
        ppm = False
        extractor.extractChromatograms(saccess, tmp_out, targeted,
                                       mz_extraction_window, ppm, -1.0,
                                       b"tophat")

        # Basically test that the output is non-zero (e.g. the data is
        # correctly relayed to python)
        # The functionality is not tested here!
        self.assertEqual(len(tmp_out), len(targeted))
        self.assertNotEqual(len(tmp_out), 0)

        # End with different length
        self.assertEqual(len(tmp_out[0].getIntensityArray()), exp_size)
        self.assertNotEqual(len(tmp_out[0].getIntensityArray()), 0)
        self.assertNotEqual(len(tmp_out[0].getTimeArray()), 0)
        self.assertNotEqual(tmp_out[0].getIntensityArray()[0], 0)
        self.assertNotEqual(tmp_out[0].getTimeArray()[0], 0)
Ejemplo n.º 6
0
def main(options):
    out = options.outfile
    chromat_in = options.infile
    traml_in = options.traml_in
    trafo_in = options.trafo_in

    pp = pyopenms.MRMTransitionGroupPicker()


    metabolomics = False
    # this is an important weight for RT-deviation -- the larger the value, the less importance will be given to exact RT matches
    # for proteomics data it tends to be a good idea to set it to the length of
    # the RT space (e.g. for 100 second RT space, set it to 100)
    rt_normalization_factor = 100.0

    pp_params = pp.getDefaults();
    pp_params.setValue("PeakPickerMRM:remove_overlapping_peaks", options.remove_overlapping_peaks, '')
    pp_params.setValue("PeakPickerMRM:method", options.method, '')
    if (metabolomics):
        # Need to change those for metabolomics and very short peaks!
        pp_params.setValue("PeakPickerMRM:signal_to_noise", 0.01, '')
        pp_params.setValue("PeakPickerMRM:peak_width", 0.1, '')
        pp_params.setValue("PeakPickerMRM:gauss_width", 0.1, '')
        pp_params.setValue("resample_boundary", 0.05, '')
        pp_params.setValue("compute_peak_quality", "true", '')
    pp.setParameters(pp_params)

    scorer = pyopenms.MRMFeatureFinderScoring()
    scoring_params = scorer.getDefaults();
    # Only report the top 5 features
    scoring_params.setValue("stop_report_after_feature", 5, '')
    scoring_params.setValue("rt_normalization_factor", rt_normalization_factor, '')
    scorer.setParameters(scoring_params);

    chromatograms = pyopenms.MSExperiment()
    fh = pyopenms.FileHandler()
    fh.loadExperiment(chromat_in, chromatograms)
    targeted = pyopenms.TargetedExperiment();
    tramlfile = pyopenms.TraMLFile();
    tramlfile.load(traml_in, targeted);

    trafoxml = pyopenms.TransformationXMLFile()
    trafo = pyopenms.TransformationDescription()
    if trafo_in is not None:
        model_params = pyopenms.Param()
        model_params.setValue("symmetric_regression", "false", "", [])
        model_type = "linear"
        trafoxml.load(trafo_in, trafo, True)
        trafo.fitModel(model_type, model_params);


    light_targeted = pyopenms.LightTargetedExperiment();
    pyopenms.OpenSwathDataAccessHelper().convertTargetedExp(targeted, light_targeted)
    output = algorithm(chromatograms, light_targeted, pp, scorer, trafo)

    pyopenms.FeatureXMLFile().store(out, output);
Ejemplo n.º 7
0
def testTransformationDescription():
    """
    @tests:
     TransformationDescription.__init__
     TransformationDescription.apply
     TransformationDescription.getDataPoints
    """
    td = pyopenms.TransformationDescription()
    assert td.getDataPoints() == []
    assert isinstance(td.apply(0.0), float)
Ejemplo n.º 8
0
def testTransformationXMLFile():
    """
    @tests:
     TransformationXMLFile.__init__
     TransformationXMLFile.load
     TransformationXMLFile.store
    """
    fh = pyopenms.TransformationXMLFile()
    td = pyopenms.TransformationDescription()
    fh.store("test.transformationXML", td)
    fh.load("test.transformationXML", td)
    assert td.getDataPoints() == []
Ejemplo n.º 9
0
def main(options):

    # load TraML file
    targeted = pyopenms.TargetedExperiment()
    pyopenms.TraMLFile().load(options.traml_in, targeted)

    # Create empty files as input and finally as output
    empty_swath = pyopenms.MSExperiment()
    trafo = pyopenms.TransformationDescription()
    output = pyopenms.MSExperiment()

    # load input
    for infile in options.infiles:
        exp = pyopenms.MSExperiment()
        pyopenms.FileHandler().loadExperiment(infile, exp)

        transition_exp_used = pyopenms.TargetedExperiment()

        do_continue = True
        if options.is_swath:
            do_continue = pyopenms.OpenSwathHelper(
            ).checkSwathMapAndSelectTransitions(exp, targeted,
                                                transition_exp_used,
                                                options.min_upper_edge_dist)
        else:
            transition_exp_used = targeted

        if do_continue:
            # set up extractor and run
            tmp_out = pyopenms.MSExperiment()
            extractor = pyopenms.ChromatogramExtractor()
            extractor.extractChromatograms(exp, tmp_out, targeted,
                                           options.extraction_window,
                                           options.ppm, trafo,
                                           options.rt_extraction_window,
                                           options.extraction_function)
            # add all chromatograms to the output
            for chrom in tmp_out.getChromatograms():
                output.addChromatogram(chrom)

    dp = pyopenms.DataProcessing()
    pa = pyopenms.ProcessingAction().SMOOTHING
    dp.setProcessingActions(set([pa]))

    chromatograms = output.getChromatograms()
    for chrom in chromatograms:
        this_dp = chrom.getDataProcessing()
        this_dp.append(dp)
        chrom.setDataProcessing(this_dp)

    output.setChromatograms(chromatograms)

    pyopenms.MzMLFile().store(options.outfile, output)
Ejemplo n.º 10
0
    def test_run_mrmrtnormalizer(self):

        # load chromatograms
        chromatograms = pyopenms.MSExperiment()
        fh = pyopenms.FileHandler()
        fh.loadExperiment(self.chromatograms, chromatograms)

        # load TraML file
        targeted = pyopenms.TargetedExperiment()
        tramlfile = pyopenms.TraMLFile()
        tramlfile.load(self.tramlfile, targeted)

        # Create empty files as input and finally as output
        empty_swath = pyopenms.MSExperiment()
        trafo = pyopenms.TransformationDescription()
        output = pyopenms.FeatureMap()

        # set up featurefinder and run
        featurefinder = pyopenms.MRMFeatureFinderScoring()
        # set the correct rt use values
        scoring_params = pyopenms.MRMFeatureFinderScoring().getDefaults()
        scoring_params.setValue("Scores:use_rt_score".encode(),
                                'false'.encode(), ''.encode())
        featurefinder.setParameters(scoring_params)
        featurefinder.pickExperiment(chromatograms, output, targeted, trafo,
                                     empty_swath)

        # get the pairs
        pairs = []
        simple_find_best_feature(output, pairs, targeted)
        pairs_corrected = pyopenms.MRMRTNormalizer().removeOutliersIterative(
            pairs, 0.95, 0.6, True, "iter_jackknife")
        pairs_corrected = [list(p) for p in pairs_corrected]

        expected = [(1497.56884765625, 1881.0), (2045.9776611328125, 2409.0),
                    (2151.4814453125, 2509.0), (1924.0750732421875, 2291.0),
                    (612.9832153320312, 990.0), (1086.2474365234375, 1470.0),
                    (1133.89404296875, 1519.0), (799.5291137695312, 1188.0),
                    (1397.1541748046875, 1765.0)]

        for exp, res in zip(expected, pairs_corrected):
            self.assertAlmostEqual(exp[0], res[0], eps)
            self.assertAlmostEqual(exp[1], res[1], eps)
Ejemplo n.º 11
0
def _computeTransformation(algo, refMap, fm, numBreakpoints):
    # be careful: alignFeatureMaps modifies second arg,
    # so you MUST NOT put the arg as [] into this
    # function ! in this case you have no access to the calculated
    # transformations.
    import pyopenms
    is_v2 = pyopenms.__version__.startswith("2.0.")
    # ts = []
    # index is 1-based, so 1 refers to refMap when calling
    # alignFeatureMaps below:
    algo.setReference(refMap)
    trafo = pyopenms.TransformationDescription()
    if (refMap == fm):
        trafo.fitModel("identity")
    else:
        algo.align(fm, trafo)
        model_params = pyopenms.Param()
        if is_v2:
            model_params.setValue("num_nodes", numBreakpoints, "", [])
            model_params.setValue("wavelength", 0.0, "", [])
            model_params.setValue("boundary_condition", 2, "", [])
            model_params.setValue("extrapolate", "bspline", "", [])
        else:
            pyopenms.TransformationModelBSpline.getDefaultParameters(
                model_params)
            model_params.setValue("num_breakpoints", numBreakpoints, "", [])
        trafo.fitModel("b_spline", model_params)

        # from here on used:
        # trafo.getDataPoints
        # trafo.apply
        lowess = False
        if lowess:
            dp = trafo.getDataPoints()
            x, y = zip(*dp)
            smoother = None  # smoother_lowess(y, x, frat, iterations)
            trafo = LowessTrafoHolder(smoother, dp)
    return trafo
Ejemplo n.º 12
0
def _computeTransformation(algo, refMap, fm, numBreakpoints):
    # be careful: alignFeatureMaps modifies second arg,
    # so you MUST NOT put the arg as [] into this
    # function ! in this case you have no access to the calculated
    # transformations.
    import pyopenms
    #ts = []
    # index is 1-based, so 1 refers to refMap when calling
    # alignFeatureMaps below:
    algo.setReference(refMap)
    trafo = pyopenms.TransformationDescription()
    if (refMap == fm):
        trafo.fitModel("identity")
    else:
        algo.align(fm, trafo)
        model_params = pyopenms.Param()
        pyopenms.TransformationModelBSpline.getDefaultParameters(model_params)

        model_params.setValue("num_breakpoints", numBreakpoints, "", [])
        trafo.fitModel("b_spline", model_params)
        trafo.getModelParameters(model_params)

    return trafo
Ejemplo n.º 13
0
def processPools(fileList, args):
    groups = groupSamplePools(fileList)

    # read raw input files and process
    for key, value in groups.items():
        sys.stdout.write('Working on {}\n'.format(key))

        # load files and split polarity
        posRuns = list()
        negRuns = list()
        raw = pyopenms.MSExperiment()
        for file in value:
            pyopenms.MzDataFile().load(file, raw)
            pos, neg = splitPolarity(raw)
            posRuns.append(pos)
            negRuns.append(neg)

        # run alignment
        alg = pyopenms.MapAlignmentAlgorithmIdentification()
        posTrans = list()
        posTrans = [pyopenms.TransformationDescription() for x in range(1)]
        #negTrans = [pyopenms.TransformationDescription() for x in range(len(negRuns))]

        alg.align(posRuns, posTrans)
Ejemplo n.º 14
0
def align(in_files, out_files, out_trafos, reference_index, reference_file,
          params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)):
        align_features = False
    elif in_types == set((pms.Type.FEATUREXML, )):
        align_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm = pms.MapAlignmentAlgorithmPoseClustering()
    alignment_params = params.copy("algorithm:", True)
    algorithm.setParameters(alignment_params)
    algorithm.setLogType(pms.LogType.CMD)

    plog = pms.ProgressLogger()
    plog.setLogType(pms.LogType.CMD)

    if reference_file:
        file_ = reference_file
    elif reference_index > 0:
        file_ = in_files[reference_index - 1]
    else:
        sizes = []
        if align_features:
            fh = pms.FeatureXMLFile()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                sizes.append((fh.loadSize(in_f), in_f))
                plog.setProgress(i)
        else:
            fh = pms.MzMLFile()
            mse = pms.MSExperiment()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                fh.load(in_f, mse)
                mse.updateRanges()
                sizes.append((mse.getSize(), in_f))
                plog.setProgress(i)
        plog.endProgress()
        __, file_ = max(sizes)

    f_fmxl = pms.FeatureXMLFile()
    if not out_files:
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fmxl.setOptions(options)

    if align_features:
        map_ref = pms.FeatureMap()
        f_fxml_tmp = pms.FeatureXMLFile()
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fxml_tmp.setOptions(options)
        f_fxml_tmp.load(file_, map_ref)
        algorithm.setReference(map_ref)
    else:
        map_ref = pms.MSExperiment()
        pms.MzMLFile().load(file_, map_ref)
        algorithm.setReference(map_ref)

    plog.startProgress(0, len(in_files), "Align input maps")
    for i, in_file in enumerate(in_files):
        trafo = pms.TransformationDescription()
        if align_features:
            map_ = pms.FeatureMap()
            f_fxml_tmp = pms.FeatureXMLFile()
            f_fxml_tmp.setOptions(f_fmxl.getOptions())
            f_fxml_tmp.load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSingleFeatureMap(
                    map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                f_fxml_tmp.store(out_files[i], map_)
        else:
            map_ = pms.MSExperiment()
            pms.MzMLFile().load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                pms.MzMLFile().store(out_files[i], map_)
        if out_trafos:
            pms.TransformationXMLFile().store(out_trafos[i], trafo)

        plog.setProgress(i + 1)

    plog.endProgress()
Ejemplo n.º 15
0
def align_feature_xmls(feature_xml_lis,
                       consensus_map_out_path="",
                       class_label_dict={}):
    """
    first apply pose clustering to include all features maps
      next link/group them across all features

    Each MS1 spectrum from raw-file will create a feature file -
    we need to load and align them to get unique and representative features
    :param feature_xml_lis:
    :param consensus_map_out_path:
    :return: consensus_map, consensus_map_out_path, measurement_names
    """
    # do consensus map normalization and export -
    # can't hack normalization together from lack of example usage and poor signature
    #   - no normalization implemented

    # openms won't deal with posix paths - wants to have strings instead
    # need to make sure it get's those
    # let's sort them to make sure feature matrix is also sorted
    feature_xml_lis = sorted([str(fx) for fx in feature_xml_lis])

    num_features_list = []
    for current_feature_xml_path in feature_xml_lis:
        # load features into FeatureMaps
        cm = oms.FeatureMap()  # current_map
        oms.FeatureXMLFile().load(current_feature_xml_path, cm)
        # list_functions(current_map, prefix="")
        num_features_list.append(cm.size())
        del cm

    # should choose the feature file / experiment with most features as reference
    max_index = np.argmax(num_features_list)
    reference_map_path = feature_xml_lis[max_index]

    default_max_num_peaks_considered = 1000
    default_max_scaling_value = 10.0
    aligned_paths = []
    for i, current_feature_xml_path in enumerate(feature_xml_lis):
        # load features into FeatureMaps
        reference_map = oms.FeatureMap(
        )  # pairwise alignment - so need master map -
        oms.FeatureXMLFile().load(reference_map_path, reference_map)

        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_xml_path, current_map)

        # create a transformation description required as init for aligner
        transformation_description = oms.TransformationDescription()

        # adjust max scaling parameter otherwise leads to error when running with algae samples
        # adjust max num peaks to 2k - also would leads to error when running with algae samples

        aligner = oms.MapAlignmentAlgorithmPoseClustering()
        aligner_params = aligner.getParameters()

        # print(aligner_params.asDict().keys())
        max_scaling_key = b'superimposer:max_scaling'
        # aligner_params.getEntry(max_scaling_key)
        aligner_params.setValue(max_scaling_key, default_max_scaling_value)

        max_num_peaks_key = b'max_num_peaks_considered'
        # aligner_params.getEntry(max_num_peaks_key)
        aligner_params.setValue(
            max_num_peaks_key,
            default_max_num_peaks_considered)  # default = 1000
        # need higher default for algae

        # decrease runtime by removing weak signals
        # print(aligner_params.asDict())
        num_used_points_key = b'superimposer:num_used_points'
        # aligner_params.getEntry(num_used_points_key)
        aligner_params.setValue(
            num_used_points_key,
            1000)  # half the default parameter, speed up alignment

        aligner.setParameters(aligner_params)

        aligner.setReference(reference_map)

        try:
            # run alignment
            aligner.align(current_map, transformation_description)
        except RuntimeError as re:
            if 'max_num_peaks_considered' in str(re):
                # retry with higher threshold - required for algae dataset
                default_max_num_peaks_considered = 15000  # 15 fold - makes it a lot slower but less error prone
                aligner_params.setValue(max_num_peaks_key,
                                        default_max_num_peaks_considered)
                default_max_scaling_value = 20.0  # need to increase to 20
                aligner_params.setValue(max_scaling_key,
                                        default_max_scaling_value)

                # max shift could also be off - issue for ckd dataset
                default_max_shift_value = 2000.0  # need to increase from 1000 to 2000
                max_shift_key = b'superimposer:max_shift'
                aligner_params.setValue(max_shift_key, default_max_shift_value)

                print(
                    f"Encountered GC/MS Clustering issue - setting 'max_num_peaks_considered' to {default_max_num_peaks_considered}, 'superimposer:max_scaling' to {default_max_scaling_value} and 'superimposer:max_shift' to {default_max_shift_value}"
                )
                aligner.setParameters(aligner_params)
                aligner.setReference(reference_map)
                aligner.align(current_map, transformation_description)

        current_map.updateRanges()
        reference_map.updateRanges()

        # update feature XML files - both reference and current
        updated_current_map_path = default_store_aligned_feature_xml(
            current_map, current_feature_xml_path)
        updated_reference_path = default_store_aligned_feature_xml(
            reference_map, reference_map_path)
        reference_map_path = updated_reference_path

        aligned_paths.append(updated_current_map_path)
        print(f"Finished alignment of {i}/{len(feature_xml_lis)-1}")

    # also replace here with new reference we updated the reference map to
    aligned_paths[max_index] = reference_map_path

    #   link/group them across features to create consensus map

    grouper = oms.FeatureGroupingAlgorithmUnlabeled()
    # leave parameters default

    # according to openms documentation:
    #   b) Call "setReference", "addToGroup" (n times), "getResultMap" in that order.

    for i, current_feature_map_path in enumerate(aligned_paths):
        print(f"Grouping features {i}/{len(aligned_paths)-1}")
        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_map_path, current_map)

        if not i:
            # first iteration - use as reference
            grouper.setReference(i, current_map)

        else:
            grouper.addToGroup(i, current_map)

    # get consensus map
    consensus_map = grouper.getResultMap()

    # consensus map requires some mapping between ids and filenames - otherwise will complain
    print(f"Mapping aligned results back to class labels")
    class_label_fns = list(class_label_dict.keys())
    fds = {i: oms.ColumnHeader() for i, _ in enumerate(aligned_paths)}
    measurement_names = []
    for i, aligned_path in enumerate(aligned_paths):
        # fds[i].filename = b"file0"
        current_fn = f"{str(Path(aligned_path).stem)}{str(Path(aligned_path).suffix)}"

        # this is where we need to replace the feature_xml filenames with the ones from class_labels
        if class_label_dict:
            # could do longest substring match with each of the fns in class_label dict to find matching filename
            #   django will rename duplicate filenames instead of overwriting
            # or we expect both featureXML input and class_label_dict to be ordered - which they should be when using the getter
            fds[i].filename = class_label_fns[i]

        else:
            fds[i].filename = current_fn.encode(
                "UTF8")  # needs bytestring representation

        measurement_names.append(current_fn)

    consensus_map.setColumnHeaders(fds)

    #  cleanup aligned_feature_xmls - can be >30mb per file - so better remove them
    for ap in aligned_paths:
        os.remove(ap)

    #   do consensus map normalization and export to consensus files
    # using median normalization, also available are Quantile and "robust regression"
    normalizer = oms.ConsensusMapNormalizerAlgorithmMedian()

    # ConsensusMapNormalizerAlgorithmMedian
    # signature of class is more than incomplete ... *args **kwargs for required parameters is not the best implementation choice...
    # but gives TypeError requiring int when calling with
    # normalizer.normalizeMaps(consensus_map, "NM_SCALE", "", "") #
    """
    normalizer.normalizeMaps(map, method, acc_filter, desc_filter)
    map	ConsensusMap
    method	whether to use scaling or shifting to same median 
    acc_filter	string describing the regular expression for filtering accessions
    desc_filter	string describing the regular expression for filtering descriptions 
    """
    """
        method: probably 0 / 1 - referenced as Enumerator in OpenMS documentation
        from shell output can deduce normalization methods are
        0: NM_SCALE 	scale to same median using division/multiplication  
        1: NM_SHIFT 	shift using subtraction/addition
    """
    normalizer.normalizeMaps(consensus_map, 0, "", "")

    # don't export if not required - requires more file management
    # now export
    if consensus_map_out_path:
        print("Storing consensus xml")
        oms.ConsensusXMLFile().store(str(consensus_map_out_path),
                                     consensus_map)

    return consensus_map, measurement_names
Ejemplo n.º 16
0
    def run_ma(self, **kwargs):

        #create object for the computed transformation
        self.transformation = oms.TransformationDescription()
        #align
        self.ma_algorithm.align(self.toAlign, self.transformation)