Python write_out_matrix_file Examples

Programming Language: Python

Namespace/Package Name: msproteomicstoolslib.algorithms.alignment.AlignmentHelper

Method/Function: write_out_matrix_file

Examples at hotexamples.com: 7

Python write_out_matrix_file - 7 examples found. These are the top rated real world Python examples of msproteomicstoolslib.algorithms.alignment.AlignmentHelper.write_out_matrix_file extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: compute_full_matrix.py Project: uweschmitt/msproteomicstools

def main(options):
    import time

    # Read the files
    start = time.time()
    reader = SWATHScoringReader.newReader(options.infiles, options.file_format,
                                          options.readmethod)
    runs = reader.parse_files(True)
    # Create experiment
    this_exp = MRExperiment()
    this_exp.set_runs(runs)
    print("Reading the input files took %ss" % (time.time() - start))

    # Fix input filenames
    fix_input_fnames(options, runs)

    # Map the precursors across multiple runs, determine the number of
    # precursors in all runs without alignment.
    start = time.time()
    multipeptides = this_exp.get_all_multipeptides(1.0, verbose=True)
    print("Mapping the precursors took %ss" % (time.time() - start))

    for m in multipeptides:

        # Error handling if somehow more than one peakgroup was selected ...
        for p in m.getAllPeptides():
            p._fixSelectedPGError(fixMethod="BestScore")

        if len(m.get_selected_peakgroups()) > 0:
            continue

        for p in m.get_peptides():
            if len(list(p.get_all_peakgroups())) != 1:
                print(p)
                print(dir(p))
                print(p.get_run_id())
                for pg in p.get_all_peakgroups():
                    print(pg.print_out())
                print(len(list(p.get_all_peakgroups())))

            assert len(list(p.get_all_peakgroups())) == 1
            for pg in p.get_all_peakgroups():
                pg.select_this_peakgroup()

    start = time.time()
    if len(options.matrix_outfile) > 0:
        write_out_matrix_file(
            options.matrix_outfile,
            this_exp.runs,
            multipeptides,
            options.min_frac_selected,
            style=options.output_method,
            write_requant=not options.remove_requant_values,
            aligner_mscore_treshold=options.aligner_mscore_threshold)
    print("Writing output took %ss" % (time.time() - start))

Example #2

Show file

File: compute_full_matrix.py Project: aseyboldt/msproteomicstools

def main(options):
    import time

    # Read the files
    start = time.time()
    reader = SWATHScoringReader.newReader(options.infiles, options.file_format, options.readmethod)
    runs = reader.parse_files(True)
    # Create experiment
    this_exp = MRExperiment()
    this_exp.set_runs(runs)
    print("Reading the input files took %ss" % (time.time() - start) )

    # Fix input filenames
    fix_input_fnames(options, runs)

    # Map the precursors across multiple runs, determine the number of
    # precursors in all runs without alignment.
    start = time.time()
    multipeptides = this_exp.get_all_multipeptides(1.0, verbose=True)
    print("Mapping the precursors took %ss" % (time.time() - start) )

    for m in multipeptides:

        # Error handling if somehow more than one peakgroup was selected ... 
        for p in m.getAllPeptides():
            p._fixSelectedPGError(fixMethod="BestScore")

        if len(m.get_selected_peakgroups() ) > 0:
            continue 

        for p in m.get_peptides():
            if len(list(p.get_all_peakgroups())) != 1:
                print(p)
                print(dir(p))
                print(p.get_run_id())
                for pg in p.get_all_peakgroups():
                    print (pg.print_out())
                print (len(list(p.get_all_peakgroups())))

            assert len(list(p.get_all_peakgroups())) == 1
            for pg in p.get_all_peakgroups():
               pg.select_this_peakgroup()

    start = time.time()
    if len(options.matrix_outfile) > 0:
        write_out_matrix_file(options.matrix_outfile, this_exp.runs, multipeptides,
                              options.min_frac_selected, style=options.output_method, 
                              write_requant = not options.remove_requant_values, aligner_mscore_treshold=options.aligner_mscore_threshold)
    print("Writing output took %ss" % (time.time() - start) )

Example #3

Show file

    def write_to_file(self, multipeptides, options, writeTrafoFiles=True):

        infiles = options.infiles
        outfile = options.outfile
        matrix_outfile = options.matrix_outfile
        yaml_outfile = options.yaml_outfile
        ids_outfile = options.ids_outfile
        fraction_needed_selected = options.min_frac_selected
        file_format = options.file_format

        # 1. Collect ids of selected features
        selected_pgs = []
        for m in multipeptides:

            selected_peakgroups = m.get_selected_peakgroups()
            if (len(selected_peakgroups) * 1.0 /
                    len(self.runs)) < fraction_needed_selected:
                continue

            for p in m.getAllPeptides():
                selected_pg = p.get_selected_peakgroup()
                clustered_pg = p.getClusteredPeakgroups()
                for pg in clustered_pg:
                    selected_pgs.append(pg)

        selected_ids_dict = dict([(pg.get_feature_id(), pg)
                                  for pg in selected_pgs])

        # 2. Write out the (selected) ids
        if len(ids_outfile) > 0:
            fh = open(ids_outfile, "w")
            id_writer = csv.writer(fh, delimiter="\t")
            for pg in selected_pgs:
                id_writer.writerow([pg.get_feature_id()])
            fh.close()
            del id_writer

        # 3. Write out the matrix outfile
        if len(matrix_outfile) > 0:
            write_out_matrix_file(matrix_outfile,
                                  self.runs,
                                  multipeptides,
                                  fraction_needed_selected,
                                  style=options.matrix_output_method,
                                  aligner_mscore_treshold=options.fdr_cutoff)

        # 4. Write out the full outfile
        if len(outfile) > 0 and options.readmethod == "full":
            # write out the complete original files
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for m in multipeptides:

                selected_peakgroups = m.get_selected_peakgroups()
                if (len(selected_peakgroups) * 1.0 /
                        len(self.runs)) < fraction_needed_selected:
                    continue

                for p in m.get_peptides():
                    selected_pg = p.get_selected_peakgroup()
                    if selected_pg is None:
                        continue

                    row_to_write = selected_pg.row
                    row_to_write += [
                        selected_pg.run.get_id(), selected_pg.run.orig_filename
                    ]
                    # Replace run_id with the aligned id (align_runid) ->
                    # otherwise the run_id is not guaranteed to be unique
                    row_to_write[header_dict["run_id"]] = selected_ids_dict[
                        f_id].peptide.run.get_id()
                    writer.writerow(row_to_write)

        elif len(outfile) > 0 and file_format in [
                "openswath", "peakview_preprocess"
        ]:

            name_of_id_col_map = {
                "openswath": "id",
                "peakview_preprocess": "preprocess_id"
            }
            name_of_trgr_col_map = {
                "openswath": "transition_group_id",
                "peakview_preprocess": "Pep Index"
            }
            name_of_id_col = name_of_id_col_map[file_format]
            name_of_trgr_col = name_of_trgr_col_map[file_format]

            # Only in openswath we have the ID and can go back to the original file.
            # We can write out the complete original files.

            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += [
                "align_runid", "align_origfilename", "align_clusterid"
            ]
            writer.writerow(header_first)

            for file_nr, f in enumerate(infiles):
                header_dict = {}
                if f.endswith('.gz'):
                    import gzip
                    filehandler = gzip.open(f, 'rb')
                else:
                    filehandler = open(f)

                reader = csv.reader(filehandler, delimiter="\t")
                header = reader.next()
                for i, n in enumerate(header):
                    header_dict[n] = i

                for row in reader:
                    f_id = row[header_dict[name_of_id_col]]
                    if selected_ids_dict.has_key(f_id):
                        # Check the "id" and "transition_group_id" field.
                        # Unfortunately the id can be non-unique, there we check both.
                        trgroup_id = selected_ids_dict[f_id].peptide.get_id()
                        unique_peptide_id = row[header_dict[name_of_trgr_col]]
                        if unique_peptide_id == trgroup_id:
                            row_to_write = row
                            row_to_write += [
                                selected_ids_dict[f_id].peptide.run.get_id(),
                                f, selected_ids_dict[f_id].get_cluster_id()
                            ]
                            # Replace run_id with the aligned id (align_runid) ->
                            # otherwise the run_id is not guaranteed to be unique
                            if file_format == "openswath":
                                row_to_write[
                                    header_dict["run_id"]] = selected_ids_dict[
                                        f_id].peptide.run.get_id()
                            writer.writerow(row_to_write)

        # 5. Write out the .tr transformation files
        if writeTrafoFiles:
            self._write_trafo_files()

        # 6. Write out the YAML file
        if len(yaml_outfile) > 0:
            import yaml
            myYaml = {
                "Commandline": sys.argv,
                "RawData": [],
                "PeakGroupData": [outfile],
                "ReferenceRun":
                self.transformation_collection.getReferenceRunID(),
                "FeatureAlignment": {
                    "RawInputParameters": options.__dict__,
                    "Parameters": {}
                },
                "Parameters": {}
            }
            myYaml["Parameters"]["m_score_cutoff"] = float(
                options.fdr_cutoff)  # deprecated
            myYaml["FeatureAlignment"]["Parameters"]["m_score_cutoff"] = float(
                options.fdr_cutoff)
            myYaml["FeatureAlignment"]["Parameters"]["fdr_cutoff"] = float(
                options.fdr_cutoff)
            myYaml["FeatureAlignment"]["Parameters"][
                "aligned_fdr_cutoff"] = float(options.aligned_fdr_cutoff)
            for current_run in self.runs:
                current_id = current_run.get_id()
                ref_id = self.transformation_collection.getReferenceRunID()
                filename = self._getTrafoFilename(current_run, ref_id)
                dirpath = os.path.dirname(current_run.orig_filename)
                ### Use real path (not very useful when moving data from one computer to another)
                ### filename = os.path.realpath(filename)
                ### dirpath = os.path.realpath(dirpath)
                this = {
                    "id": current_id,
                    "directory": dirpath,
                    "trafo_file": filename
                }
                myYaml["RawData"].append(this)
            open(yaml_outfile,
                 'w').write(yaml.dump({"AlignedSwathRuns": myYaml}))

Example #4

Show file

File: feature_alignment.py Project: aseyboldt/msproteomicstools

    def write_to_file(self, multipeptides, options, alignment, tree=None, writeTrafoFiles=True):

        infiles = options.infiles
        outfile = options.outfile
        matrix_outfile = options.matrix_outfile
        yaml_outfile = options.yaml_outfile
        ids_outfile = options.ids_outfile
        fraction_needed_selected = options.min_frac_selected
        file_format = options.file_format

        # 1. Collect ids of selected features
        selected_pgs = []
        for m in multipeptides:

            selected_peakgroups = m.get_selected_peakgroups()
            if (len(selected_peakgroups)*1.0 / len(self.runs)) < fraction_needed_selected: 
                continue

            for p in m.getAllPeptides():
                selected_pg = p.get_selected_peakgroup()
                clustered_pg = p.getClusteredPeakgroups()
                for pg in clustered_pg:
                    selected_pgs.append(pg)

        selected_ids_dict = dict( [ (pg.get_feature_id(), pg) for pg in selected_pgs] )

        # 2. Write out the (selected) ids
        if len(ids_outfile) > 0:
            fh = open(ids_outfile, "w")
            id_writer = csv.writer(fh, delimiter="\t")
            for pg in sorted(selected_pgs):
                id_writer.writerow([pg.get_feature_id()])
            fh.close()
            del id_writer

        # 3. Write out the matrix outfile
        if len(matrix_outfile) > 0:
            write_out_matrix_file(matrix_outfile, self.runs, multipeptides,
                                  fraction_needed_selected,
                                  style=options.matrix_output_method,
                                  aligner_mscore_treshold=options.fdr_cutoff)

        # 4. Write out the full outfile
        if len(outfile) > 0 and options.readmethod == "full":
            # write out the complete original files 
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for m in multipeptides:

                selected_peakgroups = m.get_selected_peakgroups()
                if (len(selected_peakgroups)*1.0 / len(self.runs)) < fraction_needed_selected:
                    continue

                for p in m.get_peptides():
                    selected_pg = p.get_selected_peakgroup()
                    if selected_pg is None: 
                        continue

                    row_to_write = selected_pg.row
                    row_to_write += [selected_pg.run.get_id(), selected_pg.run.orig_filename]
                    # Replace run_id with the aligned id (align_runid) ->
                    # otherwise the run_id is not guaranteed to be unique 
                    row_to_write[ header_dict["run_id"]] = selected_ids_dict[f_id].peptide.run.get_id()
                    writer.writerow(row_to_write)

        elif len(outfile) > 0 and file_format in ["openswath", "peakview_preprocess"]:

            name_of_id_col_map = { "openswath" : "id" , "peakview_preprocess" : "preprocess_id"}
            name_of_trgr_col_map = { "openswath" : "transition_group_id" , "peakview_preprocess" : "Pep Index"}
            name_of_id_col = name_of_id_col_map[file_format]
            name_of_trgr_col = name_of_trgr_col_map[file_format]

            # Only in openswath we have the ID and can go back to the original file.
            # We can write out the complete original files.

            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename", "align_clusterid"]
            writer.writerow(header_first)

            for file_nr, f in enumerate(infiles):
              header_dict = {}
              if f.endswith('.gz'):
                  import gzip
                  filehandler = gzip.open(f,'rb')
              else:
                  filehandler = open(f)

              reader = csv.reader(filehandler, delimiter="\t")
              header = next(reader)
              for i,n in enumerate(header):
                header_dict[n] = i

              for row in reader:
                  f_id = row[ header_dict[name_of_id_col]]
                  if f_id in selected_ids_dict:
                      # Check the "id" and "transition_group_id" field.
                      # Unfortunately the id can be non-unique, there we check both.
                      trgroup_id = selected_ids_dict[f_id].peptide.get_id()
                      unique_peptide_id = row[ header_dict[name_of_trgr_col]]
                      if unique_peptide_id == trgroup_id:
                          row_to_write = row
                          row_to_write += [selected_ids_dict[f_id].peptide.run.get_id(), f, selected_ids_dict[f_id].get_cluster_id()]
                          # Replace run_id with the aligned id (align_runid) ->
                          # otherwise the run_id is not guaranteed to be unique 
                          if file_format == "openswath" : 
                              row_to_write[ header_dict["run_id"]] = selected_ids_dict[f_id].peptide.run.get_id()
                          writer.writerow(row_to_write)

        # 5. Write out the .tr transformation files
        if writeTrafoFiles:
            self._write_trafo_files()

        # 6. Write out the YAML file
        if len(yaml_outfile) > 0:
            import yaml
            myYaml = {"Commandline" : sys.argv, 
                      "RawData" : [], "PeakGroupData" : [ outfile ],
                      "ReferenceRun" : self.transformation_collection.getReferenceRunID(), 
                      "FeatureAlignment" : 
                      {
                        "RawInputParameters" : options.__dict__,
                        "Parameters" : {}
                      },
                      "Parameters" : {}
                     }

            myYaml["Output"] = {}
            myYaml["Output"]["Tree"] = {}
            if tree is not None:
                myYaml["Output"]["Tree"]["Raw"] = [list(t) for t in tree]
                tree_mapped = [ [self.runs[a].get_id(), self.runs[b].get_id()] for a,b in tree]
                myYaml["Output"]["Tree"]["Mapped"] = tree_mapped
                tree_mapped = [ [self.runs[a].get_openswath_filename(), self.runs[b].get_openswath_filename()] for a,b in tree]
                myYaml["Output"]["Tree"]["MappedFile"] = tree_mapped
                tree_mapped = [ [self.runs[a].get_openswath_filename(), self.runs[b].get_openswath_filename()] for a,b in tree]
                myYaml["Output"]["Tree"]["MappedFile"] = tree_mapped
                tree_mapped = [ [self.runs[a].get_original_filename(), self.runs[b].get_original_filename()] for a,b in tree]
                myYaml["Output"]["Tree"]["MappedFileInput"] = tree_mapped

            myYaml["Output"]["Quantification"] = alignment.to_yaml()
            myYaml["Parameters"]["m_score_cutoff"] = float(options.fdr_cutoff) # deprecated
            myYaml["FeatureAlignment"]["Parameters"]["m_score_cutoff"] = float(options.fdr_cutoff)
            myYaml["FeatureAlignment"]["Parameters"]["fdr_cutoff"] = float(options.fdr_cutoff)
            myYaml["FeatureAlignment"]["Parameters"]["aligned_fdr_cutoff"] = float(options.aligned_fdr_cutoff)
            for current_run in self.runs:
                current_id = current_run.get_id()
                ref_id = self.transformation_collection.getReferenceRunID()
                filename = self._getTrafoFilename(current_run, ref_id)
                dirpath = os.path.dirname(current_run.orig_filename)
                ### Use real path (not very useful when moving data from one computer to another)
                ### filename = os.path.realpath(filename)
                ### dirpath = os.path.realpath(dirpath)
                this = {"id" : current_id, "directory" : dirpath, "trafo_file" : filename}
                myYaml["RawData"].append(this)
            open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns" : myYaml}))

Example #5

Show file

File: compute_full_matrix.py Project: aseyboldt/msproteomicstools

    def write_to_file(self, multipeptides, options):

        infiles = options.infiles
        outfile = options.outfile
        matrix_outfile = options.matrix_outfile
        matrix_excelfile = options.matix_excel
        yaml_outfile = options.yaml_outfile
        ids_outfile = options.ids_outfile
        fraction_needed_selected = options.min_frac_selected
        file_format = options.file_format

        selected_pgs = []
        for m in multipeptides:
            selected_peakgroups = m.get_selected_peakgroups()
            if (len(selected_peakgroups)*1.0 / len(self.runs) < fraction_needed_selected) : continue
            for p in m.get_peptides():
                selected_pg = p.get_selected_peakgroup()
                if selected_pg is None: continue
                selected_pgs.append(selected_pg)
        selected_ids_dict = dict( [ (pg.get_feature_id(), pg) for pg in selected_pgs] )

        if len(ids_outfile) > 0:
            fh = open(ids_outfile, "w")
            id_writer = csv.writer(fh, delimiter="\t")
            for pg in selected_pgs:
                id_writer.writerow([pg.get_feature_id()])
            fh.close()
            del id_writer

        if len(matrix_outfile) > 0:
            write_out_matrix_file(matrix_outfile, self.runs, multipeptides, fraction_needed_selected)


        if len(outfile) > 0 and options.readmethod == "full":
            # write out the complete original files 
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for m in multipeptides:
                selected_peakgroups = m.get_selected_peakgroups()
                if (len(selected_peakgroups)*1.0 / len(self.runs) < fraction_needed_selected) : continue
                for p in m.get_peptides():
                    selected_pg = p.get_selected_peakgroup()
                    if selected_pg is None: continue
                    row_to_write = selected_pg.row
                    row_to_write += [selected_pg.run.get_id(), selected_pg.run.orig_filename]
                    writer.writerow(row_to_write)
        elif len(outfile) > 0 and file_format == "openswath":
            # only in openswath we have the ID and can go back to the original file ... 
            # write out the complete original files 
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for file_nr, f in enumerate(infiles):
              header_dict = {}
              reader = csv.reader(open(f), delimiter="\t")
              header = next(reader)
              for i,n in enumerate(header):
                header_dict[n] = i
              for row in reader:
                  f_id = row[ header_dict["id"]]
                  if f_id in selected_ids_dict:
                      # Check the "id" and "transition_group_id" field. 
                      # Unfortunately the id can be non-unique, there we check both.
                      trgroup_id = selected_ids_dict[f_id].peptide.get_id()
                      unique_peptide_id = row[ header_dict["transition_group_id"]]
                      if unique_peptide_id == trgroup_id:
                          row_to_write = row
                          row_to_write += [selected_ids_dict[f_id].peptide.run.get_id(), f]
                          writer.writerow(row_to_write)
 
        # Print out trafo data
        trafo_fnames = []
        for current_run in self.runs:
          current_id = current_run.get_id()
          ref_id = self.transformation_collection.getReferenceRunID() 
          filename = os.path.join(os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id) )
          trafo_fnames.append(filename)
          self.transformation_collection.writeTransformationData(filename, current_id, ref_id)
          self.transformation_collection.readTransformationData(filename)

        if len(yaml_outfile) > 0:
            import yaml
            myYaml = {"RawData" : [], "PeakGroupData" : [ outfile ],
                      "ReferenceRun" : self.transformation_collection.getReferenceRunID() }
            for current_run in self.runs:
                current_id = current_run.get_id()
                ref_id = self.transformation_collection.getReferenceRunID() 
                filename = os.path.join(os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id) )
                dirpath = os.path.realpath(os.path.dirname(current_run.orig_filename))
                this = {"id" : current_id, "directory" : dirpath, "trafo_file" : os.path.realpath(filename)}
                myYaml["RawData"].append(this)
            open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns" : myYaml}))

        return trafo_fnames

Example #6

Show file

def main(options):
    infiles = options.feature_files
    chromatograms = options.chromatogram_files

    readfilter = ReadFilter()
    file_format = 'openswath'
    readmethod = "minimal"

    reader = SWATHScoringReader.newReader(infiles,
                                          file_format,
                                          readmethod,
                                          readfilter,
                                          enable_isotopic_grouping=False,
                                          read_cluster_id=False)
    reader.map_infiles_chromfiles(chromatograms)
    runs = reader.parse_files()
    MStoFeature = MSfileRunMapping(chromatograms, runs)
    precursor_to_transitionID, precursor_sequence = getPrecursorTransitionMapping(
        infiles[0])
    MZs = mzml_accessors(runs, MStoFeature)
    MZs.set_precursor_to_chromID(precursor_to_transitionID)

    this_exp = Experiment()
    this_exp.set_runs(runs)
    start = time.time()
    fdr_cutoff = options.aligned_fdr_cutoff
    multipeptides = this_exp.get_all_multipeptides(fdr_cutoff,
                                                   verbose=False,
                                                   verbosity=10)
    print("Mapping the precursors took %0.2fs" % (time.time() - start))

    # Reference based alignment
    # best_run = this_exp.determine_best_run(alignment_fdr_threshold = 0.05)
    reference_run = referenceForPrecursor(
        refType="precursor_specific",
        alignment_fdr_threshold=options.fdr_cutoff
    ).get_reference_for_precursors(multipeptides)
    # Pairwise global alignment
    spl_aligner = SplineAligner(alignment_fdr_threshold=fdr_cutoff,
                                smoother="lowess",
                                experiment=this_exp)
    tr_data = initialize_transformation()
    # Initialize XIC smoothing function
    chrom_smoother = chromSmoother(smoother="sgolay", kernelLen=11, polyOrd=4)

    # Calculate the aligned retention time for each precursor across all runs
    prec_ids = list(precursor_to_transitionID.keys())
    for i in range(len(prec_ids)):
        prec_id = prec_ids[i]  #9719 9720
        refrun = reference_run.get(prec_id)
        if not refrun:
            print(
                "The precursor {} doesn't have any associated reference run. Skipping!"
                .format(prec_id))
            continue
        eXps = list(set(runs) - set([refrun]))
        # Extract XICs from reference run and smooth it.
        XICs_ref = MZs.extractXIC_group(refrun, prec_id)
        if not XICs_ref:
            continue
        XICs_ref_sm = chrom_smoother.smoothXICs(XICs_ref)
        # For each precursor, we need peptide_group_label and trgr_id
        peptide_group_label = precursor_sequence[prec_id][0]
        # Iterate through all other runs and align them to the reference run
        for eXprun in eXps:
            ## Extract XICs from experiment run and smooth it.
            XICs_eXp = MZs.extractXIC_group(eXprun, prec_id)
            if not XICs_eXp:
                continue
            XICs_eXp_sm = chrom_smoother.smoothXICs(XICs_eXp)
            t_ref_aligned, t_eXp_aligned = RTofAlignedXICs(
                XICs_ref_sm,
                XICs_eXp_sm,
                tr_data,
                spl_aligner,
                eXprun,
                refrun,
                multipeptides,
                RSEdistFactor=4,
                alignType=b"hybrid",
                normalization=b"mean",
                simType=b"dotProductMasked",
                goFactor=0.125,
                geFactor=40,
                cosAngleThresh=0.3,
                OverlapAlignment=True,
                dotProdThresh=0.96,
                gapQuantile=0.5,
                hardConstrain=False,
                samples4gradient=100)
            # Update retention time of all peak-groups to reference peak-group
            updateRetentionTime(eXprun, peptide_group_label, prec_id,
                                t_ref_aligned, t_eXp_aligned)

    AlignmentAlgorithm().align_features(
        multipeptides,
        rt_diff_cutoff=40,
        fdr_cutoff=0.01,
        aligned_fdr_cutoff=options.aligned_fdr_cutoff,
        method=options.method)
    al = this_exp.print_stats(multipeptides, 0.05, 0.1, 1)
    write_out_matrix_file(options.matrix_outfile, runs, multipeptides,
                          options.min_frac_selected,
                          options.matrix_output_method, True, 0.05,
                          precursor_sequence)

Example #7

Show file

File: compute_full_matrix.py Project: uweschmitt/msproteomicstools

    def write_to_file(self, multipeptides, options):

        infiles = options.infiles
        outfile = options.outfile
        matrix_outfile = options.matrix_outfile
        matrix_excelfile = options.matix_excel
        yaml_outfile = options.yaml_outfile
        ids_outfile = options.ids_outfile
        fraction_needed_selected = options.min_frac_selected
        file_format = options.file_format

        selected_pgs = []
        for m in multipeptides:
            selected_peakgroups = m.get_selected_peakgroups()
            if (len(selected_peakgroups) * 1.0 / len(self.runs) <
                    fraction_needed_selected):
                continue
            for p in m.get_peptides():
                selected_pg = p.get_selected_peakgroup()
                if selected_pg is None: continue
                selected_pgs.append(selected_pg)
        selected_ids_dict = dict([(pg.get_feature_id(), pg)
                                  for pg in selected_pgs])

        if len(ids_outfile) > 0:
            fh = open(ids_outfile, "w")
            id_writer = csv.writer(fh, delimiter="\t")
            for pg in selected_pgs:
                id_writer.writerow([pg.get_feature_id()])
            fh.close()
            del id_writer

        if len(matrix_outfile) > 0:
            write_out_matrix_file(matrix_outfile, self.runs, multipeptides,
                                  fraction_needed_selected)

        if len(outfile) > 0 and options.readmethod == "full":
            # write out the complete original files
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for m in multipeptides:
                selected_peakgroups = m.get_selected_peakgroups()
                if (len(selected_peakgroups) * 1.0 / len(self.runs) <
                        fraction_needed_selected):
                    continue
                for p in m.get_peptides():
                    selected_pg = p.get_selected_peakgroup()
                    if selected_pg is None: continue
                    row_to_write = selected_pg.row
                    row_to_write += [
                        selected_pg.run.get_id(), selected_pg.run.orig_filename
                    ]
                    writer.writerow(row_to_write)
        elif len(outfile) > 0 and file_format == "openswath":
            # only in openswath we have the ID and can go back to the original file ...
            # write out the complete original files
            writer = csv.writer(open(outfile, "w"), delimiter="\t")
            header_first = self.runs[0].header
            for run in self.runs:
                assert header_first == run.header
            header_first += ["align_runid", "align_origfilename"]
            writer.writerow(header_first)

            for file_nr, f in enumerate(infiles):
                header_dict = {}
                reader = csv.reader(open(f), delimiter="\t")
                header = next(reader)
                for i, n in enumerate(header):
                    header_dict[n] = i
                for row in reader:
                    f_id = row[header_dict["id"]]
                    if f_id in selected_ids_dict:
                        # Check the "id" and "transition_group_id" field.
                        # Unfortunately the id can be non-unique, there we check both.
                        trgroup_id = selected_ids_dict[f_id].peptide.get_id()
                        unique_peptide_id = row[
                            header_dict["transition_group_id"]]
                        if unique_peptide_id == trgroup_id:
                            row_to_write = row
                            row_to_write += [
                                selected_ids_dict[f_id].peptide.run.get_id(), f
                            ]
                            writer.writerow(row_to_write)

        # Print out trafo data
        trafo_fnames = []
        for current_run in self.runs:
            current_id = current_run.get_id()
            ref_id = self.transformation_collection.getReferenceRunID()
            filename = os.path.join(
                os.path.dirname(current_run.orig_filename),
                "transformation-%s-%s.tr" % (current_id, ref_id))
            trafo_fnames.append(filename)
            self.transformation_collection.writeTransformationData(
                filename, current_id, ref_id)
            self.transformation_collection.readTransformationData(filename)

        if len(yaml_outfile) > 0:
            import yaml
            myYaml = {
                "RawData": [],
                "PeakGroupData": [outfile],
                "ReferenceRun":
                self.transformation_collection.getReferenceRunID()
            }
            for current_run in self.runs:
                current_id = current_run.get_id()
                ref_id = self.transformation_collection.getReferenceRunID()
                filename = os.path.join(
                    os.path.dirname(current_run.orig_filename),
                    "transformation-%s-%s.tr" % (current_id, ref_id))
                dirpath = os.path.realpath(
                    os.path.dirname(current_run.orig_filename))
                this = {
                    "id": current_id,
                    "directory": dirpath,
                    "trafo_file": os.path.realpath(filename)
                }
                myYaml["RawData"].append(this)
            open(yaml_outfile,
                 'w').write(yaml.dump({"AlignedSwathRuns": myYaml}))

        return trafo_fnames