def main(options): import time # Read the files start = time.time() reader = SWATHScoringReader.newReader(options.infiles, options.file_format, options.readmethod) runs = reader.parse_files(True) # Create experiment this_exp = MRExperiment() this_exp.set_runs(runs) print("Reading the input files took %ss" % (time.time() - start)) # Fix input filenames fix_input_fnames(options, runs) # Map the precursors across multiple runs, determine the number of # precursors in all runs without alignment. start = time.time() multipeptides = this_exp.get_all_multipeptides(1.0, verbose=True) print("Mapping the precursors took %ss" % (time.time() - start)) for m in multipeptides: # Error handling if somehow more than one peakgroup was selected ... for p in m.getAllPeptides(): p._fixSelectedPGError(fixMethod="BestScore") if len(m.get_selected_peakgroups()) > 0: continue for p in m.get_peptides(): if len(list(p.get_all_peakgroups())) != 1: print(p) print(dir(p)) print(p.get_run_id()) for pg in p.get_all_peakgroups(): print(pg.print_out()) print(len(list(p.get_all_peakgroups()))) assert len(list(p.get_all_peakgroups())) == 1 for pg in p.get_all_peakgroups(): pg.select_this_peakgroup() start = time.time() if len(options.matrix_outfile) > 0: write_out_matrix_file( options.matrix_outfile, this_exp.runs, multipeptides, options.min_frac_selected, style=options.output_method, write_requant=not options.remove_requant_values, aligner_mscore_treshold=options.aligner_mscore_threshold) print("Writing output took %ss" % (time.time() - start))
def main(options): import time # Read the files start = time.time() reader = SWATHScoringReader.newReader(options.infiles, options.file_format, options.readmethod) runs = reader.parse_files(True) # Create experiment this_exp = MRExperiment() this_exp.set_runs(runs) print("Reading the input files took %ss" % (time.time() - start) ) # Fix input filenames fix_input_fnames(options, runs) # Map the precursors across multiple runs, determine the number of # precursors in all runs without alignment. start = time.time() multipeptides = this_exp.get_all_multipeptides(1.0, verbose=True) print("Mapping the precursors took %ss" % (time.time() - start) ) for m in multipeptides: # Error handling if somehow more than one peakgroup was selected ... for p in m.getAllPeptides(): p._fixSelectedPGError(fixMethod="BestScore") if len(m.get_selected_peakgroups() ) > 0: continue for p in m.get_peptides(): if len(list(p.get_all_peakgroups())) != 1: print(p) print(dir(p)) print(p.get_run_id()) for pg in p.get_all_peakgroups(): print (pg.print_out()) print (len(list(p.get_all_peakgroups()))) assert len(list(p.get_all_peakgroups())) == 1 for pg in p.get_all_peakgroups(): pg.select_this_peakgroup() start = time.time() if len(options.matrix_outfile) > 0: write_out_matrix_file(options.matrix_outfile, this_exp.runs, multipeptides, options.min_frac_selected, style=options.output_method, write_requant = not options.remove_requant_values, aligner_mscore_treshold=options.aligner_mscore_threshold) print("Writing output took %ss" % (time.time() - start) )
def write_to_file(self, multipeptides, options, writeTrafoFiles=True): infiles = options.infiles outfile = options.outfile matrix_outfile = options.matrix_outfile yaml_outfile = options.yaml_outfile ids_outfile = options.ids_outfile fraction_needed_selected = options.min_frac_selected file_format = options.file_format # 1. Collect ids of selected features selected_pgs = [] for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups) * 1.0 / len(self.runs)) < fraction_needed_selected: continue for p in m.getAllPeptides(): selected_pg = p.get_selected_peakgroup() clustered_pg = p.getClusteredPeakgroups() for pg in clustered_pg: selected_pgs.append(pg) selected_ids_dict = dict([(pg.get_feature_id(), pg) for pg in selected_pgs]) # 2. Write out the (selected) ids if len(ids_outfile) > 0: fh = open(ids_outfile, "w") id_writer = csv.writer(fh, delimiter="\t") for pg in selected_pgs: id_writer.writerow([pg.get_feature_id()]) fh.close() del id_writer # 3. Write out the matrix outfile if len(matrix_outfile) > 0: write_out_matrix_file(matrix_outfile, self.runs, multipeptides, fraction_needed_selected, style=options.matrix_output_method, aligner_mscore_treshold=options.fdr_cutoff) # 4. Write out the full outfile if len(outfile) > 0 and options.readmethod == "full": # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups) * 1.0 / len(self.runs)) < fraction_needed_selected: continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue row_to_write = selected_pg.row row_to_write += [ selected_pg.run.get_id(), selected_pg.run.orig_filename ] # Replace run_id with the aligned id (align_runid) -> # otherwise the run_id is not guaranteed to be unique row_to_write[header_dict["run_id"]] = selected_ids_dict[ f_id].peptide.run.get_id() writer.writerow(row_to_write) elif len(outfile) > 0 and file_format in [ "openswath", "peakview_preprocess" ]: name_of_id_col_map = { "openswath": "id", "peakview_preprocess": "preprocess_id" } name_of_trgr_col_map = { "openswath": "transition_group_id", "peakview_preprocess": "Pep Index" } name_of_id_col = name_of_id_col_map[file_format] name_of_trgr_col = name_of_trgr_col_map[file_format] # Only in openswath we have the ID and can go back to the original file. # We can write out the complete original files. writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += [ "align_runid", "align_origfilename", "align_clusterid" ] writer.writerow(header_first) for file_nr, f in enumerate(infiles): header_dict = {} if f.endswith('.gz'): import gzip filehandler = gzip.open(f, 'rb') else: filehandler = open(f) reader = csv.reader(filehandler, delimiter="\t") header = reader.next() for i, n in enumerate(header): header_dict[n] = i for row in reader: f_id = row[header_dict[name_of_id_col]] if selected_ids_dict.has_key(f_id): # Check the "id" and "transition_group_id" field. # Unfortunately the id can be non-unique, there we check both. trgroup_id = selected_ids_dict[f_id].peptide.get_id() unique_peptide_id = row[header_dict[name_of_trgr_col]] if unique_peptide_id == trgroup_id: row_to_write = row row_to_write += [ selected_ids_dict[f_id].peptide.run.get_id(), f, selected_ids_dict[f_id].get_cluster_id() ] # Replace run_id with the aligned id (align_runid) -> # otherwise the run_id is not guaranteed to be unique if file_format == "openswath": row_to_write[ header_dict["run_id"]] = selected_ids_dict[ f_id].peptide.run.get_id() writer.writerow(row_to_write) # 5. Write out the .tr transformation files if writeTrafoFiles: self._write_trafo_files() # 6. Write out the YAML file if len(yaml_outfile) > 0: import yaml myYaml = { "Commandline": sys.argv, "RawData": [], "PeakGroupData": [outfile], "ReferenceRun": self.transformation_collection.getReferenceRunID(), "FeatureAlignment": { "RawInputParameters": options.__dict__, "Parameters": {} }, "Parameters": {} } myYaml["Parameters"]["m_score_cutoff"] = float( options.fdr_cutoff) # deprecated myYaml["FeatureAlignment"]["Parameters"]["m_score_cutoff"] = float( options.fdr_cutoff) myYaml["FeatureAlignment"]["Parameters"]["fdr_cutoff"] = float( options.fdr_cutoff) myYaml["FeatureAlignment"]["Parameters"][ "aligned_fdr_cutoff"] = float(options.aligned_fdr_cutoff) for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = self._getTrafoFilename(current_run, ref_id) dirpath = os.path.dirname(current_run.orig_filename) ### Use real path (not very useful when moving data from one computer to another) ### filename = os.path.realpath(filename) ### dirpath = os.path.realpath(dirpath) this = { "id": current_id, "directory": dirpath, "trafo_file": filename } myYaml["RawData"].append(this) open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns": myYaml}))
def write_to_file(self, multipeptides, options, alignment, tree=None, writeTrafoFiles=True): infiles = options.infiles outfile = options.outfile matrix_outfile = options.matrix_outfile yaml_outfile = options.yaml_outfile ids_outfile = options.ids_outfile fraction_needed_selected = options.min_frac_selected file_format = options.file_format # 1. Collect ids of selected features selected_pgs = [] for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups)*1.0 / len(self.runs)) < fraction_needed_selected: continue for p in m.getAllPeptides(): selected_pg = p.get_selected_peakgroup() clustered_pg = p.getClusteredPeakgroups() for pg in clustered_pg: selected_pgs.append(pg) selected_ids_dict = dict( [ (pg.get_feature_id(), pg) for pg in selected_pgs] ) # 2. Write out the (selected) ids if len(ids_outfile) > 0: fh = open(ids_outfile, "w") id_writer = csv.writer(fh, delimiter="\t") for pg in sorted(selected_pgs): id_writer.writerow([pg.get_feature_id()]) fh.close() del id_writer # 3. Write out the matrix outfile if len(matrix_outfile) > 0: write_out_matrix_file(matrix_outfile, self.runs, multipeptides, fraction_needed_selected, style=options.matrix_output_method, aligner_mscore_treshold=options.fdr_cutoff) # 4. Write out the full outfile if len(outfile) > 0 and options.readmethod == "full": # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups)*1.0 / len(self.runs)) < fraction_needed_selected: continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue row_to_write = selected_pg.row row_to_write += [selected_pg.run.get_id(), selected_pg.run.orig_filename] # Replace run_id with the aligned id (align_runid) -> # otherwise the run_id is not guaranteed to be unique row_to_write[ header_dict["run_id"]] = selected_ids_dict[f_id].peptide.run.get_id() writer.writerow(row_to_write) elif len(outfile) > 0 and file_format in ["openswath", "peakview_preprocess"]: name_of_id_col_map = { "openswath" : "id" , "peakview_preprocess" : "preprocess_id"} name_of_trgr_col_map = { "openswath" : "transition_group_id" , "peakview_preprocess" : "Pep Index"} name_of_id_col = name_of_id_col_map[file_format] name_of_trgr_col = name_of_trgr_col_map[file_format] # Only in openswath we have the ID and can go back to the original file. # We can write out the complete original files. writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename", "align_clusterid"] writer.writerow(header_first) for file_nr, f in enumerate(infiles): header_dict = {} if f.endswith('.gz'): import gzip filehandler = gzip.open(f,'rb') else: filehandler = open(f) reader = csv.reader(filehandler, delimiter="\t") header = next(reader) for i,n in enumerate(header): header_dict[n] = i for row in reader: f_id = row[ header_dict[name_of_id_col]] if f_id in selected_ids_dict: # Check the "id" and "transition_group_id" field. # Unfortunately the id can be non-unique, there we check both. trgroup_id = selected_ids_dict[f_id].peptide.get_id() unique_peptide_id = row[ header_dict[name_of_trgr_col]] if unique_peptide_id == trgroup_id: row_to_write = row row_to_write += [selected_ids_dict[f_id].peptide.run.get_id(), f, selected_ids_dict[f_id].get_cluster_id()] # Replace run_id with the aligned id (align_runid) -> # otherwise the run_id is not guaranteed to be unique if file_format == "openswath" : row_to_write[ header_dict["run_id"]] = selected_ids_dict[f_id].peptide.run.get_id() writer.writerow(row_to_write) # 5. Write out the .tr transformation files if writeTrafoFiles: self._write_trafo_files() # 6. Write out the YAML file if len(yaml_outfile) > 0: import yaml myYaml = {"Commandline" : sys.argv, "RawData" : [], "PeakGroupData" : [ outfile ], "ReferenceRun" : self.transformation_collection.getReferenceRunID(), "FeatureAlignment" : { "RawInputParameters" : options.__dict__, "Parameters" : {} }, "Parameters" : {} } myYaml["Output"] = {} myYaml["Output"]["Tree"] = {} if tree is not None: myYaml["Output"]["Tree"]["Raw"] = [list(t) for t in tree] tree_mapped = [ [self.runs[a].get_id(), self.runs[b].get_id()] for a,b in tree] myYaml["Output"]["Tree"]["Mapped"] = tree_mapped tree_mapped = [ [self.runs[a].get_openswath_filename(), self.runs[b].get_openswath_filename()] for a,b in tree] myYaml["Output"]["Tree"]["MappedFile"] = tree_mapped tree_mapped = [ [self.runs[a].get_openswath_filename(), self.runs[b].get_openswath_filename()] for a,b in tree] myYaml["Output"]["Tree"]["MappedFile"] = tree_mapped tree_mapped = [ [self.runs[a].get_original_filename(), self.runs[b].get_original_filename()] for a,b in tree] myYaml["Output"]["Tree"]["MappedFileInput"] = tree_mapped myYaml["Output"]["Quantification"] = alignment.to_yaml() myYaml["Parameters"]["m_score_cutoff"] = float(options.fdr_cutoff) # deprecated myYaml["FeatureAlignment"]["Parameters"]["m_score_cutoff"] = float(options.fdr_cutoff) myYaml["FeatureAlignment"]["Parameters"]["fdr_cutoff"] = float(options.fdr_cutoff) myYaml["FeatureAlignment"]["Parameters"]["aligned_fdr_cutoff"] = float(options.aligned_fdr_cutoff) for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = self._getTrafoFilename(current_run, ref_id) dirpath = os.path.dirname(current_run.orig_filename) ### Use real path (not very useful when moving data from one computer to another) ### filename = os.path.realpath(filename) ### dirpath = os.path.realpath(dirpath) this = {"id" : current_id, "directory" : dirpath, "trafo_file" : filename} myYaml["RawData"].append(this) open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns" : myYaml}))
def write_to_file(self, multipeptides, options): infiles = options.infiles outfile = options.outfile matrix_outfile = options.matrix_outfile matrix_excelfile = options.matix_excel yaml_outfile = options.yaml_outfile ids_outfile = options.ids_outfile fraction_needed_selected = options.min_frac_selected file_format = options.file_format selected_pgs = [] for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups)*1.0 / len(self.runs) < fraction_needed_selected) : continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue selected_pgs.append(selected_pg) selected_ids_dict = dict( [ (pg.get_feature_id(), pg) for pg in selected_pgs] ) if len(ids_outfile) > 0: fh = open(ids_outfile, "w") id_writer = csv.writer(fh, delimiter="\t") for pg in selected_pgs: id_writer.writerow([pg.get_feature_id()]) fh.close() del id_writer if len(matrix_outfile) > 0: write_out_matrix_file(matrix_outfile, self.runs, multipeptides, fraction_needed_selected) if len(outfile) > 0 and options.readmethod == "full": # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups)*1.0 / len(self.runs) < fraction_needed_selected) : continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue row_to_write = selected_pg.row row_to_write += [selected_pg.run.get_id(), selected_pg.run.orig_filename] writer.writerow(row_to_write) elif len(outfile) > 0 and file_format == "openswath": # only in openswath we have the ID and can go back to the original file ... # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for file_nr, f in enumerate(infiles): header_dict = {} reader = csv.reader(open(f), delimiter="\t") header = next(reader) for i,n in enumerate(header): header_dict[n] = i for row in reader: f_id = row[ header_dict["id"]] if f_id in selected_ids_dict: # Check the "id" and "transition_group_id" field. # Unfortunately the id can be non-unique, there we check both. trgroup_id = selected_ids_dict[f_id].peptide.get_id() unique_peptide_id = row[ header_dict["transition_group_id"]] if unique_peptide_id == trgroup_id: row_to_write = row row_to_write += [selected_ids_dict[f_id].peptide.run.get_id(), f] writer.writerow(row_to_write) # Print out trafo data trafo_fnames = [] for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = os.path.join(os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id) ) trafo_fnames.append(filename) self.transformation_collection.writeTransformationData(filename, current_id, ref_id) self.transformation_collection.readTransformationData(filename) if len(yaml_outfile) > 0: import yaml myYaml = {"RawData" : [], "PeakGroupData" : [ outfile ], "ReferenceRun" : self.transformation_collection.getReferenceRunID() } for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = os.path.join(os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id) ) dirpath = os.path.realpath(os.path.dirname(current_run.orig_filename)) this = {"id" : current_id, "directory" : dirpath, "trafo_file" : os.path.realpath(filename)} myYaml["RawData"].append(this) open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns" : myYaml})) return trafo_fnames
def main(options): infiles = options.feature_files chromatograms = options.chromatogram_files readfilter = ReadFilter() file_format = 'openswath' readmethod = "minimal" reader = SWATHScoringReader.newReader(infiles, file_format, readmethod, readfilter, enable_isotopic_grouping=False, read_cluster_id=False) reader.map_infiles_chromfiles(chromatograms) runs = reader.parse_files() MStoFeature = MSfileRunMapping(chromatograms, runs) precursor_to_transitionID, precursor_sequence = getPrecursorTransitionMapping( infiles[0]) MZs = mzml_accessors(runs, MStoFeature) MZs.set_precursor_to_chromID(precursor_to_transitionID) this_exp = Experiment() this_exp.set_runs(runs) start = time.time() fdr_cutoff = options.aligned_fdr_cutoff multipeptides = this_exp.get_all_multipeptides(fdr_cutoff, verbose=False, verbosity=10) print("Mapping the precursors took %0.2fs" % (time.time() - start)) # Reference based alignment # best_run = this_exp.determine_best_run(alignment_fdr_threshold = 0.05) reference_run = referenceForPrecursor( refType="precursor_specific", alignment_fdr_threshold=options.fdr_cutoff ).get_reference_for_precursors(multipeptides) # Pairwise global alignment spl_aligner = SplineAligner(alignment_fdr_threshold=fdr_cutoff, smoother="lowess", experiment=this_exp) tr_data = initialize_transformation() # Initialize XIC smoothing function chrom_smoother = chromSmoother(smoother="sgolay", kernelLen=11, polyOrd=4) # Calculate the aligned retention time for each precursor across all runs prec_ids = list(precursor_to_transitionID.keys()) for i in range(len(prec_ids)): prec_id = prec_ids[i] #9719 9720 refrun = reference_run.get(prec_id) if not refrun: print( "The precursor {} doesn't have any associated reference run. Skipping!" .format(prec_id)) continue eXps = list(set(runs) - set([refrun])) # Extract XICs from reference run and smooth it. XICs_ref = MZs.extractXIC_group(refrun, prec_id) if not XICs_ref: continue XICs_ref_sm = chrom_smoother.smoothXICs(XICs_ref) # For each precursor, we need peptide_group_label and trgr_id peptide_group_label = precursor_sequence[prec_id][0] # Iterate through all other runs and align them to the reference run for eXprun in eXps: ## Extract XICs from experiment run and smooth it. XICs_eXp = MZs.extractXIC_group(eXprun, prec_id) if not XICs_eXp: continue XICs_eXp_sm = chrom_smoother.smoothXICs(XICs_eXp) t_ref_aligned, t_eXp_aligned = RTofAlignedXICs( XICs_ref_sm, XICs_eXp_sm, tr_data, spl_aligner, eXprun, refrun, multipeptides, RSEdistFactor=4, alignType=b"hybrid", normalization=b"mean", simType=b"dotProductMasked", goFactor=0.125, geFactor=40, cosAngleThresh=0.3, OverlapAlignment=True, dotProdThresh=0.96, gapQuantile=0.5, hardConstrain=False, samples4gradient=100) # Update retention time of all peak-groups to reference peak-group updateRetentionTime(eXprun, peptide_group_label, prec_id, t_ref_aligned, t_eXp_aligned) AlignmentAlgorithm().align_features( multipeptides, rt_diff_cutoff=40, fdr_cutoff=0.01, aligned_fdr_cutoff=options.aligned_fdr_cutoff, method=options.method) al = this_exp.print_stats(multipeptides, 0.05, 0.1, 1) write_out_matrix_file(options.matrix_outfile, runs, multipeptides, options.min_frac_selected, options.matrix_output_method, True, 0.05, precursor_sequence)
def write_to_file(self, multipeptides, options): infiles = options.infiles outfile = options.outfile matrix_outfile = options.matrix_outfile matrix_excelfile = options.matix_excel yaml_outfile = options.yaml_outfile ids_outfile = options.ids_outfile fraction_needed_selected = options.min_frac_selected file_format = options.file_format selected_pgs = [] for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups) * 1.0 / len(self.runs) < fraction_needed_selected): continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue selected_pgs.append(selected_pg) selected_ids_dict = dict([(pg.get_feature_id(), pg) for pg in selected_pgs]) if len(ids_outfile) > 0: fh = open(ids_outfile, "w") id_writer = csv.writer(fh, delimiter="\t") for pg in selected_pgs: id_writer.writerow([pg.get_feature_id()]) fh.close() del id_writer if len(matrix_outfile) > 0: write_out_matrix_file(matrix_outfile, self.runs, multipeptides, fraction_needed_selected) if len(outfile) > 0 and options.readmethod == "full": # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for m in multipeptides: selected_peakgroups = m.get_selected_peakgroups() if (len(selected_peakgroups) * 1.0 / len(self.runs) < fraction_needed_selected): continue for p in m.get_peptides(): selected_pg = p.get_selected_peakgroup() if selected_pg is None: continue row_to_write = selected_pg.row row_to_write += [ selected_pg.run.get_id(), selected_pg.run.orig_filename ] writer.writerow(row_to_write) elif len(outfile) > 0 and file_format == "openswath": # only in openswath we have the ID and can go back to the original file ... # write out the complete original files writer = csv.writer(open(outfile, "w"), delimiter="\t") header_first = self.runs[0].header for run in self.runs: assert header_first == run.header header_first += ["align_runid", "align_origfilename"] writer.writerow(header_first) for file_nr, f in enumerate(infiles): header_dict = {} reader = csv.reader(open(f), delimiter="\t") header = next(reader) for i, n in enumerate(header): header_dict[n] = i for row in reader: f_id = row[header_dict["id"]] if f_id in selected_ids_dict: # Check the "id" and "transition_group_id" field. # Unfortunately the id can be non-unique, there we check both. trgroup_id = selected_ids_dict[f_id].peptide.get_id() unique_peptide_id = row[ header_dict["transition_group_id"]] if unique_peptide_id == trgroup_id: row_to_write = row row_to_write += [ selected_ids_dict[f_id].peptide.run.get_id(), f ] writer.writerow(row_to_write) # Print out trafo data trafo_fnames = [] for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = os.path.join( os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id)) trafo_fnames.append(filename) self.transformation_collection.writeTransformationData( filename, current_id, ref_id) self.transformation_collection.readTransformationData(filename) if len(yaml_outfile) > 0: import yaml myYaml = { "RawData": [], "PeakGroupData": [outfile], "ReferenceRun": self.transformation_collection.getReferenceRunID() } for current_run in self.runs: current_id = current_run.get_id() ref_id = self.transformation_collection.getReferenceRunID() filename = os.path.join( os.path.dirname(current_run.orig_filename), "transformation-%s-%s.tr" % (current_id, ref_id)) dirpath = os.path.realpath( os.path.dirname(current_run.orig_filename)) this = { "id": current_id, "directory": dirpath, "trafo_file": os.path.realpath(filename) } myYaml["RawData"].append(this) open(yaml_outfile, 'w').write(yaml.dump({"AlignedSwathRuns": myYaml})) return trafo_fnames