def generated_spectrum_vector(self, peptide=None, attenuation_ratio=0.0, tolerance=0.5, bin_size=1): peaks_to_vectorize = self.peaks max_mass = 1500 if peptide != None: charge_set = range(1, self.charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map( self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks( theoretical_peaks, self.peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) peaks_to_vectorize = sorted(new_peaks, key=lambda peak: peak[0]) #Doing peak_vector = ming_numerical_utilities.vectorize_peaks( self.peaks, max_mass, bin_size) return peak_vector
def map_ions_to_peak( peaks, max_charge, tolerance, peptide, ions_to_consider=["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"]): charge_set = range(1, max_charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map( peptide, ions_to_consider, charge_set=charge_set) ions_to_peaks = defaultdict(list) for peak in peaks: mass = peak[0] for ion_peak in theoretical_peaks: if abs(mass - theoretical_peaks[ion_peak]) < tolerance: ions_to_peaks[ion_peak].append(peak) break #Now lets choose the peak with biggest intensity ions_to_peak = {} for ion in ions_to_peaks: max_peak = ions_to_peaks[ion][0] for peak in ions_to_peaks[ion]: if peak[1] > max_peak[1]: max_peak = peak ions_to_peak[ion] = max_peak return ions_to_peak
def attentuate_unannotated_peaks(self, attenuation_ratio=0.0, tolerance=0.5): charge_set = range(1, self.charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) new_peaks = sorted(new_peaks, key=lambda peak: peak[0]) self.peaks = new_peaks
def attenuate_unannotated_peaks(peaks, max_charge, tolerance, peptide, attenuation_ratio=0): charge_set = range(1, max_charge) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) new_peaks = sorted(new_peaks, key=lambda peak: peak[0]) return new_peaks
def attentuate_unannotated_peaks(self, attenuation_ratio=0.0, tolerance=0.5): charge_set = range(1, self.charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) new_peaks = sorted(new_peaks, key=lambda peak: peak[0]) self.peaks = new_peaks
def attenuate_unannotated_peaks(peaks, max_charge, tolerance, peptide, attenuation_ratio=0): charge_set = range(1, max_charge) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) new_peaks = sorted(new_peaks, key=lambda peak: peak[0]) return new_peaks
def calculate_unique_ions_annotated(peaks, max_charge, peptide, tolerance): charge_set = range(1, max_charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "b-iso", "y", "y-iso", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) #Determining which ions are annotated annotated_ions = set() for peak in peaks: mass = peak[0] for ion_peak in theoretical_peaks: if abs(mass - theoretical_peaks[ion_peak]) < tolerance: annotated_ions.add(ion_peak) return list(annotated_ions)
def calculate_unique_ions_annotated(peaks, max_charge, peptide, tolerance): charge_set = range(1, max_charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "b-iso", "y", "y-iso", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) #Determining which ions are annotated annotated_ions = set() for peak in peaks: mass = peak[0] for ion_peak in theoretical_peaks: if abs(mass - theoretical_peaks[ion_peak]) < tolerance: annotated_ions.add(ion_peak) return list(annotated_ions)
def generated_spectrum_vector(self, peptide=None, attenuation_ratio=0.0, tolerance=0.5, bin_size=1): peaks_to_vectorize = self.peaks max_mass = 1500 if peptide != None: charge_set = range(1, self.charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set) annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance) new_peaks = annotated_peaks if attenuation_ratio > 0: for unannotated_peak in unannotated_peaks: unannotated_peak[1] *= attenuation_ratio new_peaks.append(unannotated_peak) peaks_to_vectorize = sorted(new_peaks, key=lambda peak: peak[0]) #Doing peak_vector = ming_numerical_utilities.vectorize_peaks(self.peaks, max_mass, bin_size) return peak_vector
def map_ions_to_peak(peaks, max_charge, tolerance, peptide, ions_to_consider=["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"]): charge_set = range(1, max_charge + 1) theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ions_to_consider, charge_set=charge_set) ions_to_peaks = defaultdict(list) for peak in peaks: mass = peak[0] for ion_peak in theoretical_peaks: if abs(mass - theoretical_peaks[ion_peak]) < tolerance: ions_to_peaks[ion_peak].append(peak) break #Now lets choose the peak with biggest intensity ions_to_peak = {} for ion in ions_to_peaks: max_peak = ions_to_peaks[ion][0] for peak in ions_to_peaks[ion]: if peak[1] > max_peak[1]: max_peak = peak ions_to_peak[ion] = max_peak return ions_to_peak
def calculated_ambiguity(parameter_map, peak_tolerance): filename = parameter_map["filename"] scan_mapping = parameter_map["scan_mapping"] spectrum_collection = ming_spectrum_library.SpectrumCollection(filename) spectrum_collection.load_from_file() return_ambiguity_mapping = defaultdict(lambda: {}) for scan in scan_mapping: spectrum_obj = spectrum_collection.scandict[int(scan)] #Lets determine if the strings are actually ambiguous ambiguous_list = ming_ambiguity_library.collapse_ambiguous_from_annotations_list( scan_mapping[scan]) #print(ambiguous_list) if len(ambiguous_list) == 1: score_summary = {} score_summary["ambiguity_total_score"] = -1 score_summary["first_unique_count"] = -1 score_summary["second_unique_count"] = -1 score_summary["first_unique_intensity"] = -1 score_summary["second_unique_intensity"] = -1 score_summary["first_second_unique_ratio"] = -1 return_ambiguity_mapping[scan] = score_summary continue if len(ambiguous_list) > 2: score_summary = {} score_summary["ambiguity_total_score"] = 10 score_summary["first_unique_count"] = 10 score_summary["second_unique_count"] = 10 score_summary["first_unique_intensity"] = 10 score_summary["second_unique_intensity"] = 10 score_summary["first_second_unique_ratio"] = -1 return_ambiguity_mapping[scan] = score_summary continue peptide_to_extracted_peaks_mapping = {} for peptide in ambiguous_list: theoreteical_peaks = ming_psm_library.create_theoretical_peak_map( peptide, ["b", "y"]) original_peaks = spectrum_obj.peaks extracted_peaks = extract_annotated_peaks(theoreteical_peaks, original_peaks, peak_tolerance) peptide_to_extracted_peaks_mapping[peptide] = extracted_peaks #print("Original:\t%d\tExtracted:\t%d" % (len(original_peaks), len(extracted_peaks))) #print(original_peaks) #print(extracted_peaks) #print(theoreteical_peaks) #Checkout overlap of stuff first_peaks = peptide_to_extracted_peaks_mapping[list( peptide_to_extracted_peaks_mapping.keys())[0]] second_peaks = peptide_to_extracted_peaks_mapping[list( peptide_to_extracted_peaks_mapping.keys())[1]] total_score, reported_alignments = spectrum_alignment.score_alignment( first_peaks, second_peaks, spectrum_obj.mz, spectrum_obj.mz, peak_tolerance) first_total = len(first_peaks) second_total = len(second_peaks) intersection_total = len(reported_alignments) first_unique_count = first_total - intersection_total second_unique_count = second_total - intersection_total #Calculating the explained intensity in each of these peaks_1_normed = spectrum_alignment.sqrt_normalize_spectrum( spectrum_alignment.convert_to_peaks(first_peaks)) peaks_2_normed = spectrum_alignment.sqrt_normalize_spectrum( spectrum_alignment.convert_to_peaks(second_peaks)) first_aligned_index = [] second_aligned_index = [] for alignment in reported_alignments: first_aligned_index.append(alignment.peak1) second_aligned_index.append(alignment.peak2) #intensity values first_unique = [] second_unique = [] for i in range(len(peaks_1_normed)): if not i in first_aligned_index: first_unique.append(peaks_1_normed[i][1]) for i in range(len(peaks_2_normed)): if not i in second_aligned_index: second_unique.append(peaks_2_normed[i][1]) first_unique_intensity = sum(i[0] * i[1] for i in zip(first_unique, first_unique)) second_unique_intensity = sum( i[0] * i[1] for i in zip(second_unique, second_unique)) first_second_unique_ratio = 0 try: first_second_unique_ratio = min( first_unique_intensity, second_unique_intensity) / max( first_unique_intensity, second_unique_intensity) except KeyboardInterrupt: raise except: first_second_unique_ratio = 10 if first_second_unique_ratio > 10: first_second_unique_ratio = 10 #print(reported_alignments) #print(peaks_1_normed) #print("FirstCount\t%d\tSecondCount\t%d\tFirstInt\t%f\tSecondInt\t%f" % (first_unique_count, second_unique_count, first_unique_intensity, second_unique_intensity)) score_summary = {} score_summary["ambiguity_total_score"] = total_score score_summary["first_unique_count"] = first_unique_count score_summary["second_unique_count"] = second_unique_count score_summary["first_unique_intensity"] = first_unique_intensity score_summary["second_unique_intensity"] = second_unique_intensity score_summary["first_second_unique_ratio"] = first_second_unique_ratio return_ambiguity_mapping[scan] = score_summary return return_ambiguity_mapping