def generated_spectrum_vector(self,
                                  peptide=None,
                                  attenuation_ratio=0.0,
                                  tolerance=0.5,
                                  bin_size=1):
        peaks_to_vectorize = self.peaks
        max_mass = 1500

        if peptide != None:
            charge_set = range(1, self.charge + 1)
            theoretical_peaks = ming_psm_library.create_theoretical_peak_map(
                self.peptide,
                ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"],
                charge_set=charge_set)
            annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(
                theoretical_peaks, self.peaks, tolerance)
            new_peaks = annotated_peaks
            if attenuation_ratio > 0:
                for unannotated_peak in unannotated_peaks:
                    unannotated_peak[1] *= attenuation_ratio
                    new_peaks.append(unannotated_peak)
            peaks_to_vectorize = sorted(new_peaks, key=lambda peak: peak[0])

        #Doing
        peak_vector = ming_numerical_utilities.vectorize_peaks(
            self.peaks, max_mass, bin_size)

        return peak_vector
def map_ions_to_peak(
        peaks,
        max_charge,
        tolerance,
        peptide,
        ions_to_consider=["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"]):
    charge_set = range(1, max_charge + 1)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(
        peptide, ions_to_consider, charge_set=charge_set)

    ions_to_peaks = defaultdict(list)

    for peak in peaks:
        mass = peak[0]
        for ion_peak in theoretical_peaks:
            if abs(mass - theoretical_peaks[ion_peak]) < tolerance:
                ions_to_peaks[ion_peak].append(peak)
                break

    #Now lets choose the peak with biggest intensity
    ions_to_peak = {}
    for ion in ions_to_peaks:
        max_peak = ions_to_peaks[ion][0]
        for peak in ions_to_peaks[ion]:
            if peak[1] > max_peak[1]:
                max_peak = peak
        ions_to_peak[ion] = max_peak

    return ions_to_peak
 def attentuate_unannotated_peaks(self, attenuation_ratio=0.0, tolerance=0.5):
     charge_set = range(1, self.charge + 1)
     theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)
     annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance)
     new_peaks = annotated_peaks
     if attenuation_ratio > 0:
         for unannotated_peak in unannotated_peaks:
             unannotated_peak[1] *= attenuation_ratio
             new_peaks.append(unannotated_peak)
     new_peaks = sorted(new_peaks, key=lambda peak: peak[0])
     self.peaks = new_peaks
def attenuate_unannotated_peaks(peaks, max_charge, tolerance, peptide, attenuation_ratio=0):
    charge_set = range(1, max_charge)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)
    annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, peaks, tolerance)
    new_peaks = annotated_peaks
    if attenuation_ratio > 0:
        for unannotated_peak in unannotated_peaks:
            unannotated_peak[1] *= attenuation_ratio
            new_peaks.append(unannotated_peak)
    new_peaks = sorted(new_peaks, key=lambda peak: peak[0])
    return new_peaks
 def attentuate_unannotated_peaks(self, attenuation_ratio=0.0, tolerance=0.5):
     charge_set = range(1, self.charge + 1)
     theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)
     annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance)
     new_peaks = annotated_peaks
     if attenuation_ratio > 0:
         for unannotated_peak in unannotated_peaks:
             unannotated_peak[1] *= attenuation_ratio
             new_peaks.append(unannotated_peak)
     new_peaks = sorted(new_peaks, key=lambda peak: peak[0])
     self.peaks = new_peaks
def attenuate_unannotated_peaks(peaks, max_charge, tolerance, peptide, attenuation_ratio=0):
    charge_set = range(1, max_charge)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)
    annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, peaks, tolerance)
    new_peaks = annotated_peaks
    if attenuation_ratio > 0:
        for unannotated_peak in unannotated_peaks:
            unannotated_peak[1] *= attenuation_ratio
            new_peaks.append(unannotated_peak)
    new_peaks = sorted(new_peaks, key=lambda peak: peak[0])
    return new_peaks
def calculate_unique_ions_annotated(peaks, max_charge, peptide, tolerance):
    charge_set = range(1, max_charge + 1)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b",  "b-iso", "y", "y-iso", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)

    #Determining which ions are annotated
    annotated_ions = set()
    for peak in peaks:
        mass = peak[0]
        for ion_peak in theoretical_peaks:
            if abs(mass - theoretical_peaks[ion_peak]) < tolerance:
                annotated_ions.add(ion_peak)

    return list(annotated_ions)
def calculate_unique_ions_annotated(peaks, max_charge, peptide, tolerance):
    charge_set = range(1, max_charge + 1)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ["b",  "b-iso", "y", "y-iso", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)

    #Determining which ions are annotated
    annotated_ions = set()
    for peak in peaks:
        mass = peak[0]
        for ion_peak in theoretical_peaks:
            if abs(mass - theoretical_peaks[ion_peak]) < tolerance:
                annotated_ions.add(ion_peak)

    return list(annotated_ions)
    def generated_spectrum_vector(self, peptide=None, attenuation_ratio=0.0, tolerance=0.5, bin_size=1):
        peaks_to_vectorize = self.peaks
        max_mass = 1500

        if peptide != None:
            charge_set = range(1, self.charge + 1)
            theoretical_peaks = ming_psm_library.create_theoretical_peak_map(self.peptide, ["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"], charge_set=charge_set)
            annotated_peaks, unannotated_peaks = ming_psm_library.extract_annotated_peaks(theoretical_peaks, self.peaks, tolerance)
            new_peaks = annotated_peaks
            if attenuation_ratio > 0:
                for unannotated_peak in unannotated_peaks:
                    unannotated_peak[1] *= attenuation_ratio
                    new_peaks.append(unannotated_peak)
            peaks_to_vectorize = sorted(new_peaks, key=lambda peak: peak[0])

        #Doing
        peak_vector = ming_numerical_utilities.vectorize_peaks(self.peaks, max_mass, bin_size)

        return peak_vector
def map_ions_to_peak(peaks, max_charge, tolerance, peptide, ions_to_consider=["b", "y", "b-H2O", "b-NH3", "y-H2O", "y-NH3", "a"]):
    charge_set = range(1, max_charge + 1)
    theoretical_peaks = ming_psm_library.create_theoretical_peak_map(peptide, ions_to_consider, charge_set=charge_set)

    ions_to_peaks = defaultdict(list)

    for peak in peaks:
        mass = peak[0]
        for ion_peak in theoretical_peaks:
            if abs(mass - theoretical_peaks[ion_peak]) < tolerance:
                ions_to_peaks[ion_peak].append(peak)
                break

    #Now lets choose the peak with biggest intensity
    ions_to_peak = {}
    for ion in ions_to_peaks:
        max_peak = ions_to_peaks[ion][0]
        for peak in ions_to_peaks[ion]:
            if peak[1] > max_peak[1]:
                max_peak = peak
        ions_to_peak[ion] = max_peak

    return ions_to_peak
def calculated_ambiguity(parameter_map, peak_tolerance):
    filename = parameter_map["filename"]
    scan_mapping = parameter_map["scan_mapping"]

    spectrum_collection = ming_spectrum_library.SpectrumCollection(filename)
    spectrum_collection.load_from_file()

    return_ambiguity_mapping = defaultdict(lambda: {})

    for scan in scan_mapping:
        spectrum_obj = spectrum_collection.scandict[int(scan)]
        #Lets determine if the strings are actually ambiguous
        ambiguous_list = ming_ambiguity_library.collapse_ambiguous_from_annotations_list(
            scan_mapping[scan])
        #print(ambiguous_list)
        if len(ambiguous_list) == 1:
            score_summary = {}
            score_summary["ambiguity_total_score"] = -1
            score_summary["first_unique_count"] = -1
            score_summary["second_unique_count"] = -1
            score_summary["first_unique_intensity"] = -1
            score_summary["second_unique_intensity"] = -1
            score_summary["first_second_unique_ratio"] = -1

            return_ambiguity_mapping[scan] = score_summary

            continue

        if len(ambiguous_list) > 2:
            score_summary = {}
            score_summary["ambiguity_total_score"] = 10
            score_summary["first_unique_count"] = 10
            score_summary["second_unique_count"] = 10
            score_summary["first_unique_intensity"] = 10
            score_summary["second_unique_intensity"] = 10
            score_summary["first_second_unique_ratio"] = -1

            return_ambiguity_mapping[scan] = score_summary
            continue

        peptide_to_extracted_peaks_mapping = {}
        for peptide in ambiguous_list:
            theoreteical_peaks = ming_psm_library.create_theoretical_peak_map(
                peptide, ["b", "y"])
            original_peaks = spectrum_obj.peaks
            extracted_peaks = extract_annotated_peaks(theoreteical_peaks,
                                                      original_peaks,
                                                      peak_tolerance)
            peptide_to_extracted_peaks_mapping[peptide] = extracted_peaks

            #print("Original:\t%d\tExtracted:\t%d" % (len(original_peaks), len(extracted_peaks)))
            #print(original_peaks)
            #print(extracted_peaks)
            #print(theoreteical_peaks)

        #Checkout overlap of stuff
        first_peaks = peptide_to_extracted_peaks_mapping[list(
            peptide_to_extracted_peaks_mapping.keys())[0]]
        second_peaks = peptide_to_extracted_peaks_mapping[list(
            peptide_to_extracted_peaks_mapping.keys())[1]]
        total_score, reported_alignments = spectrum_alignment.score_alignment(
            first_peaks, second_peaks, spectrum_obj.mz, spectrum_obj.mz,
            peak_tolerance)

        first_total = len(first_peaks)
        second_total = len(second_peaks)
        intersection_total = len(reported_alignments)
        first_unique_count = first_total - intersection_total
        second_unique_count = second_total - intersection_total

        #Calculating the explained intensity in each of these
        peaks_1_normed = spectrum_alignment.sqrt_normalize_spectrum(
            spectrum_alignment.convert_to_peaks(first_peaks))
        peaks_2_normed = spectrum_alignment.sqrt_normalize_spectrum(
            spectrum_alignment.convert_to_peaks(second_peaks))

        first_aligned_index = []
        second_aligned_index = []

        for alignment in reported_alignments:
            first_aligned_index.append(alignment.peak1)
            second_aligned_index.append(alignment.peak2)

        #intensity values
        first_unique = []
        second_unique = []

        for i in range(len(peaks_1_normed)):
            if not i in first_aligned_index:
                first_unique.append(peaks_1_normed[i][1])

        for i in range(len(peaks_2_normed)):
            if not i in second_aligned_index:
                second_unique.append(peaks_2_normed[i][1])

        first_unique_intensity = sum(i[0] * i[1]
                                     for i in zip(first_unique, first_unique))
        second_unique_intensity = sum(
            i[0] * i[1] for i in zip(second_unique, second_unique))

        first_second_unique_ratio = 0
        try:
            first_second_unique_ratio = min(
                first_unique_intensity, second_unique_intensity) / max(
                    first_unique_intensity, second_unique_intensity)
        except KeyboardInterrupt:
            raise
        except:
            first_second_unique_ratio = 10

        if first_second_unique_ratio > 10:
            first_second_unique_ratio = 10

        #print(reported_alignments)
        #print(peaks_1_normed)
        #print("FirstCount\t%d\tSecondCount\t%d\tFirstInt\t%f\tSecondInt\t%f" % (first_unique_count, second_unique_count, first_unique_intensity, second_unique_intensity))

        score_summary = {}
        score_summary["ambiguity_total_score"] = total_score
        score_summary["first_unique_count"] = first_unique_count
        score_summary["second_unique_count"] = second_unique_count
        score_summary["first_unique_intensity"] = first_unique_intensity
        score_summary["second_unique_intensity"] = second_unique_intensity
        score_summary["first_second_unique_ratio"] = first_second_unique_ratio

        return_ambiguity_mapping[scan] = score_summary

    return return_ambiguity_mapping