def apply_msgfdb(in_file, msms_run_summary, modifications, num_mods): """ Read output file of MS-GFDB and add child elements to msms_run_summary """ spectrum2element = {} enzyme_list = [ArgC,LysC,Trypsin,LysCP,Chymotrypsin,TrypChymo,TrypsinP,PepsinA, CNBr,V8E,AspN,Formic_acid,AspNambic,V8DE] semi_list = [] sample_enzyme = msms_run_summary.find('sample_enzyme') for f in read_msgfdb(in_file): spectrum = '%(name)s.%(scan)05i.%(scan)05i.%(charge)i' % \ {'name': remove_file_extention(f['#SpecFile']), 'scan': f['Scan#'], 'charge': f['Charge']} enzyme_list, semi_list = what_enzyme(enzyme_list, semi_list, f['Peptide']) peptide_prev_aa = f['Peptide'][0] if peptide_prev_aa == '_': peptide_prev_aa = '-' peptide_middle = f['Peptide'][2:-2] peptide_next_aa = f['Peptide'][-1] if peptide_next_aa == '_': peptide_next_aa = '-' if ' ' in f['Protein']: protein_name, protein_descr = f['Protein'].split(' ', 1) else: protein_name = f['Protein'] protein_descr = '' precursor_neutral_mass = f['Precursor'] * f['Charge'] - f['Charge'] * H_plus if spectrum not in spectrum2element: spectrum_query = SubElement(msms_run_summary, 'spectrum_query') spectrum2element[spectrum] = spectrum_query spectrum_query.append(Element('search_result')) spectrum_query.set('spectrum', spectrum) spectrum_query.set('start_scan', str(f['Scan#'])) spectrum_query.set('end_scan', str(f['Scan#'])) spectrum_query.set('assumed_charge', str(f['Charge'])) spectrum_query.set('precursor_neutral_mass', str(precursor_neutral_mass)) spectrum_query = spectrum2element[spectrum] search_result = spectrum_query.find('search_result') search_hit = SubElement(search_result, 'search_hit') search_hit.set('peptide', "".join(aa for aa in peptide_middle if aa.isalpha())) search_hit.set('peptide_prev_aa', peptide_prev_aa) search_hit.set('peptide_next_aa', peptide_next_aa) search_hit.set('protein', protein_name) search_hit.set('protein_descr', protein_descr) modification_instances = sum((find_modifications(mod, peptide_middle) for mod in modifications), []) calc_neutral_pep_mass = modified_peptide_mass(modification_instances, peptide_middle, num_mods) if modification_instances: modification_info = SubElement(search_hit, 'modification_info') for mass, mass_diff, aa_number, is_opt in modification_instances: maam = SubElement(modification_info, 'mod_aminoacid_mass') maam.set('position', str(aa_number)) maam.set('mass', str(mass)) search_hit.set('calc_neutral_pep_mass', str(calc_neutral_pep_mass)) search_hit.set('massdiff', str(precursor_neutral_mass - calc_neutral_pep_mass)) for field in score_fields: if field in f: SubElement(search_hit, 'search_score', name=field, value=f[field]) # sample_enzyme.set('fidelity',flag) if enzyme_list == []: if semi_list == []: sample_enzyme.set('name','NoEnzyme') sample_enzyme.set('fidelity','nonspecific') else: sample_enzyme.set('fidelity','semispecific') enzyme = re.split("\|",enzyme2name[re.search(r'<(\w+)>',semi_list[0][0]).group(1)]) else: sample_enzyme.set('fidelity','specific') enzyme = re.split("\|",enzyme2name[re.search(r'<(\w+)>',enzyme_list[0][0]).group(1)]) if not(enzyme_list == [] and semi_list == []): sample_enzyme.set('name',enzyme[0]) specificity = SubElement(sample_enzyme, 'specificity') specificity.set('cut',enzyme[1]) if enzyme[2]: specificity.set('no_cut',enzyme[2]) specificity.set('sense',enzyme[3])
def apply_msgfdb(in_file, msms_run_summary, modifications, num_mods): """ Read output file of MS-GFDB and add child elements to msms_run_summary """ spectrum2element = {} enzyme_list = [ ArgC, LysC, Trypsin, LysCP, Chymotrypsin, TrypChymo, TrypsinP, PepsinA, CNBr, V8E, AspN, Formic_acid, AspNambic, V8DE ] semi_list = [] sample_enzyme = msms_run_summary.find('sample_enzyme') for f in read_msgfdb(in_file): spectrum = '%(name)s.%(scan)05i.%(scan)05i.%(charge)i' % \ {'name': remove_file_extention(f['#SpecFile']), 'scan': f['Scan#'], 'charge': f['Charge']} enzyme_list, semi_list = what_enzyme(enzyme_list, semi_list, f['Peptide']) peptide_prev_aa = f['Peptide'][0] if peptide_prev_aa == '_': peptide_prev_aa = '-' peptide_middle = f['Peptide'][2:-2] peptide_next_aa = f['Peptide'][-1] if peptide_next_aa == '_': peptide_next_aa = '-' if ' ' in f['Protein']: protein_name, protein_descr = f['Protein'].split(' ', 1) else: protein_name = f['Protein'] protein_descr = '' precursor_neutral_mass = f['Precursor'] * f['Charge'] - f[ 'Charge'] * H_plus if spectrum not in spectrum2element: spectrum_query = SubElement(msms_run_summary, 'spectrum_query') spectrum2element[spectrum] = spectrum_query spectrum_query.append(Element('search_result')) spectrum_query.set('spectrum', spectrum) spectrum_query.set('start_scan', str(f['Scan#'])) spectrum_query.set('end_scan', str(f['Scan#'])) spectrum_query.set('assumed_charge', str(f['Charge'])) spectrum_query.set('precursor_neutral_mass', str(precursor_neutral_mass)) spectrum_query = spectrum2element[spectrum] search_result = spectrum_query.find('search_result') search_hit = SubElement(search_result, 'search_hit') search_hit.set('peptide', "".join(aa for aa in peptide_middle if aa.isalpha())) search_hit.set('peptide_prev_aa', peptide_prev_aa) search_hit.set('peptide_next_aa', peptide_next_aa) search_hit.set('protein', protein_name) search_hit.set('protein_descr', protein_descr) modification_instances = sum((find_modifications(mod, peptide_middle) for mod in modifications), []) calc_neutral_pep_mass = modified_peptide_mass(modification_instances, peptide_middle, num_mods) if modification_instances: modification_info = SubElement(search_hit, 'modification_info') for mass, mass_diff, aa_number, is_opt in modification_instances: maam = SubElement(modification_info, 'mod_aminoacid_mass') maam.set('position', str(aa_number)) maam.set('mass', str(mass)) search_hit.set('calc_neutral_pep_mass', str(calc_neutral_pep_mass)) search_hit.set('massdiff', str(precursor_neutral_mass - calc_neutral_pep_mass)) for field in score_fields: if field in f: SubElement(search_hit, 'search_score', name=field, value=f[field]) # sample_enzyme.set('fidelity',flag) if enzyme_list == []: if semi_list == []: sample_enzyme.set('name', 'NoEnzyme') sample_enzyme.set('fidelity', 'nonspecific') else: sample_enzyme.set('fidelity', 'semispecific') enzyme = re.split( "\|", enzyme2name[re.search(r'<(\w+)>', semi_list[0][0]).group(1)]) else: sample_enzyme.set('fidelity', 'specific') enzyme = re.split( "\|", enzyme2name[re.search(r'<(\w+)>', enzyme_list[0][0]).group(1)]) if not (enzyme_list == [] and semi_list == []): sample_enzyme.set('name', enzyme[0]) specificity = SubElement(sample_enzyme, 'specificity') specificity.set('cut', enzyme[1]) if enzyme[2]: specificity.set('no_cut', enzyme[2]) specificity.set('sense', enzyme[3])