def testrun(scorers_list=[], scorers_settings=[], batchindex=-1, header='', use_metalikeness=False, use_formula=False, use_elements=False): #for batchindex in range(5): logit('Assessing Fragprint based search. Batch %s...' % batchindex) total_peaks = 0 best_results_neg = [0] * max_results_per_query worst_results_neg = [0] * max_results_per_query return_number_neg = [0] * max_results_per_query neg_mode_count = 0 best_results_pos = [0] * max_results_per_query worst_results_pos = [0] * max_results_per_query return_number_pos = [0] * max_results_per_query pos_mode_count = 0 logtime() logit('Assuming correct adduct info and 0 isotope.') logit('No formula assumption. Assuming Mass precision: %s ppm' % ppm) logit('Only considering [M+H]+ and [M-H]- adducts for now') peaks = [] for spectrum_index in range(len(specmanager.ms_spectra)): print('%s of %s spectra preprocessed' % (spectrum_index, len(specmanager.ms_spectra))) spectrum = specmanager.ms_spectra[spectrum_index] if batchindex == -1 or int( spectrum.parameters['crossvalidation_batch_index'] ) == batchindex: for peak in spectrum.peaks: if 'ion_type' in peak.parameters: if peak.parameters[ 'ion_type'] in FragPrintScorer.supported_adducts: peak.ppm = ppm #Assuming 0 isotope states only. shortinchi = get_short_inchi_from_full_inchi( peak.parent_spectrum.parameters['inchi']) # filters = [] if use_metalikeness: metabolite_likeness = LikenessFilter( likeness=['MetaLike']) filters.append(metabolite_likeness) if use_formula: formula = peak.parent_spectrum.parameters[ 'formula'] correct_formula = FormulasFilter(formula) filters.append(correct_formula) if use_elements: elements = peak.parent_spectrum.parameters[ 'formula'] correct_elements = ElementCompositionFilter( elements, elements) filters.append(correct_elements) annotation=MSPeakAnnotation(peak, adduct=get_adduct_by_name(peak.parameters['ion_type'],\ spectrum.parameters['mode']), isotope=0, formula_scorer=None,\ filters=filters, scores={'AdductIsotopeScore':1.0}) if not (annotation.adduct is None): peak.annotations = [annotation] peaks.append(peak) else: logit('Unsupported Adduct: %s' % peak.parameters['ion_type']) logit('Peaks to annotate: %s' % len(peaks)) # peak_annotator.annotate_peaks(peaks, test_chemical_databases, \ scorers_list=scorers_list, scorers_settings=scorers_settings, total_score=total_multiplicative_score, required_fields=set(['ShortInChI']),\ results_limit=max_results_per_query, save_memory=False, ppm=ppm, overwrite=True) for i in reversed(range(len(peaks))): if peaks[i].annotations[0].mol_candidates is None: del peaks[i] logit('Finished annotating... Annotated peaks :%s' % len(peaks)) logtime() total_peaks = len(peaks) total_candidates = 0 for peakindex in range(total_peaks): peak = peaks[peakindex] peakmode = 0 if peak.parent_spectrum.parameters['mode'] == 1: pos_mode_count += 1 peakmode = 1 elif peak.parent_spectrum.parameters['mode'] == -1: neg_mode_count += 1 peakmode = -1 #print('%s of %s '%(peakindex,total_peaks)); shortinchi = get_short_inchi_from_full_inchi( peak.parent_spectrum.parameters['inchi']) for annotation in peak.annotations: annotation.min_correct = -1 annotation.max_correct = -1 total_candidates += annotation.mol_candidates.total_candidate_count #Get annotation score annotation.mean_score = 0.0 annotation.max_score = 0.0 for index in range(len(annotation.mol_candidates.mol_list)): total_score = annotation.mol_candidates.mol_list[index][ 'TotalScore'] #print(annotation.mol_candidates.mol_list[index]); annotation.mean_score += total_score if total_score > annotation.max_score: annotation.max_score = total_score if len(annotation.mol_candidates.mol_list) > 0: annotation.mean_score = annotation.mean_score / len( annotation.mol_candidates.mol_list) if len(annotation.mol_candidates.mol_list ) <= max_results_per_query: if peakmode == 1: return_number_pos[ len(annotation.mol_candidates.mol_list) - 1] += 1 elif peakmode == -1: return_number_neg[ len(annotation.mol_candidates.mol_list) - 1] += 1 for index in reversed( range(1, len(annotation.mol_candidates.mol_list))): if annotation.mol_candidates.mol_list[index - 1][ 'ShortInChI'] == annotation.mol_candidates.mol_list[ index]['ShortInChI']: del annotation.mol_candidates.mol_list[index] # For the purpose of statistics condensing sequential identical Short InChI-s #print(len(annotation.mol_candidates.mol_list)); #print('Correct: %s'%shortinchi); for index in range(len(annotation.mol_candidates.mol_list)): #print(annotation.mol_candidates.mol_list[index]['ShortInChI']); if annotation.mol_candidates.mol_list[index][ 'ShortInChI'] == shortinchi: #print('Got it!'); if annotation.min_correct == -1: annotation.min_correct = index annotation.max_correct = index if annotation.min_correct > -1: if peakmode == 1: for i in range(len(best_results_pos)): if annotation.min_correct <= i: best_results_pos[i] += 1 if annotation.max_correct <= i: worst_results_pos[i] += 1 elif peakmode == -1: for i in range(len(best_results_neg)): if annotation.min_correct <= i: best_results_neg[i] += 1 if annotation.max_correct <= i: worst_results_neg[i] += 1 #else: # logit('No candidate for %s (Mass: %s, peak mode: %s, peak_mz %s, deltaM %s)'%(shortinchi,peak.parent_spectrum.parameters['exactmass'],peakmode, peak.mz,abs(peak.mz-float(peak.parent_spectrum.parameters['exactmass'])))); logit('Finished Calculating Retrieval Stats.') logit('Positive Mode Count: %s' % pos_mode_count) logit('Negative Mode Count: %s' % neg_mode_count) logit('Total Peaks: %s' % total_peaks) logit('Total Candidate Count: %s' % total_candidates) logtime() logit('Positive Mode (Total: %s ):' % pos_mode_count) if pos_mode_count > 0: for i in range(max_results_per_query): logit( 'Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s' % (i + 1, best_results_pos[i] * 100 / pos_mode_count, worst_results_pos[i] * 100 / pos_mode_count, return_number_pos[i])) logit('Negative Mode (Total: %s ):' % neg_mode_count) if neg_mode_count > 0: for i in range(max_results_per_query): logit( 'Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s' % (i + 1, best_results_neg[i] * 100 / neg_mode_count, worst_results_neg[i] * 100 / neg_mode_count, return_number_neg[i])) logit('Both Modes (Total: %s ):' % (pos_mode_count + neg_mode_count)) if neg_mode_count > 0 or pos_mode_count > 0: for i in range(max_results_per_query): logit( 'Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s' % (i + 1, (best_results_neg[i] + best_results_pos[i]) * 100 / (neg_mode_count + pos_mode_count), (worst_results_neg[i] + worst_results_pos[i]) * 100 / (neg_mode_count + pos_mode_count), return_number_pos[i] + return_number_neg[i])) for i in range(len(best_results_neg)): best_results_neg[i] = best_results_neg[i] * 100 / neg_mode_count worst_results_neg[i] = worst_results_neg[i] * 100 / neg_mode_count best_results_pos[i] = best_results_pos[i] * 100 / pos_mode_count worst_results_pos[i] = worst_results_pos[i] * 100 / pos_mode_count store_stats(header, best_results_neg, worst_results_neg, best_results_pos, worst_results_pos) #logit('Missed Peaks due to unknown mode: %s'%(total_peaks-neg_mode_count-pos_mode_count)); logtime() '''
def run_annotation(spectral_input_path, db_file, chemical_databases, output_folder, SVMs_path, ncpu, ppm, max_results_per_query, test_mode=False, ignore_peaks_without_spectra=True): global starttime; global logfile; if not os.path.isfile(db_file): print('No database list file found!'); quit(); db_files=[db_file]; if not os.path.exists(output_folder): try: os.makedirs(output_folder); except: print('Cannot create output path %s !'%output_folder); print('Run: python annotate.py -h for help'); sys.exit(2); logfile=open(os.path.join(output_folder,'log.txt'),'w'); starttime=time.time(); logit('Starting. %s'%time.strftime('%d/%m/%y %H:%M:%S')); logit('Input Spectral path: %s'%spectral_input_path); logit('Output path: %s'%output_folder) if chemical_databases: logit('Chemical Databases Included: %s'%chemical_databases); else: logit('Chemical Databases Included: All'); logit('from chemical database paths file %s'%db_file); logit('SVMs from %s'%SVMs_path); logit('nCPUs to use: %s'%ncpu); logit('Default ppm tolerance for MS1: %s'%ppm); specmanager=SpectralManager(); logit('Reading test spectra...'); specmanager.import_textfile_spectra_from_folder(spectral_input_path); logit('Finshed reading spectra. Total number: %s'%len(specmanager.ms_spectra)); logtime(); multiprocessing.freeze_support(); scorers_list=['FingerPrintScorer','FragPrintScorer']; scorers_settings=[[],[]]; total_score=total_multiplicative_score; with PeakAnnotator(db_files, SVMs_path, ncpu) as peak_annotator: logit('Beginning annotation...'); logtime(); total_peaks=0; best_results_neg=[0]*max_results_per_query; worst_results_neg=[0]*max_results_per_query; return_number_neg=[0]*max_results_per_query; neg_mode_count=0; best_results_pos=[0]*max_results_per_query; worst_results_pos=[0]*max_results_per_query; return_number_pos=[0]*max_results_per_query; pos_mode_count=0; peaks=[]; supported_adducts=set(global_supported_adducts); #Collect all supported adducts #if 'FingerPrintScorer' in scorers_list: # supported_adducts=supported_adducts|FingerPrintScorer.supported_adducts; #if 'FragPrintScorer' in scorers_list: # supported_adducts=supported_adducts|FragPrintScorer.supported_adducts; #Leave only adducts supported by all selected filters if 'FingerPrintScorer' in scorers_list: supported_adducts=supported_adducts&FingerPrintScorer.supported_adducts; if 'FragPrintScorer' in scorers_list: supported_adducts=supported_adducts&FragPrintScorer.supported_adducts; logit('Preparing annotations'); if test_mode: logit('Running in test mode.....'); logit('Assuming correct adduct info and 0 isotope.'); logit('No formula assumption. Mass precision: %s ppm'%ppm); logit('Only considering [M+H]+ and [M-H]- adducts for now'); print('\n'); for spectrum_index in range(len(specmanager.ms_spectra)): #print('\r%s of %s spectra preprocessed'%(spectrum_index,len(specmanager.ms_spectra))); spectrum=specmanager.ms_spectra[spectrum_index]; for peak in spectrum.peaks: if (not ignore_peaks_without_spectra) or (hasattr(peak,'ms_spectra') and peak.ms_spectra): if hasattr(peak,'parameters') and ('ion_type' in peak.parameters): if (peak.parameters['ion_type'] in FragPrintScorer.supported_adducts): peak.ppm=ppm; shortinchi=get_short_inchi_from_full_inchi(peak.parent_spectrum.parameters['inchi']); annotation=MSPeakAnnotation(peak, adduct=\ get_adduct_by_name(peak.parameters['ion_type'], \ spectrum.parameters['mode']), isotope=0, \ formula_scorer=None, element_scorer=None, filters=[], scores={}); #testformulascorer=FormulaScorer(); #testformulascorer.setup_scorer(peak.parent_spectrum.parameters['formula'],1.0); #testelementscorer=ElementScorer(); #testelementscorer.setup_scorer({'C':0.8,'O':0.9,'Si':0.1}); #formulafilter=FormulasFilter(formulas=[peak.parent_spectrum.parameters['formula']]); #elementfilter=ElementCompositionFilter(peak.parent_spectrum.parameters['formula'], peak.parent_spectrum.parameters['formula']); #inchifilter=InChIFilter(ref_inchi=peak.parent_spectrum.parameters['inchi'],use_short_inchi=False, match_type=0) #annotation=MSPeakAnnotation(peak, adduct=\ # get_adduct_by_name(peak.parameters['ion_type'], \ # spectrum.parameters['mode']), isotope=0, \ # formula_scorer=testformulascorer, element_scorer=testelementscorer, \ # filters=[formulafilter, inchifilter, elementfilter], scores={'ExtraScore':25.0, 'AllExtra':1.9}); if not (annotation.adduct is None): peak.annotations=[annotation]; peaks.append(peak); else: logit('Unsupported Adduct: %s'%peak.parameters['ion_type']); else: logit('Running in normal mode.....'); logit('Mass precision: %s ppm'%ppm); print('\n'); for spectrum_index in range(len(specmanager.ms_spectra)): #print('\r%s of %s spectra preprocessed'%(spectrum_index,len(specmanager.ms_spectra))); spectrum=specmanager.ms_spectra[spectrum_index]; for peak in spectrum.peaks: if (not ignore_peaks_without_spectra) or (hasattr(peak,'ms_spectra') and peak.ms_spectra): peak.ppm=ppm; generate_annotation=True; if hasattr(peak, 'annotations'): if peak.annotations: generate_annotation=False; peaks.append(peak); if generate_annotation: #Assuming isotope 0 by default if spectrum.parameters['mode']==1: selected_adducts=get_positive_mode_adducts(supported_adducts); else: selected_adducts=get_negative_mode_adducts(supported_adducts); peak.annotations=[]; for adduct in selected_adducts: annotation=MSPeakAnnotation(peak, adduct=adduct, isotope=0, formula_scorer=None, element_scorer=None, filters=[], scores={}); peak.annotations.append(annotation); peaks.append(peak); logit('Peaks to annotate: %s'%len(peaks)); if test_mode: required_fields=set(['ShortInChI','InChI','SMILES','Formula','IDs']); #required_fields=set(['ShortInChI','InChI','SMILES','Formula','IDs', 'InChIKeyValues', 'InChiKey', 'FormulaVector', 'ElementVector', 'Frag', 'FPT']); else: required_fields=set(['InChI','SMILES','Formula','IDs']); peak_annotator.annotate_peaks(peaks, chemical_databases, \ scorers_list=scorers_list, scorers_settings=scorers_settings, total_score=total_score, required_fields=required_fields,\ results_limit=max_results_per_query, save_memory=False, ppm=ppm, overwrite=True); annotation_count=0; total_candidates=0; total_peaks=len(peaks); for peakindex in range(total_peaks): peak=peaks[peakindex]; if peak.annotations: for annotation in peak.annotations: if not (annotation.mol_candidates is None): annotation_count+=1; total_candidates+=annotation.mol_candidates.total_candidate_count; logit('Finished annotating... '); logit('Annotated peaks :%s'%total_peaks); if total_peaks>0: logit('Averaged annotations per peak:%s'%(float(annotation_count)/total_peaks)); logit('Averaged candidate molecules per peak:%s'%(float(total_candidates)/total_peaks)); #Calculating retrieval statistics using test data #===================================================================== if test_mode: #logit('Removing peaks with no annotations found from consideration...'); #for i in reversed(range(len(peaks))): # if not peaks[i].annotations: # del peaks[i]; #logit('Finished removing... Annotated peaks :%s'%len(peaks)); total_peaks=len(peaks); print('Calculating retrieval stats...\n'); for peakindex in range(total_peaks): peak=peaks[peakindex]; peakmode=0; if peak.parent_spectrum.parameters['mode']==1: pos_mode_count+=1; peakmode=1; elif peak.parent_spectrum.parameters['mode']==-1: neg_mode_count+=1; peakmode=-1; #print('\r%s of %s '%(peakindex,total_peaks)); shortinchi=get_short_inchi_from_full_inchi(peak.parent_spectrum.parameters['inchi']); for annotation in peak.annotations: annotation.min_correct=-1; annotation.max_correct=-1; annotation.mean_score=0.0; annotation.max_score=0.0; for index in range(len(annotation.mol_candidates.mol_list)): total_score=annotation.mol_candidates.mol_list[index]['TotalScore']; annotation.mean_score+=total_score; if total_score>annotation.max_score: annotation.max_score=total_score; if len(annotation.mol_candidates.mol_list)>0: annotation.mean_score=annotation.mean_score/len(annotation.mol_candidates.mol_list); if len(annotation.mol_candidates.mol_list)<=max_results_per_query: if peakmode==1: return_number_pos[len(annotation.mol_candidates.mol_list)-1]+=1; elif peakmode==-1: return_number_neg[len(annotation.mol_candidates.mol_list)-1]+=1; for index in reversed(range(1,len(annotation.mol_candidates.mol_list))): if annotation.mol_candidates.mol_list[index-1]['ShortInChI']==annotation.mol_candidates.mol_list[index]['ShortInChI']: del annotation.mol_candidates.mol_list[index]; # For the purpose of statistics condensing sequential identical Short InChI-s for index in range(len(annotation.mol_candidates.mol_list)): if annotation.mol_candidates.mol_list[index]['ShortInChI']==shortinchi: annotation.mol_candidates.mol_list[index]['Annotation']='Correct'; if annotation.min_correct==-1: annotation.min_correct=index; annotation.max_correct=index; else: annotation.mol_candidates.mol_list[index]['Annotation']='Wrong'; if annotation.min_correct>-1: if peakmode==1: for i in range(len(best_results_pos)): if annotation.min_correct<=i: best_results_pos[i]+=1; if annotation.max_correct<=i: worst_results_pos[i]+=1; elif peakmode==-1: for i in range(len(best_results_neg)): if annotation.min_correct<=i: best_results_neg[i]+=1; if annotation.max_correct<=i: worst_results_neg[i]+=1; logit('Finished Calculating Retrieval Stats.'); logit('Positive Mode Count: %s'%pos_mode_count); logit('Negative Mode Count: %s'%neg_mode_count); logit('Total Peaks: %s'%total_peaks); logit('Positive Mode (Total: %s ):'%pos_mode_count); if pos_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%.2f%%\tWorst:\t%.2f%%\tCCount:\t%s'%(i+1,float(best_results_pos[i])*100/pos_mode_count,worst_results_pos[i]*100/pos_mode_count,return_number_pos[i])); logit('Negative Mode (Total: %s ):'%neg_mode_count); if neg_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%.2f%%\tWorst:\t%.2f%%\tCCount:\t%s'%(i+1,float(best_results_neg[i])*100/neg_mode_count,worst_results_neg[i]*100/neg_mode_count,return_number_neg[i])); logit('Both Modes (Total: %s ):'%(pos_mode_count+neg_mode_count)); if neg_mode_count>0 or pos_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%.2f%%\tWorst:\t%.2f%%\tCCount:\t%s'%(i+1,float(best_results_neg[i]+best_results_pos[i])*100/(neg_mode_count+pos_mode_count),float(worst_results_neg[i]+worst_results_pos[i])*100/(neg_mode_count+pos_mode_count),return_number_pos[i]+return_number_neg[i])); for peak in peaks: merge_annotations(peak, remove_old_annotations=False); #Finished calculating retrieval statistics using test data #===================================================================== logtime(); logit('Finished Annotation. Exporting results...'); logtime(); logit('Exporting to JSON...'); spectra_to_json(os.path.join(output_folder,'annotated_spectra.json'), specmanager.ms_spectra); logtime(); logit('Exporting to internal text format...'); specmanager.export_textfile_spectra_to_folder(os.path.join(output_folder,'annotated_spectra')); logit('Preparing HTML report...'); generate_HTML_report(os.path.join(output_folder,'Report'), specmanager); specmanager.close(); logtime(); logit('Finished'); logfile.close();
def _pipe_from_textfile(self, finp): current_peak = None current_peak_number = 0 while True: s = finp.readline() if s == '': return s = s.rstrip('\n').lstrip() if '##' in s: s = s[:s.index('##')] if '=' in s: s = s.split('=', 1) if s[0].lower().startswith('mode'): self.parameters['mode'] = int(s[1]) elif s[0].lower().startswith('collision_energy'): self.parameters['collision_energy'] = float(s[1]) elif s[0].lower().startswith('level'): self.parameters['level'] = int(s[1]) else: self.parameters[s[0].lower()] = s[1] elif s.lower().startswith('peaks'): finished = False while not finished: s = finp.readline() if s == '': return s = s.rstrip('\n').lstrip() if '##' in s: s = s[:s.index('##')] if '=' in s: s = s.split('=', 1) if not hasattr(current_peak, 'parameters'): if isinstance(current_peak, MSBasicPeak): current_peak = MSPeak(current_peak) current_peak.number = current_peak_number current_peak.parent_spectrum = self self.peaks[-1] = current_peak current_peak.parameters = {} current_peak.parameters[s[0].lower()] = s[1] elif s.lower().startswith('spectrum'): sub_spectrum = MSSpectrum() sub_spectrum._pipe_from_textfile(finp) if hasattr(current_peak, 'ms_spectra'): current_peak.ms_spectra.append(sub_spectrum) else: if isinstance(current_peak, MSBasicPeak): current_peak = MSPeak(current_peak) current_peak.number = current_peak_number current_peak.parent_spectrum = self self.peaks[-1] = current_peak current_peak.ms_spectra = [sub_spectrum] sub_spectrum.parent_peak = current_peak elif s.lower().startswith('annotation'): annotation = MSPeakAnnotation(current_peak) annotation._pipe_from_textfile(finp) if hasattr(current_peak, 'annotations'): current_peak.annotations.append(annotation) else: if isinstance(current_peak, MSBasicPeak): current_peak = MSPeak(current_peak) current_peak.number = current_peak_number current_peak.parent_spectrum = self self.peaks[-1] = current_peak current_peak.annotations = [annotation] elif s.lower().startswith('merged_annotation'): merged_annotation = MolecularRecord() merged_annotation._pipe_from_textfile(finp) if hasattr(current_peak, 'merged_annotations'): current_peak.merged_annotations.append( merged_annotation) else: if isinstance(current_peak, MSBasicPeak): current_peak = MSPeak(current_peak) current_peak.number = current_peak_number current_peak.parent_spectrum = self self.peaks[-1] = current_peak current_peak.merged_annotations = [ merged_annotation ] elif (s.lower().startswith('end')): finished = True else: s = s.split(',') if self.parameters['level'] > 1: current_peak = MSBasicPeak() else: current_peak = MSPeak() current_peak.number = int(s[0]) current_peak.parent_spectrum = self current_peak.mz = float(s[1]) current_peak.intensity = float(s[2]) current_peak_number = int(s[0]) if len(s) > 3: if isinstance(current_peak, MSBasicPeak): current_peak = MSPeak(current_peak) current_peak.number = current_peak_number current_peak.parent_spectrum = self current_peak.ppm = float(s[3]) self.peaks.append(current_peak) elif s.lower().startswith('end'): return
def testrun(scorers_list=[], scorers_settings=[], batchindex=-1): #for batchindex in range(5): logit('Assessing Fragprint based search. Batch %s...'%batchindex); total_peaks=0; best_results_neg=[0]*max_results_per_query; worst_results_neg=[0]*max_results_per_query; return_number_neg=[0]*max_results_per_query; neg_mode_count=0; best_results_pos=[0]*max_results_per_query; worst_results_pos=[0]*max_results_per_query; return_number_pos=[0]*max_results_per_query; pos_mode_count=0; logtime(); logit('Assuming correct adduct info and 0 isotope.'); logit('No formula assumption. Assuming Mass precision: %s ppm'%ppm); logit('Only considering [M+H]+ and [M-H]- adducts for now'); peaks=[]; for spectrum_index in range(len(specmanager.ms_spectra)): print('%s of %s spectra preprocessed'%(spectrum_index,len(specmanager.ms_spectra))); spectrum=specmanager.ms_spectra[spectrum_index]; if batchindex==-1 or int(spectrum.parameters['crossvalidation_batch_index'])==batchindex: for peak in spectrum.peaks: if 'ion_type' in peak.parameters: if peak.parameters['ion_type'] in FragPrintScorer.supported_adducts: peak.ppm=ppm; #Assuming 0 isotope states only. shortinchi=peak.parent_spectrum.parameters['shortinchi']; #formula=peak.parent_spectrum.parameters['formula']; #correct_inchi=InChiFilter(shortinchi,True,4); #correct_formula=FormulasFilter(formula); annotation=MSPeakAnnotation(adduct=get_adduct_by_name(peak.parameters['ion_type'],\ spectrum.parameters['mode']), isotope=0, formula_scorer=None,\ filters=[], scores={'AdductIsotopeScore':1.0}); if not (annotation.adduct is None): peak.annotations=[annotation]; annotation.parent_peak=peak; peaks.append(peak); else: logit('Unsupported Adduct: %s'%peak.parameters['ion_type']); logit('Peaks to annotate: %s'%len(peaks)); # peak_annotator.annotate_peaks(peaks, test_chemical_databases, \ scorers_list=scorers_list, scorers_settings=scorers_settings, total_score=total_multiplicative_score,\ results_limit=max_results_per_query, save_memory=False, batch_index=batchindex, ppm=ppm, overwrite=True); for i in reversed(range(len(peaks))): if not peaks[i].annotations[0].mol_candidates: del peaks[i]; logit('Finished annotating... Annotated peaks :%s'%len(peaks)); logtime(); total_peaks=len(peaks); for peakindex in range(total_peaks): peak=peaks[peakindex]; peakmode=0; if peak.parent_spectrum.parameters['mode']==1: pos_mode_count+=1; peakmode=1; elif peak.parent_spectrum.parameters['mode']==-1: neg_mode_count+=1; peakmode=-1; print('%s of %s '%(peakindex,total_peaks)); shortinchi=peak.parent_spectrum.parameters['shortinchi']; for annotation in peak.annotations: annotation.min_correct=-1; annotation.max_correct=-1; #Get annotation score annotation.mean_score=0.0; annotation.max_score=0.0; for index in range(len(annotation.mol_candidates.mol_list)): total_score=annotation.mol_candidates.mol_list[index]['TotalScore']; #print(annotation.mol_candidates.mol_list[index]); annotation.mean_score+=total_score; if total_score>annotation.max_score: annotation.max_score=total_score; if len(annotation.mol_candidates.mol_list)>0: annotation.mean_score=annotation.mean_score/len(annotation.mol_candidates.mol_list); if len(annotation.mol_candidates.mol_list)<=max_results_per_query: if peakmode==1: return_number_pos[len(annotation.mol_candidates.mol_list)-1]+=1; elif peakmode==-1: return_number_neg[len(annotation.mol_candidates.mol_list)-1]+=1; for index in reversed(range(1,len(annotation.mol_candidates.mol_list))): if annotation.mol_candidates.mol_list[index-1]['ShortInChi']==annotation.mol_candidates.mol_list[index]['ShortInChi']: del annotation.mol_candidates.mol_list[index]; # For the purpose of statistics condensing sequential identical Short InChi-s #print(len(annotation.mol_candidates.mol_list)); #print('Correct: %s'%shortinchi); for index in range(len(annotation.mol_candidates.mol_list)): #print(annotation.mol_candidates.mol_list[index]['ShortInChi']); if annotation.mol_candidates.mol_list[index]['ShortInChi']==shortinchi: #print('Got it!'); if annotation.min_correct==-1: annotation.min_correct=index; annotation.max_correct=index; if annotation.min_correct>-1: if peakmode==1: for i in range(len(best_results_pos)): if annotation.min_correct<=i: best_results_pos[i]+=1; if annotation.max_correct<=i: worst_results_pos[i]+=1; elif peakmode==-1: for i in range(len(best_results_neg)): if annotation.min_correct<=i: best_results_neg[i]+=1; if annotation.max_correct<=i: worst_results_neg[i]+=1; #else: # logit('No candidate for %s (Mass: %s, peak mode: %s, peak_mz %s, deltaM %s)'%(shortinchi,peak.parent_spectrum.parameters['exactmass'],peakmode, peak.mz,abs(peak.mz-float(peak.parent_spectrum.parameters['exactmass'])))); logit('Finished Calculating Retrieval Stats.'); logit('Positive Mode Count: %s'%pos_mode_count); logit('Negative Mode Count: %s'%neg_mode_count); logit('Total Peaks: %s'%total_peaks); logtime(); logit('Positive Mode (Total: %s ):'%pos_mode_count); if pos_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s'%(i+1,best_results_pos[i]*100/pos_mode_count,worst_results_pos[i]*100/pos_mode_count,return_number_pos[i])); logit('Negative Mode (Total: %s ):'%neg_mode_count); if neg_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s'%(i+1,best_results_neg[i]*100/neg_mode_count,worst_results_neg[i]*100/neg_mode_count,return_number_neg[i])); logit('Both Modes (Total: %s ):'%(pos_mode_count+neg_mode_count)); if neg_mode_count>0 or pos_mode_count>0: for i in range(max_results_per_query): logit('Correct within first\t%s:\tBest:\t%s%%\tWorst:\t%s%%\tCandidateCount:\t%s'%(i+1,(best_results_neg[i]+best_results_pos[i])*100/(neg_mode_count+pos_mode_count),(worst_results_neg[i]+worst_results_pos[i])*100/(neg_mode_count+pos_mode_count),return_number_pos[i]+return_number_neg[i])); #logit('Missed Peaks due to unknown mode: %s'%(total_peaks-neg_mode_count-pos_mode_count)); logtime(); '''