def main(): path = "../tests/test_data/" fn_peaklist = os.path.join(path, "variableMetadata.txt") fn_matrix = os.path.join(path, "dataMatrix.txt") df = in_out.combine_peaklist_matrix(fn_peaklist, fn_matrix) ion_mode = "pos" db_out = "results_{}.sqlite".format(ion_mode) graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson") nx.write_gml(graphs, "graphs.gml") # graphs = nx.read_gml("graphs.gml") path = "../beamspy/data" lib_isotopes = in_out.read_isotopes(os.path.join(path, "isotopes.txt"), ion_mode) lib_adducts = in_out.read_adducts(os.path.join(path, "adducts.txt"), ion_mode) lib_multiple_charged_ions = in_out.read_multiple_charged_ions( os.path.join(path, "multiple_charged_ions.txt"), ion_mode) lib_mass_differences = in_out.read_mass_differences( os.path.join(path, "multiple_charged_differences.txt"), ion_mode) print(lib_isotopes) print(lib_adducts) ppm = 5.0 annotate_adducts(graphs, db_out, ppm, lib_adducts) annotate_isotopes(graphs, db_out, ppm, lib_isotopes) annotate_oligomers(graphs, db_out, ppm, lib_adducts) annotate_multiple_charged_ions(graphs, db_out, ppm, lib_multiple_charged_ions) # annotate_molecular_formulae(df, lib_adducts, ppm, db_out) annotate_compounds(df, lib_adducts, ppm, db_out, "lipidmaps_full_20181217_v1") df_out = summary(df, db_out) fn_out = "summary_{}.txt".format(ion_mode) df_out.to_csv(fn_out, sep="\t", index=False, encoding="utf-8") pdf_out = "report_{}.pdf".format(ion_mode) plots.report(db=db_out, pdf_out=pdf_out, column_corr="r_value", column_pvalue="p_value", column_ppm_error="ppm_error", column_adducts="adduct")
def main(): print("Executing BEAMSpy version {}.".format(__version__)) parser = argparse.ArgumentParser( description='Annotation package of LC-MS and DIMS data', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # formatter_class=RawTextHelpFormatter) subparsers = parser.add_subparsers(dest='step') parser_gf = subparsers.add_parser('group-features', help='Group features.') parser_app = subparsers.add_parser( 'annotate-peak-patterns', help='Annotate peak patterns, molecular formulae and metabolites.') parser_amf = subparsers.add_parser('annotate-mf', help='Annotate molecular formulae.') parser_am = subparsers.add_parser('annotate-compounds', help='Annotate metabolites.') parser_sr = subparsers.add_parser('summary-results', help='Summarise results.') parser_gui = subparsers.add_parser('start-gui', help='Start GUI.') ################################# # GROUP FEATURES ################################# parser_gf.add_argument('-l', '--peaklist', type=str, required=True, help="Tab-delimited peaklist.") parser_gf.add_argument('-i', '--intensity-matrix', type=str, required=True, help="Tab-delimited intensity matrix.") #parser_gf.add_argument('-x', '--xset-matrix', # type=str, required=False, help="Tab-delimited intensity matrix") parser_gf.add_argument('-d', '--db', type=str, required=True, help="Sqlite database to write results.") parser_gf.add_argument( '-r', '--max-rt-diff', default=5.0, type=float, required=True, help="Maximum difference in retention time between two peaks.") parser_gf.add_argument('-m', '--method', default="pearson", choices=["pearson", "spearman"], required=True, help="Method to apply for grouping features.") parser_gf.add_argument('-c', '--coeff-threshold', default=0.7, type=float, required=True, help="Threshold for correlation coefficient.") parser_gf.add_argument('-p', '--pvalue-threshold', default=0.01, type=float, required=True, help="Threshold for p-value.") parser_gf.add_argument('-g', '--gml-file', type=str, required=True, help="Write graph to GraphML format.") parser_gf.add_argument('-n', '--ncpus', type=int, required=False, help="Number of central processing units (CPUs).") ################################# # ANNOTATE PEAK PATTERS ################################# parser_app.add_argument('-l', '--peaklist', type=str, required=True, help="Tab-delimited peaklist.") parser_app.add_argument('-i', '--intensity-matrix', type=str, required=False, help="Tab-delimited intensity matrix.") parser_app.add_argument('-g', '--gml-file', type=str, required=False, help="Correlation graph in GraphML format.") parser_app.add_argument('-d', '--db', type=str, required=True, help="Sqlite database to write results.") parser_app.add_argument('-a', '--adducts', action='store_true', required=False, help="Annotate adducts.") parser_app.add_argument('-b', '--adducts-library', action='append', required=False, default=[], help="List of adducts.") parser_app.add_argument('-e', '--isotopes', action='store_true', required=False, help="Annotate isotopes.") parser_app.add_argument('-f', '--isotopes-library', required=False, help="List of isotopes.") parser_app.add_argument('-r', '--multiple-charged-ions', action='store_true', required=False, help="Annotate multiple-charged ions.") parser_app.add_argument('-s', '--multiple-charged-ions-library', required=False, help="List of multiple charged ions.") parser_app.add_argument('-o', '--oligomers', action='store_true', required=False, help="Annotate oligomers.") parser_app.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, help="Ion mode of the libraries.") parser_app.add_argument('-p', '--ppm', default=3.0, type=float, required=True, help="Mass tolerance in parts per million.") parser_app.add_argument('-u', '--max-monomer-units', default=2, type=int, required=False, help="Maximum number of monomer units.") ################################# # ANNOTATE MOLECULAR FORMULAE ################################# parser_amf.add_argument('-l', '--peaklist', type=str, required=True, help="Tab-delimited peaklist.") parser_amf.add_argument('-i', '--intensity-matrix', type=str, required=False, help="Tab-delimited intensity matrix.") parser_amf.add_argument('-d', '--db', type=str, required=True, help="Sqlite database to write results.") parser_amf.add_argument('-c', '--db-mf', type=str, default="http://mfdb.bham.ac.uk", help="Molecular formulae database (reference).") parser_amf.add_argument('-a', '--adducts-library', type=str, default=None, required=False, help="List of adducts to search for.") parser_amf.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, help="Ion mode of the libraries.") parser_amf.add_argument('-p', '--ppm', default=3.0, type=float, required=True, help="Mass tolerance in parts per million.") parser_amf.add_argument( '-z', '--max-mz', type=float, required=False, default=500.0, help="Maximum m/z value to assign molecular formula(e).") ################################# # ANNOTATE METABOLITES ################################# parser_am.add_argument('-l', '--peaklist', type=str, required=True, help="Tab-delimited peaklist.") parser_am.add_argument('-i', '--intensity-matrix', type=str, required=False, help="Tab-delimited intensity matrix.") parser_am.add_argument('-d', '--db', type=str, required=True, help="Sqlite database to write results.") parser_am.add_argument('-c', '--db-compounds', type=str, required=False, help="Metabolite database (reference).") parser_am.add_argument( '-n', '--db-name', type=str, default="", required=True, help="Name compound / metabolite database (within --db-compounds).") parser_am.add_argument('-a', '--adducts-library', type=str, default=None, required=False, help="List of adducts to search for.") parser_am.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, help="Ion mode of the libraries.") parser_am.add_argument('-p', '--ppm', default=3.0, type=float, required=True, help="Mass tolerance in parts per million.") ################################# # SUMMARY RESULTS ################################# parser_sr.add_argument('-l', '--peaklist', type=str, required=True, help="Tab-delimited peaklist") parser_sr.add_argument('-i', '--intensity-matrix', type=str, required=False, help="Tab-delimited intensity matrix.") parser_sr.add_argument('-o', '--output', type=str, required=True, help="Output file for the summary") parser_sr.add_argument('-p', '--pdf', type=str, required=False, help="Output pdf file for the summary plots") parser_sr.add_argument( '-d', '--db', type=str, required=True, help= "Sqlite database that contains the results from the previous steps.") parser_sr.add_argument( '-s', '--sep', default="tab", choices=["tab", "comma"], required=True, help= "Values on each line of the output are separated by this character.") parser_sr.add_argument( '-r', '--single-row', action="store_true", help= "Concatenate the annotations for each spectral feature and represent in a single row." ) parser_sr.add_argument( '-c', '--single-column', action="store_true", help= "Concatenate the annotations for each spectral feature and keep seperate columns for molecular formula, adduct, name, etc." ) parser_sr.add_argument( '-n', '--ndigits-mz', default=None, type=int, required=False, help="Digits after the decimal point for m/z values.") parser_sr.add_argument( '-t', '--convert-rt', default=None, choices=["sec", "min", None], required=False, help= "Covert the retention time to seconds or minutes. An additional column will be added." ) args = parser.parse_args() print(args) separators = {"tab": "\t", "comma": ","} if args.step == "group-features": df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) graph = grouping.group_features(df, db_out=args.db, max_rt_diff=args.max_rt_diff, coeff_thres=args.coeff_threshold, pvalue_thres=args.pvalue_threshold, method=args.method, ncpus=args.ncpus) nx.write_gml(graph, str(args.gml_file)) if args.step == "annotate-peak-patterns": if args.gml_file: inp = nx.read_gml(args.gml_file) elif args.intensity_matrix: inp = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) else: inp = in_out.read_peaklist(args.peaklist) if args.adducts: if len(args.adducts_library ) > 0 and args.adducts_library is not None: for i, a in enumerate(args.adducts_library): try: lib = in_out.read_adducts(a, args.ion_mode) except: lib = in_out.read_mass_differences(a, args.ion_mode) if i > 0: add = True else: add = False annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=add) else: path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=False) if args.isotopes: if args.isotopes_library is not None: lib = in_out.read_isotopes(args.isotopes_library, args.ion_mode) annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) else: path = 'data/isotopes.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_isotopes(p, args.ion_mode) annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) if args.multiple_charged_ions: if len(args.multiple_charged_ions_library ) > 0 and args.multiple_charged_ions_library is not None: for i, m in enumerate(args.multiple_charged_ions_library): try: lib = in_out.read_multiple_charged_ions( m, args.ion_mode) except: lib = in_out.read_mass_differences(m, args.ion_mode) if i > 0: add = True else: add = False annotation.annotate_multiple_charged_ions(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=add) else: path = 'data/multiple_charged_ions.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_multiple_charged_ions(p, args.ion_mode) if args.oligomers: annotation.annotate_oligomers(inp, db_out=args.db, ppm=args.ppm, lib=lib) if args.step == "annotate-mf": if args.intensity_matrix: df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) else: df = in_out.read_peaklist(args.peaklist) if args.adducts_library: lib = in_out.read_adducts(args.adducts_library, args.ion_mode) else: path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) annotation.annotate_molecular_formulae(df, ppm=args.ppm, lib_adducts=lib, db_out=args.db, db_in=args.db_mf, max_mz=args.max_mz) if args.step == "annotate-compounds": if args.intensity_matrix: df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) else: df = in_out.read_peaklist(args.peaklist) if args.adducts_library: lib = in_out.read_adducts(args.adducts_library, args.ion_mode) else: path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) annotation.annotate_compounds(df, lib_adducts=lib, ppm=args.ppm, db_out=args.db, db_name=args.db_name, db_in="") if args.step == "summary-results": if args.intensity_matrix: df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) else: df = in_out.read_peaklist(args.peaklist) df_out = annotation.summary(df, db=args.db, single_row=args.single_row, single_column=args.single_column, convert_rt=args.convert_rt, ndigits_mz=args.ndigits_mz) df_out.to_csv(args.output, sep=separators[args.sep], index=False, encoding="utf-8") if args.pdf: plots.report(db=args.db, pdf_out=args.pdf, column_corr="r_value", column_pvalue="p_value", column_ppm_error="ppm_error", column_adducts="adduct") if args.step == "start-gui": from PySide2 import QtWidgets from beamspy.gui import BeamsApp app = QtWidgets.QApplication(sys.argv) app.setStyle("Fusion") form = BeamsApp() form.show() sys.exit(app.exec_())
def run(self): if not os.path.isfile( self.lineEdit_peaklist.text()) or not os.path.isfile( self.lineEdit_intensity_matrix.text()): QtWidgets.QMessageBox.critical( None, "Select file", "Select file(s) for Peaklist and/or Intensity Matrix", QtWidgets.QMessageBox.Ok) return elif self.lineEdit_sql_database.text() == "": QtWidgets.QMessageBox.critical( None, "Select File", "Select file for SQLite database to save output", QtWidgets.QMessageBox.Ok) return if self.checkBox_annotate_compounds.isChecked(): if len(self.listWidget_databases.selectedItems() ) == 0 and not self.checkBox_filename_reference.isChecked(): QtWidgets.QMessageBox.critical( None, "Select File", "Select database or file for 'Annotate Compounds / Metabolites'", QtWidgets.QMessageBox.Ok) return if self.checkBox_create_summary.isChecked( ) and self.lineEdit_summary_filename.text() == "": QtWidgets.QMessageBox.critical(None, "Save File As", "Select file to save summary", QtWidgets.QMessageBox.Ok) return self.hide() lib_ion_mode = {"Positive": "pos", "Negative": "neg"} if self.checkBox_group_features.isChecked(): print("Grouping features....") if self.comboBox_grouping_method.currentText( ) == "Pearson correlation": method = "pearson" else: method = "spearman" df = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) graph = grouping.group_features( df, db_out=self.lineEdit_sql_database.text(), max_rt_diff=self.doubleSpinBox_max_rt.value(), coeff_thres=self.doubleSpinBox_coefficent.value(), pvalue_thres=self.doubleSpinBox_p_value.value(), method=method, block=int(self.doubleSpinBox_block.value()), ncpus=int(self.doubleSpinBox_ncpus.value())) nx.write_gml(graph, str(self.lineEdit_graph.text())) print("Done") print("") if self.checkBox_annotate_peak_patterns.isChecked(): print("Annotating peak patterns....") if str(self.lineEdit_graph.text()) != "": inp = nx.read_gml(str(self.lineEdit_graph.text())) else: inp = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) if self.checkBox_adduct_library.isChecked(): print("Adducts...."), if self.lineEdit_adduct_library.text() == "Use default": path = 'data/adducts.txt' p = os.path.join( os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_adduct_library.text()): try: lib = in_out.read_adducts( self.lineEdit_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) except: lib = in_out.read_mass_differences( self.lineEdit_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError( "Provide a valid filename for adducts or 'Use default'" ) print("") print(lib) annotation.annotate_adducts( inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) print("Done") if self.checkBox_isotopes.isChecked(): print("Isotopes...."), if self.lineEdit_isotopes.text() == "Use default": path = 'data/isotopes.txt' p = os.path.join( os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_isotopes( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_isotopes.text()): lib = in_out.read_isotopes( self.lineEdit_isotopes.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError( "Provide a valid filename for isotopes or 'Use default'" ) print("") print(lib) annotation.annotate_isotopes( inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) print("Done") if self.checkBox_multiple_charged.isChecked(): print("Multiple charged ions...."), if self.lineEdit_multiple_charged.text() == "Use default": path = 'data/multiple_charged_ions.txt' p = os.path.join( os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_multiple_charged_ions( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_multiple_charged.text()): lib = in_out.read_multiple_charged_ions( self.lineEdit_multiple_charged.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError( "Provide a valid filename for multiple charged ions or 'Use default'" ) annotation.annotate_multiple_charged_ions( inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) print("Done") if self.checkBox_oligomers.isChecked(): print("Oligomers...."), if self.lineEdit_default_adduct_library.text( ) == "Use default": path = 'data/adducts.txt' p = os.path.join( os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile( self.lineEdit_default_adduct_library.text()): try: lib = in_out.read_adducts( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) except: lib = in_out.read_mass_differences( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError("Provide a valid filename for adducts") inp = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) annotation.annotate_oligomers( inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib, maximum=self.spinBox_max_monomer_units.value()) print("Done") print if self.checkBox_annotate_molecular_formulae.isChecked(): print("Annotating molecular formulae...."), df = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) if self.lineEdit_default_adduct_library.text() == "Use default": path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_default_adduct_library.text()): try: lib = in_out.read_adducts( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) except: lib = in_out.read_mass_differences( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError("Provide a valid filename for adducts") if self.comboBox_source_mf.currentText( ) == "Tab-delimited text file": db_in = self.lineEdit_filename_mf.text() rules = None max_mz = None else: db_in = "http://mfdb.bham.ac.uk" rules = self.checkBox_heuristic_rules.isChecked() max_mz = self.spinBox_max_mz.value() print("") print(lib) annotation.annotate_molecular_formulae( df, lib_adducts=lib, ppm=self.doubleSpinBox_mf_ppm_error.value(), db_out=self.lineEdit_sql_database.text(), db_in=db_in, rules=rules, max_mz=max_mz) print("Done") print("") if self.checkBox_annotate_compounds.isChecked(): print("Annotating compounds...."), df = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) if self.lineEdit_default_adduct_library.text() == "Use default": path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts( p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_default_adduct_library.text()): try: lib = in_out.read_adducts( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) except: lib = in_out.read_mass_differences( self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError("Provide a valid filename for adducts") if self.checkBox_filename_reference.isChecked(): print("") print(lib) annotation.annotate_compounds( df, lib_adducts=lib, ppm=self.doubleSpinBox_cpds_ppm_error.value(), db_out=self.lineEdit_sql_database.text(), db_name=None, db_in=self.lineEdit_filename_reference.text()) else: for db_name in self.listWidget_databases.selectedItems(): annotation.annotate_compounds( df, lib_adducts=lib, ppm=self.doubleSpinBox_cpds_ppm_error.value(), db_out=self.lineEdit_sql_database.text(), db_name=self.db_names[db_name.text()]) print("Done") print if self.checkBox_create_summary.isChecked(): print("Creating summary...."), if self.checkBox_convert_rt.isChecked(): lib = {"Seconds": "sec", "Minutes": "min"} convert_rt = lib[self.comboBox_convert_rt.currentText()] else: convert_rt = None if self.checkBox_mz_digits.isChecked(): ndigits_mz = self.spinBox_mz_digits.value() else: ndigits_mz = None df = in_out.combine_peaklist_matrix( self.lineEdit_peaklist.text(), self.lineEdit_intensity_matrix.text()) if self.comboBox_annotations_format.currentText( ) == "Single row for each feature and separate columns": single_row = True single_column = False elif self.comboBox_annotations_format.currentText( ) == "Single row for each feature and merged columns": single_row = True single_column = True else: single_row = False single_column = False df_out = annotation.summary(df, db=self.lineEdit_sql_database.text(), single_row=single_row, single_column=single_column, convert_rt=convert_rt, ndigits_mz=ndigits_mz) separators = {"tab": "\t", "comma": ","} df_out.to_csv( self.lineEdit_summary_filename.text(), sep=separators[self.comboBox_separator.currentText()], index=False, encoding="utf-8") ext = os.path.splitext(self.lineEdit_summary_filename.text())[1] if ext == "": pdf_out = str(self.lineEdit_summary_filename.text()) + ".pdf" else: pdf_out = str(self.lineEdit_summary_filename.text()).replace( ext, ".pdf") plots.report(db=str(self.lineEdit_sql_database.text()), pdf_out=pdf_out, column_corr="r_value", column_pvalue="p_value", column_ppm_error="ppm_error", column_adducts="adduct") print("Done") print("") self.close()