from glycresoft_ms2_classification.structure import sequence from glycresoft_ms2_classification.structure import glycans from glycresoft_ms2_classification.structure import modification from glycresoft_ms2_classification.prediction_tools.false_discovery_rate import random_glycopeptide from glycresoft_ms2_classification.utils import config_loader config_file = "test.config" config = ConfigParser() config.read(config_file) #multiprocessing_util.log_to_stderr() config_loader.load("base.config") def try_type(obj): try: return int(obj) except: try: return float(obj) except: return str(obj) # class DebugPipeline(unittest.TestCase): # db_file_name = "test_data/Phil-82-Chemotrypsin/ResultOf20150428_04_isos.db" # ms1_matching_output_file = "test_data/Phil-82-Chemotrypsin/ResultOf20150428_04_isos.csv" # ms2_decon_file = "test_data/Phil-82-Chemotrypsin/20150428_04_isos_individual_scans_processed.yaml" # glycosylation_sites_file = "test_data/Phil-82-Chemotrypsin/Phil82-glycosites.txt"
def main(): import argparse app = argparse.ArgumentParser() subparsers = app.add_subparsers() app.add_argument("-c", "--config", type=str, default=None, required=False, help="Path to configuration file") app.add_argument( "-d", "--debug", action='store_true', default=False, required=False) app.add_argument("-n", "--n-processes", type=int, action="store", default=4, help="Number of procresses to use") # BUILD MODEL build_model_app = subparsers.add_parser( "build-model", help="Build a model and prepare it to be labeled") build_model_app.add_argument( "--parameter-file", action="store", default=None) build_model_app.add_argument( "--ms1-results-file", action="store", required=True) build_model_app.add_argument( "--glycosylation-sites-file", action="store", required=True) build_model_app.add_argument( "-e", "--enzyme", action="store", help="Name of the enzyme used") build_model_app.add_argument("-p", "--protein-prospector-xml", action="store", help="path to msdgist XML file.\ Instead of --enzyme,--constant_modifications and --variable_modifications") build_model_app.add_argument( "--deconvoluted-spectra-file", action="store", required=True) build_model_app.add_argument("--method", action="store", default="full_random_forest", choices=set(PrepareModelTask.method_table), help="Select the model method to use for classification") build_model_app.add_argument( "--ms1-match-tolerance", type=float, action="store", default=match_ions2.ms1_tolerance_default, help="Mass Error Tolerance for matching MS1 masses in PPM") build_model_app.add_argument( "--ms2-match-tolerance", type=float, action="store", default=match_ions2.ms2_tolerance_default, help="Mass Error Tolerance for matching MS2 masses in PPM") build_model_app.add_argument( "--constant-modification-list", type=str, action="append", default=None, help="Pass the list of constant modifications to include in the sequence search space") build_model_app.add_argument( "--variable-modification-list", type=str, action="append", default=None, help="Pass the list of variable modifications to include in the sequence search space") build_model_app.add_argument("--out", action="store", default=None) build_model_app.set_defaults(func=build_model_app_function) # CLASSIFY WITH MODEL classify_with_model_app = subparsers.add_parser( "classify-with-model", help="Classify a data set using a labeled model") classify_with_model_app.add_argument( "--parameter-file", action="store", default=None) classify_with_model_app.add_argument( "--ms1-results-file", action="store", required=True) classify_with_model_app.add_argument( "--glycosylation-sites-file", action="store", required=True) classify_with_model_app.add_argument( "--deconvoluted-spectra-file", action="store", required=True) classify_with_model_app.add_argument( "--method", action="store", default="full_random_forest", choices=set(ClassifyTargetWithModelTask.method_table), help="Select the model method to use for classification") classify_with_model_app.add_argument("--model-file", action="store", default="naive", required=True) classify_with_model_app.add_argument( "--ms1-match-tolerance", type=float, action="store", default=match_ions2.ms1_tolerance_default, help="Mass Error Tolerance for matching MS1 masses in PPM") classify_with_model_app.add_argument( "--ms2-match-tolerance", type=float, action="store", default=match_ions2.ms2_tolerance_default, help="Mass Error Tolerance for matching MS2 masses in PPM") classify_with_model_app.add_argument( "--constant-modification-list", type=str, action="append", default=None, help="Pass the list of constant modifications to include in the sequence search space") classify_with_model_app.add_argument( "--variable-modification-list", type=str, action="append", default=None, help="Pass the list of variable modifications to include in the sequence search space") classify_with_model_app.add_argument( "-e", "--enzyme", action="store", help="Name of the enzyme used") classify_with_model_app.add_argument("-p", "--protein-prospector-xml", action="store", help="path to msdgist\ XML file. Instead of --enzyme,--constant_modifications and --variable_modifications") classify_with_model_app.add_argument("--out", action="store", default=None) classify_with_model_app.set_defaults(func=classify_with_model_app_function) classify_with_model_app.add_argument("--decoy-to-real-ratio", action="store", default=1, type=int, help="Number of\ decoys per prediction sequence") classify_with_model_app.add_argument("--random-only", action="store_true", default=False, help="Don't\ generate shuffled decoys, only randomized sequences") classify_with_model_app.add_argument("--prefix-length", default=0, required=False, type=int, help="Length of peptide prefix to preserve when generating\ random glycopeptides by shuffling.") classify_with_model_app.add_argument("--suffix-length", default=1, required=False, type=int, help="Length of peptide suffix to preserve when generating\ random glycopeptides by shuffling.") reclassify_with_model_app = subparsers.add_parser( "reclassify-with-model", help="Rerun classification of an matched ion data file") reclassify_with_model_app.add_argument("--target-file", action="store", default=None, required=True, help="Matched ion data file to re-classify") reclassify_with_model_app.add_argument( "--method", action="store", default="full_random_forest", choices=set(ModelDiagnosticsTask.method_table), help="Select the model method to use for classification") reclassify_with_model_app.add_argument("--model-file", action="store", default="naive", required=True) reclassify_with_model_app.set_defaults( func=reclassify_with_model_app_function) # Simple Diagnostic plots for testing a model on itself model_diagnostics_app = subparsers.add_parser( "model-diagnostics", help="Given a labeled model, calculate model diagnostics") model_diagnostics_app.add_argument( "--method", action="store", default="full_random_forest", choices=set(ModelDiagnosticsTask.method_table), help="Select the model method to use for classification") model_diagnostics_app.add_argument("--model-file", action="store", default="naive", required=True) model_diagnostics_app.set_defaults(func=model_diagnostics_app_function) # Stand alone False Discovery Rate Calculations. Either reuse decoys or create them anew. May also # include re-scoring for the predictions and decoys calculate_fdr_app = subparsers.add_parser("calculate-fdr", help="Given a set of predictions from a collection\ of data, estimate the false discovery rate") calculate_fdr_app.add_argument("--predictions-file", required=True, help="Path to predictions file generated by\ classify-with-model, build-model, or reclassify-with-model") calculate_fdr_app.add_argument( "--deconvoluted-spectra-file", action="store", default=None) calculate_fdr_app.add_argument( "--method", action="store", default="full_random_forest", choices=set(ModelDiagnosticsTask.method_table), help="Select the model method to use for classification") calculate_fdr_app.add_argument( "--decoys-file", default=None, help="A file containing precomputed decoy sequence matches") calculate_fdr_app.add_argument("--decoy-to-real-ratio", action="store", default=1, type=int, help="Number of\ decoys per prediction sequence") calculate_fdr_app.add_argument("--random-only", action="store_true", default=False, help="Don't\ generate shuffled decoys, only randomized sequences") calculate_fdr_app.add_argument("--prefix-length", default=0, required=False, type=int, help="Length of peptide prefix to preserve when generating\ random glycopeptides by shuffling.") calculate_fdr_app.add_argument("--suffix-length", default=1, required=False, type=int, help="Length of peptide suffix to preserve when generating\ random glycopeptides by shuffling.") calculate_fdr_app.add_argument("--model-file", action="store", default="naive", required=False) calculate_fdr_app.add_argument("--out", action="store", default=None) calculate_fdr_app.set_defaults(func=calculate_fdr_app_function) try: args = app.parse_args() args = args.__dict__ func = args.pop("func") debug = args.pop("debug", os.environ.get("GLYCRESOFT_DEBUG", False)) config_path = args.pop("config") if config_path is not None: config_loader.load(config_path) logger.debug("Config: %r", json.dumps(config_loader.gather(), indent=4)) if 'constant_modification_list' in args: args['constant_modification_list'] = uri_decode_list( args['constant_modification_list']) if 'variable_modification_list' in args: args['variable_modification_list'] = uri_decode_list( args['variable_modification_list']) if 'protein_prospector_xml' in args and args["protein_prospector_xml"] is not None: ms_digest = MSDigestParameters.parse(args["protein_prospector_xml"]) args[ "constant_modification_list"] = ms_digest.constant_modifications args[ "variable_modification_list"] = ms_digest.variable_modifications args["enzyme"] = ms_digest.enzyme args.pop("protein_prospector_xml", None) param_file = args.pop("parameter_file", None) if param_file is not None: params = load_parameters_from_json(param_file) args.update(params) if args['out'] is not None and args['out'][0] != os.sep: args['out'] = ".{0}{1}".format(os.sep, args['out']) logger.info(json.dumps(args, indent=4)) if debug: pass else: #atexit.register(lambda: clean_up_files(*intermediary_files)) pass logger.debug("Entering main program") func(**args) except GlycReSoftInterprocessCommunicationException, e: logger.debug("An error occurred", exc_info=e) exit(e.errcode)