def rhapsody(input_obj, classifier, aux_classifier=None, input_type='SAVs', custom_PDB=None, force_env=None, log=True): """'input_obj' can be: * a filename, a list/tuple of strings or a single string, containing SAV coordinates, with the format "P17516 135 G E" (input_type='SAVs', default) * a filename of the output from PolyPhen-2, usually named "pph2-full.txt" (input_type='PP2') * a string of Uniprot coordinates with unspecified variant, for performing simulated mutagenesis experiment (input_type='scanning'). Possible formats are: 'P17516 135' for a single site scanning, and 'P17516' for a complete sequence scanning. 'custom_PDB' can be a PDBID, a filename or an Atomic instance """ assert input_type in ('SAVs', 'scanning', 'PP2') if log: LOGGER.start('rhapsody-log.txt') # initialize object that will contain all results and predictions r = Rhapsody() # import classifier and feature set from pickle r.importClassifier(classifier, force_env=force_env) # import custom PDB structure if custom_PDB is not None: r.setCustomPDB(custom_PDB) # obtain or import PolyPhen-2 results if input_type == 'SAVs': # 'input_obj' is a filename, list, tuple or string # containing SAV coordinates r.queryPolyPhen2(input_obj) elif input_type == 'scanning': # 'input_obj' is a Uniprot accession number identifying a sequence, # with or without a specified position r.queryPolyPhen2(input_obj, scanning=True) elif input_type == 'PP2': # 'input_obj' is a filename containing PolyPhen-2's output r.importPolyPhen2output(input_obj) # compute needed features r.calcFeatures() # compute predictions r.calcPredictions() if aux_classifier is not None: # compute additional predictions from a subset of features try: r.calcAuxPredictions(aux_classifier, force_env=force_env) r.printPredictions(format="both", filename='rhapsody-predictions-full.txt') except Exception as e: LOGGER.warn(f'Unable to compute auxiliary predictions: {e}') # print final predictions r.printPredictions(filename='rhapsody-predictions.txt') # save pickle r.savePickle() if log: LOGGER.close('rhapsody-log.txt') return r
def rhapsody(query, query_type='SAVs', main_classifier=None, aux_classifier=None, custom_PDB=None, force_env=None, refresh=False, log=True, **kwargs): """Obtain Rhapsody pathogenicity predictions on a list of human missense variants ([ref]_) :arg query: Single Amino Acid Variants (SAVs) in Uniprot coordinates - if *query_type* = ``'SAVs'`` (default), it should be a filename, a string or a list/tuple of strings, containing Uniprot SAV coordinates, with the format ``'P17516 135 G E'``. The string could also be just a single Uniprot sequence identifier (e.g. ``'P17516'``), or the coordinate of a specific site in a sequence (e.g. ``'P17516 135'``), in which case all possible 19 amino acid substitutions at the specified positions will be analyzed. - if *query_type* = ``'PolyPhen2'``, it should be a filename containing the output from PolyPhen-2, usually named :file:`pph2-full.txt` :type query: str, list :arg query_type: ``'SAVs'`` or ``'PolyPhen2'`` :type query_type: str :arg main_classifier: main classifier's filename. If **None**, the default *full* Rhapsody classifier will be used :type main_classifier: str :arg aux_classifier: auxiliary classifier's filename. If both *main_classifier* and *aux_classifier* are **None**, the default *reduced* Rhapsody classifier will be used :type aux_classifier: str :arg custom_PDB: a PDBID, a filename or an :class:`Atomic` to be used for computing structural and dynamical features, instead of the PDB structure automatically selected by the program :type custom_PDB: str, :class:`AtomGroup` :arg force_env: force a specific environment model for GNM/ANM calculations, among ``'chain'``, ``'reduced'`` and ``'sliced'``. If **None** (default), the model of individual dynamical features will match that found in the classifier's feature set :type force_env: str :arg refresh: if **True**, precomputed features and PDB mappings found in the working directory will be ignored and computed again :type refresh: str :arg log: if **True**, log messages will be saved in :file:`rhapsody-log.txt` :type log: str .. [ref] Ponzoni L, Bahar I. Structural dynamics is a determinant of the functional significance of missense variants. *PNAS* **2018** 115 (16) 4164-4169. """ assert query_type in ['SAVs', 'PolyPhen2'], 'Invalid query type.' if log: LOGGER.start('rhapsody-log.txt') # select classifiers if main_classifier is None: main_classifier = getDefaultClassifiers()['full'] if aux_classifier is None: aux_classifier = getDefaultClassifiers()['reduced'] # initialize object that will contain all results and predictions r = Rhapsody(**kwargs) # import classifiers and feature set from pickle r.importClassifiers(main_classifier, aux_classifier, force_env=force_env) # import custom PDB structure if custom_PDB is not None: r.setCustomPDB(custom_PDB) # obtain or import PolyPhen-2 results if query_type == 'SAVs': r.queryPolyPhen2(query) elif query_type == 'PolyPhen2': r.importPolyPhen2output(query) # compute predictions r.getPredictions(refresh=refresh) # print predictions to file r.printPredictions() if aux_classifier is not None: # print both 'full' and 'reduced' predictions in a more detailed format r.printPredictions( classifier="both", PolyPhen2=False, EVmutation=False, filename='rhapsody-predictions-full_vs_reduced.txt') # save pickle r.savePickle() if log: LOGGER.close('rhapsody-log.txt') return r