def fetch_pdb(pdbid): """Get the newest entry from the RCSB server for the given PDB ID. Exits with '1' if PDB ID is invalid.""" pdbid = pdbid.lower() logger.info(f'checking status of PDB-ID {pdbid}') # @todo re-implement state check with ew RCSB API, see https://www.rcsb.org/news?year=2020&article=5eb18ccfd62245129947212a&feature=true # state, current_entry = check_pdb_status(pdbid) # Get state and current PDB ID # # if state == 'OBSOLETE': # logger.info(f'entry is obsolete, getting {current_entry} instead') # elif state == 'CURRENT': # logger.info('entry is up-to-date') # elif state == 'UNKNOWN': # logger.error('invalid PDB-ID (entry does not exist on PDB server)') # sys.exit(1) logger.info('downloading file from PDB') # get URL for current entry # @todo needs update to react properly on response codes of RCSB servers pdburl = f'http://www.rcsb.org/pdb/files/{pdbid}.pdb' try: pdbfile = urlopen(pdburl).read().decode() # If no PDB file is available, a text is now shown with "We're sorry, but ..." # Could previously be distinguished by an HTTP error if 'sorry' in pdbfile: logger.error('no file in PDB format available from wwPDB for the given PDB ID.') sys.exit(1) except HTTPError: logger.error('no file in PDB format available from wwPDB for the given PDB ID') sys.exit(1) return [pdbfile, pdbid]
def run_analysis(inputstructs, inputpdbids): """Main function. Calls functions for processing, report generation and visualization.""" pdbid, pdbpath = None, None # @todo For multiprocessing, implement better stacktracing for errors # Print title and version logger.info(f'Protein-Ligand Interaction Profiler (PLIP) {__version__}') logger.info(f'brought to you by: {config.__maintainer__}') logger.info(f'please cite: https://www.doi.org/10.1093/nar/gkv315') output_prefix = config.OUTPUTFILENAME if inputstructs is not None: # Process PDB file(s) num_structures = len(inputstructs) inputstructs = remove_duplicates(inputstructs) read_from_stdin = False for inputstruct in inputstructs: if inputstruct == '-': inputstruct = sys.stdin.read() read_from_stdin = True if config.RAWSTRING: if sys.version_info < (3, ): inputstruct = bytes(inputstruct).decode( 'unicode_escape') else: inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape') else: if os.path.getsize(inputstruct) == 0: logger.error('empty PDB file') sys.exit(1) if num_structures > 1: basename = inputstruct.split('.')[-2].split('/')[-1] config.OUTPATH = '/'.join([config.BASEPATH, basename]) output_prefix = 'report' process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix) else: # Try to fetch the current PDB structure(s) directly from the RCBS server num_pdbids = len(inputpdbids) inputpdbids = remove_duplicates(inputpdbids) for inputpdbid in inputpdbids: pdbpath, pdbid = download_structure(inputpdbid) if num_pdbids > 1: config.OUTPATH = '/'.join( [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]) output_prefix = 'report' process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: if config.BASEPATH in ['.', './']: logger.info( 'finished analysis, find the result files in the working directory' ) else: logger.info( f'finished analysis, find the result files in {config.BASEPATH}' )
def download_structure(inputpdbid): """Given a PDB ID, downloads the corresponding PDB structure. Checks for validity of ID and handles error while downloading. Returns the path of the downloaded file.""" try: if len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == 'UnknownProtein': logger.error(f'invalid PDB-ID (wrong format): {inputpdbid}') sys.exit(1) pdbfile, pdbid = fetch_pdb(inputpdbid.lower()) pdbpath = tilde_expansion('%s/%s.pdb' % (config.BASEPATH.rstrip('/'), pdbid)) create_folder_if_not_exists(config.BASEPATH) with open(pdbpath, 'w') as g: g.write(pdbfile) logger.info(f'file downloaded as {pdbpath}') return pdbpath, pdbid except ValueError: # Invalid PDB ID, cannot fetch from RCBS server logger.error(f'PDB-ID does not exist: {inputpdbid}') sys.exit(1)
def readmol(path, as_string=False): """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type. In contrast to the standard Pybel implementation, the file is closed properly.""" supported_formats = ['pdb'] # Fix for Windows-generated files: Remove carriage return characters if "\r" in path and as_string: path = path.replace('\r', '') for sformat in supported_formats: obc = pybel.ob.OBConversion() obc.SetInFormat(sformat) logger.debug( f'detected {sformat} as format, trying to read file with OpenBabel' ) # Read molecules with single bond information if as_string: try: mymol = pybel.readstring(sformat, path) except IOError: logger.error('no valid file format provided') sys.exit(1) else: read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None}) try: mymol = next(read_file) except StopIteration: logger.error('file contains no valid molecules') sys.exit(1) logger.debug('molecule successfully read') # Assign multiple bonds mymol.OBMol.PerceiveBondOrders() return mymol, sformat logger.error('no valid file format provided') sys.exit(1)
def main(): """Parse command line arguments and start main script for analysis.""" parser = ArgumentParser(prog="PLIP", description=description) pdbstructure = parser.add_mutually_exclusive_group( required=True) # Needs either PDB ID or file # '-' as file name reads from stdin pdbstructure.add_argument("-f", "--file", dest="input", nargs="+", help="Set input file, '-' reads from stdin") pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+") outputgroup = parser.add_mutually_exclusive_group( required=False) # Needs either outpath or stdout outputgroup.add_argument("-o", "--out", dest="outpath", default="./") outputgroup.add_argument("-O", "--stdout", dest="stdout", action="store_true", default=False, help="Write to stdout instead of file") parser.add_argument("--rawstring", dest="use_raw_string", default=False, action="store_true", help="Use Python raw strings for stdin") parser.add_argument("-v", "--verbose", dest="verbose", default=False, help="Turn on verbose mode", action="store_true") parser.add_argument("-q", "--quiet", dest="quiet", default=False, help="Turn on quiet mode", action="store_true") parser.add_argument("-s", "--silent", dest="silent", default=False, help="Turn on silent mode", action="store_true") parser.add_argument("-p", "--pics", dest="pics", default=False, help="Additional pictures", action="store_true") parser.add_argument("-x", "--xml", dest="xml", default=False, help="Generate report file in XML format", action="store_true") parser.add_argument("-t", "--txt", dest="txt", default=False, help="Generate report file in TXT (RST) format", action="store_true") parser.add_argument("-y", "--pymol", dest="pymol", default=False, help="Additional PyMOL session files", action="store_true") parser.add_argument( "--maxthreads", dest="maxthreads", default=multiprocessing.cpu_count(), help= "Set maximum number of main threads (number of binding sites processed simultaneously)." "If not set, PLIP uses all available CPUs if possible.", type=int) parser.add_argument( "--breakcomposite", dest="breakcomposite", default=False, help= "Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", action="store_true") parser.add_argument( "--altlocation", dest="altlocation", default=False, help= "Also consider alternate locations for atoms (e.g. alternate conformations).", action="store_true") parser.add_argument("--nofix", dest="nofix", default=False, help="Turns off fixing of PDB files.", action="store_true") parser.add_argument("--nofixfile", dest="nofixfile", default=False, help="Turns off writing files for fixed PDB files.", action="store_true") parser.add_argument( "--nopdbcanmap", dest="nopdbcanmap", default=False, help= "Turns off calculation of mapping between canonical and PDB atom order for ligands.", action="store_true") parser.add_argument( "--dnareceptor", dest="dnareceptor", default=False, help= "Treat nucleic acids as part of the receptor structure (together with any present protein) instead of as a ligand.", action="store_true") parser.add_argument( "--name", dest="outputfilename", default="report", help= "Set a filename for the report TXT and XML files. Will only work when processing single structures." ) ligandtype = parser.add_mutually_exclusive_group( ) # Either peptide/inter or intra mode ligandtype.add_argument( "--peptides", "--inter", dest="peptides", default=[], help= "Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", nargs="+") ligandtype.add_argument( "--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.") parser.add_argument("--keepmod", dest="keepmod", default=False, help="Keep modified residues as ligands", action="store_true") parser.add_argument( "--nohydro", dest="nohydro", default=False, help= "Do not add polar hydrogens in case your structure already contains hydrogens.", action="store_true") parser.add_argument( "--model", dest="model", default=1, type=int, help="Model number to be used for multi-model structures.") # Optional threshold arguments, not shown in help thr = namedtuple('threshold', 'name type') thresholds = [ thr(name='aromatic_planarity', type='angle'), thr(name='hydroph_dist_max', type='distance'), thr(name='hbond_dist_max', type='distance'), thr(name='hbond_don_angle_min', type='angle'), thr(name='pistack_dist_max', type='distance'), thr(name='pistack_ang_dev', type='other'), thr(name='pistack_offset_max', type='distance'), thr(name='pication_dist_max', type='distance'), thr(name='saltbridge_dist_max', type='distance'), thr(name='halogen_dist_max', type='distance'), thr(name='halogen_acc_angle', type='angle'), thr(name='halogen_don_angle', type='angle'), thr(name='halogen_angle_dev', type='other'), thr(name='water_bridge_mindist', type='distance'), thr(name='water_bridge_maxdist', type='distance'), thr(name='water_bridge_omega_min', type='angle'), thr(name='water_bridge_omega_max', type='angle'), thr(name='water_bridge_theta_min', type='angle') ] for t in thresholds: parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), help=argparse.SUPPRESS) arguments = parser.parse_args() # configure log levels config.VERBOSE = True if arguments.verbose else False config.QUIET = True if arguments.quiet else False config.SILENT = True if arguments.silent else False if config.VERBOSE: logger.setLevel(logging.DEBUG) elif config.QUIET: logger.setLevel(logging.WARN) elif config.SILENT: logger.setLevel(logging.CRITICAL) else: logger.setLevel(config.DEFAULT_LOG_LEVEL) config.MAXTHREADS = arguments.maxthreads config.XML = arguments.xml config.TXT = arguments.txt config.PICS = arguments.pics config.PYMOL = arguments.pymol config.STDOUT = arguments.stdout config.RAWSTRING = arguments.use_raw_string config.OUTPATH = arguments.outpath config.OUTPATH = tilde_expansion( "".join([config.OUTPATH, '/'] ) if not config.OUTPATH.endswith('/') else config.OUTPATH) config.BASEPATH = config.OUTPATH # Used for batch processing config.BREAKCOMPOSITE = arguments.breakcomposite config.ALTLOC = arguments.altlocation config.PEPTIDES = arguments.peptides config.INTRA = arguments.intra config.NOFIX = arguments.nofix config.NOFIXFILE = arguments.nofixfile config.NOPDBCANMAP = arguments.nopdbcanmap config.KEEPMOD = arguments.keepmod config.DNARECEPTOR = arguments.dnareceptor config.OUTPUTFILENAME = arguments.outputfilename config.NOHYDRO = arguments.nohydro config.MODEL = arguments.model # Make sure we have pymol with --pics and --pymol if config.PICS or config.PYMOL: try: import pymol except ImportError: logger.error('PyMOL is required for the --pics and --pymol option') sys.exit(1) # Assign values to global thresholds for t in thresholds: tvalue = getattr(arguments, t.name) if tvalue is not None: if t.type == 'angle' and not 0 < tvalue < 180: # Check value for angle thresholds parser.error( "Threshold for angles need to have values within 0 and 180." ) if t.type == 'distance': if tvalue > 10: # Check value for angle thresholds parser.error( "Threshold for distances must not be larger than 10 Angstrom." ) elif tvalue > config.BS_DIST + 1: # Dynamically adapt the search space for binding site residues config.BS_DIST = tvalue + 1 setattr(config, t.name.upper(), tvalue) # Check additional conditions for interdependent thresholds if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV: parser.error( "The halogen acceptor angle has to be larger than the halogen angle deviation." ) if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV: parser.error( "The halogen donor angle has to be larger than the halogen angle deviation." ) if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST: parser.error( "The water bridge minimum distance has to be smaller than the water bridge maximum distance." ) if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: parser.error( "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle" ) expanded_path = tilde_expansion( arguments.input) if arguments.input is not None else None run_analysis(expanded_path, arguments.pdbid) # Start main script