Ejemplo n.º 1
0
def get_molecule(path):
    """
    Reads a molecular structure with Open Babel.
    """
    if os.path.exists(path):

        # DETERMINE FILE FORMAT
        filename, filetype = os.path.splitext(path)
        
        try:
            molecule = pybel.readfile(filetype[1:], path).next()
        except StopIteration:
            logger.error("cannot read molecule {}: StopIteration.".format(path))
            sys.exit(1)

        return molecule

    else:
        logger.error('cannot not load molecule: the file {} does not exist.'.format(path))
        sys.exit(1)
Ejemplo n.º 2
0
def get_molecule(path):
    """
    Reads a molecular structure with Open Babel.
    """
    if os.path.exists(path):

        # DETERMINE FILE FORMAT
        filename, filetype = os.path.splitext(path)

        try:
            molecule = pybel.readfile(filetype[1:], path).next()
        except StopIteration:
            logger.error(
                "cannot read molecule {}: StopIteration.".format(path))
            sys.exit(1)

        return molecule

    else:
        logger.error(
            'cannot not load molecule: the file {} does not exist.'.format(
                path))
        sys.exit(1)
Ejemplo n.º 3
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    pdbbindconf = config['standard']

    data = parse_index(options.pdbbind, options.index)

    if options.output: fh = open(options.output, 'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES
    for pdb in data:

        # NORMALISE ACTIVITY TO NANOMOLAR
        pkd = get_pkd(float(data[pdb]['value']), data[pdb]['unit'])

        # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY
        entry_dir = os.path.join(options.pdbbind, pdb)

        # CHECK IF THE DIRECTORY ACTUALLY EXISTS
        if not os.path.exists(entry_dir):
            logger.error(
                "The PDBbind directory for PDB entry {0} does not exist.".
                format(pdb))
            continue

        # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL
        prot_path = os.path.join(
            entry_dir, '{0}_{1}.pdb'.format(pdb, pdbbindconf['protein']))
        lig_path = os.path.join(
            entry_dir, '{0}_{1}.mol2'.format(pdb, pdbbindconf['ligand']))

        if not os.path.exists(prot_path):
            logger.error(
                "The protein pocket structure for PDB entry {0} cannot be found."
                .format(pdb))
            continue

        elif not os.path.exists(lig_path):
            logger.error(
                "The ligand structure for PDB entry {0} cannot be found.".
                format(pdb))
            continue

        protein = get_molecule(prot_path)
        ligand = get_molecule(lig_path)

        # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS
        if options.descriptor == 'credo':

            # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT
            descriptor, labels = contacts.sift_descriptor(
                protein, ligand, binsize=options.binsize)

        # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
        elif options.descriptor == 'elements':

            # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
            descriptor, labels = contacts.element_descriptor(
                protein, ligand, binsize=options.binsize)

        # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
        elif options.descriptor == 'sybyl':

            # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
            descriptor, labels = contacts.sybyl_atom_type_descriptor(
                protein, ligand, binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0, 'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # KEEP ONLY THE TWO MOST SIGNIFICANT BITS
            pkdstring = "{0:.2f}".format(pkd)

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkdstring] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Ejemplo n.º 4
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage  = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("--debug",
                      action  = "store_true",
                      dest    = "debug",
                      default = False,
                      help    = 'Set logging level to debug and print more verbose output.')

    parser.add_option("-B", "--binsize",
                      dest    = "binsize",
                      type    = float,
                      default = 0.0,
                      help    = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.")

    parser.add_option("-F", "--format",
                      dest    = "format",
                      default = 'csv',
                      help    = "Format to use for writing the SIFt of the protein-ligand complex.")

    parser.add_option("-O", "--output",
                      dest    = "output",
                      default = None,
                      help    = "File to which the data will be written (default=STDOUT).")

    parser.add_option("-P", "--pdbbind-dir",
                      dest    = "pdbbind",
                      default = None,
                      help    = "PDBbind directory.")

    parser.add_option("-I", "--index",
                      dest    = "index",
                      default = None,
                      help    = "PDBbind data index file for a specific data set (core,refined,general).")

    parser.add_option("-D", "--descriptor",
                      dest    = "descriptor",
                      default = 'credo',
                      help    = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.")

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if not options.pdbbind:
        logger.error("The PDBbind directory must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.pdbbind):
        logger.fatal("The specified PDBbind directory does not exist.")
        sys.exit(1)

    if not options.index:
        logger.error("A path to a PDBbind data index file must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.index):
        logger.fatal("The specified PDBbind data index file does not exist.")
        sys.exit(1)

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options
Ejemplo n.º 5
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage  = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("--debug",
                      action  = "store_true",
                      dest    = "debug",
                      default = False,
                      help    = 'Set logging level to debug and print more verbose output.')

    parser.add_option("-B", "--binsize",
                      dest    = "binsize",
                      type    = float,
                      default = 0.0,
                      help    = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.")

    parser.add_option("-F", "--format",
                      dest    = "format",
                      default = 'csv',
                      help    = "Format to use for writing the SIFt of the protein-ligand complex.")

    parser.add_option("-O", "--output",
                      dest    = "output",
                      default = "/home/dat/WORK/DB/DESCRIPTORS/CASF2014-refined_SIFt_RMSD.csv",#None,
                      help    = "File to which the data will be written (default=STDOUT).")

    parser.add_option("-P", "--pdbbind-dir",
                      dest    = "pdbbind",
                      default = "/home/dat/WORK/DB/PDBbind/v2014-refined/",#None,
                      help    = "PDBbind directory.")

    parser.add_option("-I", "--index",
                      dest    = "index",
                      default = "/home/dat/WORK/DB/PDBbind/v2014-refined/INDEX_refined_data.2014",#None,
                      help    = "PDBbind data index file for a specific data set (core,refined,general).")

    parser.add_option("-D", "--descriptor",
                      dest    = "descriptor",
                      default = 'credo',
                      help    = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.")

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if not options.pdbbind:
        logger.error("The PDBbind directory must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.pdbbind):
        logger.fatal("The specified PDBbind directory does not exist.")
        sys.exit(1)

    if not options.index:
        logger.error("A path to a PDBbind data index file must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.index):
        logger.fatal("The specified PDBbind data index file does not exist.")
        sys.exit(1)

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options
Ejemplo n.º 6
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    pdbbindconf = config['pdbbind']

    data = parse_index(options.pdbbind, options.index)

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)


    HEADER = True

    counter = 0

    # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES
    for pdb in data:

        # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY
        entry_dir = os.path.join(options.pdbbind,pdb)

        # CHECK IF THE DIRECTORY ACTUALLY EXISTS
        if not os.path.exists(entry_dir):
            logger.error("The PDBbind directory for PDB entry {0} does not exist.".format(pdb))
            continue

        # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL
        prot_path = os.path.join(entry_dir,'{0}_{1}.pdb'.format(pdb,pdbbindconf['protein']))

        ref_lig_path = os.path.join(entry_dir,'{0}_{1}.mol2'.format(pdb,pdbbindconf['ligand']))
        #for each protein, the ligand gets generated docking poses from x docking methods,
        #
        if not os.path.exists(prot_path):
            logger.error("The protein pocket structure for PDB entry {0} cannot be found.".format(pdb))
            continue

        for score in dockingMethods:
            pose_path = os.path.join(posesDir, score, pdb)
            # \TODO: add pattern for each docking method, right now only works with gold
            lig_pattern = "gold_soln"
            # RMSD dict for all poses
            counter = counter + 1
            print("Calculating RMSDs for ligand " + pdb + ", docking method " + score)
            RMSDs = calcRMSDPoses(ref_lig_path, pose_path, lig_pattern)

            for pose in listFiles(pose_path, lig_pattern):
                lig_path = os.path.join(posesDir, score, pdb, pose)
                poseRMSD = RMSDs[pose]
                poseID = pose.split('.')[0] + '_' + score

                if not os.path.exists(lig_path):
                    logger.error("The ligand structure for PDB entry {0} cannot be found.".format(pdb))
                    continue

                protein = get_molecule(prot_path)
                ligand = get_molecule(lig_path)

                # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS
                if options.descriptor == 'credo':

                    # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT
                    descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize)

                # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
                elif options.descriptor == 'elements':

                    # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
                    descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize)

                # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
                elif options.descriptor == 'sybyl':

                    # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
                    descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize)

                if HEADER:

                    # UPDATE COLUMN LABELS
                    labels.insert(0,'RMSD')
                    labels.append('ligandID')

                    writer.writerow(labels)

                    HEADER = False

                if options.format == 'csv':

                    # KEEP ONLY THE TWO MOST SIGNIFICANT BITS
                    #pkdstring = "{0:.2f}".format(pkd)

                    # FIRST COLUMN OF OUTPUT ROW
                    row = [poseRMSD] + descriptor.tolist() + [poseID]

                    writer.writerow(row)