Esempio n. 1
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)


    HEADER = True

    # iterate through all mol2 files in inputdir
    for mol2file in glob.glob(os.path.join(options.inputligdir, '*.mol2')):
        #lig_path = os.path.join(option.inputdir, file)
        lig_path        = mol2file
        protein_path    = options.inputpdb

        if not os.path.isfile(protein_path):
            logger.fatal("The protein file does not exist.".format(options.inputpdb))
            sys.exit(1)

        protein = get_molecule(protein_path)
        ligand  = get_molecule(lig_path)

        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(protein, ligand,
                                                             binsize=options.binsize)

        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand,
                                                                     binsize=options.binsize)

        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(protein, ligand,
                                                          binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.append('ligand')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':
            #ligandname = "\"" + os.path.basename(lig_path).split('.')[0] + "\""
            ligandname = os.path.basename(lig_path).split('.')[0]

            #print(ligandname)
            # FIRST COLUMN OF OUTPUT ROW
            row = descriptor.tolist() + [ligandname]

            writer.writerow(row)
Esempio n. 2
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    pdbbindconf = config['standard']

    data = parse_index(options.pdbbind, options.index)

    if options.output: fh = open(options.output, 'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES
    for pdb in data:

        # NORMALISE ACTIVITY TO NANOMOLAR
        pkd = get_pkd(float(data[pdb]['value']), data[pdb]['unit'])

        # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY
        entry_dir = os.path.join(options.pdbbind, pdb)

        # CHECK IF THE DIRECTORY ACTUALLY EXISTS
        if not os.path.exists(entry_dir):
            logger.error(
                "The PDBbind directory for PDB entry {0} does not exist.".
                format(pdb))
            continue

        # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL
        prot_path = os.path.join(
            entry_dir, '{0}_{1}.pdb'.format(pdb, pdbbindconf['protein']))
        lig_path = os.path.join(
            entry_dir, '{0}_{1}.mol2'.format(pdb, pdbbindconf['ligand']))

        if not os.path.exists(prot_path):
            logger.error(
                "The protein pocket structure for PDB entry {0} cannot be found."
                .format(pdb))
            continue

        elif not os.path.exists(lig_path):
            logger.error(
                "The ligand structure for PDB entry {0} cannot be found.".
                format(pdb))
            continue

        protein = get_molecule(prot_path)
        ligand = get_molecule(lig_path)

        # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS
        if options.descriptor == 'credo':

            # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT
            descriptor, labels = contacts.sift_descriptor(
                protein, ligand, binsize=options.binsize)

        # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
        elif options.descriptor == 'elements':

            # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
            descriptor, labels = contacts.element_descriptor(
                protein, ligand, binsize=options.binsize)

        # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
        elif options.descriptor == 'sybyl':

            # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
            descriptor, labels = contacts.sybyl_atom_type_descriptor(
                protein, ligand, binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0, 'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # KEEP ONLY THE TWO MOST SIGNIFICANT BITS
            pkdstring = "{0:.2f}".format(pkd)

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkdstring] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Esempio n. 3
0
def main():
    """
    """
    options = parse_options()

    # this option will produce more verbose output
    if options.debug: logger.setLevel(logging.DEBUG)

    csarconf = config['csar']

    if options.output: fh = open(options.output, 'wb')
    else: fh = sys.stdout

    # choose how the ouptput data will be written
    if options.format == 'csv':
        writer = csv.writer(fh,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # iterate through all numbered directories
    for directory in os.listdir(csarconf['directory']):
        entrydir = os.path.join(csarconf['directory'], directory)

        # parse kd.dat to get the pKd
        kddat_path = os.path.join(entrydir, 'kd.dat')

        # exit if kd.dat is missing
        if not os.path.isfile(kddat_path):
            logger.fatal(
                "CSAR directory {} does not contain kd.dat file.".format(
                    directory))
            sys.exit(1)

        entry, pdb, pkd = open(kddat_path).read().strip().replace(
            ' ', '').split(',')

        protein_path = glob.glob(os.path.join(entrydir,
                                              '*_complex.mol2')).pop()

        protein = get_molecule(str(protein_path))
        ligand = extract_ligand(protein.OBMol)

        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(
                protein, ligand, binsize=options.binsize)

        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(
                protein, ligand, binsize=options.binsize)

        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(
                protein, ligand, binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0, 'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkd] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Esempio n. 4
0
def main():
    """
    """
    options = parse_options()

    # this option will produce more verbose output
    if options.debug: logger.setLevel(logging.DEBUG)

    csarconf = config['csar']

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # choose how the ouptput data will be written
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # iterate through all numbered directories
    for directory in os.listdir(csarconf['directory']):
        entrydir = os.path.join(csarconf['directory'], directory)
        
        # parse kd.dat to get the pKd
        kddat_path = os.path.join(entrydir, 'kd.dat')
        
        # exit if kd.dat is missing
        if not os.path.isfile(kddat_path):
            logger.fatal("CSAR directory {} does not contain kd.dat file."
                         .format(directory))
            sys.exit(1)
        
        entry, pdb, pkd = open(kddat_path).read().strip().replace(' ','').split(',')

        protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop()
    
        protein = get_molecule(str(protein_path))
        ligand = extract_ligand(protein.OBMol)
   
        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(protein, ligand,
                                                             binsize=options.binsize)
            
        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand,
                                                                     binsize=options.binsize)
   
        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(protein, ligand,
                                                          binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0,'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkd] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Esempio n. 5
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    pdbbindconf = config['pdbbind']

    data = parse_index(options.pdbbind, options.index)

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)


    HEADER = True

    counter = 0

    # ITERATE THROUGH ALL PROTEIN-LIGAND COMPLEXES
    for pdb in data:

        # THE PDBBIND DIRECTORY CONTAINING ALL THE STRUCTURES FOR THIS PDB ENTRY
        entry_dir = os.path.join(options.pdbbind,pdb)

        # CHECK IF THE DIRECTORY ACTUALLY EXISTS
        if not os.path.exists(entry_dir):
            logger.error("The PDBbind directory for PDB entry {0} does not exist.".format(pdb))
            continue

        # CREATE THE PATHS TO THE PROTEIN AND LIGAND USING THE SPECIFIC _<POCKET,PROTEIN,LIGAND,ZINC> LABEL
        prot_path = os.path.join(entry_dir,'{0}_{1}.pdb'.format(pdb,pdbbindconf['protein']))

        ref_lig_path = os.path.join(entry_dir,'{0}_{1}.mol2'.format(pdb,pdbbindconf['ligand']))
        #for each protein, the ligand gets generated docking poses from x docking methods,
        #
        if not os.path.exists(prot_path):
            logger.error("The protein pocket structure for PDB entry {0} cannot be found.".format(pdb))
            continue

        for score in dockingMethods:
            pose_path = os.path.join(posesDir, score, pdb)
            # \TODO: add pattern for each docking method, right now only works with gold
            lig_pattern = "gold_soln"
            # RMSD dict for all poses
            counter = counter + 1
            print("Calculating RMSDs for ligand " + pdb + ", docking method " + score)
            RMSDs = calcRMSDPoses(ref_lig_path, pose_path, lig_pattern)

            for pose in listFiles(pose_path, lig_pattern):
                lig_path = os.path.join(posesDir, score, pdb, pose)
                poseRMSD = RMSDs[pose]
                poseID = pose.split('.')[0] + '_' + score

                if not os.path.exists(lig_path):
                    logger.error("The ligand structure for PDB entry {0} cannot be found.".format(pdb))
                    continue

                protein = get_molecule(prot_path)
                ligand = get_molecule(lig_path)

                # CALCULATE DESCRIPTOR USING STRUCTURAL INTERACTION FINGERPRINTS
                if options.descriptor == 'credo':

                    # GET THE PROTEIN-LIGAND STRUCTURAL INTERACTION FINGERPRINT
                    descriptor, labels = contacts.sift_descriptor(protein, ligand, binsize=options.binsize)

                # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
                elif options.descriptor == 'elements':

                    # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
                    descriptor, labels = contacts.element_descriptor(protein, ligand, binsize=options.binsize)

                # CALCULATE DESCRIPTOR BASED ON THE SUM OF INTERACTING ELEMENT PAIRS
                elif options.descriptor == 'sybyl':

                    # CALCULATE ELEMENT PAIR DESCRIPTOR FOR THIS COMPLEX
                    descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand, binsize=options.binsize)

                if HEADER:

                    # UPDATE COLUMN LABELS
                    labels.insert(0,'RMSD')
                    labels.append('ligandID')

                    writer.writerow(labels)

                    HEADER = False

                if options.format == 'csv':

                    # KEEP ONLY THE TWO MOST SIGNIFICANT BITS
                    #pkdstring = "{0:.2f}".format(pkd)

                    # FIRST COLUMN OF OUTPUT ROW
                    row = [poseRMSD] + descriptor.tolist() + [poseID]

                    writer.writerow(row)