Ejemplo n.º 1
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--debug",
        action="store_true",
        dest="debug",
        default=False,
        help='Set logging level to debug and print more verbose output.')

    parser.add_option(
        "-B",
        "--binsize",
        dest="binsize",
        type=float,
        default=0.0,
        help=
        "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance."
    )

    parser.add_option(
        "-F",
        "--format",
        dest="format",
        default='csv',
        help="Format to use for writing the SIFt of the protein-ligand complex."
    )

    parser.add_option(
        "-O",
        "--output",
        dest="output",
        default=None,
        help="File to which the data will be written (default=STDOUT).")

    parser.add_option(
        "-D",
        "--descriptor",
        dest="descriptor",
        default='elements',
        help=
        "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'."
    )

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options
Ejemplo n.º 2
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage  = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("--debug",
                      action  = "store_true",
                      dest    = "debug",
                      default = False,
                      help    = 'Set logging level to debug and print more verbose output.')

    parser.add_option("-B", "--binsize",
                      dest    = "binsize",
                      type    = float,
                      default = 0.0,
                      help    = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.")

    parser.add_option("-F", "--format",
                      dest    = "format",
                      default = 'csv',
                      help    = "Format to use for writing the SIFt of the protein-ligand complex.")

    parser.add_option("-O", "--output",
                      dest    = "output",
                      default = None,
                      help    = "File to which the data will be written (default=STDOUT).")

    parser.add_option("-D", "--descriptor",
                      dest    = "descriptor",
                      default = 'elements',
                      help    = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.")

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options
Ejemplo n.º 3
0
def main():
    '''
    '''
    options = parse_options()

    # THIS OPTION WILL PRODUCE MORE VERBOSE OUTPUT
    if options.debug: logger.setLevel(logging.DEBUG)

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # CHOOSE HOW THE OUPTPUT DATA WILL BE WRITTEN
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)


    HEADER = True

    # iterate through all mol2 files in inputdir
    for mol2file in glob.glob(os.path.join(options.inputligdir, '*.mol2')):
        #lig_path = os.path.join(option.inputdir, file)
        lig_path        = mol2file
        protein_path    = options.inputpdb

        if not os.path.isfile(protein_path):
            logger.fatal("The protein file does not exist.".format(options.inputpdb))
            sys.exit(1)

        protein = get_molecule(protein_path)
        ligand  = get_molecule(lig_path)

        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(protein, ligand,
                                                             binsize=options.binsize)

        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand,
                                                                     binsize=options.binsize)

        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(protein, ligand,
                                                          binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.append('ligand')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':
            #ligandname = "\"" + os.path.basename(lig_path).split('.')[0] + "\""
            ligandname = os.path.basename(lig_path).split('.')[0]

            #print(ligandname)
            # FIRST COLUMN OF OUTPUT ROW
            row = descriptor.tolist() + [ligandname]

            writer.writerow(row)
Ejemplo n.º 4
0
def main():
    """
    """
    options = parse_options()

    # this option will produce more verbose output
    if options.debug: logger.setLevel(logging.DEBUG)

    csarconf = config['csar']

    if options.output: fh = open(options.output,'wb')
    else: fh = sys.stdout

    # choose how the ouptput data will be written
    if options.format == 'csv':
        writer = csv.writer(fh, delimiter=',', quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # iterate through all numbered directories
    for directory in os.listdir(csarconf['directory']):
        entrydir = os.path.join(csarconf['directory'], directory)
        
        # parse kd.dat to get the pKd
        kddat_path = os.path.join(entrydir, 'kd.dat')
        
        # exit if kd.dat is missing
        if not os.path.isfile(kddat_path):
            logger.fatal("CSAR directory {} does not contain kd.dat file."
                         .format(directory))
            sys.exit(1)
        
        entry, pdb, pkd = open(kddat_path).read().strip().replace(' ','').split(',')

        protein_path = glob.glob(os.path.join(entrydir, '*_complex.mol2')).pop()
    
        protein = get_molecule(str(protein_path))
        ligand = extract_ligand(protein.OBMol)
   
        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(protein, ligand,
                                                             binsize=options.binsize)
            
        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(protein, ligand,
                                                                     binsize=options.binsize)
   
        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(protein, ligand,
                                                          binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0,'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkd] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Ejemplo n.º 5
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage  = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("--debug",
                      action  = "store_true",
                      dest    = "debug",
                      default = False,
                      help    = 'Set logging level to debug and print more verbose output.')

    parser.add_option("-B", "--binsize",
                      dest    = "binsize",
                      type    = float,
                      default = 0.0,
                      help    = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.")

    parser.add_option("-F", "--format",
                      dest    = "format",
                      default = 'csv',
                      help    = "Format to use for writing the SIFt of the protein-ligand complex.")

    parser.add_option("-O", "--output",
                      dest    = "output",
                      default = None,
                      help    = "File to which the data will be written (default=STDOUT).")

    parser.add_option("-P", "--pdbbind-dir",
                      dest    = "pdbbind",
                      default = None,
                      help    = "PDBbind directory.")

    parser.add_option("-I", "--index",
                      dest    = "index",
                      default = None,
                      help    = "PDBbind data index file for a specific data set (core,refined,general).")

    parser.add_option("-D", "--descriptor",
                      dest    = "descriptor",
                      default = 'credo',
                      help    = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.")

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if not options.pdbbind:
        logger.error("The PDBbind directory must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.pdbbind):
        logger.fatal("The specified PDBbind directory does not exist.")
        sys.exit(1)

    if not options.index:
        logger.error("A path to a PDBbind data index file must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.index):
        logger.fatal("The specified PDBbind data index file does not exist.")
        sys.exit(1)

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options
Ejemplo n.º 6
0
def main():
    """
    """
    options = parse_options()

    # this option will produce more verbose output
    if options.debug: logger.setLevel(logging.DEBUG)

    csarconf = config['csar']

    if options.output: fh = open(options.output, 'wb')
    else: fh = sys.stdout

    # choose how the ouptput data will be written
    if options.format == 'csv':
        writer = csv.writer(fh,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

    HEADER = True

    # iterate through all numbered directories
    for directory in os.listdir(csarconf['directory']):
        entrydir = os.path.join(csarconf['directory'], directory)

        # parse kd.dat to get the pKd
        kddat_path = os.path.join(entrydir, 'kd.dat')

        # exit if kd.dat is missing
        if not os.path.isfile(kddat_path):
            logger.fatal(
                "CSAR directory {} does not contain kd.dat file.".format(
                    directory))
            sys.exit(1)

        entry, pdb, pkd = open(kddat_path).read().strip().replace(
            ' ', '').split(',')

        protein_path = glob.glob(os.path.join(entrydir,
                                              '*_complex.mol2')).pop()

        protein = get_molecule(str(protein_path))
        ligand = extract_ligand(protein.OBMol)

        # calculate descriptor based on the sum of interacting element pairs
        if options.descriptor == 'elements':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.element_descriptor(
                protein, ligand, binsize=options.binsize)

        # calculate descriptor based on the sum of interacting element pairs
        elif options.descriptor == 'sybyl':

            # calculate element pair descriptor for this complex
            descriptor, labels = contacts.sybyl_atom_type_descriptor(
                protein, ligand, binsize=options.binsize)

        # calculate descriptor using structural interaction fingerprints
        elif options.descriptor == 'credo':

            # get the protein-ligand structural interaction fingerprint
            descriptor, labels = contacts.sift_descriptor(
                protein, ligand, binsize=options.binsize)

        if HEADER:

            # UPDATE COLUMN LABELS
            labels.insert(0, 'pKd/pKi')
            labels.append('pdb')

            writer.writerow(labels)

            HEADER = False

        if options.format == 'csv':

            # FIRST COLUMN OF OUTPUT ROW
            row = [pkd] + descriptor.tolist() + [pdb]

            writer.writerow(row)
Ejemplo n.º 7
0
def parse_options():
    '''
    '''
    # PARSE COMMAND LINE
    usage  = "%prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("--debug",
                      action  = "store_true",
                      dest    = "debug",
                      default = False,
                      help    = 'Set logging level to debug and print more verbose output.')

    parser.add_option("-B", "--binsize",
                      dest    = "binsize",
                      type    = float,
                      default = 0.0,
                      help    = "Bin size (in Angstrom) to use for binning contacts based on inter-atomic distance.")

    parser.add_option("-F", "--format",
                      dest    = "format",
                      default = 'csv',
                      help    = "Format to use for writing the SIFt of the protein-ligand complex.")

    parser.add_option("-O", "--output",
                      dest    = "output",
                      default = "/home/dat/WORK/DB/DESCRIPTORS/CASF2014-refined_SIFt_RMSD.csv",#None,
                      help    = "File to which the data will be written (default=STDOUT).")

    parser.add_option("-P", "--pdbbind-dir",
                      dest    = "pdbbind",
                      default = "/home/dat/WORK/DB/PDBbind/v2014-refined/",#None,
                      help    = "PDBbind directory.")

    parser.add_option("-I", "--index",
                      dest    = "index",
                      default = "/home/dat/WORK/DB/PDBbind/v2014-refined/INDEX_refined_data.2014",#None,
                      help    = "PDBbind data index file for a specific data set (core,refined,general).")

    parser.add_option("-D", "--descriptor",
                      dest    = "descriptor",
                      default = 'credo',
                      help    = "Descriptor to use. Valid descriptors are 'credo', 'elements' and 'sybyl'.")

    # GET COMMAND LINE OPTIONS
    (options, args) = parser.parse_args()

    if not options.pdbbind:
        logger.error("The PDBbind directory must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.pdbbind):
        logger.fatal("The specified PDBbind directory does not exist.")
        sys.exit(1)

    if not options.index:
        logger.error("A path to a PDBbind data index file must be provided.")
        parser.print_help()
        sys.exit(1)

    elif not os.path.exists(options.index):
        logger.fatal("The specified PDBbind data index file does not exist.")
        sys.exit(1)

    if options.descriptor not in ('elements', 'credo', 'sybyl'):
        logger.fatal("Invalid descriptor: {0}.".format(options.descriptor))
        parser.print_help()
        sys.exit(1)

    return options