Exemple #1
0
def main():
    # Read in a SD or SMILES file - then write out into a specified directory
    parser = argparse.ArgumentParser(
        description=
        "Convert a SMILES or SDFile to input for Astex Fragment network.")
    parser.add_argument("--input")
    parser.add_argument("--input_format", default="smi")
    parser.add_argument("--base_dir")
    parser.add_argument("--iso_flag", default=True)

    args = parser.parse_args()
    attrs = []
    id = 0
    mols = parse_mols(args.input, args.input_format)
    for x in tqdm(mols):
        if x is None:
            continue
        attr = Attr(
            Chem.CanonSmiles(Chem.MolToSmiles(x, isomericSmiles=True)),
            ["EM", x.GetProp("_Name")],
        )
        attrs.append(attr)
        id += 1
    if not os.path.isdir(args.base_dir):
        os.mkdir(args.base_dir)
    # Build the network
    node_holder = NodeHolder(iso_flag=args.iso_flag)
    node_holder = build_network(attrs, node_holder)
    # Write the data out
    write_data(args.base_dir, node_holder, attrs)
def process_smiles(smiles, id='1', recurse=True, no_output=False, verbosity=0):
    attrs = []
    # print("Original SMILES: " + args.smiles)
    # mol = Chem.MolFromSmiles(args.smiles)
    # if args.standardize:
    #     mol = standardize(mol)
    #     print("Standardized SMILES: " + Chem.MolToSmiles(mol))
    # smiles = Chem.CanonSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
    # print("Canonical SMILES: " + smiles)

    attr = Attr(smiles, ["EM", id])
    attrs.append(attr)
    # Build the network
    node_holder = NodeHolder(iso_flag=False)
    max_frags = 0
    # print("Recurse:",recurse)
    node_holder = build_network(attrs,
                                node_holder,
                                max_frags,
                                smiles,
                                verbosity,
                                recurse=recurse)
    # Write the data out
    if not no_output:
        for node in node_holder.get_nodes():
            print(str(node))

        for edge in node_holder.get_edges():
            print(str(edge))

        for attr in attrs:
            print(str(attr))

    print("Number of nodes: " + str(len(node_holder.get_nodes())) +
          " edges: " + str(len(node_holder.get_edges())))
Exemple #3
0
def main():
    # Read in a SD or SMILES file - then write out into a specified directory
    parser = argparse.ArgumentParser(
        description=
        "Convert a SMILES or SDFile to input for Astex Fragment network.")
    parser.add_argument("--input")
    parser.add_argument("--input_format", default="smi")
    parser.add_argument("--base_dir")
    parser.add_argument("--isomeric", dest="iso_flag", action="store_true")
    parser.add_argument("--non_isomeric",
                        dest="iso_flag",
                        action="store_false")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("-v", dest="verbosity", action="store_const", const=1)
    group.add_argument("-vv", dest="verbosity", action="store_const", const=2)

    parser.set_defaults(verbosity=0)
    parser.set_defaults(iso_flag=True)

    args = parser.parse_args()

    # Do we have an input and base directory?
    if not args.input:
        print('ERROR: Must specify an input')
        sys.exit(1)
    if not os.path.isfile(args.input):
        print('ERROR: input (%s) does not exist' % args.input)
        sys.exit(1)
    if not args.base_dir:
        print('ERROR: Must specify a base directory')
        sys.exit(1)
    if not os.path.isdir(args.base_dir):
        print('ERROR:input base directory (%s) does not exist' % args.base_dir)
        sys.exit(1)

    tqdm_disable = True if args.verbosity else False
    attrs = []
    id = 0
    mols = parse_mols(args.input, args.input_format)
    for x in tqdm(mols, disable=tqdm_disable):
        print("Processing " + Chem.MolToSmiles(x, isomericSmiles=True))
        if x is None:
            continue
        attr = Attr(
            Chem.CanonSmiles(Chem.MolToSmiles(x, isomericSmiles=True)),
            ["EM", x.GetProp("_Name")],
        )
        attrs.append(attr)
        id += 1
    if not os.path.isdir(args.base_dir):
        os.mkdir(args.base_dir)
    # Build the network
    node_holder = NodeHolder(iso_flag=args.iso_flag)
    max_frags = 0
    node_holder = build_network(attrs, node_holder, max_frags, args.base_dir,
                                args.verbosity)
    # Write the data out
    write_data(args.base_dir, node_holder, attrs)
Exemple #4
0
def main():
    # Read in a SD or SMILES file - then write out into a specified directory
    parser = argparse.ArgumentParser(
        description="Convert a SMILES to nodes, edges and attributes"
    )
    parser.add_argument("--smiles")
    parser.add_argument("--id")
    parser.add_argument("--standardize", action="store_true")
    parser.add_argument("--isomeric", dest="iso_flag", action="store_true")
    parser.add_argument("--non_isomeric", dest="iso_flag", action="store_false")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("-v", dest="verbosity", action="store_const", const=1)
    group.add_argument("-vv", dest="verbosity", action="store_const", const=2)

    parser.set_defaults(verbosity=0)
    parser.set_defaults(iso_flag=True)

    args = parser.parse_args()

    # Do we have an input and base directory?
    if not args.smiles:
        print('ERROR: Must specify a SMILES')
        sys.exit(1)

    attrs = []
    print("Original SMILES: " + args.smiles)
    mol = Chem.MolFromSmiles(args.smiles)
    if args.standardize:
        mol = standardize(mol)
        print("Standardized SMILES: " + Chem.MolToSmiles(mol))
    smiles = Chem.CanonSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
    print("Canonical SMILES: " + smiles)

    id = args.id
    if id is None:
        id = "smiles1"
    attr = Attr(smiles, ["EM", id])
    attrs.append(attr)
    # Build the network
    node_holder = NodeHolder(iso_flag=args.iso_flag)
    max_frags = 0
    node_holder = build_network(attrs, node_holder,
                                max_frags, smiles, args.verbosity)
    # Write the data out
    for node in node_holder.node_list:
        print(str(node))

    for edge in node_holder.get_edges():
        print(str(edge))

    for attr in attrs:
        print(str(attr))

    print("Number of nodes: " + str(len(node_holder.node_list)))
    print("Number of edges: " + str(len(node_holder.get_edges())))
Exemple #5
0
 def test_generate_nodes(self):
     """
     Test we can generate nodes for the basic data.
     :return:
     """
     try:
         nodes = [x for x in open("frag/tests/data/nodes.txt").readlines()]
         edges = [x.split() for x in open("frag/tests/data/edges.txt").readlines()]
         attrs = [
             Attr(input_str=x)
             for x in open("frag/tests/data/attributes.txt").readlines()
         ]
     except IOError:
         nodes = [x for x in open("data/nodes.txt").readlines()]
         edges = [x.split() for x in open("data/edges.txt").readlines()]
         attrs = [Attr(input_str=x) for x in open("data/attributes.txt").readlines()]
     node_holder = NodeHolder(iso_flag=True)
     node_holder = build_network(attrs, node_holder)
     # Create the nodes and test with output
     self.assertEqual(len(node_holder.node_list), len(nodes))
     # This doesn't work yet(we get 3687 edges - should be 3691
     # Close enough - and the output looks right...
     self.assertEqual(len(node_holder.get_edges()), 3687)
Exemple #6
0
def fragment_mol(smiles, base_dir, verbosity):
    attrs = []
    attr = Attr(smiles, ["EM"])
    attrs.append(attr)

    # Build the network
    node_holder = NodeHolder(iso_flag=False)
    max_frags = 0
    node_holder = build_network(attrs,
                                node_holder,
                                max_frags,
                                base_dir,
                                verbosity=verbosity,
                                recurse=False)
    return node_holder
Exemple #7
0
    def fragment_mol(self, smiles, verbosity=0) -> object:
        """Performs the fragmentation process for a SMILES.

        Returns:
           Fragdata object with Node/Edge data to write to files.

        """
        # Note that in this version, only one SMILES is sent in here.
        # There seemed to be some strange issues with edges if a combined node holder is used TBI
        attrs = []
        attr = Attr(smiles, ["EM"])
        attrs.append(attr)

        node_holder = NodeHolder(iso_flag=False)
        node_holder = build_network(attrs, node_holder, base_dir=None, verbosity=verbosity, recurse=False)

        return node_holder
    def fragment_mol(self, smiles, verbosity=0) -> object:
        """Performs the fragmentation process for a SMILES.

        Returns:
           NodeHolder object.

        """

        attrs = []
        attr = Attr(smiles, ["EM"])
        attrs.append(attr)
        #print('fragment smiles: {}'.format(smiles))
        # Build the network
        node_holder = NodeHolder(iso_flag=False)
        node_holder = build_network(attrs,
                                    node_holder,
                                    base_dir=None,
                                    verbosity=verbosity,
                                    recurse=False)

        return node_holder
def fragment_mols(input_smiles, verbosity=0, recurse=False):

    #print("Fragmenting", smiles)

    attrs = []
    for smiles in input_smiles:
        attr = Attr(smiles, ["EM"])
        attrs.append(attr)

    # Build the network
    # print("Processing", len(input_smiles), "mols")
    # print('INPUT ', ','.join(sorted(input_smiles)))
    node_holder = NodeHolder(iso_flag=False)
    node_holder = build_network(attrs, node_holder, base_dir=None, verbosity=verbosity, recurse=recurse)
    # output_smiles = [n.SMILES for n in node_holder.get_nodes()]
    # print('OUTPUT', ','.join(sorted(output_smiles)))

    frag_data = group_data(node_holder, input_smiles)
    # print("Groups:", len(frag_data.parent_data))

    return frag_data
def fragment_mol(smiles, verbosity=0):

    attrs = []
    attr = Attr(smiles, ["EM"])
    attrs.append(attr)

    # Build the network
    node_holder = NodeHolder(iso_flag=False)
    node_holder = build_network(attrs,
                                node_holder,
                                base_dir=None,
                                verbosity=verbosity,
                                recurse=False)
    # Write the data out
    # print(str(node_holder.size()))
    # for node in node_holder.node_list:
    #     print(str(node))
    # for edge in node_holder.get_edges():
    #         print(str(edge))

    return node_holder
Exemple #11
0
from tqdm import tqdm
from frag.network.models import Attr

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description=
        'Decorate a library of molecules for insertion to the database.')
    parser.add_argument('--input_smi')
    parser.add_argument('--output_attr')
    args = parser.parse_args()
    out_smi = open(args.output_attr, "w")
    for mol in tqdm(
            Chem.SmilesMolSupplier(args.input_smi,
                                   delimiter=',',
                                   smilesColumn=1,
                                   nameColumn=0)):
        this_smi = Chem.MolToSmiles(mol, isomericSmiles=True)
        new_smis = decorate_smi(this_smi)
        new_murck = decorate_smi(MurckoScaffold.MurckoScaffoldSmiles(this_smi))
        # mol_frags = get_fragments(Chem.MolFromSmiles(this_smi),iso_labels=False)
        # new_smis.extend([x.replace("Xe","At") for x in mol_frags])
        new_smis.extend(new_murck)
        new_smis = list(set(new_smis))
        # Do this on original and on Murcko Scaffold
        name = mol.GetProp("_Name")
        new_attr = Attr(this_smi, ["EM", name])
        out_smi.write(str(new_attr) + "\n")
        for i, smi in enumerate(new_smis):
            new_attr = Attr(smi, ["EM", name + "_" + str(i)])
            out_smi.write(str(new_attr) + "\n")
Exemple #12
0
    def test_compare_iso_non_iso(self):
        """
        Test that the iso flag makes a difference.
        :return:
        """
        input_smis = ["C#CC(C)(C)NC[C@]1(O)CCCN2CCCC[C@@H]21"]
        test_iso_node_list = [
            "C#CC(C)(C)NC",
            "OC1CCCN2CCCCC12",
            "O",
            "C#CC(C)(C)NCC1CCCN2CCCCC12",
            "C#CC(C)(C)NC[C@]1(O)CCCN2CCCC[C@@H]21",
            "C1CCN2CCCCC2C1",
            "C#CC(C)(C)NC.O",
        ]
        test_non_iso_node_list = [
            "C#CC(C)(C)NC",
            "OC1CCCN2CCCCC12",
            "O",
            "C#CC(C)(C)NCC1CCCN2CCCCC12",
            "C#CC(C)(C)NCC1(O)CCCN2CCCCC21",
            "C1CCN2CCCCC2C1",
            "C#CC(C)(C)NC.O",
        ]
        test_iso_edge_list = [
            "EDGE C#CC(C)(C)NC[C@]1(O)CCCN2CCCC[C@@H]21 OC1CCCN2CCCCC12 FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]|RING|OC1([Xe])CCCN2CCCCC21|O[C@@]1([100Xe])CCCC2CCCC[C@@H]21",
            "EDGE OC1CCCN2CCCCC12 O RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12|FG|O[Xe]|O[100Xe]",
            "EDGE C#CC(C)(C)NC[C@]1(O)CCCN2CCCC[C@@H]21 C#CC(C)(C)NC.O RING|[Xe]C1([Xe])CCCN2CCCCC21|[100Xe][C@]1([101Xe])CCCC2CCCC[C@@H]21|FG|C#CC(C)(C)NC[Xe].O[Xe]|CCC(C)(C)NC[100Xe].O[101Xe]",
            "EDGE OC1CCCN2CCCCC12 C1CCN2CCCCC2C1 FG|O[Xe]|O[100Xe]|RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12",
            "EDGE C#CC(C)(C)NC.O C#CC(C)(C)NC FG|O|O|FG|C#CC(C)(C)NC|CCC(C)(C)NC",
            "EDGE C#CC(C)(C)NC[C@]1(O)CCCN2CCCC[C@@H]21 C#CC(C)(C)NCC1CCCN2CCCCC12 FG|O[Xe]|O[101Xe]|RING|C#CC(C)(C)NCC1([Xe])CCCN2CCCCC21|CCC(C)(C)NC[C@@]1([101Xe])CCCC2CCCC[C@@H]21",
            "EDGE C#CC(C)(C)NCC1CCCN2CCCCC12 C1CCN2CCCCC2C1 FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]|RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12",
            "EDGE C#CC(C)(C)NC.O O FG|C#CC(C)(C)NC|CCC(C)(C)NC|FG|O|O",
            "EDGE C#CC(C)(C)NCC1CCCN2CCCCC12 C#CC(C)(C)NC RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12|FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]",
        ]

        test_non_iso_edge_list = [
            "EDGE C#CC(C)(C)NCC1(O)CCCN2CCCCC21 C#CC(C)(C)NCC1CCCN2CCCCC12 FG|O[Xe]|O[101Xe]|RING|C#CC(C)(C)NCC1([Xe])CCCN2CCCCC21|CCC(C)(C)NCC1([101Xe])CCCC2CCCCC21",
            "EDGE OC1CCCN2CCCCC12 C1CCN2CCCCC2C1 FG|O[Xe]|O[100Xe]|RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12",
            "EDGE C#CC(C)(C)NCC1(O)CCCN2CCCCC21 C#CC(C)(C)NC.O RING|[Xe]C1([Xe])CCCN2CCCCC21|[100Xe]C1([101Xe])CCCC2CCCCC21|FG|C#CC(C)(C)NC[Xe].O[Xe]|CCC(C)(C)NC[100Xe].O[101Xe]",
            "EDGE OC1CCCN2CCCCC12 O RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12|FG|O[Xe]|O[100Xe]",
            "EDGE C#CC(C)(C)NCC1CCCN2CCCCC12 C1CCN2CCCCC2C1 FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]|RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12",
            "EDGE C#CC(C)(C)NC.O O FG|C#CC(C)(C)NC|CCC(C)(C)NC|FG|O|O",
            "EDGE C#CC(C)(C)NCC1(O)CCCN2CCCCC21 OC1CCCN2CCCCC12 FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]|RING|OC1([Xe])CCCN2CCCCC21|OC1([100Xe])CCCC2CCCCC21",
            "EDGE C#CC(C)(C)NC.O C#CC(C)(C)NC FG|O|O|FG|C#CC(C)(C)NC|CCC(C)(C)NC",
            "EDGE C#CC(C)(C)NCC1CCCN2CCCCC12 C#CC(C)(C)NC RING|[Xe]C1CCCN2CCCCC12|[100Xe]C1CCCC2CCCCC12|FG|C#CC(C)(C)NC[Xe]|CCC(C)(C)NC[100Xe]",
        ]

        attrs = [Attr(input_smi) for input_smi in input_smis]
        node_holder = NodeHolder(iso_flag=False)
        node_holder = build_network(attrs, node_holder)
        non_iso_node_list = [x.SMILES for x in node_holder.node_list]
        non_iso_edge_list = [str(x) for x in node_holder.edge_list]
        self.assertListEqual(sorted(non_iso_node_list), sorted(test_non_iso_node_list))
        self.assertListEqual(sorted(non_iso_edge_list), sorted(test_non_iso_edge_list))
        node_holder = NodeHolder(iso_flag=True)
        node_holder = build_network(attrs, node_holder)
        iso_node_list = [x.SMILES for x in node_holder.node_list]
        iso_edge_list = [str(x) for x in node_holder.edge_list]
        self.assertListEqual(sorted(iso_node_list), sorted(test_iso_node_list))
        self.assertListEqual(sorted(iso_edge_list), sorted(test_iso_edge_list))