Exemple #1
0
def load_in_reagents(library_title, path_to_file, reaction):
    """Function to load up a library of reagents - e.g. acyl chlorides"""
    # Open the file
    my_mols = Chem.SDMolSupplier(path_to_file)
    # Get the library name this corresponds to
    if len(ReactantLib.objects.filter(library_name=library_title)) == 0:
        out_lib = ReactantLib()
        out_lib.library_name = library_title
        out_lib.save()
    else:
        out_lib = ReactantLib.objects.filter(library_name=library_title)[0]
    # Go through the compounds
    for rdmol in my_mols:
        # Add them as a django thingy
        dj_comp = add_new_comp(rdmol)
        # Link it to the reaction
        my_r = Reactant()
        my_r.cmpd_id = dj_comp
        try:
            my_r.validate_unique()
            my_r.save()
        except ValidationError:
            my_r = Reactant.objects.get(cmpd_id=dj_comp)
        # Now update this
        my_r.is_available = True
        my_r.react_id.add(reaction)
        my_r.save()
        out_lib.reactant_id.add(my_r)
        out_lib.save()
    return out_lib
Exemple #2
0
def load_in_follow_ups(library_title, path_to_file, reaction, mol_id):
    """Function to load in follow ups - ready made"""
    # Open the file
    my_mols = Chem.SDMolSupplier(path_to_file)
    # Get the library name this corresponds to
    if len(ProductLib.objects.filter(lib_name=library_title)) == 0:
        out_lib = ProductLib()
        out_lib.lib_name = library_title
        out_lib.save()
    else:
        out_lib = ProductLib.objects.filter(lib_name=library_title)[0]
    # Get the process
    my_process = Process()
    my_process.mol_id = mol_id
    my_process.is_made_lloommppaa = False
    my_process.reaction_id = reaction
    my_process.save()
    # Go through the compounds
    for rdmol in my_mols:
        # Add them as a django thingy
        dj_comp = add_new_comp(rdmol)
        # Link it to the reaction
        my_p = Product()
        my_p.cmpd_id = dj_comp
        try:
            my_p.validate_unique()
            my_p.save()
        except ValidationError:
            my_p = Product.objects.get(cmpd_id=dj_comp)
        # Now update this
        my_p.process_id.add(my_process)
        my_p.save()
        out_lib.product_id.add(my_p)
        out_lib.save()
    return out_lib
Exemple #3
0
def load_in_follow_ups(library_title, path_to_file, reaction, mol_id):
    """Function to load in follow ups - ready made"""
    # Open the file
    my_mols = Chem.SDMolSupplier(path_to_file)
    # Get the library name this corresponds to
    if len(ProductLib.objects.filter(lib_name=library_title)) == 0:
        out_lib = ProductLib()
        out_lib.lib_name = library_title
        out_lib.save()
    else:
        out_lib = ProductLib.objects.filter(lib_name=library_title)[0]
    # Get the process
    my_process = Process()
    my_process.mol_id = mol_id
    my_process.is_made_lloommppaa = False
    my_process.reaction_id = reaction
    my_process.save()
    # Go through the compounds
    for rdmol in my_mols:
        # Add them as a django thingy
        dj_comp = add_new_comp(rdmol)
        # Link it to the reaction
        my_p = Product()
        my_p.cmpd_id = dj_comp
        try:
            my_p.validate_unique()
            my_p.save()
        except ValidationError:
            my_p = Product.objects.get(cmpd_id=dj_comp)
        # Now update this
        my_p.process_id.add(my_process)
        my_p.save()
        out_lib.product_id.add(my_p)
        out_lib.save()
    return out_lib
Exemple #4
0
def load_in_reagents(library_title, path_to_file, reaction):
    """Function to load up a library of reagents - e.g. acyl chlorides"""
    # Open the file
    my_mols = Chem.SDMolSupplier(path_to_file)
    # Get the library name this corresponds to
    if len(ReactantLib.objects.filter(library_name=library_title)) == 0:
        out_lib = ReactantLib()
        out_lib.library_name = library_title
        out_lib.save()
    else:
        out_lib = ReactantLib.objects.filter(library_name=library_title)[0]
    # Go through the compounds
    for rdmol in my_mols:
        # Add them as a django thingy
        dj_comp = add_new_comp(rdmol)
        # Link it to the reaction
        my_r = Reactant()
        my_r.cmpd_id = dj_comp
        try:
            my_r.validate_unique()
            my_r.save()
        except ValidationError:
            my_r = Reactant.objects.get(cmpd_id=dj_comp)
        # Now update this
        my_r.is_available = True
        my_r.react_id.add(reaction)
        my_r.save()
        out_lib.reactant_id.add(my_r)
        out_lib.save()
    return out_lib
Exemple #5
0
def load_activity_data(target, file_path):
    """Function to load in a CSV file of activity data
    Takes a Target object and a file path
    Returns None"""
    # Read the file into a CSV dict
    in_d = read_CSV(open(file_path))
    # Fields looking for
    all_fields = ["smiles", "Activity", "ID", "operator"]
    mandatory_fields = ["smiles", "Activity"]
    # Check to see if fields are missing
    missing_fields = [x for x in mandatory_fields if x not in in_d.fieldnames]
    if len(missing_fields) != 0:
        print " ".join(missing_fields), " fields required"
        sys.exit()
    if len([x for x in all_fields if x not in in_d.fieldnames]) != 0:
        print " ".join([x for x in all_fields
                        if x not in in_d.fieldnames]), " fields missing"
    tot = len(open(file_path).readlines()) - 1
    if tot == 0:
        print "No activity data"
        return
    old = -1
    print "Loading activity data"
    for i, l in enumerate(in_d):
        # Do the percent clock
        if i * 100 / tot != old:
            old = i * 100 / tot
            sys.stdout.write("\r%d%% complete..." % old)
            sys.stdout.flush()
        m = Chem.MolFromSmiles(str(l["smiles"]))
        if m is None:
            # Try doing this - in case it needs escaping
            m = Chem.MolFromSmiles(str(l["smiles"])).decode('string-escape')
        if m is None:
            print "Error None molecule", l["smiles"]
            continue
        comp_ref = add_new_comp(m)
        if comp_ref is None:
            continue
        # Now add the required information if no column is entered
        units = l.get("units")
        if units is None:
            units = "pnM"
        chid = l.get("ID")
        if chid is None:
            chid = "NONE"
        source = l.get("Source")
        if source is None:
            source = "IC50"
        operator = l.get("operator")
        if operator is None:
            operator = "NA"
        add_new_act(comp_ref, target, l["Activity"], units, chid, source,
                    operator)
    old = 100
    sys.stdout.write("\r%d%%" % old)
    sys.stdout.flush()
    print "\nAdding activity data complete"
    return None
Exemple #6
0
def add_new_mol(rdmol, target):
    """Function to add a new bound Molecule object
    Takes an RDKit molecule and a Target
    Returns None"""
    import re
    new_mol = Molecule()
    rdProps = rdmol.GetProp("_Name").split("_")
    comp_ref = add_new_comp(rdmol)
    if comp_ref is None:
        return None
    # To get rid of the .pdb suffix
    pdb_id = rdProps[0].split(".")[0]
    # If it's an SGC model entry -> rename it appropriately
    if rdmol.HasProp("name"):
        if re.match("^m\d\d\d$", rdmol.GetProp("name")):
            pdb_id = rdmol.GetProp("name") + "_" + str(target.pk)
    # Check that the name is unique and more than 3 characters long
    # and doesn't contain the target.title
    if len(pdb_id) < 4 or len(Protein.objects.filter(code=pdb_id)) > 0 or target.title in pdb_id:
        # Make a new uniqid
        # First up check that this molecule has not been added before
        mols = [[Chem.MolFromMolBlock(str(x.sdf_info)), x.pk] for x in Molecule.objects.filter(prot_id__target_id=target, cmpd_id=comp_ref)]
        [x[0].SetProp("_Name", "N") for x in mols]
        rdmol.SetProp("_Name", "N")
        # If this is actually a duplicate then continue
        sd_block = Chem.MolToMolBlock(Chem.MolFromMolBlock(Chem.MolToMolBlock(rdmol)))
        matches = [x for x in mols if sd_block == Chem.MolToMolBlock(x[0])]
        if len(matches) > 0:
            # Just return
            return
        else:
            # We have not put this EXACT mol into the database
            # Now lets make an ID
            molid = uuid.uuid4().hex + "_" + pdb_id
            rdmol.SetProp("_Name", molid)
    else:
        molid = pdb_id
    # Make a protein object by which it is related in the DB
    new_mol.prot_id = Protein.objects.get_or_create(code=molid, target_id=target)[0]
    new_mol.sdf_info = Chem.MolToMolBlock(rdmol)
    new_mol.smiles = Chem.MolToSmiles(rdmol, isomericSmiles=True)
    try:
        new_mol.lig_id = rdProps[1]
        new_mol.chain_id = rdProps[2]
        new_mol.occupancy = float(rdProps[3])
    except IndexError:
        new_mol.lig_id = "UNL"
        new_mol.chain_id = "Z"
        new_mol.occupancy = 0.0
    # Add this to the compound list -> make sure this passes in for the
    # correct molecule. I.e. if it fails where does it go???
    # Now link that compound back
    new_mol.cmpd_id = comp_ref
    try:
        new_mol.validate_unique()
        new_mol.save()
    except ValidationError:
        pass
Exemple #7
0
def load_activity_data(target, file_path):
    """Function to load in a CSV file of activity data
    Takes a Target object and a file path
    Returns None"""
    # Read the file into a CSV dict
    in_d = read_CSV(open(file_path))
    # Fields looking for
    all_fields = ["smiles", "Activity", "ID", "operator"]
    mandatory_fields = ["smiles", "Activity"]
    # Check to see if fields are missing
    missing_fields = [x for x in mandatory_fields if x not in in_d.fieldnames]
    if len(missing_fields) != 0:
        print " ".join(missing_fields), " fields required"
        sys.exit()
    if len([x for x in all_fields if x not in in_d.fieldnames]) != 0:
        print " ".join([x for x in all_fields if x not in in_d.fieldnames]), " fields missing"
    tot = len(open(file_path).readlines()) - 1
    if tot == 0:
        print "No activity data"
        return
    old = -1
    print "Loading activity data"
    for i, l in enumerate(in_d):
        # Do the percent clock
        if i * 100 / tot != old:
            old = i * 100 / tot
            sys.stdout.write("\r%d%% complete..." % old)
            sys.stdout.flush()
        m = Chem.MolFromSmiles(str(l["smiles"]))
        if m is None:
             # Try doing this - in case it needs escaping
             m = Chem.MolFromSmiles(str(l["smiles"])).decode('string-escape')
        if m is None:
            print "Error None molecule", l["smiles"]
            continue
        comp_ref = add_new_comp(m)
        if comp_ref is None:
            continue
        # Now add the required information if no column is entered
        units = l.get("units")
        if units is None:
            units = "pnM"
        chid = l.get("ID")
        if chid is None:
            chid = "NONE"
        source = l.get("Source")
        if source is None:
            source = "IC50"
        operator = l.get("operator")
        if operator is None:
            operator = "NA"
        add_new_act(comp_ref, target, l["Activity"], units, chid, source, operator)
    old = 100
    sys.stdout.write("\r%d%%" % old)
    sys.stdout.flush()
    print "\nAdding activity data complete"
    return None
Exemple #8
0
def load_compounds(file_path):
    """Function to load compounds and make the MMPs
    Takes a file path
    Returns None"""
    mols = Chem.SDMolSupplier(file_path)
    counter = 0
    for m in mols:
        if m is None:
            print "NONE MOL"
            continue
        counter += 1
        print counter
        # add the new compound to the database
        comp_ref = add_new_comp(m)
        if comp_ref is None:
            continue
        new_m = Chem.MolFromSmiles(str(comp_ref.smiles))
        # Filter too big molecules
        if Descriptors.ExactMolWt(new_m) > 560:
            continue
        make_mol_mmp(new_m, id="cmp" + str(comp_ref.pk), target_id=None)
Exemple #9
0
def load_compounds(file_path):
    """Function to load compounds and make the MMPs
    Takes a file path
    Returns None"""
    mols = Chem.SDMolSupplier(file_path)
    counter = 0
    for m in mols:
        if m is None:
            print "NONE MOL"
            continue
        counter +=1
        print counter
        # add the new compound to the database
        comp_ref = add_new_comp(m)
        if comp_ref is None:
            continue
        new_m = Chem.MolFromSmiles(str(comp_ref.smiles))
        # Filter too big molecules
        if Descriptors.ExactMolWt(new_m) > 560:
            continue
        make_mol_mmp(new_m, id="cmp" + str(comp_ref.pk), target_id=None)
Exemple #10
0
def add_new_mol(rdmol, target):
    """Function to add a new bound Molecule object
    Takes an RDKit molecule and a Target
    Returns None"""
    import re
    new_mol = Molecule()
    rdProps = rdmol.GetProp("_Name").split("_")
    comp_ref = add_new_comp(rdmol)
    if comp_ref is None:
        return None
    # To get rid of the .pdb suffix
    pdb_id = rdProps[0].split(".")[0]
    # If it's an SGC model entry -> rename it appropriately
    if rdmol.HasProp("name"):
        if re.match("^m\d\d\d$", rdmol.GetProp("name")):
            pdb_id = rdmol.GetProp("name") + "_" + str(target.pk)
    # Check that the name is unique and more than 3 characters long
    # and doesn't contain the target.title
    if len(pdb_id) < 4 or len(
            Protein.objects.filter(code=pdb_id)) > 0 or target.title in pdb_id:
        # Make a new uniqid
        # First up check that this molecule has not been added before
        mols = [[Chem.MolFromMolBlock(str(x.sdf_info)), x.pk]
                for x in Molecule.objects.filter(prot_id__target_id=target,
                                                 cmpd_id=comp_ref)]
        [x[0].SetProp("_Name", "N") for x in mols]
        rdmol.SetProp("_Name", "N")
        # If this is actually a duplicate then continue
        sd_block = Chem.MolToMolBlock(
            Chem.MolFromMolBlock(Chem.MolToMolBlock(rdmol)))
        matches = [x for x in mols if sd_block == Chem.MolToMolBlock(x[0])]
        if len(matches) > 0:
            # Just return
            return
        else:
            # We have not put this EXACT mol into the database
            # Now lets make an ID
            molid = uuid.uuid4().hex + "_" + pdb_id
            rdmol.SetProp("_Name", molid)
    else:
        molid = pdb_id
    # Make a protein object by which it is related in the DB
    new_mol.prot_id = Protein.objects.get_or_create(code=molid,
                                                    target_id=target)[0]
    new_mol.sdf_info = Chem.MolToMolBlock(rdmol)
    new_mol.smiles = Chem.MolToSmiles(rdmol, isomericSmiles=True)
    try:
        new_mol.lig_id = rdProps[1]
        new_mol.chain_id = rdProps[2]
        new_mol.occupancy = float(rdProps[3])
    except IndexError:
        new_mol.lig_id = "UNL"
        new_mol.chain_id = "Z"
        new_mol.occupancy = 0.0
    # Add this to the compound list -> make sure this passes in for the
    # correct molecule. I.e. if it fails where does it go???
    # Now link that compound back
    new_mol.cmpd_id = comp_ref
    try:
        new_mol.validate_unique()
        new_mol.save()
    except ValidationError:
        pass
Exemple #11
0
def create_lib(rxn, react_proc, lib_name):
    """Function to  create a library from a reaction"""
    from LLOOMMPPAA.pains_filter import pains_test
    # Make the molecule fit for reaction
    rdmol = Chem.MolFromSmiles(str(react_proc.react_frag))
    p_lib = ProductLib()
    p_lib.lib_name = str(uuid.uuid4())
    p_lib.save()
    # Get the process
    my_process = Process()
    my_process.mol_id = react_proc.mol_id
    my_process.is_made_lloommppaa = False
    my_process.reaction_id = react_proc.react_id
    my_process.save()
    # Loop through the library
    # Get the lib
    my_cmpd = react_proc.reactant_queue.all()
    tot = len(my_cmpd)
    old = -1
    for i, cmpd in enumerate(my_cmpd):
        sys.stdout.write("\rCarried out reaction %d of %d..." % (i, tot))
        sys.stdout.flush()
        re_mol = Chem.MolFromSmiles(str(cmpd.smiles))
        out_prods = rxn.RunReactants((re_mol, rdmol))
        if len(out_prods) == 0:
            out_prods = rxn.RunReactants((rdmol, re_mol))
            if len(out_prods) == 0:
                print "NO PRODUCTS"
                print Chem.MolToSmiles(rdmol, isomericSmiles=True)
                print Chem.MolToSmiles(re_mol, isomericSmiles=True)
                continue
        products = out_prods[0]
        if len(products) > 1:
            print "MULTIPLE PRODUCTS"
            print products
            print Chem.MolToSmiles(rdmol)
            print Chem.MolToSmiles(re_mol)
            continue
        if pains_test(products[0]):
            print "PAINS FILTER SKIPPING!!!"
            continue
        # Register the compound
        dj_comp = add_new_comp(products[0])
        # Add it to the list of products
        my_prod = Product.objects.filter(cmpd_id=dj_comp)
        if my_prod:
            my_prod = my_prod[0]
        else:
            my_prod = Product()
            my_prod.cmpd_id = dj_comp
            my_prod.save()
        my_prod.process_id.add(my_process)
        # Add the product to the product library
        p_lib.product_id.add(my_prod)
        p_lib.save()
        # Add it to the product queue
        react_proc.product_queue.add(dj_comp)
        react_proc.reactant_queue.remove(cmpd)
        my_prg = int((float(i) / float(tot)) * 100)
        if my_prg != old:
            react_proc.stage_completion = my_prg
            old = my_prg
            react_proc.save()
    react_proc.products_id.add(p_lib)
    react_proc.save()
    # Now return this
    return react_proc
Exemple #12
0
def create_lib(rxn, react_proc, lib_name):
    """Function to  create a library from a reaction"""
    from LLOOMMPPAA.pains_filter import pains_test
    # Make the molecule fit for reaction
    rdmol = Chem.MolFromSmiles(str(react_proc.react_frag))
    p_lib = ProductLib()
    p_lib.lib_name = str(uuid.uuid4())
    p_lib.save()
    # Get the process
    my_process = Process()
    my_process.mol_id = react_proc.mol_id
    my_process.is_made_lloommppaa = False
    my_process.reaction_id = react_proc.react_id
    my_process.save()
    # Loop through the library
    # Get the lib
    my_cmpd = react_proc.reactant_queue.all()
    tot = len(my_cmpd)
    old = -1
    for i, cmpd in enumerate(my_cmpd):
        sys.stdout.write("\rCarried out reaction %d of %d..." % (i, tot))
        sys.stdout.flush()
        re_mol = Chem.MolFromSmiles(str(cmpd.smiles))
        out_prods = rxn.RunReactants((re_mol, rdmol))
        if len(out_prods) == 0:
            out_prods = rxn.RunReactants((rdmol, re_mol))
            if len(out_prods) == 0:
                print "NO PRODUCTS"
                print Chem.MolToSmiles(rdmol, isomericSmiles=True)
                print Chem.MolToSmiles(re_mol, isomericSmiles=True)
                continue
        products = out_prods[0]
        if len(products) > 1:
            print "MULTIPLE PRODUCTS"
            print products
            print Chem.MolToSmiles(rdmol)
            print Chem.MolToSmiles(re_mol)
            continue
        if pains_test(products[0]):
            print "PAINS FILTER SKIPPING!!!"
            continue
        # Register the compound
        dj_comp = add_new_comp(products[0])
        # Add it to the list of products
        my_prod = Product.objects.filter(cmpd_id=dj_comp)
        if my_prod:
            my_prod = my_prod[0]
        else:
            my_prod = Product()
            my_prod.cmpd_id = dj_comp
            my_prod.save()
        my_prod.process_id.add(my_process)
        # Add the product to the product library
        p_lib.product_id.add(my_prod)
        p_lib.save()
        # Add it to the product queue
        react_proc.product_queue.add(dj_comp)
        react_proc.reactant_queue.remove(cmpd)
        my_prg = int((float(i) / float(tot)) * 100)
        if my_prg != old:
            react_proc.stage_completion = my_prg
            old = my_prg
            react_proc.save()
    react_proc.products_id.add(p_lib)
    react_proc.save()
    # Now return this
    return react_proc
Exemple #13
0
def register_targ_data(tot_d, target, save_map=None, overwrite=None):
    """Function to register a targets data"""
    import gzip
    old = -1
    tot = len(tot_d)
    prots_made = []
    for tot_c, chain in enumerate(tot_d):
        if tot_c * 100 / tot != old:
            old = tot_c * 100 / tot
            sys.stdout.write("\rRegistering proteins %d%% complete..." % old)
            sys.stdout.flush()
        smiles = tot_d[chain]["smiles"]
        model = tot_d[chain]["model_id"]
        # Get the PDB file
        if tot_d[chain]["path_to_pdb"][-3:] == ".gz":
            file_lines = gzip.open(tot_d[chain]["path_to_pdb"]).readlines()
        else:
            file_lines = open(tot_d[chain]["path_to_pdb"]).readlines()
        #If we're saving maps and the map exists
        if save_map and tot_d[chain]["path_ to_map"]:
        # Get the map file
            map_lines = open(tot_d[chain]["path_to_map"]).readlines()
        # Get the mols from this
        mols = [assign_temp(block, smiles, model) for block in get_ligs(file_lines) if assign_temp(block, smiles, model) is not None]
        # So now we check that the model hasn't been updated OR exists
        # Check that everything's ok with the protein
        apo_prot = Chem.MolFromPDBBlock(remove_hetatm(file_lines), sanitize=False)
        if not apo_prot:
            print "NONE PROTEIN: ", model + "_" + tot_d[chain]["chain"] + "_" + str(target.title)
            continue
        # Check the protein exists
        prot_code = model + "_" + tot_d[chain]["chain"] + "_" + str(target.pk)
        prot_me = Protein.objects.get_or_create(target_id=target, code=prot_code)
        # Only proceed if this has been created OR overwrite / refresh is set
        my_prot = prot_me[0]
        # If this is a newly created prot - or overwrite is on carry on
        if prot_me[1] or overwrite:
            prots_made.append(my_prot)
            # Delete the molecules for this protein
            Molecule.objects.filter(prot_id=my_prot).delete()
            # Loop through them
            for i, mol in enumerate(mols):
                # Find the reference compounds
                comp_ref = add_new_comp(mol)
                # Find the molecules
                new_mol = Molecule.objects.get_or_create(smiles=smiles, sdf_info=Chem.MolToMolBlock(mol, includeStereo=True), lig_id=str(i), chain_id="A", cmpd_id=comp_ref, occupancy=0.0, prot_id=my_prot)
                my_mol = new_mol[0]
                ### Now add this to the user data
                if new_mol[1]:
                    for user in UserData.objects.all():
                        user.new_mols.add(my_mol)
                        user.save()
                # Calculate the RMSD between ligands
                if i > 0:
                    my_mol.rmsd = AllChem.GetBestRMS(mols[0], mol)
                    my_mol.save()
                else:
                    my_mol.rmsd = 0.0
                # Give the internal ID
                iidl = InternalIDLink()
                iidl.mol_id = my_mol
                iidl.internal_id = tot_d[chain]["cmpd_id"]
                iidl.save()
            # DEFINE THIS CLUSTER 
            # Apo protein - within 5A of the ligand for this protein
#            if not Chem.MolToPDBBlock(apo_prot):
#                my_prot.delete()
#                print "APO PROTEIN NOT REAL"
#                print model
#                sys.exit()
            if prot_me[1] or overwrite:
                my_prot.pdb_info = remove_hetatm(file_lines)
                # If we have a map - add it
                if save_map:
                    my_prot.cif_info = "".join(map_lines)
                my_prot.save()
            # Waters - within 5A of the main ligand cluster
            waters = Chem.MolFromPDBBlock(get_waters(file_lines))
            if waters is not None:
                # Check the waters exist
                if len(Water.objects.filter(prot_id=my_prot)) != 0 and not overwrite:
                    pass
                else:
                    conf = waters.GetConformer()
                    # Delete them for this protein
                    for w in Water.objects.filter(prot_id=my_prot):
                        w.delete()
                    for i in range(waters.GetNumAtoms()):
                        cp = conf.GetAtomPosition(i)
                        if waters.GetAtomWithIdx(i).GetSmarts() != "O":
                            continue
                        Water.objects.get_or_create(water_num=i + 1, prot_id=my_prot, target_id=target,x_com=cp.x,y_com=cp.y,z_com=cp.z)
    print "\nRegistered proteins"
    return prots_made