Ejemplo n.º 1
0
def filter_pubchem(ms):
    ms_filtered = []
    elements = set(['C', 'H', 'O', 'N', 'S', 'P', 'Cl', 'B', 'Br', 'Se'])
    for m in ms:
        mw = CalcExactMolWt(m)
        if mw < 100 or mw > 1500:
            continue

        if GetFormalCharge(m) != 0:
            continue

        atoms = [a.GetSymbol() for a in m.GetAtoms()]
        c = Counter(atoms)
        if 'C' in c and 'H' in c:
            if 'S' in c and c['S'] > 5:
                continue
            if 'Cl' in c and c['Cl'] > 5:
                continue
            if 'Br' in c and c['Br'] > 5:
                continue
            if 'B' in c and c['B'] > 5:
                continue
            if set(c.keys()).issubset(elements):
                ms_filtered.append(CalcMolFormula(m))
    return ms_filtered
def extract_smiles(xyz_file, charge, allow_charge=True, check_ac=False):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True, use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False, use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    if check_ac:
        global AC_SAME
        ac = Chem.GetAdjacencyMatrix(input_mol)
        if not np.all(AC == ac):
            AC_SAME = False
            print("change in AC: stopping")

    return structure_smiles, GetFormalCharge(structure_mol), res_status
def calculate_md_relaxed_structure(smiles, scale_factor, ridx):
    """
    This function submits an md with a box with size scaled by scale_factor and
    extracts last structure of the trajectory file
    """

    os.mkdir('md')
    os.chdir('md')
    mol = Chem.MolFromSmiles(smiles, sanitize=False)
    mol = reorder_atoms_to_map(mol)
    n_atoms = mol.GetNumAtoms()
    charge = GetFormalCharge(mol)
    write_xyz_file(mol, str(ridx)+'.xyz')
    write_md_input(scale_factor)
    output = run_cmd("/groups/kemi/koerstz/opt/xtb/6.1/bin/xtb {0} --omd --input md.inp --gfn2 --chrg {1}".format(str(ridx)+'.xyz', charge))

    with open('md_out.log', 'w') as _file:
        _file.write(output)

    out_file = str(scale_factor)+'_md.xyz'
    extract_last_structure('xtb.trj', out_file)
    
    check_md_reaction(out_file, charge, smiles, str(ridx)+'.xyz')
    shutil.copy(out_file, '../')

    os.chdir('../')
    return out_file, charge, n_atoms
Ejemplo n.º 4
0
def extract_smiles(xyz_file, charge, allow_charge=True):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms,
                                          xyz_coordinates,
                                          charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True,
                                          use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms,
                                          xyz_coordinates,
                                          charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False,
                                          use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    return structure_smiles, GetFormalCharge(structure_mol), res_status
Ejemplo n.º 5
0
def syncProperties(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        formula = CalcMolFormula(mol)
        charge = GetFormalCharge(mol)
        formula = formula.replace(str(charge), '')
        return formula, charge, 'calculated properties from structure'
    except:
        return False, False, 'property calculation error'
Ejemplo n.º 6
0
    def preprocess(dataset, dir_input):

        train_smiles = list(dataset['SMILES'])
        train_adducts = dataset['Adducts']
        train_ccs = list(dataset['CCS'])

        adducts_encoder = AdductToOneHotEncoder()
        adducts_encoder.fit(train_adducts)
        adducts = adducts_encoder.transform(train_adducts)

        Smiles, molecules, adjacencies, properties, descriptors = '', [], [], [], []
        for i, smi in enumerate(train_smiles):
            if '.' in smi:
                continue
            smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi))
            mol = Chem.MolFromSmiles(smi)
            mol = Chem.AddHs(mol)
            atoms = create_atoms(mol)
            i_jbond_dict = create_ijbonddict(mol)

            fingerprints = extract_fingerprints(atoms, i_jbond_dict, radius)
            adjacency = create_adjacency(mol)

            Smiles += smi + '\n'
            molecules.append(fingerprints)
            adjacencies.append(adjacency)
            properties.append([[train_ccs[i]]])
            descriptors.append([
                ExactMolWt(mol),
                MolLogP(mol),
                GetFormalCharge(mol),
                CalcNumRings(mol),
                CalcNumRotatableBonds(mol),
                CalcLogS(mol),
                AcidCount(mol),
                BaseCount(mol),
                APolar(mol),
                BPolar(mol)
            ])

        properties = np.array(properties)
        mean, std = np.mean(properties), np.std(properties)
        properties = np.array((properties - mean) / std)

        os.makedirs(dir_input, exist_ok=True)

        with open(dir_input + 'Smiles.txt', 'w') as f:
            f.write(Smiles)
        np.save(dir_input + 'molecules', molecules)
        np.save(dir_input + 'adducts', adducts)
        np.save(dir_input + 'adjacencies', adjacencies)
        np.save(dir_input + 'properties', properties)
        np.save(dir_input + 'descriptors', descriptors)
        np.save(dir_input + 'mean', mean)
        np.save(dir_input + 'std', std)
        dump_dictionary(fingerprint_dict,
                        dir_input + 'fingerprint_dict.pickle')
Ejemplo n.º 7
0
def is_ts_correct(rsmi, psmi, irc_start_xyz, irc_end_xyz):
    """
    This function compares the input smiles with the smiles of the endpoints of
    the IRC.
    """
    print(rsmi, psmi)
    rmol = smiles_to_mol(rsmi)
    pmol = smiles_to_mol(psmi)

    charge = GetFormalCharge(rmol)

    ts_found = False

    #doing smiles check
    irc_start_smi, _, _ = get_smiles(irc_start_xyz, charge)
    print("reverse SMILES: ", irc_start_smi)
    irc_end_smi, _, _ = get_smiles(irc_end_xyz, charge)
    print("forward smiles: ", irc_end_smi)
    if irc_start_smi == rsmi and irc_end_smi == psmi:
        ts_found = True
        print("SMILES MATCH: TS FOUND: reactant = reverse")

    if irc_start_smi == psmi and irc_end_smi == rsmi:
        ts_found = True
        print("SMILES MATCH: TS FOUND: reactant = forward")

    #doing AC check
    r_ac = rdmolops.GetAdjacencyMatrix(rmol)
    p_ac = rdmolops.GetAdjacencyMatrix(pmol)

    irc_start_mol = smiles_to_mol(irc_start_smi)
    irc_end_mol = smiles_to_mol(irc_end_smi)

    irc_start_ac = rdmolops.GetAdjacencyMatrix(irc_start_mol)
    irc_end_ac = rdmolops.GetAdjacencyMatrix(irc_end_mol)

    if np.all(irc_start_ac == irc_end_ac):
        print("found TS for conformational change")
    else:
        print("found non-coonformational change")

    if np.all(r_ac == irc_start_ac) and np.all(p_ac == irc_end_ac):
        print("AC MATCH: reactant = reverse")
    if np.all(p_ac == irc_start_ac) and np.all(r_ac == irc_end_ac):
        print("AC MATCH: reactant = forward")

    return ts_found
Ejemplo n.º 8
0
def get_charge(smiles):
    mol = get_mol(smiles)
    charge = GetFormalCharge(mol)
    return charge