def find(smile, smart_torsion="[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]", filter_smart_torsion=None, positions=None): if positions is None: mol = Chem.MolFromSmiles(smile) if mol is None: raise ValueError("The smile is invalid") pattern_tor = Chem.MolFromSmarts(smart_torsion) torsion = list(mol.GetSubstructMatches(pattern_tor)) if filter_smart_torsion: pattern_custom = Chem.MolFromSmarts(filter_smart_torsion) custom = list(mol.GetSubstructMatches(pattern_custom)) to_del_bef_custom = [] for x in reversed(range(len(torsion))): for y in reversed(range(len(custom))): ix1, ix2 = ig(1)(torsion[x]), ig(2)(torsion[x]) iy1, iy2 = ig(1)(custom[y]), ig(2)(custom[y]) if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): to_del_bef_custom.append(x) custom_torsion = copy(torsion) custom_torsion = [v for i, v in enumerate(custom_torsion) if i not in set(to_del_bef_custom)] torsion = custom_torsion positions = cleaner(torsion) return positions
def find(smiles, smarts_cistrans=None, positions=None): if positions is None: mol = Chem.MolFromSmiles(smiles) if mol is None: raise ValueError("The smiles is invalid") pattern_cistrans = Chem.MolFromSmarts(smarts_cistrans) cistrans = list(mol.GetSubstructMatches(pattern_cistrans)) positions = cleaner(cistrans) return positions
def find(smiles, smarts_torsion="[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]", filter_smarts_torsion=None, positions=None): """Find the positions of rotatable bonds in the molecule. Args(required): smiles (str) Arge(optional) smarts_torion (str) : pattern defintion for the torsions, if not defined, a default pattern "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" will be used filter_smarts_torsion (str): pattern defition for the torsion to be ignored positions (list of tuples) : if the positions (in terms of atom indicies) of the torsions is known, they can be passed directly """ if positions is None: mol = Chem.MolFromSmiles(smiles) if mol is None: raise ValueError("The smiles is invalid") pattern_tor = Chem.MolFromSmarts(smarts_torsion) torsion = list(mol.GetSubstructMatches(pattern_tor)) if filter_smarts_torsion: pattern_custom = Chem.MolFromSmarts(filter_smarts_torsion) custom = list(mol.GetSubstructMatches(pattern_custom)) to_del_bef_custom = [] for x in reversed(range(len(torsion))): for y in reversed(range(len(custom))): ix1, ix2 = ig(1)(torsion[x]), ig(2)(torsion[x]) iy1, iy2 = ig(1)(custom[y]), ig(2)(custom[y]) if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): to_del_bef_custom.append(x) custom_torsion = copy(torsion) custom_torsion = [ v for i, v in enumerate(custom_torsion) if i not in set(to_del_bef_custom) ] torsion = custom_torsion positions = cleaner(torsion) return positions
def find(smiles, smarts_torsion="[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]", filter_smarts_torsion=None, positions=None): """Find the positions of rotatable bonds in the molecule. Args(required): smiles (str) Arge(optional) smarts_torion (str) : pattern defintion for the torsions, if not defined, a default pattern "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" will be used filter_smarts_torsion (str): pattern defition for the torsion to be ignored positions (list of tuples) : if the positions (in terms of atom indicies) of the torsions is known, they can be passed directly """ if positions is None: mol = Chem.MolFromSmiles(smiles) if mol is None: raise ValueError("The smiles is invalid") pattern_tor = Chem.MolFromSmarts(smarts_torsion) torsion = list(mol.GetSubstructMatches(pattern_tor)) if filter_smarts_torsion: pattern_custom = Chem.MolFromSmarts(filter_smarts_torsion) custom = list(mol.GetSubstructMatches(pattern_custom)) to_del_bef_custom = [] for x in reversed(range(len(torsion))): for y in reversed(range(len(custom))): ix1, ix2 = ig(1)(torsion[x]), ig(2)(torsion[x]) iy1, iy2 = ig(1)(custom[y]), ig(2)(custom[y]) if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): to_del_bef_custom.append(x) custom_torsion = copy(torsion) custom_torsion = [v for i, v in enumerate(custom_torsion) if i not in set(to_del_bef_custom)] torsion = custom_torsion positions = cleaner(torsion) return positions
from utilities import cleaner """ Train Modification """ k = 1 with open("./data/given/train_tweets.txt", "r") as rf: ct = rf.readlines() ct_user = [c.split()[0] for c in ct] ct_tweet = [" ".join(c.split()[1:]) for c in ct] df = pd.DataFrame({"User": ct_user, "Tweets": ct_tweet}) clean_tweets = [] for i in range(len(df)): clean_tweets.append(cleaner(df['Tweets'][i])) pd.DataFrame({ "User": ct_user, "Tweets": ct_tweet, "Clean Tweets": clean_tweets }).to_csv("./data/train_tweets_mod2.csv", index=False) """ Test Modification """ k = 1 with open("./data/given/test_tweets_unlabeled.txt", "r") as rf: ct = rf.readlines() test_df = pd.DataFrame({"Tweets": ct})