def write_read_cycle(self):
        smiles = write_smiles(self.mol)
        note(self.mol.nodes(data=True))
        note(self.mol.edges(data=True))
        note(smiles)

        # self.mol can exist in a mixed implicit/explicit H style. The reference
        # must be one or the other, since we can't read in mixed mode. We want
        # to be sure we produce the correct answer in both cases though.
        for expl_H in (False, True):
            ref_mol = self.mol.copy()
            defaults = {'charge': 0, 'hcount': 0}
            for node in ref_mol:
                for key, val in defaults.items():
                    if key not in ref_mol.nodes[node]:
                        ref_mol.nodes[node][key] = val
            if expl_H:
                add_explicit_hydrogens(ref_mol)
            else:
                remove_explicit_hydrogens(ref_mol)
            found = read_smiles(smiles,
                                explicit_hydrogen=expl_H,
                                reinterpret_aromatic=False)
            note(found.nodes(data=True))
            note(found.edges(data=True))
            assertEqualGraphs(ref_mol, found)
def test_write_smiles(node_data, edge_data, expl_h):
    mol = make_mol(node_data, edge_data)
    smiles = write_smiles(mol)
    found = read_smiles(smiles,
                        explicit_hydrogen=expl_h,
                        reinterpret_aromatic=False)
    assertEqualGraphs(mol, found)
Beispiel #3
0
def LAZY_maker(infile,outfile,DOI,SOFTWARE,METHOD,NAME,EMAIL):
    op = open(infile,'r')
    text = op.read().splitlines()
    op.close()
    DataDictionary = OutputParser(text)
    DataDictionary['DOI'] = DOI
    DataDictionary['METHOD'] = METHOD
    DataDictionary['NAME'] = NAME
    DataDictionary['EMAIL'] = EMAIL
    DataDictionary['fAfC'] = (DataDictionary['ARCH'][0][3]/np.sum(DataDictionary['ARCH'][0][3]))[0]
    if len(DataDictionary['ARCH'])==1:
        DataDictionary['NSC'] = 0
    else:
        DataDictionary['NSC'] = DataDictionary['ARCH'][1][3][0]
    
    DataDictionary['PHASE'] =infile.split('/')[-2][:-5]
    chainlist = list_convert_object(DataDictionary['ARCH'])
    G = molecule(chainlist).create_network()
    smiles = write_smiles(G)   
    DataDictionary['SMILES'] = smiles
    DataDictionary['CLASSIFICATION'] = ['Polymers','Bottlebrushes','Computational','SCFT']
    pif_dict = dict_to_pif(DataDictionary)
    print(f'Writing...{outfile}')
    op = open(f'{outfile}.json','w+')
    op.write(pif_dict)
    op.close()
def graph_to_canonical_smiles(G, allHsExplicit=True):
    #yields a canonical string from a graph representing a molecule, with
    #node attributes 'element'
    sm = write_smiles(G)
    indigo = Indigo()

    mol = indigo.loadMolecule(sm)
    mol.aromatize()
    return mol.canonicalSmiles()  #Does not include with H...
def find_elements():
    # list all the elements appeared
    special_case = ['b', 'c', 'o', 'p', 's']
    element_list = []
    longest_len =0
    for path_name in paths:
        path = paths[path_name]
        df_smiles = pd.read_csv(os.path.join(path, 'names_smiles.txt'))
        smiles_list = np.array(df_smiles.iloc[:, 1])

        for smiles in smiles_list:
            mol = read_smiles(smiles)
            for node in mol.nodes:
                if 'stereo' in mol.nodes[node]:
                    mol.nodes[node].pop('stereo') # discard stereo infomation by hand

            new_smiles = write_smiles(mol)
            length = 0
            for i, ele in enumerate(new_smiles):
                ele = str(ele)
                #assert ele != 'n', 'SIMPLIFICATION FAILS'
                    
                if ele.islower() and (not ele in special_case) and i > 0 and\
                        str(new_smiles[i-1]).isupper(): # is the suffix of an element
                    continue
                if ele.isupper() and i < len(new_smiles) - 1 and str(new_smiles[i+1]).islower() \
                        and (not str(new_smiles[i+1]) in special_case): # an element with 2 chars
                    ele = ele + str(new_smiles[i+1])

                length += 1
                if not ele in element_list:
                    element_list.append(ele)
                
                if length > longest_len:
                    longest_len = length

    print(element_list)
    with open('element_list.txt', 'w') as f:
        for item in element_list:
            f.write("%s " % item)
        f.write(f'{longest_len}')
Beispiel #6
0








#for idx, ele in enumerate(string):
#    mol.nodes[idx]['element'] = ele
#    mol.add_edges_from([(idx,idx+1)])
#    count+=1
    
    
    
print(write_smiles(G))
# [O-]C(=O)C([C])([C])[C]
# fill_valence(mol, respect_hcount=True)
#print(write_smiles(mol))
# [O-]C(=O)C(C)(C)C



#plt.subplot(121)

#nx.draw(G, with_labels=True, font_weight='bold')
#plt.subplot(122)

# nx.draw(G)
#plt.show()
def augment_smiles(rdata, data_choice, make_1d_pading=True):
    if os.path.exists(f'data/{data_choice}/data.json'):
        with open(f'data/{data_choice}/data.json', 'r') as f:
            new_data = json.load(f)

        return new_data, 320

    if not os.path.exists('./element_list.txt'):
        print('no element list found!\n')
        find_elements() # find all appeared elements if the list has not been abtained
    
    new_data = []
    with open('element_list.txt') as f:
        lines = f.readlines()
        element_list = lines[0].split(sep=' ')[:-1]
        #longest_len = int(lines[0].split(sep=' ')[-1])
        longest_len = 320

    for item in rdata:
        name = item['name']
        smiles = item['SMILES']
        if data_choice != 'test':
            label = item['label']
            weight = item['weight']

        mol = read_smiles(smiles)
        for node in mol.nodes:
            if 'stereo' in mol.nodes[node]:
                mol.nodes[node].pop('stereo') # discard stereo infomation by hand
        
        degrees = np.array([mol.degree(idx) for idx in mol.nodes])
        # find leaf nodes to generate different smiles for one mol
        leaf_nodes = np.array(list(mol.nodes), dtype=int)[degrees == 1]
        if leaf_nodes.shape[0] == 0:
            leaf_nodes = [0]

        try:
            if len(leaf_nodes) > 5 and data_choice != 'test':
                length = len(leaf_nodes)
                idx_list = [idx for idx in range(0, length, length//4)]
                leaf_nodes = leaf_nodes[idx_list] # at most 5 examples

            new_smiles_list = [write_smiles(mol, start=list(mol.nodes)[leaf_node]) for leaf_node in leaf_nodes]
            onehots_list = generate_onehots(new_smiles_list, element_list)
            if data_choice != 'test':
                tmp = [write_dict(name, onehots, smiles, label, weight, len(leaf_nodes)) \
                        for onehots, smiles in zip(onehots_list, new_smiles_list)]
            else:
                tmp = [write_dict(name, onehots, smiles) for onehots, smiles in zip(onehots_list, new_smiles_list)]

        except Exception as inst:
            print(inst)
            continue
        
        new_data += tmp

    for item in new_data:
        item['onehots'] = item['onehots'].tolist()

    with open(f'data/{data_choice}/data.json', 'w') as f:
        json.dump(new_data, f)
    
    return new_data, longest_len