Python read_smilesの例、pysmiles.read_smiles Pythonの例

コード例 #1

0

ファイルを表示

ファイル: datap.py プロジェクト: xiaoas/PRML2020PJ

def snomaData(fname, popposi = False):
    datas = []
    with open(fname) as csvf:
        reader = csv.reader(csvf)
        a = 0
        for row in reader:
            if a == 0:
                a = 1
                continue
            gf = pysmiles.read_smiles(row[0]) # 'CCCCCc1c2c(cc(O)c1C(=O)O)OC(=O)c1c(cc(OC)cc1C(=O)CCCC)O2'
            x = torch.empty((gf.number_of_nodes(), 40))
            edge_index = torch.empty((2, 2 * gf.number_of_edges()))
            edge_attr = torch.empty((2 * gf.number_of_edges(), 1), dtype=torch.long)
            for idx in range(gf.number_of_nodes()):
                x[idx] = torch.cat((torch.eye(32)[elemap[gf.nodes(data='element')[idx]]], torch.eye(8)[gf.nodes(data='hcount')[idx]]))
            for idx, edge in enumerate(gf.edges):
                edge_index[:, idx * 2] = torch.tensor(edge)
                edge_index[:, idx * 2 + 1] = torch.tensor(edge[::-1])
            for idx, edge in enumerate(gf.edges(data='order')):
                edgt = edge[2] - 1
                if edgt > 0:
                    edgt += 0.5
                if edgt > 0.5:
                    edgt += 0.5
                edge_attr[idx * 2, 0] = edgt
                edge_attr[idx * 2 + 1, 0] = edgt
            datum = torch_geometric.data.Data(x= x, edge_index=edge_index.to(torch.long), edge_attr=edge_attr, y = torch.tensor([[int(row[1])]], dtype = torch.float))
            if datum.y == 1 and popposi:
                datas += 24 * [datum]
            else:
                datas.append(datum)
    return datas

コード例 #2

0

ファイルを表示

ファイル: test_write_smiles.py プロジェクト: sailfish009/pysmiles

def test_write_smiles(node_data, edge_data, expl_h):
    mol = make_mol(node_data, edge_data)
    smiles = write_smiles(mol)
    found = read_smiles(smiles,
                        explicit_hydrogen=expl_h,
                        reinterpret_aromatic=False)
    assertEqualGraphs(mol, found)

コード例 #3

0

ファイルを表示

    def __init__(self,
                 smiles_string: str,
                 y_list: list,
                 atom_info_path='../raw_data/atom_info.txt'):
        """
        Args:
            smiles_string (string): SMILES for the molecule.
            y_list (list or int): list of multilabels or single label.
        """

        # create graph from smiles
        # (the sys code is to block a pysmiles warning about iseometric stuff)
        sys.stdout = open(os.devnull, 'w')
        self.graph = read_smiles(smiles_string)
        sys.stdout = sys.__stdout__

        self.atom_info_path = atom_info_path

        if isinstance(y_list, list):
            y = torch.tensor(y_list, dtype=torch.float32)
        else:
            y = torch.tensor(y_list, dtype=torch.float32).view(1, -1)

        # inherit superclass from torch-geometric
        super().__init__(x=torch.tensor(self.extract_features(),
                                        dtype=torch.float),
                         edge_index=torch.tensor(self.graph_to_edge_index(),
                                                 dtype=torch.long),
                         y=y)

        # remove graph attribute, necessary to inherit from superclass
        del self.graph
        del self.atom_info_path

コード例 #4

0

ファイルを表示

def get_reward_fitness(state, X, ts, char_idx, chars, net):
    X_seed = dimY(X, ts, char_idx, chars)
    out = net(T.from_numpy(X_seed).float(), None).detach().numpy()

    out_cat = np.argmax(out, axis=1)
    # Penalizing fitness for fake data
    fitness = np.where(np.argmax(out, axis=1) == 0, -1 * out[:, 0], out[:, 1])
    # Reward +1 for correct and -10 for wrong classification
    reward = np.where(out_cat == 0, -10, 1)

    arr = np.hstack((reward.reshape(len(reward),
                                    1), fitness.reshape(len(fitness), 1)))
    try:
        X = X.values.reshape(len(X.values), 1)
    except:
        X = X.reshape(len(X), 1)
    # [smiles,reward,fitness]
    arr = np.hstack((X, arr))

    # Penealizing reward for generating child gene same as parent
    same_genes = (state == X.reshape(len(X))).values
    if True in same_genes:
        for i, g in enumerate(same_genes):
            if g == True:
                arr[i][1] = -10

    # Penalizing for wrong smiles
    for i, g in enumerate(arr):
        try:
            mol = read_smiles(g[0])
        except:
            arr[i][1] = -10

    return arr

コード例 #5

0

ファイルを表示

ファイル: test_hypothesis.py プロジェクト: sailfish009/pysmiles

    def write_read_cycle(self):
        smiles = write_smiles(self.mol)
        note(self.mol.nodes(data=True))
        note(self.mol.edges(data=True))
        note(smiles)

        # self.mol can exist in a mixed implicit/explicit H style. The reference
        # must be one or the other, since we can't read in mixed mode. We want
        # to be sure we produce the correct answer in both cases though.
        for expl_H in (False, True):
            ref_mol = self.mol.copy()
            defaults = {'charge': 0, 'hcount': 0}
            for node in ref_mol:
                for key, val in defaults.items():
                    if key not in ref_mol.nodes[node]:
                        ref_mol.nodes[node][key] = val
            if expl_H:
                add_explicit_hydrogens(ref_mol)
            else:
                remove_explicit_hydrogens(ref_mol)
            found = read_smiles(smiles,
                                explicit_hydrogen=expl_H,
                                reinterpret_aromatic=False)
            note(found.nodes(data=True))
            note(found.edges(data=True))
            assertEqualGraphs(ref_mol, found)

コード例 #6

0

ファイルを表示

def sm2graph(smiles,size, weight = None):
    try:
        mol = Chem.MolFromSmiles(smiles)
        mol = mol_to_nx(mol)
    except:
        mol = read_smiles(smiles)
    #normalized Laplacian matrix
#    nL = nx.normalized_laplacian_matrix(mol,weight = weight).todense().A
#    nL = np.pad(nL,(0,size-nL.shape[0]))
    #adjacent matrix
    adj = nx.to_numpy_matrix(mol, weight=weight).A
    adj = adj+np.eye(adj.shape[0])
    adj = np.pad(adj,(0,size-adj.shape[0]))
    #degree matrix
    de = np.zeros((size,size))
    for i in mol.degree:
        de[i[0]-1][i[0]-1] = i[1]+1
    #feature
    mole = mol.nodes(data='element')
    #random walk normalized Laplacian matrix
    di = de
    di[di!=0] = 1/di[di!=0]
    rwL = di@adj
    #is aromatic
    ar = mol.nodes(data = 'aromatic')
    return rwL, mole, ar

コード例 #7

0

ファイルを表示

ファイル: molecule_widget.py プロジェクト: marrink-lab/pycgbuilder

    def get_value(self):
        filename = self._pth_widget.text()
        if filename:
            try:
                pdb_mol = read_pdb(filename)
            except Exception as err:
                self._pth_widget.setText('')
                dialog = QErrorMessage()
                dialog.showMessage(str(err))
                dialog.exec_()
                return False
            pdb_mol = pdb_mol[0]
            pdb_mol.graph['name'] = Path(filename).stem
            if not pdb_mol.edges:
                system = System()
                system.add_molecule(pdb_mol)
                MakeBonds(allow_name=False).run_system(system)
            if not self.hydrogen_checkbox.checkState():
                remove_explicit_hydrogens(pdb_mol)
        else:
            pdb_mol = None
        smiles = self._smiles_widget.text()
        if smiles:
            try:
                smiles_mol = read_smiles(smiles)
            except Exception as err:
                dialog = QErrorMessage()
                dialog.showMessage(str(err))
                dialog.exec_()
                self._smiles_widget.setText('')
                return False

            smiles_mol.graph['smiles'] = smiles
            smiles_mol.graph['name'] = smiles
            if self.hydrogen_checkbox.checkState():
                add_explicit_hydrogens(smiles_mol)

        else:
            smiles_mol = None

        if pdb_mol and smiles_mol:
            gm = nx.isomorphism.GraphMatcher(
                pdb_mol, smiles_mol,
                nx.isomorphism.categorical_node_match('element', None))
            match = next(gm.isomorphisms_iter(), {})
            if not match:
                dialog = QErrorMessage()
                dialog.showMessage(
                    'Smiles and PDB molecule are not isomorphic!')
                dialog.exec_()
                return False
            for pdb_idx, smi_idx in match.items():
                smiles_mol.nodes[smi_idx].update(pdb_mol.nodes[pdb_idx])
            smiles_mol.graph.update(pdb_mol.graph)

        molecule = smiles_mol or pdb_mol
        if not molecule:
            return False
        return molecule

コード例 #8

0

ファイルを表示

ファイル: data_handler.py プロジェクト: bismuth1102/Siamese_GCN

 def res(self, smile, adj_list, feature_list):
     try:
         mol_with_H = read_smiles(smile, explicit_hydrogen=True)
     except:
         print(smile)
     A = nx.to_numpy_matrix(mol_with_H)
     X = oneHot(smile)
     adj_list.append(np.array(A))
     feature_list.append(np.array(X))

コード例 #9

0

ファイルを表示

ファイル: service.py プロジェクト: ersilia-os/eos0abc

 def predict(self, input: JsonSerializable):
     """
     This is a dummy test model.
     It counts atoms in a SMILES string.
     """
     mol = read_smiles(input, explicit_hydrogen=True)
     counts = collections.defaultdict(int)
     for _, atom in mol.nodes(data="element"):
         counts[atom] += 1
     return json.dumps(counts)

コード例 #10

0

ファイルを表示

def evaluate_smiles(smiles_string):
    classes = ['insoluble', 'slightly soluble', 'soluble']
    G = read_smiles(smiles_string, explicit_hydrogen=True) #decode smiles string
    feature = element_to_onehot(np.asarray(G.nodes(data='element'))[:, 1]) #convert element to one-hot vector
    edges = np.asarray(G.edges) #get edge array
    index = np.asarray([edges[:,0], edges[:,1]]) #reformat edge array to torch geometric suitable format
    d = Data(x=torch.tensor(feature, dtype=torch.float),edge_index=torch.tensor(index, dtype=torch.long)) #create torch gemoetry Data object
    data = d.to(device) #send data to device memory
    model.eval() #set model to evaluate mode
    print(classes[torch.argmax(torch.softmax(model(data), dim=0)).item()]) #evaluate the test data

コード例 #11

0

ファイルを表示

ファイル: one_hot.py プロジェクト: bismuth1102/Siamese_GCN

def oneHot(smiles):
    mol = read_smiles(smiles)
    mol_with_H = read_smiles(smiles, explicit_hydrogen=True)

    one_hot_matrix = []
    for atom in mol.nodes(data='element'):
        row = [0] * 4
        if atom[1] == "C":
            row[0] = 1
        elif atom[1] == "N":
            row[1] = 1
        elif atom[1] == "O":
            row[2] = 1
        one_hot_matrix.append(row)
    for x in range(len(mol), len(mol_with_H)):
        one_hot_matrix.append([0, 0, 0, 1])

    # for i in one_hot_matrix:
    # print(i)
    return one_hot_matrix

コード例 #12

0

ファイルを表示

def build_graph(smiles):
    """
    Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data.
    :param smiles: a string object of SMILES format
    :return: nx.Graph:
        A graph describing a molecule. Nodes will have an 'element', 'aromatic'
        and a 'charge', and if `explicit_hydrogen` is False a 'hcount'.
        Depending on the input, they will also have 'isotope' and 'class'
        information.
        Edges will have an 'order'.
    """
    '''
    can access node data and edge data when the graph is in networkx format
    dgl.from_networkx(g) converts networkx to dgl graph but the node data and edge data doesnt seem to be transferred
    Goal: save the node feats and edge feats of networkx as tensor and set them to dgl graph ndata and edata
    Question: Do we save ndata as ('C', 'C', 'C', 'O', 'C') or do we create one hot vectors like in the hw
    '''
    # read the smile graphs in using pysmiles & build network
    g = pysmiles.read_smiles(smiles)

    # get the features from the graph and convert to tensor
    elems = g.nodes(data='element')
    h_count = g.nodes(data='hcount')
    aros = g.nodes(data='aromatic')
    raw_node_feats = []
    for elem, data, aro in zip(elems, h_count, aros):
        node = list(elem)
        node.append(data[1])
        node.append(aro[1] * 1)
        raw_node_feats.append(node)
    na = np.array(list(raw_node_feats))
    byte_node_feats = tf.convert_to_tensor(na[:, 1])

    # turn the byte string node feats into one_hot node feats
    node_feats = pt_lookup(byte_node_feats).numpy()
    node_feats[:, -2] = na[:, 2]
    node_feats[:, -1] = na[:, 3]
    node_feats = tf.convert_to_tensor(node_feats)

    # get edge data and extract bonds, double them, then convert to tensor
    edata = g.edges(data='order')
    bonds = list(edata)
    na = np.array(bonds)
    tup = zip(na[:, 2], na[:, 2])
    bond_data = tf.convert_to_tensor(list(itertools.chain(*tup)))
    bond_data = tf.cast(bond_data, tf.float32)
    # build dgl graph
    dgl_graph = dgl.from_networkx(g)

    dgl_graph.ndata['node_feats'] = node_feats
    dgl_graph.edata['edge_feats'] = bond_data

    return dgl_graph

コード例 #13

0

ファイルを表示

ファイル: read_pysmiles.py プロジェクト: Riki-Du/PRML-Project

def read_from_pysmiles(num=10):
    train_path, test_path, dev_path = train_test_path(num)
    f_csv = OpenCSV(train_path)
    # id,smiles,activity
    if num == 10:
        SMILES_list = [row[1] for row in f_csv]
    else:
        SMILES_list = [row[0] for row in f_csv]

    SMILES = SMILES_list[1]
    print(SMILES)
    m = pysmiles.read_smiles(SMILES)
    print(m.nodes(data='element'))

コード例 #14

0

ファイルを表示

    def __init__(self, file_name):
        self.data = pd.read_csv(file_name)

        self.smiles = self.data['smiles']
        self.labels = self.data['activity']
        self.mols = [read_smiles(smile) for smile in self.smiles]

        self.periodic_table = Chem.GetPeriodicTable()
        self.ams = [
            nx.to_numpy_matrix(mol, weight='order') for mol in self.mols
        ]
        self.graphs = [nx.from_numpy_matrix(am) for am in self.ams]
        self.element_lists = [mol.nodes(data='element') for mol in self.mols]

コード例 #15

0

ファイルを表示

def build_graph(smiles):
    """
    Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data.
    :param smiles: a string object of SMILES format
    :return: nx.Graph
        A graph describing a molecule. Nodes will have an 'element', 'aromatic'
        and a 'charge', and if `explicit_hydrogen` is False a 'hcount'.
        Depending on the input, they will also have 'isotope' and 'class'
        information.
        Edges will have an 'order'.
    """
    # TODO: Initialize a DGL Graph
    g = pysmiles.read_smiles(smiles)
    return g

コード例 #16

0

ファイルを表示

 def predict(self, input: List[JsonSerializable]):
     """
     This is a dummy test model.
     It counts atoms in a SMILES string.
     """
     input = input[0]
     output = []
     for inp in input:
         mol = read_smiles(inp["input"], explicit_hydrogen=True)
         counts = collections.defaultdict(int)
         for _, atom in mol.nodes(data="element"):
             counts[atom] += 1
         output += [{"atoms": counts}]
     return [output]

コード例 #17

0

ファイルを表示

ファイル: drug.py プロジェクト: LichenYang-Jeffrey/GAT-for-COVID-19

 def process(self):
     bond_order = set()
     atom_types = set()
     processed = []
     with open(self.csv_file, 'r') as f:
         reader = csv.reader(f, delimiter=',')
         next(reader, None)  # Ignore header
         for item in tqdm(reader):
             mol_graph = pysmiles.read_smiles(item[0],
                                              explicit_hydrogen=False)
             data = convert_networkx(mol_graph, self.ATOM_TYPES)
             data['simles'] = item[0]
             data['label'] = torch.LongTensor([int(item[1])])
             processed.append(data)
     torch.save(processed, self.processed_file)
     return processed

コード例 #18

0

ファイルを表示

ファイル: dataloader.py プロジェクト: FFTYYY/AICures

def load_data_file(path, mode, pos_lim=-1, neg_lim=-1):

    stdout_backup = sys.stdout
    sys.stdout = open(os.devnull, "w")
    print("stdout not shut down correctly")

    num_pos = 0
    num_neg = 0

    data = []

    with open(path, "r") as fil:
        fil.readline()
        for i, line in enumerate(fil):
            if mode == "test":
                mol, label = line.strip().split(",")[0], -1
            elif mode == "train":
                _, mol, label = line.strip().split(",")
            elif mode == "tdt":
                mol, label = line.strip().split(",")

            label = int(label)

            if label >= 0:
                if label == 0:
                    num_neg += 1
                    if neg_lim > 0 and num_neg > neg_lim:
                        continue
                else:
                    num_pos += 1
                    if pos_lim > 0 and num_pos > pos_lim:
                        continue
            if (pos_lim > 0 and num_pos > pos_lim) and (neg_lim > 0
                                                        and num_neg > neg_lim):
                break

            mol_g = read_smiles(mol)  #将smiles字符串转成networkx graph
            #mol_g.smiles = mol

            data.append([mol_g, int(label), mol])

            if i % 1000 == 0:
                sys.stderr.write("%d\n" % i)
    sys.stdout = stdout_backup
    print("stdout recoverd.")

    return data

コード例 #19

0

ファイルを表示

def readf(fname):
    with open(fname) as csvf:
        reader = csv.reader(csvf)

        a = 0
        for row in reader:
            if a == 0:
                a = 1
                continue
            gf = pysmiles.read_smiles(row[0])
            for ele in gf.nodes(data='element'):
                ele = ele[1]
                # if ele in cnt:
                #     cnt[ele] += 1
                # else:
                #     cnt[ele] = 1
                if ele not in cnt:
                    cnt[ele] = len(cnt)

コード例 #20

0

ファイルを表示

def smiles_to_formula(smiles_string):
    mol = pysmiles.read_smiles(smiles_string, explicit_hydrogen=True)
    atom_counts = {g: 0 for g in ATOM_MASSES}
    for node in mol.nodes(data="element"):
        atom = node[1]
        if atom not in atom_counts:
            return None
        else:
            atom_counts[atom] += 1
    chem_formula = ""
    for atom, count in atom_counts.items():
        if count == 0:
            continue
        elif count == 1:
            chem_formula += atom
        else:
            chem_formula += "{}{}".format(atom, count)
    return chem_formula

コード例 #21

0

ファイルを表示

ファイル: utils.py プロジェクト: YuanC233/COVID-19GCN

def read_raw(filename, dataset, device, no_h):
    if dataset == 'covid-19':
        assertion_len = 2
        smile_idx = 0
        separator = ','
    else:
        assertion_len = 13
        smile_idx = 0
        separator = '\t'

    all_smiles = []
    mols = []
    targets = []
    with open(filename) as f:
        if assertion_len == 2:
            f.readline()
        f = tqdm(f)
        f.set_description('Reading raw data ... ')
        for line in f:
            if line != '':
                l = line.strip().split(separator)

                assert len(l) == assertion_len

                m = Chem.MolFromSmiles(l[smile_idx])
                if not no_h:
                    m = Chem.AddHs(m)
                smiles = Chem.MolToSmiles(m)
                all_smiles.append(smiles)

                targets.append(
                    torch.tensor(int(l[1]), device=device) if assertion_len ==
                    2 else torch.
                    tensor([float(i) for i in l[2:]], device=device))
                mol = read_smiles(smiles.replace('[H]', '[G]'),
                                  explicit_hydrogen=False,
                                  reinterpret_aromatic=True)
                mols.append(mol)

    features = extract_atom_feature(all_smiles, device, no_h)

    return mols, targets, features

コード例 #22

0

ファイルを表示

ファイル: smiles_utils.py プロジェクト: biansy000/CS410-Drug-Molecular-Toxicity-Prediction

def find_elements():
    # list all the elements appeared
    special_case = ['b', 'c', 'o', 'p', 's']
    element_list = []
    longest_len =0
    for path_name in paths:
        path = paths[path_name]
        df_smiles = pd.read_csv(os.path.join(path, 'names_smiles.txt'))
        smiles_list = np.array(df_smiles.iloc[:, 1])

        for smiles in smiles_list:
            mol = read_smiles(smiles)
            for node in mol.nodes:
                if 'stereo' in mol.nodes[node]:
                    mol.nodes[node].pop('stereo') # discard stereo infomation by hand

            new_smiles = write_smiles(mol)
            length = 0
            for i, ele in enumerate(new_smiles):
                ele = str(ele)
                #assert ele != 'n', 'SIMPLIFICATION FAILS'
                    
                if ele.islower() and (not ele in special_case) and i > 0 and\
                        str(new_smiles[i-1]).isupper(): # is the suffix of an element
                    continue
                if ele.isupper() and i < len(new_smiles) - 1 and str(new_smiles[i+1]).islower() \
                        and (not str(new_smiles[i+1]) in special_case): # an element with 2 chars
                    ele = ele + str(new_smiles[i+1])

                length += 1
                if not ele in element_list:
                    element_list.append(ele)
                
                if length > longest_len:
                    longest_len = length

    print(element_list)
    with open('element_list.txt', 'w') as f:
        for item in element_list:
            f.write("%s " % item)
        f.write(f'{longest_len}')

コード例 #23

0

ファイルを表示

def main():
    # Parse command line arguments to get a smiles string
    args = parse_arguments()
    if not args.smiles_string:
        smiles_string = dict_aa[args.mol]
    else:
        smiles_string = args.smiles_string

    print("Drawing molecule:\n{}".format(smiles_string))

    # Parse the string into a graph object
    g = read_smiles(smiles_string)
    # Mark the cyclic edges as rings
    try:
        g = structure.mark_rings(g)
    except:
        pass

    # init turtle window and start drawing, wait for window events
    draw.init()
    draw.molecule(g)
    draw.done()

コード例 #24

0

ファイルを表示

ファイル: utils.py プロジェクト: ZhenyueQin/Implementation-MolGAN-PyTorch

def save_mol_img(mols, f_name='tmp.png', is_test=False):
    orig_f_name = f_name
    for a_mol in mols:
        try:
            if Chem.MolToSmiles(a_mol) is not None:
                print('Generating molecule')

                if is_test:
                    f_name = orig_f_name
                    f_split = f_name.split('.')
                    f_split[-1] = random_string() + '.' + f_split[-1]
                    f_name = ''.join(f_split)

                rdkit.Chem.Draw.MolToFile(a_mol, f_name)
                a_smi = Chem.MolToSmiles(a_mol)
                mol_graph = read_smiles(a_smi)

                break

                # if not is_test:
                #     break
        except:
            continue

コード例 #25

0

ファイルを表示

ファイル: graph_parse.py プロジェクト: FFTYYY/AICures

def augment_dataset(C , dataset , k = 5):

	for _k in range(C.pos_aug):

		new_example = []

		for g , label , smiles in dataset:
			if int(label) == 0:
				continue
	
			for _i in range(1 , len(smiles)):
				i = random.randint(1 , len(smiles) - 1)
				if (smiles[i-1].isalpha() and smiles[i-1].isupper()) \
						and (smiles[i].isalpha() and smiles[i].isupper()):
					smiles = smiles[:i] + 'C' + smiles[i:]
					break
			ng = pysmiles.read_smiles(smiles)
			new_example.append( [ng , label , smiles] )

		dataset = dataset + new_example

	random.shuffle(dataset)

	return dataset

コード例 #26

0

ファイルを表示

    def __init__(self, smiles):
        """
        Initalize Molecule Class
        :param smiles: smiles string
        """

        # Cheminformatics section
        self.smiles = smiles

        self.pybel_mol = readstring("smi", smiles)
        self.pybel_mol.make3D()

        self.mol_formula = self.pybel_mol.formula

        # self.rd_mol = Chem.MolFromSmiles(smiles)
        # self.rd_mol = Chem.AddHs(self.rd_mol)
        # AllChem.EmbedMolecule(self.rd_mol)
        # AllChem.MMFFOptimizeMolecule(self.rd_mol)

        try:
            self.pysmiles_mol = read_smiles(smiles, explicit_hydrogen=True)
        except ValueError:
            self.pysmiles_mol = PySmilesCopy([0, 1, 1])

        # compositional section
        self.natoms = len(self.pybel_mol.atoms)
        self.position = np.array([0.0, 0.0, 0.0], dtype=float)
        self.atoms = []

        for i in range(self.natoms):
            atom = self.pybel_mol.atoms[i]
            self.atoms.append(Atom(atom.atomicnum, atom.coords))

        # geometrical section
        self.bonds = []
        self.bond_orders = []
        for bond in self.pysmiles_mol.edges(data='order'):
            self.bonds.append(bond[:-1])
            self.bond_orders.append(bond[2])

        self.angles = []
        for i in range(self.natoms):
            bonds = self.get_bonds(i)

            for j in range(len(bonds) - 1):
                self.angles.append(sort_bend_angle(bonds[j] + bonds[j + 1]))

        self.torsions = []
        for index1 in range(self.natoms):
            bonds = self.get_bonds(index1)
            if len(bonds) >= 2:
                for i, middle_bond in enumerate(bonds):
                    index2 = [atom for atom in middle_bond
                              if atom != index1][0]
                    bonds2 = self.get_bonds(index2)
                    if len(bonds2) >= 2:

                        if i == len(bonds) - 1:
                            bond1 = bonds[0]
                        else:
                            bond1 = bonds[i + 1]

                        for bond in bonds2:
                            if sorted(bond) != sorted(middle_bond):
                                bond2 = bond

                        self.torsions.append(
                            sort_torsion_bonds(bond1, middle_bond, bond2))
            else:
                continue

コード例 #27

0

ファイルを表示

from property_prediction.data_utils import TaskDataLoader
import networkx as nx
from pysmiles import read_smiles

task = 'FreeSolv'
path = '../datasets/{}.csv'.format(task)

data_loader = TaskDataLoader(task, path)
smiles_list, y = data_loader.load_property_data()

indices = []

for i in range(len(smiles_list)):
    graph = read_smiles(smiles_list[i])
    number_of_nodes = nx.Graph.number_of_nodes(graph)
    print('number of nodes for index ', i, ' is: ', number_of_nodes)
    if number_of_nodes == 1:
        indices.append(i)
        print(smiles_list[i])

print(indices)

コード例 #28

0

ファイルを表示

ファイル: real.py プロジェクト: minkky/Graph-Embedding

			wf.write(data)

file = open('REAL.csv', 'r', encoding='utf-8')
f = csv.reader(file)

for idx, line in enumerate(f):
	if idx ==0:
		continue
	if len(line) == 0:
		continue
	name, smiles, _, group = line[:4]

	filename = str(group) + 'REAL' + name.split('-')[1]

	print(smiles, filename, group)
	mol = read_smiles(str(smiles))
	
	labels = mol.nodes(data='element')
	node_labels = ['0']
	for label in labels:
		node_labels += [label[1]]
	matrix = nx.to_numpy_matrix(mol, weight='order').tolist()
	content = [[0 for i in range(len(matrix) + 1)]]
	for ma in matrix:
		content.append([0] + ma)
	print()
	
	with open('group/smiles'+ group + '/' + filename +'.txt', 'w') as f:
		f.write(smiles)
	writeFile('group/' + group + '/' + filename + '.txt', content, node_labels)

コード例 #29

0

ファイルを表示

import pysmiles
import dgl
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pdb

g = pysmiles.read_smiles(
    "CN1CC[C@@]23C=C[C@@H](C[C@@H]2OC4=C(C=CCC(=C34)C1)OC)O.Br")
#g = pysmiles.read_smiles("O=[N+]([O-])C(Br)(CO)CO")

for i in range(30):
    for j in range(30):
        try:
            e = g[i][j]
        except Exception:
            continue
        print("%d - %d" % (i, j), e)

for i in range(min(30, len(g.nodes))):
    print(g.nodes[i])

pdb.set_trace()
#g = dgl.DGLGraph(g)


def draw(g):
    #g = g.to_networkx().to_undirected()

    def make_color(x):
        if x == 'C':

コード例 #30

0

ファイルを表示

ファイル: get_chemicals.py プロジェクト: zouharvi/deep-molecule-qspr

    return edges_occ
           
chemicals = pd.read_pickle("smiles.pickle")
chemicals_data = []
print(chemicals)
    
headers = []
targets = []
i = 0
for index, row in chemicals.iterrows(): 
    if not is_tree(G):
        continue
    i+=1
    if i%1 == 0:
        print(i)
    G = read_smiles(row["smiles"], explicit_hydrogen=True)
    try:
        mol_weight = MW(CAS_from_any(row["chemicals"]))
        boiling_point =  Tb(CAS_from_any(row['chemicals']))
    except(ValueError):
        continue
    if boiling_point == None or mol_weight == None:
        continue
    occ = count_atom_occurencies(G)
    occ.update({'boiling_point': boiling_point})
    """
    try:
        with timeout(2, exception=RuntimeError):
            occ.update({'GP_index': calculate_indices.calcuate_pisanski(G)})
    except RuntimeError:
        continue