Exemple #1
0
    def clustering(self, smiles: list, verbose=0):
        """ Clustering the smiles with Tanimoto similarity.

        Args:
            smiles (list): list of SMILES strings.
            verbose (bool): whether showing the progress bar.

        Returns:
            list: SMILES clusters.
        """
        self.clusters = dict()
        counter = 0
        it = tqdm(smiles) if verbose else smiles
        for s in it:
            s = Smiles(s)
            if s.rdkit_mol is None:
                continue
            idx = self._find_similarity(s)
            if idx is None:
                self.clusters[counter] = [s]
                counter += 1
            else:
                self._update_cluster(idx, s)
        self.clusters = list(self.clusters.values())
        return self.clusters
Exemple #2
0
 def create_graph(self, smi, idx, q):
     try:
         graph = Smiles(smi).to_graph(sparse=True)
     except AttributeError:
         return
     fp = get_filtered_fingerprint(smi)
     label = torch.tensor(list(fp), dtype=torch.long)[None, :]
     q.put((graph, label, idx))
Exemple #3
0
 def create_graph(self, data, idx, q):
     try:
         graph = Smiles(data[0]).to_graph(sparse=True)
     except AttributeError:
         return
     fp = data[1]
     label = torch.tensor(fp, dtype=torch.long)[None, :]
     q.put((graph, label, idx))
Exemple #4
0
def smiles2graph(smiles):
    try:
        graph = Smiles(smiles).to_graph(sparse=True)
    except AttributeError:
        raise
    x = torch.tensor(graph["atom_features"], dtype=torch.float)
    edge_idx = graph["adjacency"].tocoo()
    edge_idx = torch.tensor([edge_idx.row, edge_idx.col], dtype=torch.long)
    return Data(x=x, edge_index=edge_idx)
Exemple #5
0
    def process(self, smiles_col, ecfp_col, label_col=None):
        data_list = list()
        df = pd.read_csv(self.raw_paths[0])
        if label_col is None:
            it = zip(df[smiles_col], df[ecfp_col])
        else:
            it = zip(df[smiles_col], df[ecfp_col], df[label_col])
        for item in it:
            smiles = item[0]
            fp = item[1]
            if label_col is not None:
                label = item[2]
            smi = Smiles(smiles)
            try:
                graph = smi.to_graph(sparse=True)
            except AttributeError:
                continue
            x = torch.tensor(graph["atom_features"], dtype=torch.float)
            edge_idx = graph["adjacency"].tocoo()
            edge_idx = torch.tensor([edge_idx.row, edge_idx.col],
                                    dtype=torch.long)
            y = torch.tensor(list(map(int, list(fp.strip()))),
                             dtype=torch.long)[None, :]
            if label_col is None:
                data_list.append(Data(x=x, edge_index=edge_idx, y=y))
            else:
                data_list.append(
                    Data(x=x, edge_index=edge_idx, y=y, label=label))

        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]
        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemple #6
0
def write_graphs(inpath, outpath, prefix=None):
    """ Convert JAK dataset to graphs
    """
    smiles = list()
    labels = list()
    with open(inpath, "r") as inf:
        line = inf.readline()
        while line:
            _, sm, lb = line.strip().split(",")
            if MolFromSmiles(sm) is None:
                line = inf.readline()
                continue
            smiles.append(Smiles(sm))
            labels.append(lb)
            line = inf.readline()
    writer = GraphWriter(smiles)
    writer.write(outpath, prefix=prefix, graph_labels=labels)
Exemple #7
0
def write_graphs(inpath, outpath, prefix=None):
    """ Convert JAK dataset to graphs
    """
    smiles = list()
    fps = list()
    pb = tqdm()
    with open(inpath, "r") as inf:
        line = inf.readline()
        while line:
            sm = line.strip()
            if MolFromSmiles(sm) is None:
                line = inf.readline()
                continue
            smiles.append(Smiles(sm))
            fps.append(",".join(map(str, get_filtered_fingerprint(sm))))
            pb.update(1)
            line = inf.readline()
    writer = GraphWriter(smiles)
    writer.write(outpath, prefix=prefix, graph_labels=fps)
Exemple #8
0
 def _graph_helper(self, smi):
     graph = Smiles(smi).to_graph(sparse=True)
     x = torch.tensor(graph["atom_features"], dtype=torch.float)
     edge_idx = graph["adjacency"].tocoo()
     edge_idx = torch.tensor([edge_idx.row, edge_idx.col], dtype=torch.long)
     return x, edge_idx