Beispiel #1
0
def write_sdf_file(scaffold_graph, output_file):
    """Write an SDF file from a scaffoldgraph

    Parameters
    ----------
    scaffold_graph (sg.ScaffoldGraph): graph to be converted
    output_file (str): path to output file
    """

    N = scaffold_graph.num_scaffold_nodes
    sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True),
                              key=lambda x: x[1]['hierarchy'])
    mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N)))
    writer = SDWriter(output_file)
    for scaffold, data in sorted_scaffolds:
        molecule = MolFromSmiles(scaffold)
        if molecule is not None:
            subscaffolds = list(scaffold_graph.predecessors(scaffold))
            molecule.SetProp('_Name', mapping[scaffold])
            molecule.SetIntProp('HIERARCHY',
                                scaffold_graph.nodes[scaffold]['HIERARCHY'])
            molecule.SetProp('SMILES', scaffold)
            molecule.SetProp(
                'SUBSCAFFOLDS',
                ', '.join([str(mapping[s]) for s in subscaffolds]))
            writer.write(molecule)
    writer.close()
Beispiel #2
0
 def write_scaffold(self, scaffold):
     subscaffolds = ', '.join([str(s.id) for s in scaffold.subscaffolds])
     if self.args.sdf:
         molecule = MolFromSmiles(scaffold.smiles)
         if molecule is not None:
             molecule.SetProp('_Name', str(scaffold.id))
             molecule.SetIntProp('HIERARCHY', scaffold.hierarchy)
             molecule.SetProp('SMILES', scaffold.smiles)
             molecule.SetProp('SUBSCAFFOLDS', subscaffolds)
             self.output.write(molecule)
         else:
             logger.warning(f'Failed to parse scaffold: {scaffold.smiles}')
     else:
         self.output.write('{0}\t{1}\t{2}\t{3}\n'.format(
             scaffold.id, scaffold.hierarchy, scaffold.smiles,
             subscaffolds))
Beispiel #3
0
def write_sdf_file(scaffold_graph, output_file):
    """Write an SDF file from a ScaffoldGraph.

    All scaffolds in the scaffoldgraph are written to the
    SDF, while molecules are ignored. Scaffolds are sorted
    in ascending order according to their hierarchy level.

    The output follows the standard SDF specification with
    the added property fields:

        TITLE field: scaffold ID
        SUBSCAFFOLDS field: list of sub-scaffold IDs
        HIERARCHY field: hierarchy level of scaffold
        SMILES field: scaffold canonical SMILES

    Parameters
    ----------
    scaffold_graph : scaffoldgraph.core.ScaffoldGraph
        ScaffoldGraph to be written to an SDF.
    output_file : str
        Filepath to an output file.

    """
    N = scaffold_graph.num_scaffold_nodes
    sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True),
                              key=lambda x: x[1]['hierarchy'])
    mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N)))
    writer = SDWriter(output_file)
    for scaffold, data in sorted_scaffolds:
        molecule = MolFromSmiles(scaffold)
        if molecule is not None:
            subscaffolds = list(scaffold_graph.predecessors(scaffold))
            molecule.SetProp('_Name', mapping[scaffold])
            molecule.SetIntProp('HIERARCHY',
                                scaffold_graph.nodes[scaffold]['HIERARCHY'])
            molecule.SetProp('SMILES', scaffold)
            molecule.SetProp(
                'SUBSCAFFOLDS',
                ', '.join([str(mapping[s]) for s in subscaffolds]))
            writer.write(molecule)
    writer.close()