Ejemplo n.º 1
0
    def transform(self, data):
        """Take a string list in the extended gSpan format and yields NetworkX graphs.

        Parameters
        ----------
        data : string or list
            data source, can be a list of strings, a file name or a url

        Returns
        -------
        iterator over networkx graphs

        Raises
        ------
        exception: when a graph is empty
        """
        try:
            header = ''
            string_list = []
            for line in util.read(data):
                if line.strip():
                    if line[0] in ['g', 't']:
                        if string_list:
                            yield self._gspan_to_networkx(header, string_list)
                        string_list = []
                        header = line
                    else:
                        string_list += [line]
            if string_list:
                yield self._gspan_to_networkx(header, string_list)
        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Ejemplo n.º 2
0
def _load_abalone():
    print('abalone')
    uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
    n_max = 700

    M = []
    labels = []
    counter = 0
    for line in read(uri):
        counter += 1
        if counter > n_max:
            break
        line = line.strip()
        if line:
            items = line.split(',')
            label = int(items[-1]) // 7
            labels.append(label)
            data = [float(x) for x in items[1:-1]]
            M.append(data)

    X = np.array(M)
    targets = LabelEncoder().fit_transform(labels)
    y = np.array(targets)

    y_sel = _select_targets(y, min_threshold=5)
    X, y = _filter_dataset(X, y, y_sel)

    return X, y
Ejemplo n.º 3
0
def _fasta_to_fasta(input):
    header = ""
    seq = ""
    const = ""
    for line in util.read(input):
        line = str(line).strip()
        if line == "":
            # assume the empty line indicates that next line describes the constraints
            if seq:
                yield seq
            seq = None
        elif line[0] == '>':
            if const:
                yield const
                header = ""
                seq = ""
                const = ""
            header = line
            yield header
        else:
            # remove trailing chars, split and take only first part, removing comments
            line_str = line.split()[0]
            if line_str:
                if seq is None:
                    const += line_str
                else:
                    seq += line_str
    if const:
        yield const
Ejemplo n.º 4
0
    def transform(self, data):
        """Take a string list in the extended gSpan format and yields NetworkX graphs.

        Parameters
        ----------
        data : string or list
            data source, can be a list of strings, a file name or a url

        Returns
        -------
        iterator over networkx graphs

        Raises
        ------
        exception: when a graph is empty
        """
        try:
            header = ''
            string_list = []
            for line in util.read(data):
                if line.strip():
                    if line[0] in ['g', 't']:
                        if string_list:
                            yield self._gspan_to_networkx(header, string_list)
                        string_list = []
                        header = line
                    else:
                        string_list += [line]
            if string_list:
                yield self._gspan_to_networkx(header, string_list)
        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Ejemplo n.º 5
0
 def _sdf_to_eden(self, iterable):
     for mol_sdf in read(iterable):
         mol = pybel.readstring("sdf", mol_sdf.strip())
         # remove hydrogens
         mol.removeh()
         graph = self._obabel_to_networkx(mol)
         if len(graph):
             yield graph
Ejemplo n.º 6
0
def _sdf_to_eden(iterable):
    for mol_sdf in read(iterable):
        mol = pybel.readstring("sdf", mol_sdf.strip())
        # remove hydrogens
        mol.removeh()
        graph = _obabel_to_networkx(mol)
        if len(graph):
            yield graph
Ejemplo n.º 7
0
    def _smi_to_eden(self, iterable, cache={}):
        if self.split_components:  # yield every graph separately
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and
                # thus is already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + \
                        '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    # Assume the incoming string contains only one molecule
                    # Remove warning messages generated by openbabel
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = self._generate_conformers(cache[mol_smi], self.n_conf)
                for molecule in mols:
                    graph = self._obabel_to_networkx3d(molecule)
                    if len(graph):
                        yield graph

        else:  # construct global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and
                # thus is already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + \
                        '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = self._generate_conformers(cache[mol_smi], self.n_conf)
                for molecule in mols:
                    g = self._obabel_to_networkx3d(molecule)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph
Ejemplo n.º 8
0
    def _smi_to_eden(self, iterable, cache={}):
        if self.split_components:  # yield every graph separately
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and
                # thus is already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + \
                        '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    # Assume the incoming string contains only one molecule
                    # Remove warning messages generated by openbabel
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = self._generate_conformers(cache[mol_smi], self.n_conf)
                for molecule in mols:
                    graph = self._obabel_to_networkx3d(molecule)
                    if len(graph):
                        yield graph

        else:  # construct global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and
                # thus is already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + \
                        '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = self._generate_conformers(cache[mol_smi], self.n_conf)
                for molecule in mols:
                    g = self._obabel_to_networkx3d(molecule)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph
Ejemplo n.º 9
0
 def transform(self, data):
     """Transform."""
     try:
         for serial_data in util.read(data):
             py_obj = json.loads(serial_data)
             graph = json_graph.node_link_graph(py_obj)
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Ejemplo n.º 10
0
 def _smi_to_eden(self, iterable):
     for mol_smi in read(iterable):
         if self.smi_has_error(mol_smi) is False:
             mol = pybel.readstring("smi", mol_smi.strip())
             # remove hydrogens
             mol.removeh()
             graph = self._obabel_to_networkx(mol)
             if len(graph):
                 graph.graph['id'] = mol_smi.strip()
                 yield graph
Ejemplo n.º 11
0
def _smi_to_eden(iterable):
    for mol_smi in read(iterable):
        if _smi_has_error(mol_smi) is False:
            mol = pybel.readstring("smi", mol_smi.strip())
            # remove hydrogens
            mol.removeh()
            graph = _obabel_to_networkx(mol)
            if len(graph):
                graph.graph['id'] = mol_smi.strip()
                yield graph
Ejemplo n.º 12
0
 def transform(self, data):
     """Transform."""
     try:
         for serial_data in util.read(data):
             py_obj = json.loads(serial_data)
             graph = json_graph.node_link_graph(py_obj)
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Ejemplo n.º 13
0
def load_target(name):
    """Return a numpy array of integers to be used as target vector.

    Parameters
    ----------
    name : string
        A pointer to the data source.

    """
    target = [y.strip() for y in read(name) if y]
    return np.array(target).astype(int)
Ejemplo n.º 14
0
 def _sdf_to_eden(self, iterable):
     if self.split_components:  # yield every graph separately
         for mol_sdf in read(iterable):
             mol = pybel.readstring("sdf", mol_sdf)
             mols = self._generate_conformers(mol.write("sdf"), self.n_conf)
             for molecule in mols:
                 molecule.removeh()
                 graph = self._obabel_to_networkx3d(molecule)
                 if len(graph):
                     yield graph
     else:  # construct a global graph and accumulate everything there
         global_graph = nx.Graph()
         for mol_sdf in read(iterable):
             mol = pybel.readstring("sdf", mol_sdf)
             mols = self._generate_conformers(mol.write("sdf"), self.n_conf)
             for molecule in mols:
                 molecule.removeh()
                 g = self._obabel_to_networkx3d(molecule)
                 if len(g):
                     global_graph = nx.disjoint_union(global_graph, g)
         yield global_graph
Ejemplo n.º 15
0
def node_link_data_to_eden(input=None, options=dict()):
    """
    Takes a string list in the serialised node_link_data JSON format and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    return _node_link_data_to_eden(util.read(input))
Ejemplo n.º 16
0
 def _sdf_to_eden(self, iterable):
     if self.split_components:  # yield every graph separately
         for mol_sdf in read(iterable):
             mol = pybel.readstring("sdf", mol_sdf)
             mols = self._generate_conformers(mol.write("sdf"), self.n_conf)
             for molecule in mols:
                 molecule.removeh()
                 graph = self._obabel_to_networkx3d(molecule)
                 if len(graph):
                     yield graph
     else:  # construct a global graph and accumulate everything there
         global_graph = nx.Graph()
         for mol_sdf in read(iterable):
             mol = pybel.readstring("sdf", mol_sdf)
             mols = self._generate_conformers(mol.write("sdf"), self.n_conf)
             for molecule in mols:
                 molecule.removeh()
                 g = self._obabel_to_networkx3d(molecule)
                 if len(g):
                     global_graph = nx.disjoint_union(global_graph, g)
         yield global_graph
Ejemplo n.º 17
0
def node_link_data_to_eden(input=None, options=dict()):
    """
    Takes a string list in the serialised node_link_data JSON format and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    return _node_link_data_to_eden(util.read(input))
Ejemplo n.º 18
0
def word_sequence_to_eden(input=None, options=dict()):
    """
    Takes a list of strings, splits each string in words and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    for word_sequence in util.read(input):
        yield word_sequence_to_networkx(word_sequence)
Ejemplo n.º 19
0
def sequence_to_eden(input=None, options=dict()):
    """
    Takes a list of strings and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    for sequence in read(input):
        yield sequence_to_networkx(sequence)
Ejemplo n.º 20
0
def word_sequence_to_eden(input=None, options=dict()):
    """
    Takes a list of strings, splits each string in words and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    for word_sequence in util.read(input):
        yield word_sequence_to_networkx(word_sequence)
Ejemplo n.º 21
0
def sequence_to_eden(input=None, options=dict()):
    """
    Takes a list of strings and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """

    for sequence in read(input):
        yield sequence_to_networkx(sequence)
Ejemplo n.º 22
0
def obabel_to_eden(iterable, file_format='sdf', **options):
    """
    Takes a string list in sdf format format and yields networkx graphs.

    Parameters
    ----------
    iterable : SMILES strings containing molecular structures.

    """
    def smi_has_error(smi):
        smi = smi.strip()
        n_open_parenthesis = sum(1 for c in smi if c == '(')
        n_close_parenthesis = sum(1 for c in smi if c == ')')
        n_open_parenthesis_square = sum(1 for c in smi if c == '[')
        n_close_parenthesis_square = sum(1 for c in smi if c == ']')
        return (n_open_parenthesis != n_close_parenthesis) or \
            (n_open_parenthesis_square != n_close_parenthesis_square)

    if file_format == 'sdf':
        for mol_sdf in read(iterable):
            mol = pybel.readstring("sdf", mol_sdf.strip())
            # remove hydrogens
            mol.removeh()
            graph = obabel_to_networkx(mol)
            if len(graph):
                yield graph
    elif file_format == 'smi':
        for mol_smi in read(iterable):
            if smi_has_error(mol_smi) is False:
                mol = pybel.readstring("smi", mol_smi.strip())
                # remove hydrogens
                mol.removeh()
                graph = obabel_to_networkx(mol)
                if len(graph):
                    graph.graph['info'] = mol_smi.strip()
                    yield graph
    else:
        raise Exception('ERROR: unrecognized file format: %s' % file_format)
Ejemplo n.º 23
0
def obabel_to_eden(iterable, file_format='sdf', **options):
    """
    Takes a string list in sdf format format and yields networkx graphs.

    Parameters
    ----------
    iterable : SMILES strings containing molecular structures.

    """
    def smi_has_error(smi):
        smi = smi.strip()
        n_open_parenthesis = sum(1 for c in smi if c == '(')
        n_close_parenthesis = sum(1 for c in smi if c == ')')
        n_open_parenthesis_square = sum(1 for c in smi if c == '[')
        n_close_parenthesis_square = sum(1 for c in smi if c == ']')
        return (n_open_parenthesis != n_close_parenthesis) or \
            (n_open_parenthesis_square != n_close_parenthesis_square)

    if file_format == 'sdf':
        for mol_sdf in read(iterable):
            mol = pybel.readstring("sdf", mol_sdf.strip())
            # remove hydrogens
            mol.removeh()
            graph = obabel_to_networkx(mol)
            if len(graph):
                yield graph
    elif file_format == 'smi':
        for mol_smi in read(iterable):
            if smi_has_error(mol_smi) is False:
                mol = pybel.readstring("smi", mol_smi.strip())
                # remove hydrogens
                mol.removeh()
                graph = obabel_to_networkx(mol)
                if len(graph):
                    graph.graph['info'] = mol_smi.strip()
                    yield graph
    else:
        raise Exception('ERROR: unrecognized file format: %s' % file_format)
Ejemplo n.º 24
0
def _fasta_to_fasta(input):
    seq = ""
    for line in util.read(input):
        if line:
            if line[0] == '>':
                if seq:
                    yield seq
                    seq = ""
                line_str = str(line)
                yield line_str.strip()
            else:
                line_str = line.split()
                if line_str:
                    seq += str(line_str[0]).strip()
    if seq:
        yield seq
Ejemplo n.º 25
0
def _fasta_to_fasta(input):
    seq = ""
    for line in util.read(input):
        if line:
            if line[0] == '>':
                if seq:
                    yield seq
                    seq = ""
                line_str = str(line)
                yield line_str.strip()
            else:
                line_str = line.split()
                if line_str:
                    seq += str(line_str[0]).strip()
    if seq:
        yield seq
Ejemplo n.º 26
0
 def _fasta_to_fasta(self, data):
     seq = ""
     for line in util.read(data):
         if line:
             if line[0] == '>':
                 line = line[1:]
                 if seq:
                     yield seq
                     seq = ""
                 line_str = str(line)
                 yield line_str.strip()
             else:
                 line_str = line.split()
                 if line_str:
                     seq += str(line_str[0]).strip()
     if seq:
         yield seq
Ejemplo n.º 27
0
 def _fasta_to_fasta(self, data):
     seq = ""
     for line in util.read(data):
         if line:
             if line[0] == '>':
                 line = line[1:]
                 if seq:
                     yield seq
                     seq = ""
                 line_str = str(line)
                 yield line_str.strip()
             else:
                 line_str = line.split()
                 if line_str:
                     seq += str(line_str[0]).strip()
     if seq:
         yield seq
Ejemplo n.º 28
0
 def _load_data(uri):
     M = []
     labels = []
     counter = 0
     for line in read(uri):
         counter += 1
         if counter > n_max:
             break
         line = line.strip()
         if line:
             items = line.split(' ')
             label = hash(items[-1]) & 13
             labels.append(label)
             data = [float(x) for x in items[:-1]]
             M.append(data)
     X = np.array(M)
     y = np.array(labels)
     return X, y
Ejemplo n.º 29
0
def _load_wine():
    print('wine')
    uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
    M = []
    labels = []
    for line in read(uri):
        line = line.strip()
        if line:
            items = line.split(',')
            label = int(items[0])
            labels.append(label)
            data = [float(x) for x in items[1:]]
            M.append(data)

    X = scale(np.array(M))
    targets = LabelEncoder().fit_transform(labels)
    y = np.array(targets)
    return X, y
Ejemplo n.º 30
0
def _load_wdbc():
    print('breast-cancer-wisconsin')
    uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'
    from eden.util import read
    M = []
    labels = []
    for line in read(uri):
        line = line.strip()
        if line:
            items = line.split(',')
            label = str(items[1])
            labels.append(label)
            data = [float(x) for x in items[2:]]
            M.append(data)

    import numpy as np
    from sklearn.preprocessing import normalize, scale
    X = scale(np.array(M))
    from sklearn.preprocessing import LabelEncoder
    targets = LabelEncoder().fit_transform(labels)
    y = np.array(targets)
    return X, y
Ejemplo n.º 31
0
def gspan_to_eden(input=None, options=dict()):
    """
    Takes a string list in the extended gSpan format and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """
    header = ''
    string_list = []
    for line in util.read(input):
        if line.strip():
            if line[0] in ['g', 't']:
                if string_list:
                    yield gspan_to_networkx(header, string_list)
                string_list = []
                header = line
            string_list += [line]

    if string_list:
        yield gspan_to_networkx(header, string_list)
Ejemplo n.º 32
0
def gspan_to_eden(input=None, options=dict()):
    """
    Takes a string list in the extended gSpan format and yields networkx graphs.

    Parameters
    ----------
    input : string
        A pointer to the data source.

    """
    header = ''
    string_list = []
    for line in util.read(input):
        if line.strip():
            if line[0] in ['g', 't']:
                if string_list:
                    yield gspan_to_networkx(header, string_list)
                string_list = []
                header = line
            string_list += [line]

    if string_list:
        yield gspan_to_networkx(header, string_list)
Ejemplo n.º 33
0
def gspan_to_eden(input, options=dict()):
    """Take a string list in the extended gSpan format and yields NetworkX graphs.

    Args:
        input: data source, can be a list of strings, a file name or a url
    Returns:
        NetworkX graph generator
    Raises:
        Exception: if a graph is empty
    """
    header = ''
    string_list = []
    for line in util.read(input):
        if line.strip():
            if line[0] in ['g', 't']:
                if string_list:
                    yield gspan_to_networkx(header, string_list)
                string_list = []
                header = line
            else:
                string_list += [line]
    if string_list:
        yield gspan_to_networkx(header, string_list)
Ejemplo n.º 34
0
def _load_ionosphere():
    print('ionosphere')
    uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data'
    n_max = 700

    M = []
    labels = []
    counter = 0
    for line in read(uri):
        counter += 1
        if counter > n_max:
            break
        line = line.strip()
        if line:
            items = line.split(',')
            label = hash(items[-1])
            labels.append(label)
            data = [float(x) for x in items[:-1]]
            M.append(data)

    X = (np.array(M))
    targets = LabelEncoder().fit_transform(labels)
    y = np.array(targets)
    return X, y
Ejemplo n.º 35
0
def obabel_to_eden3d(iterable, file_format='sdf',
                     cache={}, split_components=True, **kwargs):
    """Take an iterable file and yields the corresponding networkx graphs.

    **kwargs: arguments to be passed to other methods.
    """
    n_conf = kwargs.get('n_conf', 0)

    if file_format == 'sdf':
        if split_components:  # yield every graph separately
            for mol_sdf in read(iterable):
                mol = pybel.readstring("sdf", mol_sdf)
                mols = generate_conformers(mol.write("sdf"), n_conf)
                for molecule in mols:
                    molecule.removeh()
                    graph = obabel_to_networkx3d(molecule, **kwargs)
                    if len(graph):
                        yield graph
        else:  # construct a global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_sdf in read(iterable):
                mol = pybel.readstring("sdf", mol_sdf)
                mols = generate_conformers(mol.write("sdf"), n_conf)
                for molecule in mols:
                    molecule.removeh()
                    g = obabel_to_networkx3d(molecule, **kwargs)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph

    elif file_format == 'smi':
        if split_components:  # yield every graph separately
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and thus is
                # already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    # Assume the incoming string contains only one molecule
                    # Remove warning messages generated by openbabel
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = generate_conformers(cache[mol_smi], n_conf)
                for molecule in mols:
                    graph = obabel_to_networkx3d(molecule, **kwargs)
                    if len(graph):
                        yield graph

        else:  # construct global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and thus is
                # already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = generate_conformers(cache[mol_smi], n_conf)
                for molecule in mols:
                    g = obabel_to_networkx3d(molecule, **kwargs)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph

    else:
        raise Exception('ERROR: unrecognized file format: %s' % file_format)
Ejemplo n.º 36
0
def obabel_to_eden3d(iterable, file_format='sdf', cache={}, split_components=True, **kwargs):
    """
    Takes an iterable file and yields the corresponding networkx graphs.

    **kwargs: arguments to be passed to other methods.
    """

    n_conf = kwargs.get('n_conf', 0)

    if file_format == 'sdf':
        if split_components:  # yield every graph separately
            for mol_sdf in read(iterable):
                mol = pybel.readstring("sdf", mol_sdf)
                mols = generate_conformers(mol.write("sdf"), n_conf)
                for molecule in mols:
                    molecule.removeh()
                    graph = obabel_to_networkx3d(molecule, **kwargs)
                    if len(graph):
                        yield graph
        else:  # construct a global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_sdf in read(iterable):
                mol = pybel.readstring("sdf", mol_sdf)
                mols = generate_conformers(mol.write("sdf"), n_conf)
                for molecule in mols:
                    molecule.removeh()
                    g = obabel_to_networkx3d(molecule, **kwargs)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph

    elif file_format == 'smi':
        if split_components:  # yield every graph separately
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and thus is
                # already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    # Assume the incoming string contains only one molecule
                    # Remove warning messages generated by openbabel
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = generate_conformers(cache[mol_smi], n_conf)
                for molecule in mols:
                    graph = obabel_to_networkx3d(molecule, **kwargs)
                    if len(graph):
                        yield graph

        else:  # construct global graph and accumulate everything there
            global_graph = nx.Graph()
            for mol_smi in read(iterable):
                # First check if the molecule has appeared before and thus is
                # already converted
                if mol_smi not in cache:
                    # convert from SMILES to SDF and store in cache
                    command_string = 'obabel -:"' + mol_smi + '" -osdf --gen3d'
                    args = shlex.split(command_string)
                    sdf = subprocess.check_output(args)
                    sdf = '\n'.join(
                        [x for x in sdf.split('\n') if 'WARNING' not in x])
                    cache[mol_smi] = sdf

                mols = generate_conformers(cache[mol_smi], n_conf)
                for molecule in mols:
                    g = obabel_to_networkx3d(molecule, **kwargs)
                    if len(g):
                        global_graph = nx.disjoint_union(global_graph, g)
            yield global_graph

    else:
        raise Exception('ERROR: unrecognized file format: %s' % file_format)