def get_smiles( self, mol): if not mol.is_connected(): raise oasa_exceptions.oasa_not_implemented_error( "SMILES", "Cannot encode disconnected compounds, such as salts etc. HINT - use molecule.get_disconnected_subgraphs() to divide the molecule to individual parts.") #mol = molec.copy() self.molecule = mol self.ring_joins = [] self._processed_atoms = [] self.branches = {} self._stereo_bonds_to_code = {} # for bond it will contain character it uses self._stereo_bonds_to_others = {} # for bond it will contain the other bonds self._stereo_centers = {} # at first we mark all the atoms with aromatic bonds # it is much simple to do it now when all the edges are present # we can make use of the properties attribute of the vertex for e in mol.edges: if e.aromatic: for v in e.vertices: v.properties_[ 'aromatic'] = 1 # stereochemistry information preparation for st in mol.stereochemistry: if isinstance( st, stereochemistry.cis_trans_stereochemistry): end1, inside1, inside2, end2 = st.references e1 = end1.get_edge_leading_to( inside1) e2 = end2.get_edge_leading_to( inside2) self._stereo_bonds_to_others[ e1] = self._stereo_bonds_to_others.get( e1, []) + [(e2, st)] self._stereo_bonds_to_others[ e2] = self._stereo_bonds_to_others.get( e2, []) + [(e1, st)] elif isinstance( st, stereochemistry.tetrahedral_stereochemistry): self._stereo_centers[st.center] = st else: pass # we cannot handle this ret = ''.join( [i for i in self._get_smiles( mol)]) mol.reconnect_temporarily_disconnected_edges() # this is needed because the way temporarily_disconnected edges are handled is not compatible with the way smiles # generation works - it splits the molecule while reusing the same atoms and bonds and thus disconnected bonds accounting fails for e in mol.edges: e.disconnected = False # here tetrahedral stereochemistry is added for v, st in self._stereo_centers.iteritems(): processed_neighbors = [] for n in self._processed_atoms: if n in v.neighbors: processed_neighbors.append( n) elif v.explicit_hydrogens and n is v: processed_neighbors.append( stereochemistry.explicit_hydrogen()) count = match_atom_lists( st.references, processed_neighbors) clockwise = st.value == st.CLOCKWISE if count % 2 == 1: clockwise = not clockwise ch_symbol = clockwise and "@@" or "@" ret = ret.replace( "{{stereo%d}}" % mol.vertices.index(v), ch_symbol) return ret
def read_sum_layer( self): if "." in self.layers[1]: raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library") form = pt.formula_dict( self.layers[1]) processed_hs = 0 #for diborane and similar compounds we must process some Hs here j = 0 for k in form.sorted_keys(): for i in range( form[k]): if k == 'H': # we want to process only the Hs that are not in the h-layer if processed_hs >= form[k] - self.hs_in_hydrogen_layer: continue else: processed_hs += 1 j += 1 a = self.structure.create_vertex() a.symbol = k self.structure.add_vertex( a) a.properties_['inchi_number'] = j
def get_number_of_hydrogens_in_hydrogen_layer( self): # version check layer = self.get_layer( "h") if not layer: return 0 # check if we can handle it if "*" in layer or ";" in layer: raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library") ret = 0 re_for_brackets = "\([H\d,\-]+?\)" brackets = re.findall( re_for_brackets, layer) for bracket in brackets: ret += self._get_hs_in_moving_hydrogen( bracket[1:-1]) layer = re.sub( re_for_brackets, "", layer) # clean the brackets out for vs, num in self._parse_h_layer( layer): ret += len( vs) * num return ret
def read_sum_layer(self): if "." in self.layers[1]: raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library") form = pt.formula_dict(self.layers[1]) processed_hs = 0 #for diborane and similar compounds we must process some Hs here j = 0 for k in form.sorted_keys(): for i in range(form[k]): if k == 'H': # we want to process only the Hs that are not in the h-layer if processed_hs >= form[k] - self.hs_in_hydrogen_layer: continue else: processed_hs += 1 j += 1 a = self.structure.create_vertex() a.symbol = k self.structure.add_vertex(a) a.properties_['inchi_number'] = j
def get_number_of_hydrogens_in_hydrogen_layer(self): # version check layer = self.get_layer("h") if not layer: return 0 # check if we can handle it if "*" in layer or ";" in layer: raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library") ret = 0 re_for_brackets = "\([H\d,\-]+?\)" brackets = re.findall(re_for_brackets, layer) for bracket in brackets: ret += self._get_hs_in_moving_hydrogen(bracket[1:-1]) layer = re.sub(re_for_brackets, "", layer) # clean the brackets out for vs, num in self._parse_h_layer(layer): ret += len(vs) * num return ret