Example #1
0
 def get_smiles( self, mol):
   if not mol.is_connected():
     raise oasa_exceptions.oasa_not_implemented_error( "SMILES", "Cannot encode disconnected compounds, such as salts etc. HINT - use molecule.get_disconnected_subgraphs() to divide the molecule to individual parts.")
   #mol = molec.copy()
   self.molecule = mol
   self.ring_joins = []
   self._processed_atoms = []
   self.branches = {}
   self._stereo_bonds_to_code = {} # for bond it will contain character it uses
   self._stereo_bonds_to_others = {} # for bond it will contain the other bonds
   self._stereo_centers = {}
   # at first we mark all the atoms with aromatic bonds
   # it is much simple to do it now when all the edges are present
   # we can make use of the properties attribute of the vertex
   for e in mol.edges:
     if e.aromatic:
       for v in e.vertices:
         v.properties_[ 'aromatic'] = 1
   # stereochemistry information preparation
   for st in mol.stereochemistry:
     if isinstance( st, stereochemistry.cis_trans_stereochemistry):
       end1, inside1, inside2, end2 = st.references
       e1 = end1.get_edge_leading_to( inside1)
       e2 = end2.get_edge_leading_to( inside2)
       self._stereo_bonds_to_others[ e1] = self._stereo_bonds_to_others.get( e1, []) + [(e2, st)]
       self._stereo_bonds_to_others[ e2] = self._stereo_bonds_to_others.get( e2, []) + [(e1, st)]
     elif isinstance( st, stereochemistry.tetrahedral_stereochemistry):
       self._stereo_centers[st.center] = st
     else:
       pass # we cannot handle this
       
   ret = ''.join( [i for i in self._get_smiles( mol)])
   mol.reconnect_temporarily_disconnected_edges()
   # this is needed because the way temporarily_disconnected edges are handled is not compatible with the way smiles
   # generation works - it splits the molecule while reusing the same atoms and bonds and thus disconnected bonds accounting fails
   for e in mol.edges:
     e.disconnected = False
   # here tetrahedral stereochemistry is added
   for v, st in self._stereo_centers.iteritems():
     processed_neighbors = []
     for n in self._processed_atoms:
       if n in v.neighbors:
         processed_neighbors.append( n)
       elif v.explicit_hydrogens and n is v:
         processed_neighbors.append( stereochemistry.explicit_hydrogen())
     count = match_atom_lists( st.references, processed_neighbors)
     clockwise = st.value == st.CLOCKWISE
     if count % 2 == 1:
       clockwise = not clockwise
     ch_symbol = clockwise and "@@" or "@"
     ret = ret.replace( "{{stereo%d}}" % mol.vertices.index(v), ch_symbol)
   return ret
Example #2
0
  def read_sum_layer( self):
    if "." in self.layers[1]:
      raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library")

    form = pt.formula_dict( self.layers[1])
    processed_hs = 0 #for diborane and similar compounds we must process some Hs here
    j = 0
    for k in form.sorted_keys():
      for i in range( form[k]):
        if k == 'H':
          # we want to process only the Hs that are not in the h-layer
          if processed_hs >= form[k] - self.hs_in_hydrogen_layer:
            continue
          else:
            processed_hs += 1
        j += 1
        a = self.structure.create_vertex()
        a.symbol = k
        self.structure.add_vertex( a)
        a.properties_['inchi_number'] = j
Example #3
0
  def get_number_of_hydrogens_in_hydrogen_layer( self):
    # version check
    layer = self.get_layer( "h")
    if not layer:
      return 0

    # check if we can handle it
    if "*" in layer or ";" in layer:
      raise oasa_not_implemented_error( "INChI", "multiple compound systems are not supported by the library")

    ret = 0

    re_for_brackets = "\([H\d,\-]+?\)"
    brackets = re.findall( re_for_brackets, layer)
    for bracket in brackets:
      ret += self._get_hs_in_moving_hydrogen( bracket[1:-1])
    layer = re.sub( re_for_brackets, "", layer)  # clean the brackets out

    for vs, num in self._parse_h_layer( layer):
      ret += len( vs) * num

    return ret
Example #4
0
    def read_sum_layer(self):
        if "." in self.layers[1]:
            raise oasa_not_implemented_error(
                "INChI",
                "multiple compound systems are not supported by the library")

        form = pt.formula_dict(self.layers[1])
        processed_hs = 0  #for diborane and similar compounds we must process some Hs here
        j = 0
        for k in form.sorted_keys():
            for i in range(form[k]):
                if k == 'H':
                    # we want to process only the Hs that are not in the h-layer
                    if processed_hs >= form[k] - self.hs_in_hydrogen_layer:
                        continue
                    else:
                        processed_hs += 1
                j += 1
                a = self.structure.create_vertex()
                a.symbol = k
                self.structure.add_vertex(a)
                a.properties_['inchi_number'] = j
Example #5
0
    def get_number_of_hydrogens_in_hydrogen_layer(self):
        # version check
        layer = self.get_layer("h")
        if not layer:
            return 0

        # check if we can handle it
        if "*" in layer or ";" in layer:
            raise oasa_not_implemented_error(
                "INChI",
                "multiple compound systems are not supported by the library")

        ret = 0

        re_for_brackets = "\([H\d,\-]+?\)"
        brackets = re.findall(re_for_brackets, layer)
        for bracket in brackets:
            ret += self._get_hs_in_moving_hydrogen(bracket[1:-1])
        layer = re.sub(re_for_brackets, "", layer)  # clean the brackets out

        for vs, num in self._parse_h_layer(layer):
            ret += len(vs) * num

        return ret