Example #1
0
def data_to_molecule(data: data_lammps):
    """
	Creates a molecule provided a data_lammps object.
	Note that the the data_lammps must only contain
	one species. In the case of an atom the function 
	will return an error.
	"""
    if (not data.NBonds > 1):
        sys.exit(
            'data_to_molecule : ERROR - This function only works for data with bonds'
        )

    Atoms = []

    iSpc = data.At[0].iSpc

    for iAt in range(0, data.NAtoms):
        At = data.At[iAt]
        if (At.iSpc != iSpc):
            sys.exit(
                'data_to_molecule : ERROR - This function only works for data with atoms belonging to a single molecule'
            )

        Atoms.append(At)

    Bonds = deepcopy(data.Bnd)
    Angles = deepcopy(data.Ang)

    return molecule(iSpc, Atoms, Bonds, Angles)
Example #2
0
def read_cdml(text):
    """returns the last molecule for now"""
    doc = dom.parseString(text)
    #if doc.childNodes()[0].nodeName == 'svg':
    #  path = "/svg/cdml/molecule"
    #else:
    #  path = "/cdml/molecule"
    path = "//molecule"
    do_not_continue_this_mol = 0
    for mol_el in dom_ext.simpleXPathSearch(doc, path):
        atom_id_remap = {}
        mol = molecule()
        groups = []
        for atom_el in dom_ext.simpleXPathSearch(mol_el, "atom"):
            name = atom_el.getAttribute('name')
            if not name:
                #print "this molecule has an invalid symbol"
                do_not_continue_this_mol = 1
                break
            pos = dom_ext.simpleXPathSearch(atom_el, 'point')[0]
            x = cm_to_float_coord(pos.getAttribute('x'))
            y = cm_to_float_coord(pos.getAttribute('y'))
            z = cm_to_float_coord(pos.getAttribute('z'))
            if name in PT:
                # its really an atom
                a = atom(symbol=name,
                         charge=atom_el.getAttribute('charge')
                         and int(atom_el.getAttribute('charge')) or 0,
                         coords=(x, y, z))
                mol.add_vertex(v=a)
            elif name in cdml_to_smiles:
                # its a known group
                group = smiles.text_to_mol(cdml_to_smiles[name], calc_coords=0)
                a = group.vertices[0]
                a.x = x
                a.y = y
                a.z = z
                mol.insert_a_graph(group)
            atom_id_remap[atom_el.getAttribute('id')] = a
        if do_not_continue_this_mol:
            break

        for bond_el in dom_ext.simpleXPathSearch(mol_el, "bond"):
            type = bond_el.getAttribute('type')
            if type[1] == u'0':
                # we ignore bonds with order 0
                continue
            v1 = atom_id_remap[bond_el.getAttribute('start')]
            v2 = atom_id_remap[bond_el.getAttribute('end')]
            e = bond(order=int(type[1]), type=type[0])
            mol.add_edge(v1, v2, e=e)

        if mol.is_connected():
            # this is here to handle diborane and similar weird things
            yield mol
        else:
            for comp in mol.get_disconnected_subgraphs():
                yield comp
Example #3
0
 def get_transformed_template(self, n, coords, type="empty", paper=None):
     """type is type of connection - 'bond', 'atom1'(for single atom), 'atom2'(for atom with more than 1 bond), 'empty'"""
     pap = paper or Store.app.paper
     pap.onread_id_sandbox_activate()  # must be here to mangle the ids
     current = molecule(pap, package=self.templates[n])
     pap.onread_id_sandbox_finish(apply_to=[current])  # id mangling
     current.name = ""
     self._scale_ratio = 1
     trans = transform()
     # type empty - just draws the template - no conection
     if type == "empty":
         xt1, yt1 = current.t_atom.get_xy()
         xt2, yt2 = current.next_to_t_atom.get_xy()
         x1, y1 = coords
         bond_length = Screen.any_to_px(Store.app.paper.standard.bond_length)
         current.delete_items([current.t_atom], redraw=0, delete_single_atom=0)
         trans.set_move(-xt2, -yt2)
         trans.set_scaling(bond_length / math.sqrt((xt1 - xt2) ** 2 + (yt1 - yt2) ** 2))
         trans.set_move(x1, y1)
     # type atom
     elif type == "atom1" or type == "atom2":
         xt1, yt1 = current.t_atom.get_xy()
         xt2, yt2 = current.next_to_t_atom.get_xy()
         x1, y1, x2, y2 = coords
         trans.set_move(-xt2, -yt2)
         trans.set_scaling(
             math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) / math.sqrt((xt1 - xt2) ** 2 + (yt1 - yt2) ** 2)
         )
         trans.set_rotation(math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2))
         trans.set_move(x2, y2)
     # type bond
     elif type == "bond":
         if not (current.t_bond_first and current.t_bond_second):
             warn("this template is not capable to be added to bond - sorry.")
             return None
         current.delete_items([current.t_atom], redraw=0, delete_single_atom=0)
         xt1, yt1 = current.t_bond_first.get_xy()
         xt2, yt2 = current.t_bond_second.get_xy()
         x1, y1, x2, y2 = coords
         self._scale_ratio = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) / math.sqrt(
             (xt1 - xt2) ** 2 + (yt1 - yt2) ** 2
         )  # further needed for bond.bond_width transformation
         trans.set_move(-xt1, -yt1)
         trans.set_rotation(math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2))
         trans.set_scaling(self._scale_ratio)
         trans.set_move(x1, y1)
     self.transform_template(current, trans)
     # remove obsolete info from template
     if type == "atom1":
         current.delete_items([current.t_atom], redraw=0, delete_single_atom=0)
     elif type == "atom2":
         current.t_atom.x = x1
         current.t_atom.y = y1
     current.t_atom = None
     current.t_bond_first = None
     current.t_bond_second = None
     # return ready template
     return current
Example #4
0
def read_cdml( text):
  """returns the last molecule for now"""
  doc = dom.parseString( text)
  #if doc.childNodes()[0].nodeName == 'svg':
  #  path = "/svg/cdml/molecule"
  #else:
  #  path = "/cdml/molecule"
  path = "//molecule"
  do_not_continue_this_mol = 0
  for mol_el in dom_ext.simpleXPathSearch( doc, path):
    atom_id_remap = {}
    mol = molecule()
    groups = []
    for atom_el in dom_ext.simpleXPathSearch( mol_el, "atom"):
      name = atom_el.getAttribute( 'name')
      if not name:
        #print "this molecule has an invalid symbol"
        do_not_continue_this_mol = 1
        break
      pos = dom_ext.simpleXPathSearch( atom_el, 'point')[0]
      x = cm_to_float_coord( pos.getAttribute('x'))
      y = cm_to_float_coord( pos.getAttribute('y'))
      z = cm_to_float_coord( pos.getAttribute('z'))
      if name in PT:
        # its really an atom 
        a = atom( symbol=name,
                  charge=atom_el.getAttribute( 'charge') and int( atom_el.getAttribute( 'charge')) or 0,
                  coords=( x, y, z))
        mol.add_vertex( v=a)
      elif name in cdml_to_smiles:
        # its a known group
        group = smiles.text_to_mol( cdml_to_smiles[ name], calc_coords=0)
        a = group.vertices[0]
        a.x = x
        a.y = y
        a.z = z
        mol.insert_a_graph( group) 
      atom_id_remap[ atom_el.getAttribute( 'id')] = a
    if do_not_continue_this_mol:
      break

    for bond_el in dom_ext.simpleXPathSearch( mol_el, "bond"):
      type = bond_el.getAttribute( 'type')
      if type[1] == u'0':
        # we ignore bonds with order 0
        continue
      v1 = atom_id_remap[ bond_el.getAttribute( 'start')]
      v2 = atom_id_remap[ bond_el.getAttribute( 'end')]
      e = bond( order=int( type[1]), type=type[0])
      mol.add_edge( v1, v2, e=e)

    if mol.is_connected():
      # this is here to handle diborane and similar weird things
      yield mol
    else:
      for comp in mol.get_disconnected_subgraphs():
        yield comp
Example #5
0
def generate_random_universe():
    u = universe()
    mols = []
    attractors = []

    for i in range(0, 100):
        mols.append(molecule(random_atom()))

    u.molecules = mols
    u.attractors = attractors

    return u
Example #6
0
def oasa_mol_to_bkchem_mol(mol, paper):
    m = molecule.molecule(paper)
    if None in reduce(operator.add, [[a.x, a.y] for a in mol.atoms], []):
        calc_position = 0
    else:
        calc_position = 1

    minx = None
    maxx = None
    miny = None
    maxy = None
    # atoms
    for a in mol.vertices:
        a2 = oasa_atom_to_bkchem_atom(a, paper, m)
        m.insert_atom(a2)
        if calc_position:
            # data for rescaling
            if not maxx or a2.x > maxx:
                maxx = a2.x
            if not minx or a2.x < minx:
                minx = a2.x
            if not miny or a2.y < miny:
                miny = a2.y
            if not maxy or a2.y > maxy:
                maxy = a2.y
    # bonds
    bond_lengths = []
    for b in mol.edges:
        b2 = oasa_bond_to_bkchem_bond(b, paper)
        aa1, aa2 = b.vertices
        atom1 = m.atoms[mol.vertices.index(aa1)]
        atom2 = m.atoms[mol.vertices.index(aa2)]
        m.add_edge(atom1, atom2, b2)
        b2.molecule = m
        if calc_position:
            bond_lengths.append(
                math.sqrt((b2.atom1.x - b2.atom2.x)**2 +
                          (b2.atom1.y - b2.atom2.y)**2))
    # rescale
    if calc_position:
        bl = sum(bond_lengths) / len(bond_lengths)
        scale = Screen.any_to_px(paper.standard.bond_length) / bl
        movex = (maxx + minx) / 2
        movey = (maxy + miny) / 2
        trans = transform3d.transform3d()
        trans.set_move(-movex, -movey, 0)
        trans.set_scaling(scale)
        trans.set_move(320, 240, 0)
        for a in m.atoms:
            a.x, a.y, a.z = trans.transform_xyz(a.x, a.y, a.z)
    return m
Example #7
0
    def _read_molecule(self, el):
        m = molecule(self.paper)
        for v in xpath.Evaluate("vertex", el):
            a2 = self._read_atom(v, m)
            m.insert_atom(a2)
            # bonds
            bond_lengths = []
        for b in xpath.Evaluate("edge", el):
            b2 = self._read_bond(b)
            b2.molecule = m
            m.add_edge(b2.atom1, b2.atom2, b2)

        self._mol_ids[el.getAttribute('id')] = m
        return m
Example #8
0
    def _read_molecule(self, el):
        m = molecule(self.paper)
        for v in xpath.Evaluate("vertex", el):
            a2 = self._read_atom(v, m)
            m.insert_atom(a2)
            # bonds
            bond_lengths = []
        for b in xpath.Evaluate("edge", el):
            b2 = self._read_bond(b)
            b2.molecule = m
            m.add_edge(b2.atom1, b2.atom2, b2)

        self._mol_ids[el.getAttribute("id")] = m
        return m
Example #9
0
def oasa_mol_to_bkchem_mol( mol, paper):
  m = molecule.molecule( paper)
  if None in reduce( operator.add, [[a.x, a.y] for a in mol.atoms], []):
    calc_position = 0
  else:
    calc_position = 1
    
  minx = None
  maxx = None
  miny = None
  maxy = None
  # atoms
  for a in mol.vertices:
    a2 = oasa_atom_to_bkchem_atom( a, paper, m)
    m.insert_atom( a2)
    if calc_position:
      # data for rescaling
      if not maxx or a2.x > maxx:
        maxx = a2.x
      if not minx or a2.x < minx:
        minx = a2.x
      if not miny or a2.y < miny:
        miny = a2.y
      if not maxy or a2.y > maxy:
        maxy = a2.y
  # bonds
  bond_lengths = []
  for b in mol.edges:
    b2 = oasa_bond_to_bkchem_bond( b, paper)
    aa1, aa2 = b.vertices
    atom1 = m.atoms[ mol.vertices.index( aa1)]
    atom2 = m.atoms[ mol.vertices.index( aa2)]
    m.add_edge( atom1, atom2, b2)
    b2.molecule = m
    if calc_position:
      bond_lengths.append( math.sqrt( (b2.atom1.x-b2.atom2.x)**2 + (b2.atom1.y-b2.atom2.y)**2))
  # rescale
  if calc_position:
    bl = sum( bond_lengths) / len( bond_lengths)
    scale = Screen.any_to_px( paper.standard.bond_length) / bl
    movex = (maxx+minx)/2
    movey = (maxy+miny)/2
    trans = transform3d.transform3d()
    trans.set_move( -movex, -movey, 0)
    trans.set_scaling( scale)
    trans.set_move( 320, 240, 0)
    for a in m.atoms:
      a.x, a.y, a.z = trans.transform_xyz( a.x, a.y, a.z)
  return m
 def smiles_files_to_molecules(self, file_name, smile_pos, class_pos=None):
     compounds = []
     f = open(file_name, "r")
     while 1:
         txt = rf.read_line(f.readline())
         if txt == []: break
         mol_loc = self.smile_to_nodes_and_edges(txt[smile_pos])
         if mol_loc["nodes"] != [] and mol_loc["edges"] != []:
             clas = txt[class_pos]
             if clas:
                 clas = int(clas)
                 if clas == 0: clas = -1
                 compounds.append([
                     molecule.molecule(mol_loc["nodes"], mol_loc["edges"],
                                       mol_loc["covalences"], self.p_q),
                     clas
                 ])
             else:
                 compounds.append([
                     molecule.molecule(mol_loc["nodes"], mol_loc["edges"],
                                       mol_loc["covalences"], self.p_q), '?'
                 ])
     self.my_set = compounds
     return compounds
Example #11
0
 def pybel_to_oasa_molecule_with_atom_map(self, pmol):
     omol = molecule()
     patom_idx2oatom = {}
     for pa in pmol.atoms:
         oa = self.pybel_to_oasa_atom(pa)
         omol.add_vertex(oa)
         patom_idx2oatom[pa.idx] = oa
     for pb in openbabel.OBMolBondIter(pmol.OBMol):
         ob = self.pybel_to_oasa_bond(pb)
         i1 = pb.GetBeginAtomIdx()
         i2 = pb.GetEndAtomIdx()
         oa1 = patom_idx2oatom[i1]
         oa2 = patom_idx2oatom[i2]
         omol.add_edge(oa1, oa2, ob)
     return omol, patom_idx2oatom
Example #12
0
 def pybel_to_oasa_molecule_with_atom_map( self, pmol):
   omol = molecule()
   patom_idx2oatom = {}
   for pa in pmol.atoms:
     oa = self.pybel_to_oasa_atom( pa)
     omol.add_vertex( oa)
     patom_idx2oatom[ pa.idx] = oa
   for pb in openbabel.OBMolBondIter( pmol.OBMol):
     ob = self.pybel_to_oasa_bond( pb)
     i1 = pb.GetBeginAtomIdx()
     i2 = pb.GetEndAtomIdx()
     oa1 = patom_idx2oatom[ i1]
     oa2 = patom_idx2oatom[ i2]
     omol.add_edge( oa1, oa2, ob)
   return omol, patom_idx2oatom
Example #13
0
 def _read_body( self, file):
   atoms = read_molfile_value( file, 3, conversion=int)
   bonds = read_molfile_value( file, 3, conversion=int)
   # nothing more interesting
   file.readline()
   # read the structure
   self.structure = molecule()
   for i in range( atoms):
     a = self._read_atom( file)
     self.structure.add_vertex( v=a)
   for k in range( bonds):
     b, i, j = self._read_bond( file)
     self.structure.add_edge( i, j, e=b)
   for line in file:
     if line.strip() == "M  END":
       break
     if line.strip().startswith( "M  "):
       self._read_property( line.strip())
Example #14
0
 def _read_body(self, file):
     atoms = read_molfile_value(file, 3, conversion=int)
     bonds = read_molfile_value(file, 3, conversion=int)
     # nothing more interesting
     file.readline()
     # read the structure
     self.structure = molecule()
     for i in range(atoms):
         a = self._read_atom(file)
         self.structure.add_vertex(v=a)
     for k in range(bonds):
         b, i, j = self._read_bond(file)
         self.structure.add_edge(i, j, e=b)
     for line in file:
         if line.strip() == "M  END":
             break
         if line.strip().startswith("M  "):
             self._read_property(line.strip())
 def all_file_to_molecules(self,
                           file_name,
                           class_label="",
                           pos_class="active",
                           neg_class="inactive"):
     compounds = []
     f = open(file_name, "r")
     while 1:
         txt = rf.creat_local_txt(f)
         if txt == -1: return compounds
         else:
             mol_loc = self.mol_to_nodes_and_edges(txt, class_label,
                                                   pos_class)
             #print mol_loc
             if mol_loc["nodes"] != [] and mol_loc["edges"] != []:
                 compounds.append([
                     molecule.molecule(mol_loc["nodes"], mol_loc["edges"],
                                       mol_loc["covalences"], self.p_q),
                     mol_loc["class"]
                 ])
     return compounds
Example #16
0
 def add_template_from_CDML( self, file):
   if not os.path.isfile( file):
     file = os_support.get_path( file, "template")
     if not file:
       warn( "template file %s does not exist - ignoring" % file)
       return
   try:
     doc = dom.parse( file).getElementsByTagName( 'cdml')[0]
   except xml.sax.SAXException:
     warn( "template file %s cannot be parsed - ignoring" % file)
     return
   # when loading old versions of CDML try to convert them, but do nothing when they cannot be converted
   import CDML_versions
   CDML_versions.transform_dom_to_version( doc, config.current_CDML_version)
   Store.app.paper.onread_id_sandbox_activate()
   added = []
   for tmp in doc.getElementsByTagName('molecule'):
     self.templates.append( tmp)
     m = molecule( Store.app.paper, package=tmp)
     self._prepared_templates.append( m)
     added.append( m)
   Store.app.paper.onread_id_sandbox_finish( apply_to=[]) # just switch the id_managers, no id mangling
Example #17
0
def show_dump():
    from molecule import molecule
    from graph.graph import graph
    imp = molecule()
    imp.read_simple_text_file(file("aaa9.txt", "r"))
    removed = True
    while removed:
        removed = False
        for e in list(imp.edges):
            a1, a2 = e.vertices
            for e2 in list(imp.edges):
                if (e is not e2) and set(e.vertices) == set(e2.vertices):
                    imp.disconnect_edge(e2)
                    removed = True
                    break
            if removed:
                break
    import coords_generator
    for part in imp.get_disconnected_subgraphs():
        if len(part.vertices) > 1:
            coords_generator.calculate_coords(part, force=1)
            coords_generator.show_mol(part)
Example #18
0
def show_dump():
    from molecule import molecule
    from graph.graph import graph
    imp = molecule()
    imp.read_simple_text_file( file("aaa9.txt","r"))
    removed = True
    while removed:
        removed = False
        for e in list(imp.edges):
            a1,a2 = e.vertices
            for e2 in list(imp.edges):
                if (e is not e2) and set(e.vertices) == set(e2.vertices):
                    imp.disconnect_edge( e2)
                    removed = True
                    break
            if removed:
                break
    import coords_generator
    for part in imp.get_disconnected_subgraphs():
        if len( part.vertices) > 1:
            coords_generator.calculate_coords( part, force=1)
            coords_generator.show_mol( part)
Example #19
0
 def add_template_from_CDML(self, file):
     if not os.path.isfile(file):
         file = os_support.get_path(file, "template")
         if not file:
             warn("template file %s does not exist - ignoring" % file)
             return
     try:
         doc = dom.parse(file).getElementsByTagName('cdml')[0]
     except xml.sax.SAXException:
         warn("template file %s cannot be parsed - ignoring" % file)
         return
     # when loading old versions of CDML try to convert them, but do nothing when they cannot be converted
     import CDML_versions
     CDML_versions.transform_dom_to_version(doc,
                                            config.current_CDML_version)
     Store.app.paper.onread_id_sandbox_activate()
     added = []
     for tmp in doc.getElementsByTagName('molecule'):
         self.templates.append(tmp)
         m = molecule(Store.app.paper, package=tmp)
         self._prepared_templates.append(m)
         added.append(m)
     Store.app.paper.onread_id_sandbox_finish(
         apply_to=[])  # just switch the id_managers, no id mangling
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains):
    
    start_xvg=open(start_xvg,'r').readlines()
    end_xvg=open(end_xvg,'r').readlines()
    selection=res_selection.res_select(start,ndx)
    n=int(n)


    # create the path
    startpts={}
    endpts={}
    for r in selection:
        startpts[r]=[]
        chain=0
        for line in start_xvg:
            if re.search(r'\-%s$'%r,line):
                startpts[r].append([float(line.split()[0]),float(line.split()[1])])
                chain+=1
    for r in selection:
        endpts[r]=[]
        chain=0
        for line in end_xvg:
            if re.search(r'\-%s$'%r,line):
                endpts[r].append([float(line.split()[0]),float(line.split()[1])])
                chain+=1
    
    sys.stderr.write('%s'%includes)
    for k in range(1,n-1):
        in_top=open(top).read()       
        for mol in range(Nchains):
            if len(includes)>0:
                includename=includes[mol].split('/')[-1]
                in_top=re.sub(includename,'dihre_%d_chain_%d.itp'%(k,mol),in_top)
        out_top=open('topol_%d.top'%k,'w')
       # sys.stderr.write('%s'%in_top)
        out_top.write(in_top)   
            
    
    for k in range(1,n-1):
        # make the directory for the restraints
        for mol in range(Nchains):
            restraint_itp=open('dihre_%d_chain_%d.itp'%(k,mol),'w')
            if Nchains>1:
                moltop=open(includes[mol]).read()
                restraint_itp.write(moltop)
            # write the initial part of the topology file
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  phi  dphi  kfac\n")
            if len(includes)>0:
                protein=molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                out_top=open('topol_%d.top'%k,'w')
                protein=molecule(top)
                in_itp=open(top,'r').read().split('; Include Position restraint file')
                out_top.write(in_itp[0])
                out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                out_top.write(in_itp[1])
                out_top.close()

            for r in selection:
                phi = [a for a in protein if (a.resnr == int(r) and
                      (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or
                      (a.resnr == int(r)-1 and a.atomname == 'C')]

                psi = [a for a in protein if (a.resnr == int(r) and
                      (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or
                      (a.resnr == int(r)+1 and a.atomname == 'N')]

                # write phi, psi angles
                phi_val=startpts[r][mol][0]+(endpts[r][mol][0]-startpts[r][mol][0])/n*k
                psi_val=startpts[r][mol][1]+(endpts[r][mol][1]-startpts[r][mol][1])/n*k

                restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n"
                                    %(phi[0].atomnr,phi[1].atomnr,phi[2].atomnr,                                      phi[3].atomnr, phi_val, 0, 1))
                restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n"
                                    %(psi[0].atomnr,psi[1].atomnr,psi[2].atomnr,                                      psi[3].atomnr, psi_val, 0, 1))
            restraint_itp.close()
Example #21
0
  def get_molecules( self, file_name):
    doc = dom.parse( file_name)
    molecules = []
    # read colors
    colors=[]
    for elem7 in doc.getElementsByTagName("color"):
      red=(float(elem7.getAttribute("r"))*255)
      green=(float(elem7.getAttribute("g"))*255)
      blue=(float(elem7.getAttribute("b"))*255)
      colors.append("#%02x%02x%02x" % (red,green,blue))

    # read fonts
    fonts={}
    for elem8 in doc.getElementsByTagName("font"):
      family=str(elem8.getAttribute("name"))
      fonts[int(elem8.getAttribute("id"))]=family

    # read molecules
    for elem1 in doc.getElementsByTagName("fragment"):
      if elem1.parentNode.nodeName=="page":
        mol = molecule( paper=self.paper)
        atom_id_to_atom = {}
        atom_id_to_text = {}
        for elem2 in elem1.childNodes:

          # atom
          if elem2.nodeName=="n":
            font = ""
            Size = 12
            text = "C"
            color1="#000000"
            for elem3 in elem2.childNodes:
              if elem3.nodeName=="t":
                if elem3.hasAttribute("color"):
                  color1=colors[int(elem3.getAttribute("color"))-2]
                text = ""
                for elem4 in elem3.childNodes:
                  if elem4.nodeName=="s":
                    if elem3.hasAttribute("color"):
                      color1=colors[int(elem3.getAttribute("color"))-2]
                    for Id, Font in fonts.items():
                      if Id==int(elem4.getAttribute("font")):
                        font=Font
                    Size= int(elem4.getAttribute("size"))
                    text += dom_ext.getAllTextFromElement( elem4).strip()

            position = elem2.getAttribute("p").split()
            assert len( position) == 2


            # we must postpone symbol assignment until we know the valency of the atoms
            atom_id_to_text[ elem2.getAttribute('id')] = text
            atom = mol.create_vertex()
            atom.line_color = color1
            atom.font_family = font
            atom.font_size = Size
            atom.x = float( position[0])
            atom.y = float( position[1])
            mol.add_vertex( atom)
            atom_id_to_atom[ elem2.getAttribute('id')] = atom

          # bond
          #{"v BKChemu bond.type":"v ChemDraw hodnota atributu Display elementu b"}
          bondType2={"WedgeBegin":"w",
          "WedgedHashBegin":"h",
          "Wavy":"a",
          "Bold":"b",
          "Dash":"d"
          }

          if elem2.nodeName=="b":
            if elem2.hasAttribute("color"):
              color2 = colors[(int(elem2.getAttribute("color"))-2)]
            else:
              color2="#000000"
            order = 1
            if elem2.hasAttribute("Order"):
              order = int( elem2.getAttribute("Order"))
            bond = mol.create_edge()
            if elem2.hasAttribute("Display"):
              display = elem2.getAttribute("Display").strip()
              for bondC, bondB in bondType2.items():
                if bondC ==display:
                  bond.type = bondB
            bond.line_color = color2
            bond.order = order
            atom1 = atom_id_to_atom[ elem2.getAttribute("B")]
            atom2 = atom_id_to_atom[ elem2.getAttribute("E")]
            mol.add_edge( atom1, atom2, bond)

        # here we reassign the symbols
        for id, atom in atom_id_to_atom.items():
          text = atom_id_to_text[ id]
          v = mol.create_vertex_according_to_text( atom, text)
          atom.copy_settings( v)
          mol.replace_vertices( atom, v)
          atom.delete()
        # finally we add the molecule to the list of molecules for output
        molecules.append( mol)

    # read texts
    textik={2:"i",
            1:"b",
            32:"sub",
            64:"sup"}

    for elem5 in doc.getElementsByTagName("t"):
      if elem5.parentNode.nodeName=="page":
        position = map( float, elem5.getAttribute("p").split())
        assert len( position) == 2
        celyText=""
        for elem51 in elem5.childNodes:
          if elem51.nodeName=="s":
            for elem52 in elem51.childNodes:
              if isinstance( elem52, dom.Text):
                rodice=[]
                text100=elem52.data
                if elem51.hasAttribute("face"):
                  Face01=int(elem51.getAttribute("face"))
                  for face, parent in textik.items():
                    for i in range(9):
                      if not Face01&2**i==0:
                        if face==Face01&2**i:
                          rodice.append(parent)
                for rodic in rodice:
                  text100 = "<%s>%s</%s>" % (rodic,text100,rodic)
            celyText += text100

            if elem5.hasAttribute("color"):
              color3=colors[(int(elem5.getAttribute("color"))-2)]
            else:
              color3="#000000"

            font_id = elem51.getAttribute("font")
            if font_id != "":
              font=fonts[int(font_id)]
            #text = dom_ext.getAllTextFromElement(elem51)
        #print celyText
        text = celyText
        t = text_class( self.paper, position, text=text)
        t.line_color = color3
        #print elem51
        if elem51.hasAttribute("size"):
          t.font_size = int( elem51.getAttribute("size"))
        if font:
          t.font_family = font
        molecules.append(t)

    # read graphics - plus
    for elem6 in doc.getElementsByTagName("graphic"):
      if elem6.getAttribute("GraphicType")=="Symbol" and elem6.getAttribute("SymbolType")=="Plus":
        position = map( float, elem6.getAttribute("BoundingBox").split())
        position2=[position[0],position[1]]
        assert len(position2) == 2
        if elem6.hasAttribute("color"):
          color4=colors[(int(elem6.getAttribute("color"))-2)]
        else:
          color4="#000000"
        pl = plus(self.paper, position2)
        pl.line_color = color4
        molecules.append(pl)

    sipka=[]
    #for elem71 in doc.getElementsByTagName("graphic"):
      #if elem71.getAttribute("GraphicType")=="Line":

    for elem7 in doc.getElementsByTagName("arrow"):
      sipka.insert(0,elem7.getAttribute('Head3D') )
      sipka.insert(1,elem7.getAttribute('Tail3D') )
      if elem7.hasAttribute("color"):
        sipka.insert(0,colors[(int(elem7.getAttribute("color"))-2)])
      point1 = map( float, sipka[1].split())
      point2 = map( float, sipka[2].split())
      arr = arrow( self.paper, points=[point2[0:2],point1[0:2]], fill=sipka[0])
      arr.line_color=sipka[0]
      molecules.append( arr)

    sipka=[]
    return molecules
Example #22
0
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):

    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], 
                                 stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
Example #23
0
  def _read_inchi( self, text):
    if not text:
      raise oasa_inchi_error( "No inchi was given")
    self.structure = molecule()
    self.layers = self.split_layers( text)
    # version support (very crude)
    self.version = self._check_version( self.layers[0])
    if not self.version:
      raise oasa_unsupported_inchi_version_error( self.layers[0])
    elif str( self.version[0]) != '1' or str( self.version[1]) != '0':
      raise oasa_unsupported_inchi_version_error( self.layers[0])
    
    self.hs_in_hydrogen_layer = self.get_number_of_hydrogens_in_hydrogen_layer()
    self.read_sum_layer()
    self.read_connectivity_layer()
    self._charge_mover = self._move_charge_somewhere_else()
    repeat = True
    run = 0
    # we have to repeat this step in order to find the right positioning of movable hydrogens
    while repeat and not self._no_possibility_to_improve:
      # cleanup
      for h in self._added_hs:
        self.structure.remove_vertex( h)
      for b in self.structure.edges:
        b.order = 1
      for v in self.structure.vertices:
        v.symbol = v.symbol
        v.charge = 0
      self.cleanup()

      # the code itself
      run += 1
      assert run < 50
      self._deal_with_notorious_groups()
      self.process_forced_charges()
      self.read_hydrogen_layer( run=run)
      self.read_charge_layer()
      self.read_p_layer()
      self.deal_with_da_bonds()
      #self._deal_with_valencies()
      self.compensate_for_forced_charges()
      self.structure.add_missing_bond_orders()
      #self.read_double_bond_stereo_layer()

      # here we check out if the molecule seems ok
      fvs = [v for v in self.structure.vertices if v.free_valency]
      if not fvs and not filter( None, [not v.order for v in self.structure.edges]):
        repeat = False
      else:
        if len( fvs) == 1:
          a = fvs[0]
          a.symbol = a.symbol # this resets the valency
          a.raise_valency_to_senseful_value()
          if not a.free_valency:
            repeat = False

      if repeat and self._no_possibility_to_improve and self.charge:
        try:
          self._charge_mover.next()
        except StopIteration:
          pass
        else:
          self._no_possibility_to_improve = False
          run = 0

    if repeat and self._no_possibility_to_improve:
##       if len( filter( None, [v.free_valency for v in self.structure.vertices])) == 1:
##         print
##         print [(v.symbol, v.valency, v.free_valency)  for v in self.structure.vertices if v.free_valency], filter( None, [not v.order for v in self.structure.edges]), text
##       if sum( [v.charge for v in self.structure.vertices]) != self.charge:
##         print "Charge problem", sum( [v.charge for v in self.structure.vertices]), self.charge
      #pass
      raise oasa_inchi_error( "Localization of bonds, charges or movable hydrogens failed")
Example #24
0
ValveDefaultActualFlow = 800 / 3600.0 #//From average HX flow in unit CW circuit.
ValveDefaultDP = 1.2 * Ps
ValveDefaultOpening = 0.5 #//Default valve opening.
        
ValveEqualPercR = 40.0 #//Dimensionless constant for valve equalpercentage.
ValveDefaultCv = ValveDefaultActualFlow /   \
    (math.pow(ValveEqualPercR, ValveDefaultOpening - 1) * math.sqrt(ValveDefaultDP)) # //m^3/s/(Pa^0.5)
ValveHydraulicTau = 10.0  #//seconds.  Time constant of valve hydraulics.

ValveLength = 0.5 #//m
ValveWidth = 0.4 #//m


#fluidpackage constants -------------------------------------------------------------------------------------------------
fluidpackage = list() #list of component object
fluidpackage.append(component.component(molecule.molecule("Naphtha", "GTL Naphtha", 0.157, 0.00164, 661.4959, 273 + 495, 1.2411 * 10**7, \
                -1 - np.log10(110000 /1.2411 * (10**7)) , 150.5, 0.6, 0, 0), 0))
fluidpackage.append(component.component(molecule.molecule("Air", "Air", 0.02897, 1.983 * math.pow(10, -5), 1.225, 132.41, \
                3.72 * math.pow(10, 6), \
                0.0335,
                0.8*31.15  + 0.2*28.11, 0.8*(-0.01357)  + 0.2*(-3.7) * math.pow(10, -6), 0.8*2.68*math.pow(10,-5)  + 0.2*1.746 * math.pow(10, -5),
            0.8 * (-1.168) * math.pow(10, -8) + 0.2 * (-1.065) * math.pow(10, -8)), 0))
            #//Density: 1.977 kg/m3 (gas at 1 atm and 0 °C)
fluidpackage.append( component.component( molecule.molecule("CO2", "Carbon Dioxide", 0.018, 0.07 * 0.001, 1.977, 304.25, 7.39 * math.pow(10, 6), 0.228,
                19.8, 0.07344, -5.602E-05, 1.715E-08), 0))
                #//Density: 1.977 kg/m3 (gas at 1 atm and 0 °C)
fluidpackage.append( component.component( molecule.molecule("CO", "Carbon Monoxide", 0.02801, 0.0001662 * 0.001, 1.145), 0))
            #//Density: 1.145 kg/m3 at 25 °C, 1 atm
fluidpackage.append( component.component( molecule.molecule("H2", "Hydrogen", 0.0020158, 8.76 * math.pow(10, -6), 0.08988), 0))
            #//Density: 0.08988 g/L = 0.08988 kg/m3 (0 °C, 101.325 kPa)
fluidpackage.append( component.component( molecule.molecule("He", "Helium", HeMolarMass, 0, 0.1786, 5.1953, 5.1953E6, -0.390,
                20.8, 0, 0, 0), 0))
Example #25
0
 def get_transformed_template(self, n, coords, type='empty', paper=None):
     """type is type of connection - 'bond', 'atom1'(for single atom), 'atom2'(for atom with more than 1 bond), 'empty'"""
     pap = paper or Store.app.paper
     pap.onread_id_sandbox_activate()  # must be here to mangle the ids
     current = molecule(pap, package=self.templates[n])
     pap.onread_id_sandbox_finish(apply_to=[current])  # id mangling
     current.name = ''
     self._scale_ratio = 1
     trans = transform()
     # type empty - just draws the template - no conection
     if type == 'empty':
         xt1, yt1 = current.t_atom.get_xy()
         xt2, yt2 = current.next_to_t_atom.get_xy()
         x1, y1 = coords
         bond_length = Screen.any_to_px(
             Store.app.paper.standard.bond_length)
         current.delete_items([current.t_atom],
                              redraw=0,
                              delete_single_atom=0)
         trans.set_move(-xt2, -yt2)
         trans.set_scaling(bond_length / math.sqrt((xt1 - xt2)**2 +
                                                   (yt1 - yt2)**2))
         trans.set_move(x1, y1)
     #type atom
     elif type == 'atom1' or type == 'atom2':
         xt1, yt1 = current.t_atom.get_xy()
         xt2, yt2 = current.next_to_t_atom.get_xy()
         x1, y1, x2, y2 = coords
         trans.set_move(-xt2, -yt2)
         trans.set_scaling(
             math.sqrt((x1 - x2)**2 + (y1 - y2)**2) /
             math.sqrt((xt1 - xt2)**2 + (yt1 - yt2)**2))
         trans.set_rotation(
             math.atan2(xt1 - xt2, yt1 - yt2) -
             math.atan2(x1 - x2, y1 - y2))
         trans.set_move(x2, y2)
     #type bond
     elif type == 'bond':
         if not (current.t_bond_first and current.t_bond_second):
             warn(
                 "this template is not capable to be added to bond - sorry."
             )
             return None
         current.delete_items([current.t_atom],
                              redraw=0,
                              delete_single_atom=0)
         xt1, yt1 = current.t_bond_first.get_xy()
         xt2, yt2 = current.t_bond_second.get_xy()
         x1, y1, x2, y2 = coords
         self._scale_ratio = math.sqrt(
             (x1 - x2)**2 + (y1 - y2)**2) / math.sqrt(
                 (xt1 - xt2)**2 + (yt1 - yt2)**
                 2)  # further needed for bond.bond_width transformation
         trans.set_move(-xt1, -yt1)
         trans.set_rotation(
             math.atan2(xt1 - xt2, yt1 - yt2) -
             math.atan2(x1 - x2, y1 - y2))
         trans.set_scaling(self._scale_ratio)
         trans.set_move(x1, y1)
     self.transform_template(current, trans)
     #remove obsolete info from template
     if type == 'atom1':
         current.delete_items([current.t_atom],
                              redraw=0,
                              delete_single_atom=0)
     elif type == 'atom2':
         current.t_atom.x = x1
         current.t_atom.y = y1
     current.t_atom = None
     current.t_bond_first = None
     current.t_bond_second = None
     #return ready template
     return current
Example #26
0
    def __init__(self, source, outfile, molfile, goalsnr, nphot, kappa=None,
                 tnorm=2.735, velocity_function=None, seed=1971, minpop=1e-4,
                 fixset=1.e-6, blending=False, nchan=50, rt_lines=[0,1,2], velres=0.1):
        """
        Initlize a simulation.

        Args:
            source (str): Model file.
            outfile (str): File to write population levels to.
            molfile (str): Molecular data file in the LAMDA format.
            goalsnr (float): Goal signal-to-noise ratio of the run.
            nphot (float): Number of photons to use in the radiative transfer.
            kappa (optional[str]): A string decribing the dust parameters. For
                the use of Ossenkopf & Henning (1994) opacities it must take
                the form:

                    kappa_params = 'jena, TYPE, COAG'

                where TYPE must be 'bare', 'thin' or 'thick' and COAG must be
                'no', 'e5', 'e6', 'e7' or 'e8'. Otherwise a power law profile
                can be included. Alternatively, a simple power law can be used
                where the parameters are given by:

                    kappa_params = 'powerlaw, freq0, kappa0, beta'

                where freq0 is in [Hz], kappa0 in [cm^2/g], and beta is the
                frequency index. If nothing is given, we assume no opacity.
            tnorm (optional[float]): Background temperature in [K]. Default is
                the CMB at 2.735K.
            velo (optional[str]): Type of velocity structure to use.
            seed (optional[int]): Seed for the random number generators.
            minpop (optional[float]): Minimum population for each energy level.
            fixset (optional [float]): The smallest number to be counted.
            blending (optional [bool]): Whether to include line blending or not.
            nchan (optional [int]): Number of channels per trans for raytracing.
            rt_lines (optional [int list]): List of transitions to raytrace.
            velres (optional [float]): Channel res for raytracing (km/s). 
        """

        self.source = source
        self.outfile = outfile
        self.molfile = molfile
        self.goalsnr = goalsnr
        # not setting nphot yet, setting later as array w/ size ncell
        self.kappa_params = kappa
        self.tnorm = tnorm
        # self.velocity = velocity
        self.seed = seed
        self.minpop = minpop
        self.fixset = fixset
        self.blending = blending
        self.nchan = nchan
        self.rt_lines = rt_lines
        self.velres = velres*1000. # convert to m/s

        t0 = time()
        # Read in the source model (default is RATRAN).
        self.model = model(self.source, 'ratran')
        self.ncell = self.model.ncell

        # Have user input velocity function.
        if velocity_function is not None:
            self.model.velo = simulation.import_velocity(velocity_function)

        # Read in the molfile
        try:
            self.mol = molecule(self, self.molfile)
        except:
            raise Exception("Couldn't parse molecular data.")

        self.nlev = self.mol.nlev
        self.nline = self.mol.nline
        self.ntrans = self.mol.ntrans
        self.ntrans2 = self.mol.ntrans2
        self.ntemp = self.mol.ntemp
        self.ntemp2 = self.mol.ntemp2

        # Include thermal broadening the to widths.
        v_turb2 = self.model.doppb**2.0
        v_therm2 = 2 * sc.k * self.model.tkin / sc.m_p / self.mol.molweight
        self.model.doppb = np.sqrt(v_turb2 + v_therm2)

        # Calculate the collisional rates.
        self.mol.set_rates(self.model.tkin)
        if self.mol.up is None or self.mol.down is None:
            raise ValueError("Need to calculate rates.")

        # Initialize dust emissivity, convertiong from [m^2/kg] to [m^-1/n(H2)]
        # such that tau_dust = knu.
        self.norm = simulation.planck(self.mol.freq[0], self.tnorm)
        self.norm = self.norm * np.ones(self.nline)
        self.cmb = simulation.planck(self.mol.freq, self.model.tcmb)
        self.cmb /= self.norm

        # Parse kappa parameters and generate the kappa function
        self.kappa = simulation.generate_kappa(self.kappa_params)

        # Do not normalize dust; will be done in photon.
        # Funky looping as functions fail to broadcast.
        self.knu = np.zeros((self.nline, self.ncell))
        self.dust = np.zeros((self.nline, self.ncell))
        for l in range(self.nline):
            for i in range(self.ncell):
                self.knu[l, i] = self.kappa(i, self.mol.freq[l]) * 2.4 * sc.m_p
                self.knu[l, i] *= self.model.nh2[i] / self.model.g2d
                self.dust[l, i] = simulation.planck(self.mol.freq[l],
                                                    self.model.tdust[i])

        # Set up the Monte Carlo simulation
        self.nphot = np.full(self.ncell, nphot)  # Set nphot to initial number.
        self.niter = self.ncell  # Estimated crossing time.
        self.fixseed = self.seed

        t1 = time()
        print("Set up took %.1f ms." % ((t1 - t0) * 1e3))
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):
    cmdnames = cmds.GromacsCommands()
    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr,
                                           '-o', '0%3d.xvg' % i]
            ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
Example #28
0
    def get_molecules(self, file_name):
        doc = dom.parse(file_name)
        molecules = []
        # read colors
        colors = []
        for elem7 in doc.getElementsByTagName("color"):
            red = (float(elem7.getAttribute("r")) * 255)
            green = (float(elem7.getAttribute("g")) * 255)
            blue = (float(elem7.getAttribute("b")) * 255)
            colors.append("#%02x%02x%02x" % (red, green, blue))

        # read fonts
        fonts = {}
        for elem8 in doc.getElementsByTagName("font"):
            family = str(elem8.getAttribute("name"))
            fonts[int(elem8.getAttribute("id"))] = family

        # read molecules
        for elem1 in doc.getElementsByTagName("fragment"):
            if elem1.parentNode.nodeName == "page":
                mol = molecule(paper=self.paper)
                atom_id_to_atom = {}
                atom_id_to_text = {}
                for elem2 in elem1.childNodes:

                    # atom
                    if elem2.nodeName == "n":
                        font = ""
                        Size = 12
                        text = "C"
                        color1 = "#000000"
                        for elem3 in elem2.childNodes:
                            if elem3.nodeName == "t":
                                if elem3.hasAttribute("color"):
                                    color1 = colors[
                                        int(elem3.getAttribute("color")) - 2]
                                text = ""
                                for elem4 in elem3.childNodes:
                                    if elem4.nodeName == "s":
                                        if elem3.hasAttribute("color"):
                                            color1 = colors[int(
                                                elem3.getAttribute("color")) -
                                                            2]
                                        for Id, Font in fonts.items():
                                            if Id == int(
                                                    elem4.getAttribute(
                                                        "font")):
                                                font = Font
                                        Size = int(elem4.getAttribute("size"))
                                        text += dom_ext.getAllTextFromElement(
                                            elem4).strip()

                        position = elem2.getAttribute("p").split()
                        assert len(position) == 2

                        # we must postpone symbol assignment until we know the valency of the atoms
                        atom_id_to_text[elem2.getAttribute('id')] = text
                        atom = mol.create_vertex()
                        atom.line_color = color1
                        atom.font_family = font
                        atom.font_size = Size
                        atom.x = float(position[0])
                        atom.y = float(position[1])
                        mol.add_vertex(atom)
                        atom_id_to_atom[elem2.getAttribute('id')] = atom

                    # bond
                    #{"v BKChemu bond.type":"v ChemDraw hodnota atributu Display elementu b"}
                    bondType2 = {
                        "WedgeBegin": "w",
                        "WedgedHashBegin": "h",
                        "Wavy": "a",
                        "Bold": "b",
                        "Dash": "d"
                    }

                    if elem2.nodeName == "b":
                        if elem2.hasAttribute("color"):
                            color2 = colors[(int(elem2.getAttribute("color")) -
                                             2)]
                        else:
                            color2 = "#000000"
                        order = 1
                        if elem2.hasAttribute("Order"):
                            order = int(elem2.getAttribute("Order"))
                        bond = mol.create_edge()
                        if elem2.hasAttribute("Display"):
                            display = elem2.getAttribute("Display").strip()
                            for bondC, bondB in bondType2.items():
                                if bondC == display:
                                    bond.type = bondB
                        bond.line_color = color2
                        bond.order = order
                        atom1 = atom_id_to_atom[elem2.getAttribute("B")]
                        atom2 = atom_id_to_atom[elem2.getAttribute("E")]
                        mol.add_edge(atom1, atom2, bond)

                # here we reassign the symbols
                for id, atom in atom_id_to_atom.items():
                    text = atom_id_to_text[id]
                    v = mol.create_vertex_according_to_text(atom, text)
                    atom.copy_settings(v)
                    mol.replace_vertices(atom, v)
                    atom.delete()
                # finally we add the molecule to the list of molecules for output
                molecules.append(mol)

        # read texts
        textik = {2: "i", 1: "b", 32: "sub", 64: "sup"}

        for elem5 in doc.getElementsByTagName("t"):
            if elem5.parentNode.nodeName == "page":
                position = map(float, elem5.getAttribute("p").split())
                assert len(position) == 2
                celyText = ""
                for elem51 in elem5.childNodes:
                    if elem51.nodeName == "s":
                        for elem52 in elem51.childNodes:
                            if isinstance(elem52, dom.Text):
                                rodice = []
                                text100 = elem52.data
                                if elem51.hasAttribute("face"):
                                    Face01 = int(elem51.getAttribute("face"))
                                    for face, parent in textik.items():
                                        for i in range(9):
                                            if not Face01 & 2**i == 0:
                                                if face == Face01 & 2**i:
                                                    rodice.append(parent)
                                for rodic in rodice:
                                    text100 = "<%s>%s</%s>" % (rodic, text100,
                                                               rodic)
                        celyText += text100

                        if elem5.hasAttribute("color"):
                            color3 = colors[(int(elem5.getAttribute("color")) -
                                             2)]
                        else:
                            color3 = "#000000"

                        font_id = elem51.getAttribute("font")
                        if font_id != "":
                            font = fonts[int(font_id)]
                        #text = dom_ext.getAllTextFromElement(elem51)
                #print celyText
                text = celyText
                t = text_class(self.paper, position, text=text)
                t.line_color = color3
                #print elem51
                if elem51.hasAttribute("size"):
                    t.font_size = int(elem51.getAttribute("size"))
                if font:
                    t.font_family = font
                molecules.append(t)

        # read graphics - plus
        for elem6 in doc.getElementsByTagName("graphic"):
            if elem6.getAttribute(
                    "GraphicType") == "Symbol" and elem6.getAttribute(
                        "SymbolType") == "Plus":
                position = map(float,
                               elem6.getAttribute("BoundingBox").split())
                position2 = [position[0], position[1]]
                assert len(position2) == 2
                if elem6.hasAttribute("color"):
                    color4 = colors[(int(elem6.getAttribute("color")) - 2)]
                else:
                    color4 = "#000000"
                pl = plus(self.paper, position2)
                pl.line_color = color4
                molecules.append(pl)

        sipka = []
        #for elem71 in doc.getElementsByTagName("graphic"):
        #if elem71.getAttribute("GraphicType")=="Line":

        for elem7 in doc.getElementsByTagName("arrow"):
            sipka.insert(0, elem7.getAttribute('Head3D'))
            sipka.insert(1, elem7.getAttribute('Tail3D'))
            if elem7.hasAttribute("color"):
                sipka.insert(0, colors[(int(elem7.getAttribute("color")) - 2)])
            point1 = map(float, sipka[1].split())
            point2 = map(float, sipka[2].split())
            arr = arrow(self.paper,
                        points=[point2[0:2], point1[0:2]],
                        fill=sipka[0])
            arr.line_color = sipka[0]
            molecules.append(arr)

        sipka = []
        return molecules
Example #29
0
###GENERATE 3D MOL FILE WITH OBGEN###
os.chdir('../public/uploads/structures')
#3D conformer search with obgen
subprocess.call([os.path.join(config.babeldir,'obgen'),'{}.mol'.format(molid)],stdout=open('{}-3dt.mol'.format(molid),'w'),stderr=open(os.devnull,'w'))
#Remove warning flags
subprocess.call(['/bin/grep','-v','WARNING','{}-3dt.mol'.format(molid)],stdout=open('{}-3d.mol'.format(molid),'w'),stderr=open(os.devnull,'w'))
os.remove('{}-3dt.mol'.format(molid))
#Convert to PDB without hydrogens
subprocess.call([os.path.join(config.babeldir,'babel'),'-imol','{}-3d.mol'.format(molid),'-d','-opdb','{}-3d.pdb'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w'))
#Neutralize atoms
subprocess.call(['/bin/sed','-i',r"s/1[\+-]$//g",'{}-3d.pdb'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w'))
#Finally convert to 3D SDF with hydrogens in neutral state
subprocess.call([config.babeldir+'babel','-ipdb','{}-3d.pdb'.format(molid),'-h','--title',molname,'-osdf','{}-3d.mol'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w'))
os.remove('{}-3d.pdb'.format(molid))

molobj = molecule('{}-3d.mol'.format(molid))
os.chdir(cgidir)

####UPATE MOLECULE DATA IN DATABASE############
dbconn = psycopg2.connect(config.dsn)
q = dbconn.cursor()
query = 'UPDATE molecules SET molweight=%s,molformula=%s WHERE molid=%s'
options = [str(molobj.molweight),molobj.formula(),str(molid)]
q.execute(query,options)
dbconn.commit()
q.close()
dbconn.close()

######EXTENSIONS###############
os.chdir('../extensions')
##### RUN QIKPROP##############
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n,
                     ndxfn, Nchains):

    n = int(n)  # number of points in the string, including start and end point

    ndx_atoms = res_selection.read_ndx(ndxfn)

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending atom configurations for later interpolation TODO
        #startpts = readxvg.readxvg(start_xvg, selection)
        #endpts = readxvg.readxvg(end_xvg, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per intermediate string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename,
                                    'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k, 'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)

    # Generate/copy and write-out the restraint atom and force spec for each intermediate point
    # This is really unnecessary here since the restraint positions are not in these files so they are the same
    # for all points and chains. TODO
    for k in range(n):
        for mol in range(Nchains):
            with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp:
                if Nchains > 1:
                    with open(includes[mol]) as moltop_f:
                        moltop = moltop_f.read()
                        restraint_itp.write(moltop)

                if len(includes) > 0:
                    protein = molecule(includes[mol])
                    # replace the chain names with the chain names
                else:
                    with open('topol_%d.top' % k, 'w') as out_top:
                        protein = molecule(top)
                        with open(top, 'r') as in_itp_f:
                            in_itp = in_itp_f.read().split(
                                '; Include Position restraint file')
                            out_top.write(in_itp[0])
                            out_top.write('#include "res_%d_chain_%d.itp"\n' %
                                          (k, mol))
                            out_top.write(in_itp[1])

                # Go through the atoms in the selection index and write one row for each one with the KFAC
                # force constant placeholder

                restraint_itp.write("\n[ position_restraints ]\n")
                restraint_itp.write("; atom  type      fx      fy      fz\n")

                for a in ndx_atoms:
                    if a < 5566:  # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule.
                        restraint_itp.write("%6d     1  KFAC  KFAC  KFAC\n" %
                                            int(a))
Example #31
0
    def _read_inchi(self, text):
        if not text:
            raise oasa_inchi_error("No inchi was given")
        self.structure = molecule()
        self.layers = self.split_layers(text)
        # version support (very crude)
        self.version = self._check_version(self.layers[0])
        if not self.version:
            raise oasa_unsupported_inchi_version_error(self.layers[0])
        elif str(self.version[0]) != '1' or str(self.version[1]) != '0':
            raise oasa_unsupported_inchi_version_error(self.layers[0])

        self.hs_in_hydrogen_layer = self.get_number_of_hydrogens_in_hydrogen_layer(
        )
        self.read_sum_layer()
        self.read_connectivity_layer()
        self._charge_mover = self._move_charge_somewhere_else()
        repeat = True
        run = 0
        # we have to repeat this step in order to find the right positioning of movable hydrogens
        while repeat and not self._no_possibility_to_improve:
            # cleanup
            for h in self._added_hs:
                self.structure.remove_vertex(h)
            for b in self.structure.edges:
                b.order = 1
            for v in self.structure.vertices:
                v.symbol = v.symbol
                v.charge = 0
            self.cleanup()

            # the code itself
            run += 1
            assert run < 50
            self._deal_with_notorious_groups()
            self.process_forced_charges()
            self.read_hydrogen_layer(run=run)
            self.read_charge_layer()
            self.read_p_layer()
            self.deal_with_da_bonds()
            #self._deal_with_valencies()
            self.compensate_for_forced_charges()
            self.structure.add_missing_bond_orders()
            #self.read_double_bond_stereo_layer()

            # here we check out if the molecule seems ok
            fvs = [v for v in self.structure.vertices if v.free_valency]
            if not fvs and not filter(
                    None, [not v.order for v in self.structure.edges]):
                repeat = False
            else:
                if len(fvs) == 1:
                    a = fvs[0]
                    a.symbol = a.symbol  # this resets the valency
                    a.raise_valency_to_senseful_value()
                    if not a.free_valency:
                        repeat = False

            if repeat and self._no_possibility_to_improve and self.charge:
                try:
                    self._charge_mover.next()
                except StopIteration:
                    pass
                else:
                    self._no_possibility_to_improve = False
                    run = 0

        if repeat and self._no_possibility_to_improve:
            ##       if len( filter( None, [v.free_valency for v in self.structure.vertices])) == 1:
            ##         print
            ##         print [(v.symbol, v.valency, v.free_valency)  for v in self.structure.vertices if v.free_valency], filter( None, [not v.order for v in self.structure.edges]), text
            ##       if sum( [v.charge for v in self.structure.vertices]) != self.charge:
            ##         print "Charge problem", sum( [v.charge for v in self.structure.vertices]), self.charge
            #pass
            raise oasa_inchi_error(
                "Localization of bonds, charges or movable hydrogens failed")
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx,
                     Nchains):

    start_xvg = open(start_xvg, 'r').readlines()
    end_xvg = open(end_xvg, 'r').readlines()
    selection = res_selection.res_select(start, ndx)
    n = int(n)

    # create the path
    startpts = {}
    endpts = {}
    for r in selection:
        startpts[r] = []
        chain = 0
        for line in start_xvg:
            if re.search(r'\-%s$' % r, line):
                startpts[r].append(
                    [float(line.split()[0]),
                     float(line.split()[1])])
                chain += 1
    for r in selection:
        endpts[r] = []
        chain = 0
        for line in end_xvg:
            if re.search(r'\-%s$' % r, line):
                endpts[r].append(
                    [float(line.split()[0]),
                     float(line.split()[1])])
                chain += 1

    sys.stderr.write('%s' % includes)
    for k in range(1, n - 1):
        in_top = open(top).read()
        for mol in range(Nchains):
            if len(includes) > 0:
                includename = includes[mol].split('/')[-1]
                in_top = re.sub(includename,
                                'dihre_%d_chain_%d.itp' % (k, mol), in_top)
        out_top = open('topol_%d.top' % k, 'w')
        # sys.stderr.write('%s'%in_top)
        out_top.write(in_top)

    for k in range(1, n - 1):
        # make the directory for the restraints
        for mol in range(Nchains):
            restraint_itp = open('dihre_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                moltop = open(includes[mol]).read()
                restraint_itp.write(moltop)
            # write the initial part of the topology file
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                out_top = open('topol_%d.top' % k, 'w')
                protein = molecule(top)
                in_itp = open(
                    top, 'r').read().split('; Include Position restraint file')
                out_top.write(in_itp[0])
                out_top.write('#include "dihre_%d_chain_%d.itp"\n' % (k, mol))
                #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                out_top.write(in_itp[1])
                out_top.close()

            for r in selection:
                phi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname
                                               == 'N' or a.atomname == 'C')) or
                    (a.resnr == int(r) - 1 and a.atomname == 'C')
                ]

                psi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname
                                               == 'CA' or a.atomname == 'C'))
                    or (a.resnr == int(r) + 1 and a.atomname == 'N')
                ]

                # write phi, psi angles
                phi_val = startpts[r][mol][0] + (endpts[r][mol][0] -
                                                 startpts[r][mol][0]) / n * k
                psi_val = startpts[r][mol][1] + (endpts[r][mol][1] -
                                                 startpts[r][mol][1]) / n * k

                restraint_itp.write(
                    "%5d%5d%5d%5d %8.4f%5d%5d\n" %
                    (phi[0].atomnr, phi[1].atomnr, phi[2].atomnr,
                     phi[3].atomnr, phi_val, 0, 1))
                restraint_itp.write(
                    "%5d%5d%5d%5d %8.4f%5d%5d\n" %
                    (psi[0].atomnr, psi[1].atomnr, psi[2].atomnr,
                     psi[3].atomnr, psi_val, 0, 1))
            restraint_itp.close()
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains):
    
    n = int(n)  # number of points in the string, including start and end point

    ndx_atoms = res_selection.read_ndx(ndxfn)

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending atom configurations for later interpolation TODO
        #startpts = readxvg.readxvg(start_xvg, selection)
        #endpts = readxvg.readxvg(end_xvg, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per intermediate string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k, 'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the restraint atom and force spec for each intermediate point
    # This is really unnecessary here since the restraint positions are not in these files so they are the same
    # for all points and chains. TODO
    for k in range(n):
        for mol in range(Nchains):
            with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp:
                if Nchains > 1:
                    with open(includes[mol]) as moltop_f:
                        moltop = moltop_f.read()
                        restraint_itp.write(moltop)

                if len(includes) > 0:
                    protein = molecule(includes[mol])
                    # replace the chain names with the chain names
                else:
                    with open('topol_%d.top' % k, 'w') as out_top:
                        protein = molecule(top)
                        with open(top, 'r') as in_itp_f:
                            in_itp = in_itp_f.read().split('; Include Position restraint file')
                            out_top.write(in_itp[0])
                            out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                            out_top.write(in_itp[1])

                # Go through the atoms in the selection index and write one row for each one with the KFAC
                # force constant placeholder

                restraint_itp.write("\n[ position_restraints ]\n")
                restraint_itp.write("; atom  type      fx      fy      fz\n")

                for a in ndx_atoms:
                    if a < 5566:  # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule.
                        restraint_itp.write("%6d     1  KFAC  KFAC  KFAC\n" % int(a))
Example #34
0
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains,
                  start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top,
                  includes):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select('%s' % start_conf, ndx_atoms)
        #sys.stderr.write('Residue selection: %s' %rsel)

#    else:
#            selected_atoms = []
#            for ch in range(5):
#                    for i in range(len(ndx_atoms)):
#                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

# Calculate the average drift in CV space

# newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write('Length of initpt %d, targetpt %d\n' %
                         (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write('Rep iter %d: \n' % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write('  maxspread was %f\n' % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write('Final maximum spread %f after %d iterations.\n' %
                     (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    #sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    #adjusted = newpts

    # calculate reparam distance

    sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    #Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], 'r') as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[
                    k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 *
                                      3):  # assert on GLIC length (TODO)
                    sys.stderr.write('adjusted[] entry of wrong length %d\n' %
                                     len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[
                        0:
                        8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' %
                                      (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open('res_%d_chain_%d.itp' % (k, chain),
                          'w') as restraint_itp:
                    with open(includes[k][chain], 'r') as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split('[ dihedral_restraints ]')[0]
                        restraint_itp.write('%s' % moltop)

                    sys.stderr.write(
                        "Writing restraints for stringpoint %d chain %d\n" %
                        (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write(
                        "; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n"
                    )
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule('%s' % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if (a.atomname == 'CA' or a.atomname == 'N'
                                or a.atomname == 'C'):
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [
                            dih_atoms[r - 1]['C'], dih_atoms[r]['N'],
                            dih_atoms[r]['CA'], dih_atoms[r]['C']
                        ]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [
                            dih_atoms[r]['N'], dih_atoms[r]['CA'],
                            dih_atoms[r]['C'], dih_atoms[r + 1]['N']
                        ]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
Example #35
0
                pass
    if(cstart>0):
        for i in range(cstart,len(l)):
            if l[i].find('$$$$')>=0:
                break
            try:
                cc50s.append(float(l[i]))
            except Exception:
                pass
except Exception:
    print 'Problem reading experimental data.'
    sys.exit()    

try:
    #Load molecule
    molobj=molecule(molfilename)
    molweight=molobj.molweight
    molformula=molobj.formula()
except Exception:
    print 'Problem loading mol file.'
    sys.exit()

try:
    query = 'INSERT INTO molecules (molname, authorid, dateadded, molweight, molformula) VALUES (%s,%s,localtimestamp,%s,%s) RETURNING molid'
    options = [molfilename[:-4],1,molweight,molformula]
    q.execute(query,options)
    molid=q.fetchone()[0]
except Exception:
    print 'Problem inserting molecule into database.'
    sys.exit()
Example #36
0
def reparametrize(
    use_posres,
    fix_endpoints,
    cvs,
    ndx_file,
    Nchains,
    start_conf,
    start_xvg,
    end_conf,
    end_xvg,
    last_resconfs,
    top,
    includes,
):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select("%s" % start_conf, ndx_atoms)
        # sys.stderr.write('Residue selection: %s' %rsel)

    #    else:
    #            selected_atoms = []
    #            for ch in range(5):
    #                    for i in range(len(ndx_atoms)):
    #                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

    # Calculate the average drift in CV space

    # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write("Rep iter %d: \n" % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write("  maxspread was %f\n" % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    # sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    # adjusted = newpts

    # calculate reparam distance

    sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    # Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open("rep_resconf_%d.gro" % k, "w") as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], "r") as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 * 3):  # assert on GLIC length (TODO)
                    sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[0:8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp:
                    with open(includes[k][chain], "r") as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split("[ dihedral_restraints ]")[0]
                        restraint_itp.write("%s" % moltop)

                    sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write("; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n")
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule("%s" % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C":
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)
                        )
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)
                        )