def data_to_molecule(data: data_lammps): """ Creates a molecule provided a data_lammps object. Note that the the data_lammps must only contain one species. In the case of an atom the function will return an error. """ if (not data.NBonds > 1): sys.exit( 'data_to_molecule : ERROR - This function only works for data with bonds' ) Atoms = [] iSpc = data.At[0].iSpc for iAt in range(0, data.NAtoms): At = data.At[iAt] if (At.iSpc != iSpc): sys.exit( 'data_to_molecule : ERROR - This function only works for data with atoms belonging to a single molecule' ) Atoms.append(At) Bonds = deepcopy(data.Bnd) Angles = deepcopy(data.Ang) return molecule(iSpc, Atoms, Bonds, Angles)
def read_cdml(text): """returns the last molecule for now""" doc = dom.parseString(text) #if doc.childNodes()[0].nodeName == 'svg': # path = "/svg/cdml/molecule" #else: # path = "/cdml/molecule" path = "//molecule" do_not_continue_this_mol = 0 for mol_el in dom_ext.simpleXPathSearch(doc, path): atom_id_remap = {} mol = molecule() groups = [] for atom_el in dom_ext.simpleXPathSearch(mol_el, "atom"): name = atom_el.getAttribute('name') if not name: #print "this molecule has an invalid symbol" do_not_continue_this_mol = 1 break pos = dom_ext.simpleXPathSearch(atom_el, 'point')[0] x = cm_to_float_coord(pos.getAttribute('x')) y = cm_to_float_coord(pos.getAttribute('y')) z = cm_to_float_coord(pos.getAttribute('z')) if name in PT: # its really an atom a = atom(symbol=name, charge=atom_el.getAttribute('charge') and int(atom_el.getAttribute('charge')) or 0, coords=(x, y, z)) mol.add_vertex(v=a) elif name in cdml_to_smiles: # its a known group group = smiles.text_to_mol(cdml_to_smiles[name], calc_coords=0) a = group.vertices[0] a.x = x a.y = y a.z = z mol.insert_a_graph(group) atom_id_remap[atom_el.getAttribute('id')] = a if do_not_continue_this_mol: break for bond_el in dom_ext.simpleXPathSearch(mol_el, "bond"): type = bond_el.getAttribute('type') if type[1] == u'0': # we ignore bonds with order 0 continue v1 = atom_id_remap[bond_el.getAttribute('start')] v2 = atom_id_remap[bond_el.getAttribute('end')] e = bond(order=int(type[1]), type=type[0]) mol.add_edge(v1, v2, e=e) if mol.is_connected(): # this is here to handle diborane and similar weird things yield mol else: for comp in mol.get_disconnected_subgraphs(): yield comp
def get_transformed_template(self, n, coords, type="empty", paper=None): """type is type of connection - 'bond', 'atom1'(for single atom), 'atom2'(for atom with more than 1 bond), 'empty'""" pap = paper or Store.app.paper pap.onread_id_sandbox_activate() # must be here to mangle the ids current = molecule(pap, package=self.templates[n]) pap.onread_id_sandbox_finish(apply_to=[current]) # id mangling current.name = "" self._scale_ratio = 1 trans = transform() # type empty - just draws the template - no conection if type == "empty": xt1, yt1 = current.t_atom.get_xy() xt2, yt2 = current.next_to_t_atom.get_xy() x1, y1 = coords bond_length = Screen.any_to_px(Store.app.paper.standard.bond_length) current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) trans.set_move(-xt2, -yt2) trans.set_scaling(bond_length / math.sqrt((xt1 - xt2) ** 2 + (yt1 - yt2) ** 2)) trans.set_move(x1, y1) # type atom elif type == "atom1" or type == "atom2": xt1, yt1 = current.t_atom.get_xy() xt2, yt2 = current.next_to_t_atom.get_xy() x1, y1, x2, y2 = coords trans.set_move(-xt2, -yt2) trans.set_scaling( math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) / math.sqrt((xt1 - xt2) ** 2 + (yt1 - yt2) ** 2) ) trans.set_rotation(math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2)) trans.set_move(x2, y2) # type bond elif type == "bond": if not (current.t_bond_first and current.t_bond_second): warn("this template is not capable to be added to bond - sorry.") return None current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) xt1, yt1 = current.t_bond_first.get_xy() xt2, yt2 = current.t_bond_second.get_xy() x1, y1, x2, y2 = coords self._scale_ratio = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) / math.sqrt( (xt1 - xt2) ** 2 + (yt1 - yt2) ** 2 ) # further needed for bond.bond_width transformation trans.set_move(-xt1, -yt1) trans.set_rotation(math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2)) trans.set_scaling(self._scale_ratio) trans.set_move(x1, y1) self.transform_template(current, trans) # remove obsolete info from template if type == "atom1": current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) elif type == "atom2": current.t_atom.x = x1 current.t_atom.y = y1 current.t_atom = None current.t_bond_first = None current.t_bond_second = None # return ready template return current
def read_cdml( text): """returns the last molecule for now""" doc = dom.parseString( text) #if doc.childNodes()[0].nodeName == 'svg': # path = "/svg/cdml/molecule" #else: # path = "/cdml/molecule" path = "//molecule" do_not_continue_this_mol = 0 for mol_el in dom_ext.simpleXPathSearch( doc, path): atom_id_remap = {} mol = molecule() groups = [] for atom_el in dom_ext.simpleXPathSearch( mol_el, "atom"): name = atom_el.getAttribute( 'name') if not name: #print "this molecule has an invalid symbol" do_not_continue_this_mol = 1 break pos = dom_ext.simpleXPathSearch( atom_el, 'point')[0] x = cm_to_float_coord( pos.getAttribute('x')) y = cm_to_float_coord( pos.getAttribute('y')) z = cm_to_float_coord( pos.getAttribute('z')) if name in PT: # its really an atom a = atom( symbol=name, charge=atom_el.getAttribute( 'charge') and int( atom_el.getAttribute( 'charge')) or 0, coords=( x, y, z)) mol.add_vertex( v=a) elif name in cdml_to_smiles: # its a known group group = smiles.text_to_mol( cdml_to_smiles[ name], calc_coords=0) a = group.vertices[0] a.x = x a.y = y a.z = z mol.insert_a_graph( group) atom_id_remap[ atom_el.getAttribute( 'id')] = a if do_not_continue_this_mol: break for bond_el in dom_ext.simpleXPathSearch( mol_el, "bond"): type = bond_el.getAttribute( 'type') if type[1] == u'0': # we ignore bonds with order 0 continue v1 = atom_id_remap[ bond_el.getAttribute( 'start')] v2 = atom_id_remap[ bond_el.getAttribute( 'end')] e = bond( order=int( type[1]), type=type[0]) mol.add_edge( v1, v2, e=e) if mol.is_connected(): # this is here to handle diborane and similar weird things yield mol else: for comp in mol.get_disconnected_subgraphs(): yield comp
def generate_random_universe(): u = universe() mols = [] attractors = [] for i in range(0, 100): mols.append(molecule(random_atom())) u.molecules = mols u.attractors = attractors return u
def oasa_mol_to_bkchem_mol(mol, paper): m = molecule.molecule(paper) if None in reduce(operator.add, [[a.x, a.y] for a in mol.atoms], []): calc_position = 0 else: calc_position = 1 minx = None maxx = None miny = None maxy = None # atoms for a in mol.vertices: a2 = oasa_atom_to_bkchem_atom(a, paper, m) m.insert_atom(a2) if calc_position: # data for rescaling if not maxx or a2.x > maxx: maxx = a2.x if not minx or a2.x < minx: minx = a2.x if not miny or a2.y < miny: miny = a2.y if not maxy or a2.y > maxy: maxy = a2.y # bonds bond_lengths = [] for b in mol.edges: b2 = oasa_bond_to_bkchem_bond(b, paper) aa1, aa2 = b.vertices atom1 = m.atoms[mol.vertices.index(aa1)] atom2 = m.atoms[mol.vertices.index(aa2)] m.add_edge(atom1, atom2, b2) b2.molecule = m if calc_position: bond_lengths.append( math.sqrt((b2.atom1.x - b2.atom2.x)**2 + (b2.atom1.y - b2.atom2.y)**2)) # rescale if calc_position: bl = sum(bond_lengths) / len(bond_lengths) scale = Screen.any_to_px(paper.standard.bond_length) / bl movex = (maxx + minx) / 2 movey = (maxy + miny) / 2 trans = transform3d.transform3d() trans.set_move(-movex, -movey, 0) trans.set_scaling(scale) trans.set_move(320, 240, 0) for a in m.atoms: a.x, a.y, a.z = trans.transform_xyz(a.x, a.y, a.z) return m
def _read_molecule(self, el): m = molecule(self.paper) for v in xpath.Evaluate("vertex", el): a2 = self._read_atom(v, m) m.insert_atom(a2) # bonds bond_lengths = [] for b in xpath.Evaluate("edge", el): b2 = self._read_bond(b) b2.molecule = m m.add_edge(b2.atom1, b2.atom2, b2) self._mol_ids[el.getAttribute('id')] = m return m
def _read_molecule(self, el): m = molecule(self.paper) for v in xpath.Evaluate("vertex", el): a2 = self._read_atom(v, m) m.insert_atom(a2) # bonds bond_lengths = [] for b in xpath.Evaluate("edge", el): b2 = self._read_bond(b) b2.molecule = m m.add_edge(b2.atom1, b2.atom2, b2) self._mol_ids[el.getAttribute("id")] = m return m
def oasa_mol_to_bkchem_mol( mol, paper): m = molecule.molecule( paper) if None in reduce( operator.add, [[a.x, a.y] for a in mol.atoms], []): calc_position = 0 else: calc_position = 1 minx = None maxx = None miny = None maxy = None # atoms for a in mol.vertices: a2 = oasa_atom_to_bkchem_atom( a, paper, m) m.insert_atom( a2) if calc_position: # data for rescaling if not maxx or a2.x > maxx: maxx = a2.x if not minx or a2.x < minx: minx = a2.x if not miny or a2.y < miny: miny = a2.y if not maxy or a2.y > maxy: maxy = a2.y # bonds bond_lengths = [] for b in mol.edges: b2 = oasa_bond_to_bkchem_bond( b, paper) aa1, aa2 = b.vertices atom1 = m.atoms[ mol.vertices.index( aa1)] atom2 = m.atoms[ mol.vertices.index( aa2)] m.add_edge( atom1, atom2, b2) b2.molecule = m if calc_position: bond_lengths.append( math.sqrt( (b2.atom1.x-b2.atom2.x)**2 + (b2.atom1.y-b2.atom2.y)**2)) # rescale if calc_position: bl = sum( bond_lengths) / len( bond_lengths) scale = Screen.any_to_px( paper.standard.bond_length) / bl movex = (maxx+minx)/2 movey = (maxy+miny)/2 trans = transform3d.transform3d() trans.set_move( -movex, -movey, 0) trans.set_scaling( scale) trans.set_move( 320, 240, 0) for a in m.atoms: a.x, a.y, a.z = trans.transform_xyz( a.x, a.y, a.z) return m
def smiles_files_to_molecules(self, file_name, smile_pos, class_pos=None): compounds = [] f = open(file_name, "r") while 1: txt = rf.read_line(f.readline()) if txt == []: break mol_loc = self.smile_to_nodes_and_edges(txt[smile_pos]) if mol_loc["nodes"] != [] and mol_loc["edges"] != []: clas = txt[class_pos] if clas: clas = int(clas) if clas == 0: clas = -1 compounds.append([ molecule.molecule(mol_loc["nodes"], mol_loc["edges"], mol_loc["covalences"], self.p_q), clas ]) else: compounds.append([ molecule.molecule(mol_loc["nodes"], mol_loc["edges"], mol_loc["covalences"], self.p_q), '?' ]) self.my_set = compounds return compounds
def pybel_to_oasa_molecule_with_atom_map(self, pmol): omol = molecule() patom_idx2oatom = {} for pa in pmol.atoms: oa = self.pybel_to_oasa_atom(pa) omol.add_vertex(oa) patom_idx2oatom[pa.idx] = oa for pb in openbabel.OBMolBondIter(pmol.OBMol): ob = self.pybel_to_oasa_bond(pb) i1 = pb.GetBeginAtomIdx() i2 = pb.GetEndAtomIdx() oa1 = patom_idx2oatom[i1] oa2 = patom_idx2oatom[i2] omol.add_edge(oa1, oa2, ob) return omol, patom_idx2oatom
def pybel_to_oasa_molecule_with_atom_map( self, pmol): omol = molecule() patom_idx2oatom = {} for pa in pmol.atoms: oa = self.pybel_to_oasa_atom( pa) omol.add_vertex( oa) patom_idx2oatom[ pa.idx] = oa for pb in openbabel.OBMolBondIter( pmol.OBMol): ob = self.pybel_to_oasa_bond( pb) i1 = pb.GetBeginAtomIdx() i2 = pb.GetEndAtomIdx() oa1 = patom_idx2oatom[ i1] oa2 = patom_idx2oatom[ i2] omol.add_edge( oa1, oa2, ob) return omol, patom_idx2oatom
def _read_body( self, file): atoms = read_molfile_value( file, 3, conversion=int) bonds = read_molfile_value( file, 3, conversion=int) # nothing more interesting file.readline() # read the structure self.structure = molecule() for i in range( atoms): a = self._read_atom( file) self.structure.add_vertex( v=a) for k in range( bonds): b, i, j = self._read_bond( file) self.structure.add_edge( i, j, e=b) for line in file: if line.strip() == "M END": break if line.strip().startswith( "M "): self._read_property( line.strip())
def _read_body(self, file): atoms = read_molfile_value(file, 3, conversion=int) bonds = read_molfile_value(file, 3, conversion=int) # nothing more interesting file.readline() # read the structure self.structure = molecule() for i in range(atoms): a = self._read_atom(file) self.structure.add_vertex(v=a) for k in range(bonds): b, i, j = self._read_bond(file) self.structure.add_edge(i, j, e=b) for line in file: if line.strip() == "M END": break if line.strip().startswith("M "): self._read_property(line.strip())
def all_file_to_molecules(self, file_name, class_label="", pos_class="active", neg_class="inactive"): compounds = [] f = open(file_name, "r") while 1: txt = rf.creat_local_txt(f) if txt == -1: return compounds else: mol_loc = self.mol_to_nodes_and_edges(txt, class_label, pos_class) #print mol_loc if mol_loc["nodes"] != [] and mol_loc["edges"] != []: compounds.append([ molecule.molecule(mol_loc["nodes"], mol_loc["edges"], mol_loc["covalences"], self.p_q), mol_loc["class"] ]) return compounds
def add_template_from_CDML( self, file): if not os.path.isfile( file): file = os_support.get_path( file, "template") if not file: warn( "template file %s does not exist - ignoring" % file) return try: doc = dom.parse( file).getElementsByTagName( 'cdml')[0] except xml.sax.SAXException: warn( "template file %s cannot be parsed - ignoring" % file) return # when loading old versions of CDML try to convert them, but do nothing when they cannot be converted import CDML_versions CDML_versions.transform_dom_to_version( doc, config.current_CDML_version) Store.app.paper.onread_id_sandbox_activate() added = [] for tmp in doc.getElementsByTagName('molecule'): self.templates.append( tmp) m = molecule( Store.app.paper, package=tmp) self._prepared_templates.append( m) added.append( m) Store.app.paper.onread_id_sandbox_finish( apply_to=[]) # just switch the id_managers, no id mangling
def show_dump(): from molecule import molecule from graph.graph import graph imp = molecule() imp.read_simple_text_file(file("aaa9.txt", "r")) removed = True while removed: removed = False for e in list(imp.edges): a1, a2 = e.vertices for e2 in list(imp.edges): if (e is not e2) and set(e.vertices) == set(e2.vertices): imp.disconnect_edge(e2) removed = True break if removed: break import coords_generator for part in imp.get_disconnected_subgraphs(): if len(part.vertices) > 1: coords_generator.calculate_coords(part, force=1) coords_generator.show_mol(part)
def show_dump(): from molecule import molecule from graph.graph import graph imp = molecule() imp.read_simple_text_file( file("aaa9.txt","r")) removed = True while removed: removed = False for e in list(imp.edges): a1,a2 = e.vertices for e2 in list(imp.edges): if (e is not e2) and set(e.vertices) == set(e2.vertices): imp.disconnect_edge( e2) removed = True break if removed: break import coords_generator for part in imp.get_disconnected_subgraphs(): if len( part.vertices) > 1: coords_generator.calculate_coords( part, force=1) coords_generator.show_mol( part)
def add_template_from_CDML(self, file): if not os.path.isfile(file): file = os_support.get_path(file, "template") if not file: warn("template file %s does not exist - ignoring" % file) return try: doc = dom.parse(file).getElementsByTagName('cdml')[0] except xml.sax.SAXException: warn("template file %s cannot be parsed - ignoring" % file) return # when loading old versions of CDML try to convert them, but do nothing when they cannot be converted import CDML_versions CDML_versions.transform_dom_to_version(doc, config.current_CDML_version) Store.app.paper.onread_id_sandbox_activate() added = [] for tmp in doc.getElementsByTagName('molecule'): self.templates.append(tmp) m = molecule(Store.app.paper, package=tmp) self._prepared_templates.append(m) added.append(m) Store.app.paper.onread_id_sandbox_finish( apply_to=[]) # just switch the id_managers, no id mangling
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains): start_xvg=open(start_xvg,'r').readlines() end_xvg=open(end_xvg,'r').readlines() selection=res_selection.res_select(start,ndx) n=int(n) # create the path startpts={} endpts={} for r in selection: startpts[r]=[] chain=0 for line in start_xvg: if re.search(r'\-%s$'%r,line): startpts[r].append([float(line.split()[0]),float(line.split()[1])]) chain+=1 for r in selection: endpts[r]=[] chain=0 for line in end_xvg: if re.search(r'\-%s$'%r,line): endpts[r].append([float(line.split()[0]),float(line.split()[1])]) chain+=1 sys.stderr.write('%s'%includes) for k in range(1,n-1): in_top=open(top).read() for mol in range(Nchains): if len(includes)>0: includename=includes[mol].split('/')[-1] in_top=re.sub(includename,'dihre_%d_chain_%d.itp'%(k,mol),in_top) out_top=open('topol_%d.top'%k,'w') # sys.stderr.write('%s'%in_top) out_top.write(in_top) for k in range(1,n-1): # make the directory for the restraints for mol in range(Nchains): restraint_itp=open('dihre_%d_chain_%d.itp'%(k,mol),'w') if Nchains>1: moltop=open(includes[mol]).read() restraint_itp.write(moltop) # write the initial part of the topology file restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al phi dphi kfac\n") if len(includes)>0: protein=molecule(includes[mol]) # replace the chain names with the chain names else: out_top=open('topol_%d.top'%k,'w') protein=molecule(top) in_itp=open(top,'r').read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) out_top.write(in_itp[1]) out_top.close() for r in selection: phi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r)-1 and a.atomname == 'C')] psi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r)+1 and a.atomname == 'N')] # write phi, psi angles phi_val=startpts[r][mol][0]+(endpts[r][mol][0]-startpts[r][mol][0])/n*k psi_val=startpts[r][mol][1]+(endpts[r][mol][1]-startpts[r][mol][1])/n*k restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n" %(phi[0].atomnr,phi[1].atomnr,phi[2].atomnr, phi[3].atomnr, phi_val, 0, 1)) restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n" %(psi[0].atomnr,psi[1].atomnr,psi[2].atomnr, psi[3].atomnr, psi_val, 0, 1)) restraint_itp.close()
def get_molecules( self, file_name): doc = dom.parse( file_name) molecules = [] # read colors colors=[] for elem7 in doc.getElementsByTagName("color"): red=(float(elem7.getAttribute("r"))*255) green=(float(elem7.getAttribute("g"))*255) blue=(float(elem7.getAttribute("b"))*255) colors.append("#%02x%02x%02x" % (red,green,blue)) # read fonts fonts={} for elem8 in doc.getElementsByTagName("font"): family=str(elem8.getAttribute("name")) fonts[int(elem8.getAttribute("id"))]=family # read molecules for elem1 in doc.getElementsByTagName("fragment"): if elem1.parentNode.nodeName=="page": mol = molecule( paper=self.paper) atom_id_to_atom = {} atom_id_to_text = {} for elem2 in elem1.childNodes: # atom if elem2.nodeName=="n": font = "" Size = 12 text = "C" color1="#000000" for elem3 in elem2.childNodes: if elem3.nodeName=="t": if elem3.hasAttribute("color"): color1=colors[int(elem3.getAttribute("color"))-2] text = "" for elem4 in elem3.childNodes: if elem4.nodeName=="s": if elem3.hasAttribute("color"): color1=colors[int(elem3.getAttribute("color"))-2] for Id, Font in fonts.items(): if Id==int(elem4.getAttribute("font")): font=Font Size= int(elem4.getAttribute("size")) text += dom_ext.getAllTextFromElement( elem4).strip() position = elem2.getAttribute("p").split() assert len( position) == 2 # we must postpone symbol assignment until we know the valency of the atoms atom_id_to_text[ elem2.getAttribute('id')] = text atom = mol.create_vertex() atom.line_color = color1 atom.font_family = font atom.font_size = Size atom.x = float( position[0]) atom.y = float( position[1]) mol.add_vertex( atom) atom_id_to_atom[ elem2.getAttribute('id')] = atom # bond #{"v BKChemu bond.type":"v ChemDraw hodnota atributu Display elementu b"} bondType2={"WedgeBegin":"w", "WedgedHashBegin":"h", "Wavy":"a", "Bold":"b", "Dash":"d" } if elem2.nodeName=="b": if elem2.hasAttribute("color"): color2 = colors[(int(elem2.getAttribute("color"))-2)] else: color2="#000000" order = 1 if elem2.hasAttribute("Order"): order = int( elem2.getAttribute("Order")) bond = mol.create_edge() if elem2.hasAttribute("Display"): display = elem2.getAttribute("Display").strip() for bondC, bondB in bondType2.items(): if bondC ==display: bond.type = bondB bond.line_color = color2 bond.order = order atom1 = atom_id_to_atom[ elem2.getAttribute("B")] atom2 = atom_id_to_atom[ elem2.getAttribute("E")] mol.add_edge( atom1, atom2, bond) # here we reassign the symbols for id, atom in atom_id_to_atom.items(): text = atom_id_to_text[ id] v = mol.create_vertex_according_to_text( atom, text) atom.copy_settings( v) mol.replace_vertices( atom, v) atom.delete() # finally we add the molecule to the list of molecules for output molecules.append( mol) # read texts textik={2:"i", 1:"b", 32:"sub", 64:"sup"} for elem5 in doc.getElementsByTagName("t"): if elem5.parentNode.nodeName=="page": position = map( float, elem5.getAttribute("p").split()) assert len( position) == 2 celyText="" for elem51 in elem5.childNodes: if elem51.nodeName=="s": for elem52 in elem51.childNodes: if isinstance( elem52, dom.Text): rodice=[] text100=elem52.data if elem51.hasAttribute("face"): Face01=int(elem51.getAttribute("face")) for face, parent in textik.items(): for i in range(9): if not Face01&2**i==0: if face==Face01&2**i: rodice.append(parent) for rodic in rodice: text100 = "<%s>%s</%s>" % (rodic,text100,rodic) celyText += text100 if elem5.hasAttribute("color"): color3=colors[(int(elem5.getAttribute("color"))-2)] else: color3="#000000" font_id = elem51.getAttribute("font") if font_id != "": font=fonts[int(font_id)] #text = dom_ext.getAllTextFromElement(elem51) #print celyText text = celyText t = text_class( self.paper, position, text=text) t.line_color = color3 #print elem51 if elem51.hasAttribute("size"): t.font_size = int( elem51.getAttribute("size")) if font: t.font_family = font molecules.append(t) # read graphics - plus for elem6 in doc.getElementsByTagName("graphic"): if elem6.getAttribute("GraphicType")=="Symbol" and elem6.getAttribute("SymbolType")=="Plus": position = map( float, elem6.getAttribute("BoundingBox").split()) position2=[position[0],position[1]] assert len(position2) == 2 if elem6.hasAttribute("color"): color4=colors[(int(elem6.getAttribute("color"))-2)] else: color4="#000000" pl = plus(self.paper, position2) pl.line_color = color4 molecules.append(pl) sipka=[] #for elem71 in doc.getElementsByTagName("graphic"): #if elem71.getAttribute("GraphicType")=="Line": for elem7 in doc.getElementsByTagName("arrow"): sipka.insert(0,elem7.getAttribute('Head3D') ) sipka.insert(1,elem7.getAttribute('Tail3D') ) if elem7.hasAttribute("color"): sipka.insert(0,colors[(int(elem7.getAttribute("color"))-2)]) point1 = map( float, sipka[1].split()) point2 = map( float, sipka[2].split()) arr = arrow( self.paper, points=[point2[0:2],point1[0:2]], fill=sipka[0]) arr.line_color=sipka[0] molecules.append( arr) sipka=[] return molecules
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def _read_inchi( self, text): if not text: raise oasa_inchi_error( "No inchi was given") self.structure = molecule() self.layers = self.split_layers( text) # version support (very crude) self.version = self._check_version( self.layers[0]) if not self.version: raise oasa_unsupported_inchi_version_error( self.layers[0]) elif str( self.version[0]) != '1' or str( self.version[1]) != '0': raise oasa_unsupported_inchi_version_error( self.layers[0]) self.hs_in_hydrogen_layer = self.get_number_of_hydrogens_in_hydrogen_layer() self.read_sum_layer() self.read_connectivity_layer() self._charge_mover = self._move_charge_somewhere_else() repeat = True run = 0 # we have to repeat this step in order to find the right positioning of movable hydrogens while repeat and not self._no_possibility_to_improve: # cleanup for h in self._added_hs: self.structure.remove_vertex( h) for b in self.structure.edges: b.order = 1 for v in self.structure.vertices: v.symbol = v.symbol v.charge = 0 self.cleanup() # the code itself run += 1 assert run < 50 self._deal_with_notorious_groups() self.process_forced_charges() self.read_hydrogen_layer( run=run) self.read_charge_layer() self.read_p_layer() self.deal_with_da_bonds() #self._deal_with_valencies() self.compensate_for_forced_charges() self.structure.add_missing_bond_orders() #self.read_double_bond_stereo_layer() # here we check out if the molecule seems ok fvs = [v for v in self.structure.vertices if v.free_valency] if not fvs and not filter( None, [not v.order for v in self.structure.edges]): repeat = False else: if len( fvs) == 1: a = fvs[0] a.symbol = a.symbol # this resets the valency a.raise_valency_to_senseful_value() if not a.free_valency: repeat = False if repeat and self._no_possibility_to_improve and self.charge: try: self._charge_mover.next() except StopIteration: pass else: self._no_possibility_to_improve = False run = 0 if repeat and self._no_possibility_to_improve: ## if len( filter( None, [v.free_valency for v in self.structure.vertices])) == 1: ## print ## print [(v.symbol, v.valency, v.free_valency) for v in self.structure.vertices if v.free_valency], filter( None, [not v.order for v in self.structure.edges]), text ## if sum( [v.charge for v in self.structure.vertices]) != self.charge: ## print "Charge problem", sum( [v.charge for v in self.structure.vertices]), self.charge #pass raise oasa_inchi_error( "Localization of bonds, charges or movable hydrogens failed")
ValveDefaultActualFlow = 800 / 3600.0 #//From average HX flow in unit CW circuit. ValveDefaultDP = 1.2 * Ps ValveDefaultOpening = 0.5 #//Default valve opening. ValveEqualPercR = 40.0 #//Dimensionless constant for valve equalpercentage. ValveDefaultCv = ValveDefaultActualFlow / \ (math.pow(ValveEqualPercR, ValveDefaultOpening - 1) * math.sqrt(ValveDefaultDP)) # //m^3/s/(Pa^0.5) ValveHydraulicTau = 10.0 #//seconds. Time constant of valve hydraulics. ValveLength = 0.5 #//m ValveWidth = 0.4 #//m #fluidpackage constants ------------------------------------------------------------------------------------------------- fluidpackage = list() #list of component object fluidpackage.append(component.component(molecule.molecule("Naphtha", "GTL Naphtha", 0.157, 0.00164, 661.4959, 273 + 495, 1.2411 * 10**7, \ -1 - np.log10(110000 /1.2411 * (10**7)) , 150.5, 0.6, 0, 0), 0)) fluidpackage.append(component.component(molecule.molecule("Air", "Air", 0.02897, 1.983 * math.pow(10, -5), 1.225, 132.41, \ 3.72 * math.pow(10, 6), \ 0.0335, 0.8*31.15 + 0.2*28.11, 0.8*(-0.01357) + 0.2*(-3.7) * math.pow(10, -6), 0.8*2.68*math.pow(10,-5) + 0.2*1.746 * math.pow(10, -5), 0.8 * (-1.168) * math.pow(10, -8) + 0.2 * (-1.065) * math.pow(10, -8)), 0)) #//Density: 1.977 kg/m3 (gas at 1 atm and 0 °C) fluidpackage.append( component.component( molecule.molecule("CO2", "Carbon Dioxide", 0.018, 0.07 * 0.001, 1.977, 304.25, 7.39 * math.pow(10, 6), 0.228, 19.8, 0.07344, -5.602E-05, 1.715E-08), 0)) #//Density: 1.977 kg/m3 (gas at 1 atm and 0 °C) fluidpackage.append( component.component( molecule.molecule("CO", "Carbon Monoxide", 0.02801, 0.0001662 * 0.001, 1.145), 0)) #//Density: 1.145 kg/m3 at 25 °C, 1 atm fluidpackage.append( component.component( molecule.molecule("H2", "Hydrogen", 0.0020158, 8.76 * math.pow(10, -6), 0.08988), 0)) #//Density: 0.08988 g/L = 0.08988 kg/m3 (0 °C, 101.325 kPa) fluidpackage.append( component.component( molecule.molecule("He", "Helium", HeMolarMass, 0, 0.1786, 5.1953, 5.1953E6, -0.390, 20.8, 0, 0, 0), 0))
def get_transformed_template(self, n, coords, type='empty', paper=None): """type is type of connection - 'bond', 'atom1'(for single atom), 'atom2'(for atom with more than 1 bond), 'empty'""" pap = paper or Store.app.paper pap.onread_id_sandbox_activate() # must be here to mangle the ids current = molecule(pap, package=self.templates[n]) pap.onread_id_sandbox_finish(apply_to=[current]) # id mangling current.name = '' self._scale_ratio = 1 trans = transform() # type empty - just draws the template - no conection if type == 'empty': xt1, yt1 = current.t_atom.get_xy() xt2, yt2 = current.next_to_t_atom.get_xy() x1, y1 = coords bond_length = Screen.any_to_px( Store.app.paper.standard.bond_length) current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) trans.set_move(-xt2, -yt2) trans.set_scaling(bond_length / math.sqrt((xt1 - xt2)**2 + (yt1 - yt2)**2)) trans.set_move(x1, y1) #type atom elif type == 'atom1' or type == 'atom2': xt1, yt1 = current.t_atom.get_xy() xt2, yt2 = current.next_to_t_atom.get_xy() x1, y1, x2, y2 = coords trans.set_move(-xt2, -yt2) trans.set_scaling( math.sqrt((x1 - x2)**2 + (y1 - y2)**2) / math.sqrt((xt1 - xt2)**2 + (yt1 - yt2)**2)) trans.set_rotation( math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2)) trans.set_move(x2, y2) #type bond elif type == 'bond': if not (current.t_bond_first and current.t_bond_second): warn( "this template is not capable to be added to bond - sorry." ) return None current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) xt1, yt1 = current.t_bond_first.get_xy() xt2, yt2 = current.t_bond_second.get_xy() x1, y1, x2, y2 = coords self._scale_ratio = math.sqrt( (x1 - x2)**2 + (y1 - y2)**2) / math.sqrt( (xt1 - xt2)**2 + (yt1 - yt2)** 2) # further needed for bond.bond_width transformation trans.set_move(-xt1, -yt1) trans.set_rotation( math.atan2(xt1 - xt2, yt1 - yt2) - math.atan2(x1 - x2, y1 - y2)) trans.set_scaling(self._scale_ratio) trans.set_move(x1, y1) self.transform_template(current, trans) #remove obsolete info from template if type == 'atom1': current.delete_items([current.t_atom], redraw=0, delete_single_atom=0) elif type == 'atom2': current.t_atom.x = x1 current.t_atom.y = y1 current.t_atom = None current.t_bond_first = None current.t_bond_second = None #return ready template return current
def __init__(self, source, outfile, molfile, goalsnr, nphot, kappa=None, tnorm=2.735, velocity_function=None, seed=1971, minpop=1e-4, fixset=1.e-6, blending=False, nchan=50, rt_lines=[0,1,2], velres=0.1): """ Initlize a simulation. Args: source (str): Model file. outfile (str): File to write population levels to. molfile (str): Molecular data file in the LAMDA format. goalsnr (float): Goal signal-to-noise ratio of the run. nphot (float): Number of photons to use in the radiative transfer. kappa (optional[str]): A string decribing the dust parameters. For the use of Ossenkopf & Henning (1994) opacities it must take the form: kappa_params = 'jena, TYPE, COAG' where TYPE must be 'bare', 'thin' or 'thick' and COAG must be 'no', 'e5', 'e6', 'e7' or 'e8'. Otherwise a power law profile can be included. Alternatively, a simple power law can be used where the parameters are given by: kappa_params = 'powerlaw, freq0, kappa0, beta' where freq0 is in [Hz], kappa0 in [cm^2/g], and beta is the frequency index. If nothing is given, we assume no opacity. tnorm (optional[float]): Background temperature in [K]. Default is the CMB at 2.735K. velo (optional[str]): Type of velocity structure to use. seed (optional[int]): Seed for the random number generators. minpop (optional[float]): Minimum population for each energy level. fixset (optional [float]): The smallest number to be counted. blending (optional [bool]): Whether to include line blending or not. nchan (optional [int]): Number of channels per trans for raytracing. rt_lines (optional [int list]): List of transitions to raytrace. velres (optional [float]): Channel res for raytracing (km/s). """ self.source = source self.outfile = outfile self.molfile = molfile self.goalsnr = goalsnr # not setting nphot yet, setting later as array w/ size ncell self.kappa_params = kappa self.tnorm = tnorm # self.velocity = velocity self.seed = seed self.minpop = minpop self.fixset = fixset self.blending = blending self.nchan = nchan self.rt_lines = rt_lines self.velres = velres*1000. # convert to m/s t0 = time() # Read in the source model (default is RATRAN). self.model = model(self.source, 'ratran') self.ncell = self.model.ncell # Have user input velocity function. if velocity_function is not None: self.model.velo = simulation.import_velocity(velocity_function) # Read in the molfile try: self.mol = molecule(self, self.molfile) except: raise Exception("Couldn't parse molecular data.") self.nlev = self.mol.nlev self.nline = self.mol.nline self.ntrans = self.mol.ntrans self.ntrans2 = self.mol.ntrans2 self.ntemp = self.mol.ntemp self.ntemp2 = self.mol.ntemp2 # Include thermal broadening the to widths. v_turb2 = self.model.doppb**2.0 v_therm2 = 2 * sc.k * self.model.tkin / sc.m_p / self.mol.molweight self.model.doppb = np.sqrt(v_turb2 + v_therm2) # Calculate the collisional rates. self.mol.set_rates(self.model.tkin) if self.mol.up is None or self.mol.down is None: raise ValueError("Need to calculate rates.") # Initialize dust emissivity, convertiong from [m^2/kg] to [m^-1/n(H2)] # such that tau_dust = knu. self.norm = simulation.planck(self.mol.freq[0], self.tnorm) self.norm = self.norm * np.ones(self.nline) self.cmb = simulation.planck(self.mol.freq, self.model.tcmb) self.cmb /= self.norm # Parse kappa parameters and generate the kappa function self.kappa = simulation.generate_kappa(self.kappa_params) # Do not normalize dust; will be done in photon. # Funky looping as functions fail to broadcast. self.knu = np.zeros((self.nline, self.ncell)) self.dust = np.zeros((self.nline, self.ncell)) for l in range(self.nline): for i in range(self.ncell): self.knu[l, i] = self.kappa(i, self.mol.freq[l]) * 2.4 * sc.m_p self.knu[l, i] *= self.model.nh2[i] / self.model.g2d self.dust[l, i] = simulation.planck(self.mol.freq[l], self.model.tdust[i]) # Set up the Monte Carlo simulation self.nphot = np.full(self.ncell, nphot) # Set nphot to initial number. self.niter = self.ncell # Estimated crossing time. self.fixseed = self.seed t1 = time() print("Set up took %.1f ms." % ((t1 - t0) * 1e3))
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): cmdnames = cmds.GromacsCommands() # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i] ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def get_molecules(self, file_name): doc = dom.parse(file_name) molecules = [] # read colors colors = [] for elem7 in doc.getElementsByTagName("color"): red = (float(elem7.getAttribute("r")) * 255) green = (float(elem7.getAttribute("g")) * 255) blue = (float(elem7.getAttribute("b")) * 255) colors.append("#%02x%02x%02x" % (red, green, blue)) # read fonts fonts = {} for elem8 in doc.getElementsByTagName("font"): family = str(elem8.getAttribute("name")) fonts[int(elem8.getAttribute("id"))] = family # read molecules for elem1 in doc.getElementsByTagName("fragment"): if elem1.parentNode.nodeName == "page": mol = molecule(paper=self.paper) atom_id_to_atom = {} atom_id_to_text = {} for elem2 in elem1.childNodes: # atom if elem2.nodeName == "n": font = "" Size = 12 text = "C" color1 = "#000000" for elem3 in elem2.childNodes: if elem3.nodeName == "t": if elem3.hasAttribute("color"): color1 = colors[ int(elem3.getAttribute("color")) - 2] text = "" for elem4 in elem3.childNodes: if elem4.nodeName == "s": if elem3.hasAttribute("color"): color1 = colors[int( elem3.getAttribute("color")) - 2] for Id, Font in fonts.items(): if Id == int( elem4.getAttribute( "font")): font = Font Size = int(elem4.getAttribute("size")) text += dom_ext.getAllTextFromElement( elem4).strip() position = elem2.getAttribute("p").split() assert len(position) == 2 # we must postpone symbol assignment until we know the valency of the atoms atom_id_to_text[elem2.getAttribute('id')] = text atom = mol.create_vertex() atom.line_color = color1 atom.font_family = font atom.font_size = Size atom.x = float(position[0]) atom.y = float(position[1]) mol.add_vertex(atom) atom_id_to_atom[elem2.getAttribute('id')] = atom # bond #{"v BKChemu bond.type":"v ChemDraw hodnota atributu Display elementu b"} bondType2 = { "WedgeBegin": "w", "WedgedHashBegin": "h", "Wavy": "a", "Bold": "b", "Dash": "d" } if elem2.nodeName == "b": if elem2.hasAttribute("color"): color2 = colors[(int(elem2.getAttribute("color")) - 2)] else: color2 = "#000000" order = 1 if elem2.hasAttribute("Order"): order = int(elem2.getAttribute("Order")) bond = mol.create_edge() if elem2.hasAttribute("Display"): display = elem2.getAttribute("Display").strip() for bondC, bondB in bondType2.items(): if bondC == display: bond.type = bondB bond.line_color = color2 bond.order = order atom1 = atom_id_to_atom[elem2.getAttribute("B")] atom2 = atom_id_to_atom[elem2.getAttribute("E")] mol.add_edge(atom1, atom2, bond) # here we reassign the symbols for id, atom in atom_id_to_atom.items(): text = atom_id_to_text[id] v = mol.create_vertex_according_to_text(atom, text) atom.copy_settings(v) mol.replace_vertices(atom, v) atom.delete() # finally we add the molecule to the list of molecules for output molecules.append(mol) # read texts textik = {2: "i", 1: "b", 32: "sub", 64: "sup"} for elem5 in doc.getElementsByTagName("t"): if elem5.parentNode.nodeName == "page": position = map(float, elem5.getAttribute("p").split()) assert len(position) == 2 celyText = "" for elem51 in elem5.childNodes: if elem51.nodeName == "s": for elem52 in elem51.childNodes: if isinstance(elem52, dom.Text): rodice = [] text100 = elem52.data if elem51.hasAttribute("face"): Face01 = int(elem51.getAttribute("face")) for face, parent in textik.items(): for i in range(9): if not Face01 & 2**i == 0: if face == Face01 & 2**i: rodice.append(parent) for rodic in rodice: text100 = "<%s>%s</%s>" % (rodic, text100, rodic) celyText += text100 if elem5.hasAttribute("color"): color3 = colors[(int(elem5.getAttribute("color")) - 2)] else: color3 = "#000000" font_id = elem51.getAttribute("font") if font_id != "": font = fonts[int(font_id)] #text = dom_ext.getAllTextFromElement(elem51) #print celyText text = celyText t = text_class(self.paper, position, text=text) t.line_color = color3 #print elem51 if elem51.hasAttribute("size"): t.font_size = int(elem51.getAttribute("size")) if font: t.font_family = font molecules.append(t) # read graphics - plus for elem6 in doc.getElementsByTagName("graphic"): if elem6.getAttribute( "GraphicType") == "Symbol" and elem6.getAttribute( "SymbolType") == "Plus": position = map(float, elem6.getAttribute("BoundingBox").split()) position2 = [position[0], position[1]] assert len(position2) == 2 if elem6.hasAttribute("color"): color4 = colors[(int(elem6.getAttribute("color")) - 2)] else: color4 = "#000000" pl = plus(self.paper, position2) pl.line_color = color4 molecules.append(pl) sipka = [] #for elem71 in doc.getElementsByTagName("graphic"): #if elem71.getAttribute("GraphicType")=="Line": for elem7 in doc.getElementsByTagName("arrow"): sipka.insert(0, elem7.getAttribute('Head3D')) sipka.insert(1, elem7.getAttribute('Tail3D')) if elem7.hasAttribute("color"): sipka.insert(0, colors[(int(elem7.getAttribute("color")) - 2)]) point1 = map(float, sipka[1].split()) point2 = map(float, sipka[2].split()) arr = arrow(self.paper, points=[point2[0:2], point1[0:2]], fill=sipka[0]) arr.line_color = sipka[0] molecules.append(arr) sipka = [] return molecules
###GENERATE 3D MOL FILE WITH OBGEN### os.chdir('../public/uploads/structures') #3D conformer search with obgen subprocess.call([os.path.join(config.babeldir,'obgen'),'{}.mol'.format(molid)],stdout=open('{}-3dt.mol'.format(molid),'w'),stderr=open(os.devnull,'w')) #Remove warning flags subprocess.call(['/bin/grep','-v','WARNING','{}-3dt.mol'.format(molid)],stdout=open('{}-3d.mol'.format(molid),'w'),stderr=open(os.devnull,'w')) os.remove('{}-3dt.mol'.format(molid)) #Convert to PDB without hydrogens subprocess.call([os.path.join(config.babeldir,'babel'),'-imol','{}-3d.mol'.format(molid),'-d','-opdb','{}-3d.pdb'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w')) #Neutralize atoms subprocess.call(['/bin/sed','-i',r"s/1[\+-]$//g",'{}-3d.pdb'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w')) #Finally convert to 3D SDF with hydrogens in neutral state subprocess.call([config.babeldir+'babel','-ipdb','{}-3d.pdb'.format(molid),'-h','--title',molname,'-osdf','{}-3d.mol'.format(molid)],stdout=open(os.devnull,'w'),stderr=open(os.devnull,'w')) os.remove('{}-3d.pdb'.format(molid)) molobj = molecule('{}-3d.mol'.format(molid)) os.chdir(cgidir) ####UPATE MOLECULE DATA IN DATABASE############ dbconn = psycopg2.connect(config.dsn) q = dbconn.cursor() query = 'UPDATE molecules SET molweight=%s,molformula=%s WHERE molid=%s' options = [str(molobj.molweight),molobj.formula(),str(molid)] q.execute(query,options) dbconn.commit() q.close() dbconn.close() ######EXTENSIONS############### os.chdir('../extensions') ##### RUN QIKPROP##############
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains): n = int(n) # number of points in the string, including start and end point ndx_atoms = res_selection.read_ndx(ndxfn) use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending atom configurations for later interpolation TODO #startpts = readxvg.readxvg(start_xvg, selection) #endpts = readxvg.readxvg(end_xvg, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per intermediate string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k, 'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the restraint atom and force spec for each intermediate point # This is really unnecessary here since the restraint positions are not in these files so they are the same # for all points and chains. TODO for k in range(n): for mol in range(Nchains): with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp: if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top, 'r') as in_itp_f: in_itp = in_itp_f.read().split( '; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Go through the atoms in the selection index and write one row for each one with the KFAC # force constant placeholder restraint_itp.write("\n[ position_restraints ]\n") restraint_itp.write("; atom type fx fy fz\n") for a in ndx_atoms: if a < 5566: # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule. restraint_itp.write("%6d 1 KFAC KFAC KFAC\n" % int(a))
def _read_inchi(self, text): if not text: raise oasa_inchi_error("No inchi was given") self.structure = molecule() self.layers = self.split_layers(text) # version support (very crude) self.version = self._check_version(self.layers[0]) if not self.version: raise oasa_unsupported_inchi_version_error(self.layers[0]) elif str(self.version[0]) != '1' or str(self.version[1]) != '0': raise oasa_unsupported_inchi_version_error(self.layers[0]) self.hs_in_hydrogen_layer = self.get_number_of_hydrogens_in_hydrogen_layer( ) self.read_sum_layer() self.read_connectivity_layer() self._charge_mover = self._move_charge_somewhere_else() repeat = True run = 0 # we have to repeat this step in order to find the right positioning of movable hydrogens while repeat and not self._no_possibility_to_improve: # cleanup for h in self._added_hs: self.structure.remove_vertex(h) for b in self.structure.edges: b.order = 1 for v in self.structure.vertices: v.symbol = v.symbol v.charge = 0 self.cleanup() # the code itself run += 1 assert run < 50 self._deal_with_notorious_groups() self.process_forced_charges() self.read_hydrogen_layer(run=run) self.read_charge_layer() self.read_p_layer() self.deal_with_da_bonds() #self._deal_with_valencies() self.compensate_for_forced_charges() self.structure.add_missing_bond_orders() #self.read_double_bond_stereo_layer() # here we check out if the molecule seems ok fvs = [v for v in self.structure.vertices if v.free_valency] if not fvs and not filter( None, [not v.order for v in self.structure.edges]): repeat = False else: if len(fvs) == 1: a = fvs[0] a.symbol = a.symbol # this resets the valency a.raise_valency_to_senseful_value() if not a.free_valency: repeat = False if repeat and self._no_possibility_to_improve and self.charge: try: self._charge_mover.next() except StopIteration: pass else: self._no_possibility_to_improve = False run = 0 if repeat and self._no_possibility_to_improve: ## if len( filter( None, [v.free_valency for v in self.structure.vertices])) == 1: ## print ## print [(v.symbol, v.valency, v.free_valency) for v in self.structure.vertices if v.free_valency], filter( None, [not v.order for v in self.structure.edges]), text ## if sum( [v.charge for v in self.structure.vertices]) != self.charge: ## print "Charge problem", sum( [v.charge for v in self.structure.vertices]), self.charge #pass raise oasa_inchi_error( "Localization of bonds, charges or movable hydrogens failed")
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains): start_xvg = open(start_xvg, 'r').readlines() end_xvg = open(end_xvg, 'r').readlines() selection = res_selection.res_select(start, ndx) n = int(n) # create the path startpts = {} endpts = {} for r in selection: startpts[r] = [] chain = 0 for line in start_xvg: if re.search(r'\-%s$' % r, line): startpts[r].append( [float(line.split()[0]), float(line.split()[1])]) chain += 1 for r in selection: endpts[r] = [] chain = 0 for line in end_xvg: if re.search(r'\-%s$' % r, line): endpts[r].append( [float(line.split()[0]), float(line.split()[1])]) chain += 1 sys.stderr.write('%s' % includes) for k in range(1, n - 1): in_top = open(top).read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'dihre_%d_chain_%d.itp' % (k, mol), in_top) out_top = open('topol_%d.top' % k, 'w') # sys.stderr.write('%s'%in_top) out_top.write(in_top) for k in range(1, n - 1): # make the directory for the restraints for mol in range(Nchains): restraint_itp = open('dihre_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: moltop = open(includes[mol]).read() restraint_itp.write(moltop) # write the initial part of the topology file restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: out_top = open('topol_%d.top' % k, 'w') protein = molecule(top) in_itp = open( top, 'r').read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "dihre_%d_chain_%d.itp"\n' % (k, mol)) #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) out_top.write(in_itp[1]) out_top.close() for r in selection: phi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r) - 1 and a.atomname == 'C') ] psi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r) + 1 and a.atomname == 'N') ] # write phi, psi angles phi_val = startpts[r][mol][0] + (endpts[r][mol][0] - startpts[r][mol][0]) / n * k psi_val = startpts[r][mol][1] + (endpts[r][mol][1] - startpts[r][mol][1]) / n * k restraint_itp.write( "%5d%5d%5d%5d %8.4f%5d%5d\n" % (phi[0].atomnr, phi[1].atomnr, phi[2].atomnr, phi[3].atomnr, phi_val, 0, 1)) restraint_itp.write( "%5d%5d%5d%5d %8.4f%5d%5d\n" % (psi[0].atomnr, psi[1].atomnr, psi[2].atomnr, psi[3].atomnr, psi_val, 0, 1)) restraint_itp.close()
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains): n = int(n) # number of points in the string, including start and end point ndx_atoms = res_selection.read_ndx(ndxfn) use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending atom configurations for later interpolation TODO #startpts = readxvg.readxvg(start_xvg, selection) #endpts = readxvg.readxvg(end_xvg, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per intermediate string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k, 'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the restraint atom and force spec for each intermediate point # This is really unnecessary here since the restraint positions are not in these files so they are the same # for all points and chains. TODO for k in range(n): for mol in range(Nchains): with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp: if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top, 'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Go through the atoms in the selection index and write one row for each one with the KFAC # force constant placeholder restraint_itp.write("\n[ position_restraints ]\n") restraint_itp.write("; atom type fx fy fz\n") for a in ndx_atoms: if a < 5566: # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule. restraint_itp.write("%6d 1 KFAC KFAC KFAC\n" % int(a))
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select('%s' % start_conf, ndx_atoms) #sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write('Length of initpt %d, targetpt %d\n' % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write('Rep iter %d: \n' % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(' maxspread was %f\n' % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write('Final maximum spread %f after %d iterations.\n' % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] #sys.stderr.write('Pts before repa:\n %s\n' % newpts) #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. #adjusted = newpts # calculate reparam distance sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. #Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], 'r') as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[ k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write('adjusted[] entry of wrong length %d\n' % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[ 0: 8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open('res_%d_chain_%d.itp' % (k, chain), 'w') as restraint_itp: with open(includes[k][chain], 'r') as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split('[ dihedral_restraints ]')[0] restraint_itp.write('%s' % moltop) sys.stderr.write( "Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write( "; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n" ) pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule('%s' % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
pass if(cstart>0): for i in range(cstart,len(l)): if l[i].find('$$$$')>=0: break try: cc50s.append(float(l[i])) except Exception: pass except Exception: print 'Problem reading experimental data.' sys.exit() try: #Load molecule molobj=molecule(molfilename) molweight=molobj.molweight molformula=molobj.formula() except Exception: print 'Problem loading mol file.' sys.exit() try: query = 'INSERT INTO molecules (molname, authorid, dateadded, molweight, molformula) VALUES (%s,%s,localtimestamp,%s,%s) RETURNING molid' options = [molfilename[:-4],1,molweight,molformula] q.execute(query,options) molid=q.fetchone()[0] except Exception: print 'Problem inserting molecule into database.' sys.exit()
def reparametrize( use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes, ): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select("%s" % start_conf, ndx_atoms) # sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write("Rep iter %d: \n" % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(" maxspread was %f\n" % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] # sys.stderr.write('Pts before repa:\n %s\n' % newpts) # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. # adjusted = newpts # calculate reparam distance sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. # Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open("rep_resconf_%d.gro" % k, "w") as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], "r") as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[0:8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp: with open(includes[k][chain], "r") as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split("[ dihedral_restraints ]")[0] restraint_itp.write("%s" % moltop) sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n") pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule("%s" % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C": try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]] # psi is N, CA and C on this residue and N on the next psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0) ) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0) )