Python IO.Chain Exemples

Langage de programmation: Python

Class/Type: IO

Méthode/Fonction: Chain

Exemples au hotexamples.com: 1

Python IO.Chain - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de IO.Chain à partir du pack pymoo extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

load_pickled_generation_dataframe(28)

todisc_list(8)

writebin(7)

read_balls(6)

AA(5)

autosave(4)

load_tradeprice_data(4)

write2data(3)

readoned(3)

readmap(3)

readWords(3)

pickle(3)

autosave_current_map(3)

writemap(3)

load_settings(2)

save_game(2)

json_write(2)

load_chi_file(2)

load_gr_file(2)

load_grid_of_particles(2)

load_high_scores(2)

load_manual(2)

replaceSpecialChars(2)

get_logfolders(2)

removeFile(2)

loadimage(2)

FileManager(2)

readbin(2)

read_sticks(2)

readPickledWords(2)

CmdService(2)

Clear(2)

getFileName(2)

encodingToutf8(2)

GetLine(2)

writePickledWords(2)

COSHBIO(2)

checkFile(2)

ensure_dir(2)

unpickle(2)

writeWords(1)

writedata(1)

read_geo(1)

todisc_txt(1)

Binary2netcdf(1)

Chain(1)

writeXYZ(1)

CSVFileWriter(1)

writeSeed(1)

writeString(1)

Méthodes fréquemment utilisées

load_pickled_generation_dataframe (28)

todisc_list (8)

writebin (7)

read_balls (6)

AA (5)

autosave (4)

load_tradeprice_data (4)

write2data (3)

readoned (3)

readmap (3)

Méthodes fréquemment utilisées

readWords (3)

pickle (3)

autosave_current_map (3)

writemap (3)

load_settings (2)

save_game (2)

json_write (2)

load_chi_file (2)

load_gr_file (2)

load_grid_of_particles (2)

load_high_scores (2)

load_manual (2)

replaceSpecialChars (2)

get_logfolders (2)

removeFile (2)

loadimage (2)

FileManager (2)

readbin (2)

read_sticks (2)

readPickledWords (2)

Méthodes fréquemment utilisées

load_high_scores (2)

load_manual (2)

replaceSpecialChars (2)

get_logfolders (2)

removeFile (2)

loadimage (2)

FileManager (2)

readbin (2)

read_sticks (2)

readPickledWords (2)

CmdService (2)

Clear (2)

getFileName (2)

encodingToutf8 (2)

GetLine (2)

writePickledWords (2)

COSHBIO (2)

checkFile (2)

ensure_dir (2)

unpickle (2)

writeWords (1)

writedata (1)

read_geo (1)

todisc_txt (1)

Binary2netcdf (1)

Chain (1)

writeXYZ (1)

CSVFileWriter (1)

writeSeed (1)

writeString (1)

Méthodes fréquemment utilisées

CmdService (2)

Clear (2)

getFileName (2)

encodingToutf8 (2)

GetLine (2)

writePickledWords (2)

COSHBIO (2)

checkFile (2)

ensure_dir (2)

unpickle (2)

writeWords (1)

writedata (1)

read_geo (1)

todisc_txt (1)

Binary2netcdf (1)

Chain (1)

writeXYZ (1)

CSVFileWriter (1)

writeSeed (1)

writeString (1)

save_settings (1)

safe_dump (1)

save_chi (1)

trimOutput (1)

save_grid_of_particles (1)

CSVFileReader (1)

writeNumpyArray (1)

writeMatching (1)

save_tradeprice_data (1)

File (1)

readSystemFromFileXYZ (1)

getfilenames_of_dir (1)

Filepaths (1)

GetSizeString (1)

HDF5DatasetGenerator (1)

EdittimeIO (1)

Directory (1)

calibrate (1)

check_simulation_complete (1)

countLines (1)

Exemple #1

0

Afficher le fichier

Fichier : MAIN.py Projet : xiki-tempula/martinize.py

def main(options): # Check whether to read from a gro/pdb file or from stdin # We use an iterator to wrap around the stream to allow # inferring the file type, without consuming lines already inStream = IO.streamTag(options["-f"] and options["-f"].value or sys.stdin) # The streamTag iterator first yields the file type, which # is used to specify the function for reading frames fileType = inStream.next() if fileType == "GRO": frameIterator = IO.groFrameIterator else: frameIterator = IO.pdbFrameIterator # ITERATE OVER FRAMES IN STRUCTURE FILE # # Now iterate over the frames in the stream # This should become a StructureFile class with a nice .next method model = 1 cgOutPDB = None ssTotal = [] cysteines = [] for title, atoms, box in frameIterator(inStream): if fileType == "PDB": # The PDB file can have chains, in which case we list and process them specifically # TER statements are also interpreted as chain separators # A chain may have breaks in which case the breaking residues are flagged chains = [ IO.Chain(options, [i for i in IO.residues(chain)]) for chain in IO.pdbChains(atoms) ] else: # The GRO file does not define chains. Here breaks in the backbone are # interpreted as chain separators. residuelist = [residue for residue in IO.residues(atoms)] # The breaks are indices to residues broken = IO.breaks(residuelist) # Reorder, such that each chain is specified with (i,j,k) # where i and j are the start and end of the chain, and # k is a chain identifier chains = zip([0] + broken, broken + [len(residuelist)], range(len(broken) + 1)) chains = [ IO.Chain(options, residuelist[i:j], name=chr(65 + k)) for i, j, k in chains ] for chain in chains: chain.multiscale = "all" in options[ 'multi'] or chain.id in options['multi'] # Check the chain identifiers if model == 1 and len(chains) != len(set([i.id for i in chains])): # Ending down here means that non-consecutive blocks of atoms in the # PDB file have the same chain ID. The warning pertains to PDB files only, # since chains from GRO files get a unique chain identifier assigned. logging.warning( "Several chains have identical chain identifiers in the PDB file." ) # Check if chains are of mixed type. If so, split them. # Note that in some cases HETATM residues are part of a # chain. This will get problematic. But we cannot cover # all, probably. if not options['MixedChains']: demixedChains = [] for chain in chains: demixedChains.extend(chain.split()) chains = demixedChains n = 1 logging.info("Found %d chains:" % len(chains)) for chain in chains: logging.info(" %2d: %s (%s), %d atoms in %d residues." % (n, chain.id, chain._type, chain.natoms, len(chain))) n += 1 # Check all chains keep = [] for chain in chains: if chain.type() == "Water": logging.info("Removing %d water molecules (chain %s)." % (len(chain), chain.id)) elif chain.type() in ("Protein", "Nucleic"): keep.append(chain) # This is currently not active: elif options['RetainHETATM']: keep.append(chain) else: logging.info( "Removing HETATM chain %s consisting of %d residues." % (chain.id, len(chain))) chains = keep # Here we interactively check the charge state of resides # Can be easily expanded to residues other than HIS for chain in chains: for i, resname in enumerate(chain.sequence): if resname == 'HIS' and options['chHIS']: choices = {0: 'HIH', 1: 'HIS'} choice = IO.getChargeType(resname, i, choices) chain.sequence[i] = choice # Check which chains need merging if model == 1: order, merge = IO.check_merge( chains, options['mergeList'], options['linkList'], options['CystineCheckBonds'] and options['CystineMaxDist2']) # Get the total length of the sequence seqlength = sum([len(chain) for chain in chains]) logging.info('Total size of the system: %s residues.' % seqlength) ## SECONDARY STRUCTURE ss = '' if options['Collagen']: for chain in chains: chain.set_ss("F") ss += chain.ss elif options["-ss"]: # XXX We need error-catching here, # in case the file doesn't excist, or the string contains bogus. # If the string given for the sequence consists strictly of upper case letters # and does not appear to be a file, assume it is the secondary structure ss = options["-ss"].value.replace('~', 'L').replace(' ', 'L') if ss.isalnum() and ss.isupper() and not os.path.exists( options["-ss"].value): ss = options["-ss"].value logging.info('Secondary structure read from command-line:\n' + ss) else: # There ought to be a file with the name specified ssfile = [i.strip() for i in open(options["-ss"].value)] # Try to read the file as a Gromacs Secondary Structure Dump # Those have an integer as first line if ssfile[0].isdigit(): logging.info( 'Will read secondary structure from file (assuming Gromacs ssdump).' ) ss = "".join([i for i in ssfile[1:]]) else: # Get the secondary structure type from DSSP output logging.info( 'Will read secondary structure from file (assuming DSSP output).' ) pss = re.compile(r"^([ 0-9]{4}[0-9]){2}") ss = "".join([ i[16] for i in open(options["-ss"].value) if re.match(pss, i) ]) # Now set the secondary structure for each of the chains sstmp = ss for chain in chains: ln = min(len(sstmp), len(chain)) chain.set_ss(sstmp[:ln]) sstmp = ss[:ln] else: if options["-dssp"]: method, executable = "dssp", options["-dssp"].value #elif options["-pymol"]: # method, executable = "pymol", options["-pymol"].value else: logging.warning( "No secondary structure or determination method speficied. Protein chains will be set to 'COIL'." ) method, executable = None, None for chain in chains: ss += chain.dss(method, executable) # Used to be: if method in ("dssp","pymol"): but pymol is not supported if method in ["dssp"]: logging.debug('%s determined secondary structure:\n' % method.upper() + ss) # Collect the secondary structure classifications for different frames ssTotal.append(ss) # Write the coarse grained structure if requested if options["-x"].value: logging.info("Writing coarse grained structure.") if cgOutPDB is None: cgOutPDB = open(options["-x"].value, "w") cgOutPDB.write("MODEL %8d\n" % model) cgOutPDB.write(title) cgOutPDB.write(IO.pdbBoxString(box)) atid = 1 for i in order: ci = chains[i] if ci.multiscale: for r in ci.residues: for name, resn, resi, chain, x, y, z in r: cgOutPDB.write( IO.pdbOut( (name, resn[:3], resi, chain, x, y, z), i=atid)) atid += 1 coarseGrained = ci.cg(com=True) if coarseGrained: for name, resn, resi, chain, x, y, z, ssid in coarseGrained: if ci.multiscale: name = "v" + name cgOutPDB.write( IO.pdbOut((name, resn[:3], resi, chain, x, y, z), i=atid, ssid=ssid)) atid += 1 cgOutPDB.write("TER\n") else: logging.warning( "No mapping for coarse graining chain %s (%s); chain is skipped." % (ci.id, ci.type())) cgOutPDB.write("ENDMDL\n") # Gather cysteine sulphur coordinates cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]] cysteines.append([cys for cys in cyslist if cys]) model += 1 # Write the index file if requested. # Mainly of interest for multiscaling. # Could be improved by adding separte groups for BB, SC, etc. if options["-n"].value: logging.info("Writing index file.") # Lists for All-atom, Virtual sites and Coarse Grain. NAA, NVZ, NCG = [], [], [] atid = 1 for i in order: ci = chains[i] coarseGrained = ci.cg(force=True) if ci.multiscale: NAA.extend([" %5d" % (a + atid) for a in range(ci.natoms)]) atid += ci.natoms if coarseGrained: if ci.multiscale: NVZ.extend([ " %5d" % (a + atid) for a in range(len(coarseGrained)) ]) else: NCG.extend([ " %5d" % (a + atid) for a in range(len(coarseGrained)) ]) atid += len(coarseGrained) outNDX = open(options["-n"].value, "w") outNDX.write("\n[ AA ]\n" + "\n".join( [" ".join(NAA[i:i + 15]) for i in range(0, len(NAA), 15)])) outNDX.write("\n[ VZ ]\n" + "\n".join( [" ".join(NVZ[i:i + 15]) for i in range(0, len(NVZ), 15)])) outNDX.write("\n[ CG ]\n" + "\n".join( [" ".join(NCG[i:i + 15]) for i in range(0, len(NCG), 15)])) outNDX.close() # Write the index file for mapping AA trajectory if requested if options["-nmap"].value: logging.info("Writing trajectory index file.") atid = 1 outNDX = open(options["-nmap"].value, "w") # Get all AA atoms as lists of atoms in residues # First we skip hetatoms and unknowns then iterate over beads # In DNA the O3' atom is mapped together with atoms from the next residue # This stores it until we get to the next residue o3_shift = '' for i_count, i in enumerate(IO.residues(atoms)): if i[0][1] in ("SOL", "HOH", "TIP"): continue if not i[0][1] in MAP.CoarseGrained.mapping.keys(): continue nra = 0 names = [j[0] for j in i] # This gives out a list of atoms in residue, each tuple has other # stuff in it that's needed elsewhere so we just take the last # element which is the atom index (in that residue) for j_count, j in enumerate(MAP.mapIndex(i)): outNDX.write('[ Bead %i of residue %i ]\n' % (j_count + 1, i_count + 1)) line = '' for k in j: if names[k[2]] == "O3'": line += '%s ' % (str(o3_shift)) o3_shift = k[2] + atid else: line += '%i ' % (k[2] + atid) line += '\n' nra += len(j) outNDX.write(line) atid += nra # Evertything below here we only need, if we need to write a Topology if options['-o']: # Collect the secondary structure stuff and decide what to do with it # First rearrange by the residue ssTotal = zip(*ssTotal) ssAver = [] for i in ssTotal: si = list(set(i)) if len(si) == 1: # Only one type -- consensus ssAver.append(si[0]) else: # Transitions between secondary structure types i = list(i) si = [(1.0 * i.count(j) / len(i), j) for j in si] si.sort() if si[-1][0] > options["-ssc"].value: ssAver.append(si[-1][1]) else: ssAver.append(" ") ssAver = "".join(ssAver) logging.info( '(Average) Secondary structure has been determined (see head of .itp-file).' ) # Divide the secondary structure according to the division in chains # This will set the secondary structure types to be used for the # topology. for chain in chains: chain.set_ss(ssAver[:len(chain)]) ssAver = ssAver[len(chain):] # Now the chains are complete, each consisting of a residuelist, # and a secondary structure designation if the chain is of type 'Protein'. # There may be mixed chains, there may be HETATM things. # Water has been discarded. Maybe this has to be changed at some point. # The order in the coarse grained files matches the order in the set of chains. # # If there are no merges to be done, i.e. no global Elnedyn network, no # disulphide bridges, no links, no distance restraints and no explicit merges, # then we can write out the topology, which will match the coarse grained file. # # If there are merges to be done, the order of things may be changed, in which # case the coarse grained structure will not match with the topology... # CYSTINE BRIDGES # # Extract the cysteine coordinates (for all frames) and the cysteine identifiers if options['CystineCheckBonds']: logging.info( "Checking for cystine bridges, based on sulphur (SG) atoms lying closer than %.4f nm" % math.sqrt(options['CystineMaxDist2'] / 100)) cyscoord = zip(*[[j[4:7] for j in i] for i in cysteines]) cysteines = [i[:4] for i in cysteines[0]] bl, kb = options['ForceField'].special[(("SC1", "CYS"), ("SC1", "CYS"))] # Check the distances and add the cysteines to the link list if the # SG atoms have a distance smaller than the cutoff. rlc = range(len(cysteines)) for i in rlc[:-1]: for j in rlc[i + 1:]: # Checking the minimum distance over all frames # But we could also take the maximum, or the mean d2 = min([ FUNC.distance2(a, b) for a, b in zip(cyscoord[i], cyscoord[j]) ]) if d2 <= options['CystineMaxDist2']: a, b = cysteines[i], cysteines[j] options['linkListCG'].append( (("SC1", "CYS", a[2], a[3]), ("SC1", "CYS", b[2], b[3]), bl, kb)) a, b = (a[0], a[1], a[2] - (32 << 20), a[3]), (b[0], b[1], b[2] - (32 << 20), b[3]) logging.info( "Detected SS bridge between %s and %s (%f nm)" % (a, b, math.sqrt(d2) / 10)) # REAL ITP STUFF # # Check whether we have identical chains, in which case we # only write the ITP for one... # This means making a distinction between chains and # moleculetypes. molecules = [tuple([chains[i] for i in j]) for j in merge] # At this point we should have a list or dictionary of chains # Each chain should be given a unique name, based on the value # of options["-o"] combined with the chain identifier and possibly # a number if there are chains with identical identifiers. # For each chain we then write an ITP file using the name for # moleculetype and name + ".itp" for the topology include file. # In addition we write a master topology file, using the value of # options["-o"], with an added extension ".top" if not given. # XXX *NOTE*: This should probably be gathered in a 'Universe' class itp = 0 moleculeTypes = {} for mi in range(len(molecules)): mol = molecules[mi] # Check if the moleculetype is already listed # If not, generate the topology from the chain definition if mol not in moleculeTypes or options['SeparateTop']: # Name of the moleculetype # XXX: The naming should be changed; now it becomes Protein_X+Protein_Y+... name = "+".join( [chain.getname(options['-name'].value) for chain in mol]) moleculeTypes[mol] = name # Write the molecule type topology top = TOP.Topology(mol[0], options=options, name=name) for m in mol[1:]: top += TOP.Topology(m, options=options) # Have to add the connections, like the connecting network # Gather coordinates mcg, coords = zip(*[(j[:4], j[4:7]) for m in mol for j in m.cg(force=True)]) mcg = list(mcg) # Run through the link list and add connections (links = cys bridges or hand specified links) for atomA, atomB, bondlength, forceconst in options[ 'linkListCG']: if bondlength == -1 and forceconst == -1: bondlength, forceconst = options['ForceField'].special[ (atomA[:2], atomB[:2])] # Check whether this link applies to this group atomA = atomA in mcg and mcg.index(atomA) + 1 atomB = atomB in mcg and mcg.index(atomB) + 1 if atomA and atomB: cat = (forceconst is None) and "Constraint" or "Link" top.bonds.append( TOP.Bond((atomA, atomB), options=options, type=1, parameters=(bondlength, forceconst), category=cat, comments="Cys-bonds/special link")) # Elastic Network # The elastic network is added after the topology is constructed, since that # is where the correct atom list with numbering and the full set of # coordinates for the merged chains are available. if options['ElasticNetwork']: rubberType = options['ForceField'].EBondType rubberList = ELN.rubberBands( [(i[0], j) for i, j in zip(top.atoms, coords) if i[4] in options['ElasticBeads']], options['ElasticLowerBound'], options['ElasticUpperBound'], options['ElasticDecayFactor'], options['ElasticDecayPower'], options['ElasticMaximumForce'], options['ElasticMinimumForce']) top.bonds.extend([ TOP.Bond(i, options=options, type=rubberType, category="Rubber band") for i in rubberList ]) # Write out the MoleculeType topology destination = options["-o"] and open( moleculeTypes[mol] + ".itp", 'w') or sys.stdout destination.write(str(top)) itp += 1 # Check whether other chains are equal to this one # Skip this step if we are to write all chains to separate moleculetypes if not options['SeparateTop']: for j in range(mi + 1, len(molecules)): if not molecules[j] in moleculeTypes and mol == molecules[ j]: # Molecule j is equal to a molecule mi # Set the name of the moleculetype to the one of that molecule moleculeTypes[molecules[j]] = moleculeTypes[mol] logging.info('Written %d ITP file%s' % (itp, itp > 1 and "s" or "")) # WRITING THE MASTER TOPOLOGY # Output stream top = options["-o"] and open(options['-o'].value, 'w') or sys.stdout # ITP file listing itps = '\n'.join([ '#include "%s.itp"' % molecule for molecule in set(moleculeTypes.values()) ]) # Molecule listing logging.info("Output contains %d molecules:" % len(molecules)) n = 1 for molecule in molecules: chainInfo = (n, moleculeTypes[molecule], len(molecule) > 1 and "s" or " ", " ".join([i.id for i in molecule])) logging.info(" %2d-> %s (chain%s %s)" % chainInfo) n += 1 molecules = '\n'.join( ['%s \t 1' % moleculeTypes[molecule] for molecule in molecules]) # Set a define if we are to use rubber bands useRubber = options['ElasticNetwork'] and "#define RUBBER_BANDS" or "" # XXX Specify a better, version specific base-itp name. # Do not set a define for position restrains here, as people are more used to do it in mdp file? top.write('''#include "martini.itp" %s %s [ system ] ; name Martini system from %s [ molecules ] ; name number %s''' % (useRubber, itps, options["-f"] and options["-f"].value or "stdin", molecules)) logging.info('Written topology files') # Maybe there are forcefield specific log messages? options['ForceField'].messages() # The following lines are always printed (if no errors occur). print "\n\tThere you are. One MARTINI. Shaken, not stirred.\n" Q = DOC.martiniq.pop(random.randint(0, len(DOC.martiniq) - 1)) print "\n", Q[1], "\n%80s" % ("--" + Q[0]), "\n"