def enter(): global zoro, running, map, hurdle, jelly, score, font zoro = ZORO() map = MAP() hurdle = Hurdle1().create() jelly = Jelly().create() score = Score() font = load_font('ENCR10B.TTF')
def enter(): global girl, stage1_monster, stage2_monster, stage3_monster, stage4_monster, stage2_trap, stage1_map, stage2_map, stage3_map global hp_gauge, stage2_monster_attack, stage3_monster_attack, skill, current_time, Level, protected, stage4_map, score #다시시작 할때를 위한 객체 위치 초기화 MONSTER.monster_positionX = 0 MONSTER.monster_positionY = 0 TRAP.trap_positionX = 0 TRAP.trap_positionY = 0 ITEM.protected_positionX = 0 ITEM.protected_positionY = 0 ITEM.protect_State = 0 Level = 0 score = 0 girl = Character.Character() stage1_monster = [MONSTER.Stage1_Monster() for i in range(20)] stage2_monster = [MONSTER.Stage2_Monster() for i in range(20)] stage3_monster = [MONSTER.Stage3_Monster() for i in range(20)] stage4_monster = [MONSTER.Stage4_Monster() for i in range(40)] stage2_monster_attack = [Stage2_Attack() for i in range(10)] stage3_monster_attack = [Stage3_Attack() for i in range(15)] stage2_trap = [TRAP.Trap() for i in range(13)] stage4_map = MAP.Map('image\MAP\MAP(STAGE4)_450x1200.png', 'music\stage4BGM.mp3', 225, 25425) stage3_map = MAP.Map('image\MAP\MAP(STAGE3)_450x750.png', 'music\stage3BGM.mp3', 225, 18000) stage2_map = MAP.Map('image\MAP\MAP(STAGE2)_450x750.png', 'music\stage2BGM.mp3', 225, 10800) stage1_map = MAP.Map('image\MAP\MAP(STAGE1)_450x750.png', 'music\stage1BGM.mp3', 225, 3600) hp_gauge = GAUGE.Gauge() skill = Skill() protected = [ITEM.Protected() for i in range(5)] current_time = get_time()
def enter(): global alch, zoro, running, map, hurdle, jelly, score, font, ruppy, boom, crr, jelly_sound, alch_sound, crr_sound zoro = ZORO() map = MAP() alch = Alch().create() crr = Crr().create() boom = Boom().create() ruppy = Ruppy().create() hurdle = Hurdle1().create() jelly = Jelly().create() score = Score() font = load_font('ENCR10B.TTF') jelly_sound = Jelly() alch_sound = Alch() crr_sound = Crr()
def lr_tune(train_data, validation_data,val_true_list,regParam,netParam): # initial min_error = float('inf') best_reg1 = None best_net1 = None best_model_rmse = None max_map = 0.0 best_reg2 = None best_net2 = None best_model_map = None for reg in regParam: for net in netParam: lr = LinearRegression(featuresCol='idf_features',labelCol='rating',regParam=reg, elasticNetParam=net,maxIter=200) model = lr.fit(train_data) predictions = model.transform(validation_data) predictions = predictions.withColumn('prediction', when(predictions['prediction'] < 0, 0).otherwise(predictions['prediction'])) # rmse evaluator=RegressionEvaluator(metricName='rmse', labelCol='rating',predictionCol='prediction') rmse = evaluator.evaluate(predictions) if rmse < min_error: min_error = rmse best_reg1 = reg best_net1 = net best_model_rmse = model # MAP top 25 window = Window.partitionBy(predictions['user_id']).orderBy(predictions['prediction'].desc()) top_predictions = predictions.select('*', rank().over(window).alias('row_num')).filter(col('row_num') <= 25) current_map = MAP.getMAP(top_predictions, val_true_list) if current_map > max_map: max_map = current_map best_reg2 = reg best_net2 = net best_model_map = model print('regParam = {} with elasticNetParam = {}: validation RMSE is {} validation MAP is {}'.format(reg, net, rmse, current_map)) print('The best model select by RMSE has regParam = {} with elasticNetParam = {}: RMSE = {}'.format(best_reg1, best_net1, min_error)) print('The best model select by MAP has regParam = {} with elasticNetParam = {}: MAP = {}'.format(best_reg2, best_net2, max_map)) return best_model_rmse,best_model_map
mapGenerator.generate_levels(1, 2) SETTINGS.levels_list = SETTINGS.glevels_list gameLoad.get_canvas_size() #Setup and classes text = TEXT.Text(0, 0, "YOU WON", SETTINGS.WHITE, "DUGAFONT.ttf", 48) beta = TEXT.Text(5, 5, "META ALFA BUILD V.1.6", SETTINGS.WHITE, "DUGAFONT.ttf", 20) text.update_pos( SETTINGS.canvas_actual_width / 2 - text.layout.get_width() / 2, SETTINGS.canvas_target_height / 2 - text.layout.get_height() / 2) #Classes for later use gameMap = MAP.Map(SETTINGS.levels_list[SETTINGS.current_level].array) gameCanvas = Canvas(SETTINGS.canvas_map_width, SETTINGS.canvas_map_height) gamePlayer = PLAYER.Player(SETTINGS.player_pos) gameRaycast = RAYCAST.Raycast(gameCanvas.canvas, gameCanvas.window) gameInv = INVENTORY.inventory({'bullet': 150, 'shell': 25, 'ferromag': 50}) gameHUD = HUD.hud() #More loading - Level specific gameLoad.load_new_level() #Controller classes menuController = MENU.Controller(gameCanvas.window) musicController = MUSIC.Music() tutorialController = TUTORIAL.Controller() #Run at last
def enter(): global zoro, running, map zoro = ZORO() map = MAP()
def cg(self, force=False, com=False): # Generate the coarse grained structure # Set the b-factor field to something that reflects the secondary structure # If the coarse grained structure is set already, just return, # unless regeneration is forced. if self._cg and not force: return self._cg self._cg = [] atid = 1 bb = [1] fail = False previous = '' for residue, rss, resname in zip(self.residues, self.sstypes, self.sequence): # For DNA we need to get the O3' to the following residue when calculating COM # The force and com options ensure that this part does not affect itp generation or anything else if com: # Just an initialization, this should complain if it isn't updated in the loop store = 0 for ind, i in enumerate(residue): if i[0] == "O3'": if previous != '': residue[ind] = previous previous = i else: store = ind previous = i # We couldn't remove the O3' from the 5' end residue during the loop so we do it now if store > 0: del residue[store] # Check if residues names has changed, for example because user has set residues interactively. residue = [(atom[0], resname)+atom[2:] for atom in residue] if residue[0][1] in ("SOL", "HOH", "TIP"): continue if not residue[0][1] in MAP.CoarseGrained.mapping.keys(): logging.warning("Skipped unknown residue %s\n" % residue[0][1]) continue # Get the mapping for this residue # CG.map returns bead coordinates and mapped atoms # This will fail if there are (too many) atoms missing, which is # only problematic if a mapped structure is written; the topology # is inferred from the sequence. So this is the best place to raise # an error try: beads, ids = MAP.map(residue, ca2bb=self.options['ForceField'].ca2bb) beads = zip(MAP.CoarseGrained.names[residue[0][1]], beads, ids) if residue[0][1] in self.options['ForceField'].polar: beads = add_dummy(beads, dist=0.14, n=2) elif residue[0][1] in self.options['ForceField'].charged: beads = add_dummy(beads, dist=0.11, n=1) except ValueError: logging.error("Too many atoms missing from residue %s %d(ch:%s):", residue[0][1], residue[0][2]-(32 << 20), residue[0][3]) logging.error(repr([i[0] for i in residue])) fail = True for name, (x, y, z), ids in beads: # Add the bead with coordinates and secondary structure id to the list self._cg.append((name, residue[0][1][:3], residue[0][2], residue[0][3], x, y, z, SS.ss2num[rss])) # Add the ids to the list, after converting them to indices to the list of atoms self.mapping.append([atid+i for i in ids]) # Increment the atom id; This pertains to the atoms that are included in the output. atid += len(residue) # Keep track of the numbers for CONECTing bb.append(bb[-1]+len(beads)) if fail: logging.error("Unable to generate coarse grained structure due to missing atoms.") sys.exit(1) return self._cg
class BRAIN(MP): def init(self): self.map = MAP(self.md) def explore(self): # find most unexplored area, and drive there pass def run_impl(self): # first, check bumper if "Bumper" in self.md: print "===========================================================" print "============ BUMPER ============================" print "===========================================================" print "===========================================================" # hardcoded evade behavior self.md["Move"] = [0.2, "backward"] time.sleep(2) self.md["Move"] = [0.2, "left"] time.sleep(1) del self.md["Bumper"] #clean up from this round del self.md["lidar_points"] return # next, if we have odometry, save and react to laser input if "MCS" in self.md: self.md["WCS"] = self.md["MCS"] if "lidar_points" in self.md: # build map using laser points self.map.integrate(self.md["WCS"], self.md["lidar_points"]) #self.map.visualize(((getMs()-self.md["starttime"]) / 1000.),save=True) # save a snapshot of relevant data to files ''' tsnow = getMs() np.save("/tmp/%d_LIDAR"%tsnow, self.md["lidar_points"]) coord = np.array([self.md["WCS"].x, self.md["WCS"].y, self.md["WCS"].a]) np.save("/tmp/%d_WCS"%tsnow,coord) np.save("/tmp/%d_MAPPOINTS"%tsnow, self.map.mappoints) np.save("/tmp/%d_TILES"%tsnow, self.map.tiles) ''' free = True for i in range(10, -10, -1): m = self.md["lidar"][i] if np.isnan(m): print ".", elif m > 100 and m < 500: print "X", free = False else: print "O", print " ", print if not free: self.md["Move"] = [0.1, "left"] # was 0-100, now 0-255 else: self.md["Move"] = [0.1, "forward"] del self.md["lidar_points"] ''' # try to drive to the least known map cell target_coords = np.unravel_index(np.argmin(self.map.tiles), self.map.tiles.shape) print "Minimal knowledge at tile: " tmp = self.map.tile2coordm(target_coords[1], target_coords[0]) self.md["target"] = Coordinate(tmp[0], tmp[1], 0) self.map.wcs2rcs(self.md["target"]) # first, rotate until we are facing it # transfer map tile (WCS) to RCS to calculate relative angle ''' #time.sleep(0.1) '''
def init(self): self.map = MAP(self.md)
def main(options): # Check whether to read from a gro/pdb file or from stdin # We use an iterator to wrap around the stream to allow # inferring the file type, without consuming lines already inStream = IO.streamTag(options["-f"] and options["-f"].value or sys.stdin) # The streamTag iterator first yields the file type, which # is used to specify the function for reading frames fileType = inStream.next() if fileType == "GRO": frameIterator = IO.groFrameIterator else: frameIterator = IO.pdbFrameIterator # ITERATE OVER FRAMES IN STRUCTURE FILE # # Now iterate over the frames in the stream # This should become a StructureFile class with a nice .next method model = 1 cgOutPDB = None ssTotal = [] cysteines = [] for title, atoms, box in frameIterator(inStream): if fileType == "PDB": # The PDB file can have chains, in which case we list and process them specifically # TER statements are also interpreted as chain separators # A chain may have breaks in which case the breaking residues are flagged chains = [ IO.Chain(options, [i for i in IO.residues(chain)]) for chain in IO.pdbChains(atoms) ] else: # The GRO file does not define chains. Here breaks in the backbone are # interpreted as chain separators. residuelist = [residue for residue in IO.residues(atoms)] # The breaks are indices to residues broken = IO.breaks(residuelist) # Reorder, such that each chain is specified with (i,j,k) # where i and j are the start and end of the chain, and # k is a chain identifier chains = zip([0] + broken, broken + [len(residuelist)], range(len(broken) + 1)) chains = [ IO.Chain(options, residuelist[i:j], name=chr(65 + k)) for i, j, k in chains ] for chain in chains: chain.multiscale = "all" in options[ 'multi'] or chain.id in options['multi'] # Check the chain identifiers if model == 1 and len(chains) != len(set([i.id for i in chains])): # Ending down here means that non-consecutive blocks of atoms in the # PDB file have the same chain ID. The warning pertains to PDB files only, # since chains from GRO files get a unique chain identifier assigned. logging.warning( "Several chains have identical chain identifiers in the PDB file." ) # Check if chains are of mixed type. If so, split them. # Note that in some cases HETATM residues are part of a # chain. This will get problematic. But we cannot cover # all, probably. if not options['MixedChains']: demixedChains = [] for chain in chains: demixedChains.extend(chain.split()) chains = demixedChains n = 1 logging.info("Found %d chains:" % len(chains)) for chain in chains: logging.info(" %2d: %s (%s), %d atoms in %d residues." % (n, chain.id, chain._type, chain.natoms, len(chain))) n += 1 # Check all chains keep = [] for chain in chains: if chain.type() == "Water": logging.info("Removing %d water molecules (chain %s)." % (len(chain), chain.id)) elif chain.type() in ("Protein", "Nucleic"): keep.append(chain) # This is currently not active: elif options['RetainHETATM']: keep.append(chain) else: logging.info( "Removing HETATM chain %s consisting of %d residues." % (chain.id, len(chain))) chains = keep # Here we interactively check the charge state of resides # Can be easily expanded to residues other than HIS for chain in chains: for i, resname in enumerate(chain.sequence): if resname == 'HIS' and options['chHIS']: choices = {0: 'HIH', 1: 'HIS'} choice = IO.getChargeType(resname, i, choices) chain.sequence[i] = choice # Check which chains need merging if model == 1: order, merge = IO.check_merge( chains, options['mergeList'], options['linkList'], options['CystineCheckBonds'] and options['CystineMaxDist2']) # Get the total length of the sequence seqlength = sum([len(chain) for chain in chains]) logging.info('Total size of the system: %s residues.' % seqlength) ## SECONDARY STRUCTURE ss = '' if options['Collagen']: for chain in chains: chain.set_ss("F") ss += chain.ss elif options["-ss"]: # XXX We need error-catching here, # in case the file doesn't excist, or the string contains bogus. # If the string given for the sequence consists strictly of upper case letters # and does not appear to be a file, assume it is the secondary structure ss = options["-ss"].value.replace('~', 'L').replace(' ', 'L') if ss.isalnum() and ss.isupper() and not os.path.exists( options["-ss"].value): ss = options["-ss"].value logging.info('Secondary structure read from command-line:\n' + ss) else: # There ought to be a file with the name specified ssfile = [i.strip() for i in open(options["-ss"].value)] # Try to read the file as a Gromacs Secondary Structure Dump # Those have an integer as first line if ssfile[0].isdigit(): logging.info( 'Will read secondary structure from file (assuming Gromacs ssdump).' ) ss = "".join([i for i in ssfile[1:]]) else: # Get the secondary structure type from DSSP output logging.info( 'Will read secondary structure from file (assuming DSSP output).' ) pss = re.compile(r"^([ 0-9]{4}[0-9]){2}") ss = "".join([ i[16] for i in open(options["-ss"].value) if re.match(pss, i) ]) # Now set the secondary structure for each of the chains sstmp = ss for chain in chains: ln = min(len(sstmp), len(chain)) chain.set_ss(sstmp[:ln]) sstmp = ss[:ln] else: if options["-dssp"]: method, executable = "dssp", options["-dssp"].value #elif options["-pymol"]: # method, executable = "pymol", options["-pymol"].value else: logging.warning( "No secondary structure or determination method speficied. Protein chains will be set to 'COIL'." ) method, executable = None, None for chain in chains: ss += chain.dss(method, executable) # Used to be: if method in ("dssp","pymol"): but pymol is not supported if method in ["dssp"]: logging.debug('%s determined secondary structure:\n' % method.upper() + ss) # Collect the secondary structure classifications for different frames ssTotal.append(ss) # Write the coarse grained structure if requested if options["-x"].value: logging.info("Writing coarse grained structure.") if cgOutPDB is None: cgOutPDB = open(options["-x"].value, "w") cgOutPDB.write("MODEL %8d\n" % model) cgOutPDB.write(title) cgOutPDB.write(IO.pdbBoxString(box)) atid = 1 for i in order: ci = chains[i] if ci.multiscale: for r in ci.residues: for name, resn, resi, chain, x, y, z in r: cgOutPDB.write( IO.pdbOut( (name, resn[:3], resi, chain, x, y, z), i=atid)) atid += 1 coarseGrained = ci.cg(com=True) if coarseGrained: for name, resn, resi, chain, x, y, z, ssid in coarseGrained: if ci.multiscale: name = "v" + name cgOutPDB.write( IO.pdbOut((name, resn[:3], resi, chain, x, y, z), i=atid, ssid=ssid)) atid += 1 cgOutPDB.write("TER\n") else: logging.warning( "No mapping for coarse graining chain %s (%s); chain is skipped." % (ci.id, ci.type())) cgOutPDB.write("ENDMDL\n") # Gather cysteine sulphur coordinates cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]] cysteines.append([cys for cys in cyslist if cys]) model += 1 # Write the index file if requested. # Mainly of interest for multiscaling. # Could be improved by adding separte groups for BB, SC, etc. if options["-n"].value: logging.info("Writing index file.") # Lists for All-atom, Virtual sites and Coarse Grain. NAA, NVZ, NCG = [], [], [] atid = 1 for i in order: ci = chains[i] coarseGrained = ci.cg(force=True) if ci.multiscale: NAA.extend([" %5d" % (a + atid) for a in range(ci.natoms)]) atid += ci.natoms if coarseGrained: if ci.multiscale: NVZ.extend([ " %5d" % (a + atid) for a in range(len(coarseGrained)) ]) else: NCG.extend([ " %5d" % (a + atid) for a in range(len(coarseGrained)) ]) atid += len(coarseGrained) outNDX = open(options["-n"].value, "w") outNDX.write("\n[ AA ]\n" + "\n".join( [" ".join(NAA[i:i + 15]) for i in range(0, len(NAA), 15)])) outNDX.write("\n[ VZ ]\n" + "\n".join( [" ".join(NVZ[i:i + 15]) for i in range(0, len(NVZ), 15)])) outNDX.write("\n[ CG ]\n" + "\n".join( [" ".join(NCG[i:i + 15]) for i in range(0, len(NCG), 15)])) outNDX.close() # Write the index file for mapping AA trajectory if requested if options["-nmap"].value: logging.info("Writing trajectory index file.") atid = 1 outNDX = open(options["-nmap"].value, "w") # Get all AA atoms as lists of atoms in residues # First we skip hetatoms and unknowns then iterate over beads # In DNA the O3' atom is mapped together with atoms from the next residue # This stores it until we get to the next residue o3_shift = '' for i_count, i in enumerate(IO.residues(atoms)): if i[0][1] in ("SOL", "HOH", "TIP"): continue if not i[0][1] in MAP.CoarseGrained.mapping.keys(): continue nra = 0 names = [j[0] for j in i] # This gives out a list of atoms in residue, each tuple has other # stuff in it that's needed elsewhere so we just take the last # element which is the atom index (in that residue) for j_count, j in enumerate(MAP.mapIndex(i)): outNDX.write('[ Bead %i of residue %i ]\n' % (j_count + 1, i_count + 1)) line = '' for k in j: if names[k[2]] == "O3'": line += '%s ' % (str(o3_shift)) o3_shift = k[2] + atid else: line += '%i ' % (k[2] + atid) line += '\n' nra += len(j) outNDX.write(line) atid += nra # Evertything below here we only need, if we need to write a Topology if options['-o']: # Collect the secondary structure stuff and decide what to do with it # First rearrange by the residue ssTotal = zip(*ssTotal) ssAver = [] for i in ssTotal: si = list(set(i)) if len(si) == 1: # Only one type -- consensus ssAver.append(si[0]) else: # Transitions between secondary structure types i = list(i) si = [(1.0 * i.count(j) / len(i), j) for j in si] si.sort() if si[-1][0] > options["-ssc"].value: ssAver.append(si[-1][1]) else: ssAver.append(" ") ssAver = "".join(ssAver) logging.info( '(Average) Secondary structure has been determined (see head of .itp-file).' ) # Divide the secondary structure according to the division in chains # This will set the secondary structure types to be used for the # topology. for chain in chains: chain.set_ss(ssAver[:len(chain)]) ssAver = ssAver[len(chain):] # Now the chains are complete, each consisting of a residuelist, # and a secondary structure designation if the chain is of type 'Protein'. # There may be mixed chains, there may be HETATM things. # Water has been discarded. Maybe this has to be changed at some point. # The order in the coarse grained files matches the order in the set of chains. # # If there are no merges to be done, i.e. no global Elnedyn network, no # disulphide bridges, no links, no distance restraints and no explicit merges, # then we can write out the topology, which will match the coarse grained file. # # If there are merges to be done, the order of things may be changed, in which # case the coarse grained structure will not match with the topology... # CYSTINE BRIDGES # # Extract the cysteine coordinates (for all frames) and the cysteine identifiers if options['CystineCheckBonds']: logging.info( "Checking for cystine bridges, based on sulphur (SG) atoms lying closer than %.4f nm" % math.sqrt(options['CystineMaxDist2'] / 100)) cyscoord = zip(*[[j[4:7] for j in i] for i in cysteines]) cysteines = [i[:4] for i in cysteines[0]] bl, kb = options['ForceField'].special[(("SC1", "CYS"), ("SC1", "CYS"))] # Check the distances and add the cysteines to the link list if the # SG atoms have a distance smaller than the cutoff. rlc = range(len(cysteines)) for i in rlc[:-1]: for j in rlc[i + 1:]: # Checking the minimum distance over all frames # But we could also take the maximum, or the mean d2 = min([ FUNC.distance2(a, b) for a, b in zip(cyscoord[i], cyscoord[j]) ]) if d2 <= options['CystineMaxDist2']: a, b = cysteines[i], cysteines[j] options['linkListCG'].append( (("SC1", "CYS", a[2], a[3]), ("SC1", "CYS", b[2], b[3]), bl, kb)) a, b = (a[0], a[1], a[2] - (32 << 20), a[3]), (b[0], b[1], b[2] - (32 << 20), b[3]) logging.info( "Detected SS bridge between %s and %s (%f nm)" % (a, b, math.sqrt(d2) / 10)) # REAL ITP STUFF # # Check whether we have identical chains, in which case we # only write the ITP for one... # This means making a distinction between chains and # moleculetypes. molecules = [tuple([chains[i] for i in j]) for j in merge] # At this point we should have a list or dictionary of chains # Each chain should be given a unique name, based on the value # of options["-o"] combined with the chain identifier and possibly # a number if there are chains with identical identifiers. # For each chain we then write an ITP file using the name for # moleculetype and name + ".itp" for the topology include file. # In addition we write a master topology file, using the value of # options["-o"], with an added extension ".top" if not given. # XXX *NOTE*: This should probably be gathered in a 'Universe' class itp = 0 moleculeTypes = {} for mi in range(len(molecules)): mol = molecules[mi] # Check if the moleculetype is already listed # If not, generate the topology from the chain definition if mol not in moleculeTypes or options['SeparateTop']: # Name of the moleculetype # XXX: The naming should be changed; now it becomes Protein_X+Protein_Y+... name = "+".join( [chain.getname(options['-name'].value) for chain in mol]) moleculeTypes[mol] = name # Write the molecule type topology top = TOP.Topology(mol[0], options=options, name=name) for m in mol[1:]: top += TOP.Topology(m, options=options) # Have to add the connections, like the connecting network # Gather coordinates mcg, coords = zip(*[(j[:4], j[4:7]) for m in mol for j in m.cg(force=True)]) mcg = list(mcg) # Run through the link list and add connections (links = cys bridges or hand specified links) for atomA, atomB, bondlength, forceconst in options[ 'linkListCG']: if bondlength == -1 and forceconst == -1: bondlength, forceconst = options['ForceField'].special[ (atomA[:2], atomB[:2])] # Check whether this link applies to this group atomA = atomA in mcg and mcg.index(atomA) + 1 atomB = atomB in mcg and mcg.index(atomB) + 1 if atomA and atomB: cat = (forceconst is None) and "Constraint" or "Link" top.bonds.append( TOP.Bond((atomA, atomB), options=options, type=1, parameters=(bondlength, forceconst), category=cat, comments="Cys-bonds/special link")) # Elastic Network # The elastic network is added after the topology is constructed, since that # is where the correct atom list with numbering and the full set of # coordinates for the merged chains are available. if options['ElasticNetwork']: rubberType = options['ForceField'].EBondType rubberList = ELN.rubberBands( [(i[0], j) for i, j in zip(top.atoms, coords) if i[4] in options['ElasticBeads']], options['ElasticLowerBound'], options['ElasticUpperBound'], options['ElasticDecayFactor'], options['ElasticDecayPower'], options['ElasticMaximumForce'], options['ElasticMinimumForce']) top.bonds.extend([ TOP.Bond(i, options=options, type=rubberType, category="Rubber band") for i in rubberList ]) # Write out the MoleculeType topology destination = options["-o"] and open( moleculeTypes[mol] + ".itp", 'w') or sys.stdout destination.write(str(top)) itp += 1 # Check whether other chains are equal to this one # Skip this step if we are to write all chains to separate moleculetypes if not options['SeparateTop']: for j in range(mi + 1, len(molecules)): if not molecules[j] in moleculeTypes and mol == molecules[ j]: # Molecule j is equal to a molecule mi # Set the name of the moleculetype to the one of that molecule moleculeTypes[molecules[j]] = moleculeTypes[mol] logging.info('Written %d ITP file%s' % (itp, itp > 1 and "s" or "")) # WRITING THE MASTER TOPOLOGY # Output stream top = options["-o"] and open(options['-o'].value, 'w') or sys.stdout # ITP file listing itps = '\n'.join([ '#include "%s.itp"' % molecule for molecule in set(moleculeTypes.values()) ]) # Molecule listing logging.info("Output contains %d molecules:" % len(molecules)) n = 1 for molecule in molecules: chainInfo = (n, moleculeTypes[molecule], len(molecule) > 1 and "s" or " ", " ".join([i.id for i in molecule])) logging.info(" %2d-> %s (chain%s %s)" % chainInfo) n += 1 molecules = '\n'.join( ['%s \t 1' % moleculeTypes[molecule] for molecule in molecules]) # Set a define if we are to use rubber bands useRubber = options['ElasticNetwork'] and "#define RUBBER_BANDS" or "" # XXX Specify a better, version specific base-itp name. # Do not set a define for position restrains here, as people are more used to do it in mdp file? top.write('''#include "martini.itp" %s %s [ system ] ; name Martini system from %s [ molecules ] ; name number %s''' % (useRubber, itps, options["-f"] and options["-f"].value or "stdin", molecules)) logging.info('Written topology files') # Maybe there are forcefield specific log messages? options['ForceField'].messages() # The following lines are always printed (if no errors occur). print "\n\tThere you are. One MARTINI. Shaken, not stirred.\n" Q = DOC.martiniq.pop(random.randint(0, len(DOC.martiniq) - 1)) print "\n", Q[1], "\n%80s" % ("--" + Q[0]), "\n"
def main(options): # Check whether to read from a gro/pdb file or from stdin # We use an iterator to wrap around the stream to allow # inferring the file type, without consuming lines already inStream = IO.streamTag(options["-f"] and options["-f"].value or sys.stdin) # The streamTag iterator first yields the file type, which # is used to specify the function for reading frames fileType = inStream.next() if fileType == "GRO": frameIterator = IO.groFrameIterator else: frameIterator = IO.pdbFrameIterator # ITERATE OVER FRAMES IN STRUCTURE FILE # # Now iterate over the frames in the stream # This should become a StructureFile class with a nice .next method model = 1 cgOutPDB = None ssTotal = [] cysteines = [] for title, atoms, box in frameIterator(inStream): if fileType == "PDB": # The PDB file can have chains, in which case we list and process them specifically # TER statements are also interpreted as chain separators # A chain may have breaks in which case the breaking residues are flagged chains = [IO.Chain(options, [i for i in IO.residues(chain)]) for chain in IO.pdbChains(atoms)] else: # The GRO file does not define chains. Here breaks in the backbone are # interpreted as chain separators. residuelist = [residue for residue in IO.residues(atoms)] # The breaks are indices to residues broken = IO.breaks(residuelist) # Reorder, such that each chain is specified with (i,j,k) # where i and j are the start and end of the chain, and # k is a chain identifier chains = zip([0]+broken, broken+[len(residuelist)], range(len(broken)+1)) chains = [IO.Chain(options, residuelist[i:j], name=chr(65+k)) for i, j, k in chains] for chain in chains: chain.multiscale = "all" in options['multi'] or chain.id in options['multi'] # Check the chain identifiers if model == 1 and len(chains) != len(set([i.id for i in chains])): # Ending down here means that non-consecutive blocks of atoms in the # PDB file have the same chain ID. The warning pertains to PDB files only, # since chains from GRO files get a unique chain identifier assigned. logging.warning("Several chains have identical chain identifiers in the PDB file.") # Check if chains are of mixed type. If so, split them. # Note that in some cases HETATM residues are part of a # chain. This will get problematic. But we cannot cover # all, probably. if not options['MixedChains']: demixedChains = [] for chain in chains: demixedChains.extend(chain.split()) chains = demixedChains n = 1 logging.info("Found %d chains:" % len(chains)) for chain in chains: logging.info(" %2d: %s (%s), %d atoms in %d residues." % (n, chain.id, chain._type, chain.natoms, len(chain))) n += 1 # Check all chains keep = [] for chain in chains: if chain.type() == "Water": logging.info("Removing %d water molecules (chain %s)." % (len(chain), chain.id)) elif chain.type() in ("Protein", "Nucleic"): keep.append(chain) # This is currently not active: elif options['RetainHETATM']: keep.append(chain) else: logging.info("Removing HETATM chain %s consisting of %d residues." % (chain.id, len(chain))) chains = keep # Here we interactively check the charge state of resides # Can be easily expanded to residues other than HIS for chain in chains: for i, resname in enumerate(chain.sequence): if resname == 'HIS' and options['chHIS']: choices = {0: 'HIH', 1: 'HIS'} choice = IO.getChargeType(resname, i, choices) chain.sequence[i] = choice # Check which chains need merging if model == 1: order, merge = IO.check_merge(chains, options['mergeList'], options['linkList'], options['CystineCheckBonds'] and options['CystineMaxDist2']) # Get the total length of the sequence seqlength = sum([len(chain) for chain in chains]) logging.info('Total size of the system: %s residues.' % seqlength) ## SECONDARY STRUCTURE ss = '' if options['Collagen']: for chain in chains: chain.set_ss("F") ss += chain.ss elif options["-ss"]: # XXX We need error-catching here, # in case the file doesn't excist, or the string contains bogus. # If the string given for the sequence consists strictly of upper case letters # and does not appear to be a file, assume it is the secondary structure ss = options["-ss"].value.replace('~', 'L').replace(' ', 'L') if ss.isalnum() and ss.isupper() and not os.path.exists(options["-ss"].value): ss = options["-ss"].value logging.info('Secondary structure read from command-line:\n'+ss) else: # There ought to be a file with the name specified ssfile = [i.strip() for i in open(options["-ss"].value)] # Try to read the file as a Gromacs Secondary Structure Dump # Those have an integer as first line if ssfile[0].isdigit(): logging.info('Will read secondary structure from file (assuming Gromacs ssdump).') ss = "".join([i for i in ssfile[1:]]) else: # Get the secondary structure type from DSSP output logging.info('Will read secondary structure from file (assuming DSSP output).') pss = re.compile(r"^([ 0-9]{4}[0-9]){2}") ss = "".join([i[16] for i in open(options["-ss"].value) if re.match(pss, i)]) # Now set the secondary structure for each of the chains sstmp = ss for chain in chains: ln = min(len(sstmp), len(chain)) chain.set_ss(sstmp[:ln]) sstmp = ss[:ln] else: if options["-dssp"]: method, executable = "dssp", options["-dssp"].value #elif options["-pymol"]: # method, executable = "pymol", options["-pymol"].value else: logging.warning("No secondary structure or determination method speficied. Protein chains will be set to 'COIL'.") method, executable = None, None for chain in chains: ss += chain.dss(method, executable) # Used to be: if method in ("dssp","pymol"): but pymol is not supported if method in ["dssp"]: logging.debug('%s determined secondary structure:\n' % method.upper()+ss) # Collect the secondary structure classifications for different frames ssTotal.append(ss) # Write the coarse grained structure if requested if options["-x"].value: logging.info("Writing coarse grained structure.") if cgOutPDB is None: cgOutPDB = open(options["-x"].value, "w") cgOutPDB.write("MODEL %8d\n" % model) cgOutPDB.write(title) cgOutPDB.write(IO.pdbBoxString(box)) atid = 1 for i in order: ci = chains[i] if ci.multiscale: for r in ci.residues: for name, resn, resi, chain, x, y, z in r: cgOutPDB.write(IO.pdbOut((name, resn[:3], resi, chain, x, y, z),i=atid)) atid += 1 coarseGrained = ci.cg(com=True) if coarseGrained: for name, resn, resi, chain, x, y, z, ssid in coarseGrained: if ci.multiscale: name = "v"+name cgOutPDB.write(IO.pdbOut((name, resn[:3], resi, chain, x, y, z),i=atid,ssid=ssid)) atid += 1 cgOutPDB.write("TER\n") else: logging.warning("No mapping for coarse graining chain %s (%s); chain is skipped." % (ci.id, ci.type())) cgOutPDB.write("ENDMDL\n") # Gather cysteine sulphur coordinates cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]] cysteines.append([cys for cys in cyslist if cys]) model += 1 # Write the index file if requested. # Mainly of interest for multiscaling. # Could be improved by adding separte groups for BB, SC, etc. if options["-n"].value: logging.info("Writing index file.") # Lists for All-atom, Virtual sites and Coarse Grain. NAA, NVZ, NCG = [], [], [] atid = 1 for i in order: ci = chains[i] coarseGrained = ci.cg(force=True) if ci.multiscale: NAA.extend([" %5d" % (a+atid) for a in range(ci.natoms)]) atid += ci.natoms if coarseGrained: if ci.multiscale: NVZ.extend([" %5d" % (a+atid) for a in range(len(coarseGrained))]) else: NCG.extend([" %5d" % (a+atid) for a in range(len(coarseGrained))]) atid += len(coarseGrained) outNDX = open(options["-n"].value, "w") outNDX.write("\n[ AA ]\n"+"\n".join([" ".join(NAA[i:i+15]) for i in range(0, len(NAA), 15)])) outNDX.write("\n[ VZ ]\n"+"\n".join([" ".join(NVZ[i:i+15]) for i in range(0, len(NVZ), 15)])) outNDX.write("\n[ CG ]\n"+"\n".join([" ".join(NCG[i:i+15]) for i in range(0, len(NCG), 15)])) outNDX.close() # Write the index file for mapping AA trajectory if requested if options["-nmap"].value: logging.info("Writing trajectory index file.") atid = 1 outNDX = open(options["-nmap"].value, "w") # Get all AA atoms as lists of atoms in residues # First we skip hetatoms and unknowns then iterate over beads # In DNA the O3' atom is mapped together with atoms from the next residue # This stores it until we get to the next residue o3_shift = '' for i_count, i in enumerate(IO.residues(atoms)): if i[0][1] in ("SOL", "HOH", "TIP"): continue if not i[0][1] in MAP.CoarseGrained.mapping.keys(): continue nra = 0 names = [j[0] for j in i] # This gives out a list of atoms in residue, each tuple has other # stuff in it that's needed elsewhere so we just take the last # element which is the atom index (in that residue) for j_count, j in enumerate(MAP.mapIndex(i)): outNDX.write('[ Bead %i of residue %i ]\n' % (j_count+1, i_count+1)) line = '' for k in j: if names[k[2]] == "O3'": line += '%s ' % (str(o3_shift)) o3_shift = k[2]+atid else: line += '%i ' % (k[2]+atid) line += '\n' nra += len(j) outNDX.write(line) atid += nra # Evertything below here we only need, if we need to write a Topology if options['-o']: # Collect the secondary structure stuff and decide what to do with it # First rearrange by the residue ssTotal = zip(*ssTotal) ssAver = [] for i in ssTotal: si = list(set(i)) if len(si) == 1: # Only one type -- consensus ssAver.append(si[0]) else: # Transitions between secondary structure types i = list(i) si = [(1.0*i.count(j)/len(i), j) for j in si] si.sort() if si[-1][0] > options["-ssc"].value: ssAver.append(si[-1][1]) else: ssAver.append(" ") ssAver = "".join(ssAver) logging.info('(Average) Secondary structure has been determined (see head of .itp-file).') # Divide the secondary structure according to the division in chains # This will set the secondary structure types to be used for the # topology. for chain in chains: chain.set_ss(ssAver[:len(chain)]) ssAver = ssAver[len(chain):] # Now the chains are complete, each consisting of a residuelist, # and a secondary structure designation if the chain is of type 'Protein'. # There may be mixed chains, there may be HETATM things. # Water has been discarded. Maybe this has to be changed at some point. # The order in the coarse grained files matches the order in the set of chains. # # If there are no merges to be done, i.e. no global Elnedyn network, no # disulphide bridges, no links, no distance restraints and no explicit merges, # then we can write out the topology, which will match the coarse grained file. # # If there are merges to be done, the order of things may be changed, in which # case the coarse grained structure will not match with the topology... # CYSTINE BRIDGES # # Extract the cysteine coordinates (for all frames) and the cysteine identifiers if options['CystineCheckBonds']: logging.info("Checking for cystine bridges, based on sulphur (SG) atoms lying closer than %.4f nm" % math.sqrt(options['CystineMaxDist2']/100)) cyscoord = zip(*[[j[4:7] for j in i] for i in cysteines]) cysteines = [i[:4] for i in cysteines[0]] bl, kb = options['ForceField'].special[(("SC1", "CYS"), ("SC1", "CYS"))] # Check the distances and add the cysteines to the link list if the # SG atoms have a distance smaller than the cutoff. rlc = range(len(cysteines)) for i in rlc[:-1]: for j in rlc[i+1:]: # Checking the minimum distance over all frames # But we could also take the maximum, or the mean d2 = min([FUNC.distance2(a, b) for a, b in zip(cyscoord[i], cyscoord[j])]) if d2 <= options['CystineMaxDist2']: a, b = cysteines[i], cysteines[j] options['linkListCG'].append((("SC1", "CYS", a[2], a[3]), ("SC1", "CYS", b[2], b[3]), bl, kb)) a, b = (a[0], a[1], a[2]-(32 << 20), a[3]), (b[0], b[1], b[2]-(32 << 20), b[3]) logging.info("Detected SS bridge between %s and %s (%f nm)" % (a, b, math.sqrt(d2)/10)) # REAL ITP STUFF # # Check whether we have identical chains, in which case we # only write the ITP for one... # This means making a distinction between chains and # moleculetypes. molecules = [tuple([chains[i] for i in j]) for j in merge] # At this point we should have a list or dictionary of chains # Each chain should be given a unique name, based on the value # of options["-o"] combined with the chain identifier and possibly # a number if there are chains with identical identifiers. # For each chain we then write an ITP file using the name for # moleculetype and name + ".itp" for the topology include file. # In addition we write a master topology file, using the value of # options["-o"], with an added extension ".top" if not given. # XXX *NOTE*: This should probably be gathered in a 'Universe' class itp = 0 moleculeTypes = {} for mi in range(len(molecules)): mol = molecules[mi] # Check if the moleculetype is already listed # If not, generate the topology from the chain definition if mol not in moleculeTypes or options['SeparateTop']: # Name of the moleculetype # XXX: The naming should be changed; now it becomes Protein_X+Protein_Y+... name = "+".join([chain.getname(options['-name'].value) for chain in mol]) moleculeTypes[mol] = name # Write the molecule type topology top = TOP.Topology(mol[0], options=options, name=name) for m in mol[1:]: top += TOP.Topology(m, options=options) # Have to add the connections, like the connecting network # Gather coordinates mcg, coords = zip(*[(j[:4], j[4:7]) for m in mol for j in m.cg(force=True)]) mcg = list(mcg) # Run through the link list and add connections (links = cys bridges or hand specified links) for atomA, atomB, bondlength, forceconst in options['linkListCG']: if bondlength == -1 and forceconst == -1: bondlength, forceconst = options['ForceField'].special[(atomA[:2], atomB[:2])] # Check whether this link applies to this group atomA = atomA in mcg and mcg.index(atomA)+1 atomB = atomB in mcg and mcg.index(atomB)+1 if atomA and atomB: cat = (forceconst is None) and "Constraint" or "Link" top.bonds.append(TOP.Bond( (atomA, atomB), options = options, type = 1, parameters = (bondlength, forceconst), category = cat, comments = "Cys-bonds/special link")) # Elastic Network # The elastic network is added after the topology is constructed, since that # is where the correct atom list with numbering and the full set of # coordinates for the merged chains are available. if options['ElasticNetwork']: rubberType = options['ForceField'].EBondType rubberList = ELN.rubberBands( [(i[0], j) for i, j in zip(top.atoms, coords) if i[4] in options['ElasticBeads']], options['ElasticLowerBound'], options['ElasticUpperBound'], options['ElasticDecayFactor'], options['ElasticDecayPower'], options['ElasticMaximumForce'], options['ElasticMinimumForce']) top.bonds.extend([TOP.Bond(i, options=options, type=rubberType, category="Rubber band") for i in rubberList]) # Write out the MoleculeType topology destination = options["-o"] and open(moleculeTypes[mol]+".itp", 'w') or sys.stdout destination.write(str(top)) itp += 1 # Check whether other chains are equal to this one # Skip this step if we are to write all chains to separate moleculetypes if not options['SeparateTop']: for j in range(mi+1, len(molecules)): if not molecules[j] in moleculeTypes and mol == molecules[j]: # Molecule j is equal to a molecule mi # Set the name of the moleculetype to the one of that molecule moleculeTypes[molecules[j]] = moleculeTypes[mol] logging.info('Written %d ITP file%s' % (itp, itp > 1 and "s" or "")) # WRITING THE MASTER TOPOLOGY # Output stream top = options["-o"] and open(options['-o'].value, 'w') or sys.stdout # ITP file listing itps = '\n'.join(['#include "%s.itp"' % molecule for molecule in set(moleculeTypes.values())]) # Molecule listing logging.info("Output contains %d molecules:" % len(molecules)) n = 1 for molecule in molecules: chainInfo = (n, moleculeTypes[molecule], len(molecule) > 1 and "s" or " ", " ".join([i.id for i in molecule])) logging.info(" %2d-> %s (chain%s %s)" % chainInfo) n += 1 molecules = '\n'.join(['%s \t 1' % moleculeTypes[molecule] for molecule in molecules]) # Set a define if we are to use rubber bands useRubber = options['ElasticNetwork'] and "#define RUBBER_BANDS" or "" # XXX Specify a better, version specific base-itp name. # Do not set a define for position restrains here, as people are more used to do it in mdp file? top.write( '''#include "martini.itp" %s %s [ system ] ; name Martini system from %s [ molecules ] ; name number %s''' % (useRubber, itps, options["-f"] and options["-f"].value or "stdin", molecules)) logging.info('Written topology files') # Maybe there are forcefield specific log messages? options['ForceField'].messages() # The following lines are always printed (if no errors occur). print "\n\tThere you are. One MARTINI. Shaken, not stirred.\n" Q = DOC.martiniq.pop(random.randint(0, len(DOC.martiniq)-1)) print "\n", Q[1], "\n%80s" % ("--"+Q[0]), "\n"
reg_params = [0.01, 0.05, 0.1, 0.2, 0.5] ranks = [10, 20] best_model_rmse,best_model_map = tuning.tune_ALS_NLP(spark, train, val, val_true_list, num_iters, reg_params, ranks, review_val_predictions) # test performance test_predictions = best_model_rmse_lr.transform(test_review_feature) review_test_predictions = test_predictions.withColumn('prediction', when(test_predictions['prediction'] < 0, 0).otherwise(test_predictions['prediction'])) review_test_predictions = review_test_predictions.withColumnRenamed('prediction','review_prediction') test_predictions = best_model_rmse.transform(test) als_test_predictions = test_predictions.withColumnRenamed('prediction','als_prediction') total_predictions = als_test_predictions.join(review_test_predictions,['user_id','book_id','rating'],'outer') total_predictions = total_predictions.withColumn('total_prediction', when(total_predictions['review_prediction'].isNotNull(), total_predictions['review_prediction']).otherwise(total_predictions['als_prediction'])) window = Window.partitionBy(total_predictions['user_id']).orderBy(total_predictions['total_prediction'].desc()) top_predictions = total_predictions.select('*', rank().over(window).alias('row_num')).filter(col('row_num')<=500) evaluator=RegressionEvaluator(metricName='rmse', labelCol='rating',predictionCol='total_prediction') rmse_test = evaluator.evaluate(top_predictions) window = Window.partitionBy(test['user_id']).orderBy(test['rating'].desc()) test_true_list = test.select('*', rank().over(window).alias('true_row')) map_score = MAP.getMAP(top_predictions, test_true_list) print('Test set RMSE = {}, Test set MAP = {}'.format(rmse_test, map_score))
def tune_ALS_NLP(spark, train_data, validation_data, val_true_list, maxIter, regParams, ranks, review_val_predictions): # initial min_error = float('inf') best_iter1 = -1 best_rank1 = -1 best_regularization1 = 0 best_model_rmse = None max_map = 0.0 best_iter2 = -1 best_rank2 = -1 best_regularization2 = 0 best_model_map = None for iteration in maxIter: for current_rank in ranks: for reg in regParams: als=ALS(maxIter=iteration,regParam=reg,rank=current_rank, \ userCol='user_id',itemCol='book_id',ratingCol='rating', \ coldStartStrategy="drop",nonnegative=True) als_model = als.fit(train_data) predictions = als_model.transform(validation_data) review_predictions = review_val_predictions.withColumnRenamed('prediction','review_prediction') als_predictions = predictions.withColumnRenamed('prediction','als_prediction') total_predictions = als_predictions.join(review_predictions,['user_id','book_id','rating'],'outer') total_predictions = total_predictions.withColumn('total_prediction', \ when(total_predictions['review_prediction'].isNotNull(), \ total_predictions['review_prediction']) \ .otherwise(total_predictions['als_prediction'])) window = Window.partitionBy(total_predictions['user_id']).orderBy(total_predictions['total_prediction'].desc()) top_predictions = total_predictions.select('*', rank().over(window).alias('row_num')).filter(col('row_num') <= 500) # rmse evaluator=RegressionEvaluator(metricName='rmse', labelCol='rating',predictionCol='total_prediction') rmse = evaluator.evaluate(top_predictions) if rmse < min_error: min_error = rmse best_rank1 = current_rank best_regularization1 = reg best_iter1 = iteration best_model_rmse = als_model # MAP current_map = MAP.getMAP(top_predictions, val_true_list) if current_map > max_map: max_map = current_map best_rank2 = current_rank best_regularization2 = reg best_iter2 = iteration best_model_map = als_model print('{} latent factors and regularization = {} with maxIter {}: ' 'validation RMSE is {}' 'validation MAP is {}' .format(current_rank, reg, iteration, rmse, current_map)) with open('train05_review_eval.csv', 'ab') as f: np.savetxt(f, [np.array([iteration, current_rank, reg, rmse, current_map])],delimiter=",") print('\nThe best model select by RMSE has {} latent factors and ' 'regularization = {}'' with maxIter = {}: RMSE = {}'.format(best_rank1, best_regularization1, best_iter1, min_error)) print('\nThe best model select by MAP has {} latent factors and ' 'regularization = {}'' with maxIter = {}: MAP = {}'.format(best_rank2, best_regularization2, best_iter2, max_map)) return best_model_rmse,best_model_map