def make(self, name, neurons, dimensions, array_count=1, intercept=(-1, 1), seed=None, type='lif', encoders=None): # we need to run the setup again if ensembles are added self.setup = False if seed is None: if self.seed is not None: seed = self.random.randrange(0x7fffffff) e = ensemble.Ensemble(neurons, dimensions, count=array_count, intercept=intercept, dt=self.dt, seed=seed, type=type, encoders=encoders, name=name) self.nodes[name] = e timer_conn, node_conn = Pipe() p = Process(target=e.run, args=(node_conn, ), name=name) self.processes[name] = (p, timer_conn)
def make(self,name,neurons,dimensions,array_count=1,intercept=(-1,1),seed=None,type='lif',encoders=None): if seed is None: if self.seed is not None: seed=self.random.randrange(0x7fffffff) self.theano_tick=None # just in case the model has been run previously, as adding a new node means we have to rebuild the theano function e=ensemble.Ensemble(neurons,dimensions,count=array_count,intercept=intercept,dt=self.dt,seed=seed,type=type,encoders=encoders) self.node[name]=e
def runVIC(dbname, options): """Driver function for performing a VIC forecast simulation""" startyear, startmonth, startday = map( int, options['forecast']['startdate'].split('-')) endyear, endmonth, endday = map(int, options['forecast']['enddate'].split('-')) # if date(endyear, endmonth, endday) > (date(startyear, startmonth, startday) + relativedelta(months=3)): # print("WARNING! Forecast with lead time longer than 3 months requested. Exiting...") # sys.exit() res = config.getResolution(options['forecast']) vicexe = "{0}/vicNl".format(rpath.bins) basin = config.getBasinFile(options['forecast']) saveto, savevars = config.getVICvariables(options) name = options['forecast']['name'].lower() nens = int(options['forecast']['ensemble size']) method = options['forecast']['method'] name = options['forecast']['name'].lower() models = ensemble.Ensemble(nens, dbname, res, startyear, startmonth, startday, endyear, endmonth, endday, name) if 'initialize' in options['vic'] and options['vic']['initialize'] in [ 'perturb', 'random' ]: init_method = options['vic']['initialize'] else: init_method = "determ" # default option to initialize the ensemble from the same state # override initializaton method if assimilation was requested if 'observations' in options['vic']: init_method = "random" models.initialize(options, basin, init_method, vicexe, saveindb=True, saveto=saveto, saveargs=savevars, skipsave=-1) data, alat, alon, agid = assimilate( options, date(models.startyear, models.startmonth, models.startday), models) models.updateStateFiles(data, alat, alon, agid) else: models.initialize(options, basin, init_method, vicexe, saveindb=True, saveto=saveto, saveargs=savevars) models.writeParamFiles() models.writeForcings(method, options) models.run(vicexe) models.setDates(startyear, startmonth, startday, endyear, endmonth, endday) models.save(saveto, savevars) for varname in savevars: raster.stddev(models.dbname, "{0}.{1}".format(models.name, varname)) raster.mean(models.dbname, "{0}.{1}".format(models.name, varname)) for e in range(nens): shutil.rmtree(models[e].model_path)
def make(self, name, neurons, dimensions, array_count=1, intercept=(-1, 1), seed=None, type='lif', encoders=None, num_subs=1): if num_subs < 1: print >> sys.stderr, "num_subs for ensembles must be greater than 0" exit(1) # TODO: is this necessary? # we need to run the setup again if ensembles are added self.setup = False if seed is None and self.seed is not None: seed = self.random.randrange(0x7fffffff) e = ensemble.Ensemble(neurons, dimensions, count=array_count, intercept=intercept, dt=self.dt, seed=seed, type=type, encoders=encoders, name=name) # if no subensembles, create just the main ensemble process and exit if num_subs == 1: timer_conn, node_conn = Pipe() p = Process(target=e.run, args=(node_conn, ), name=name) self.nodes[name] = Node(e, p, timer_conn) return e_num = 0 # create the specified number of subensembles for encoder, decoder, bias in e.get_subensemble_parts(num_subs): subname = e.name + str(e_num) e_sub = ensemble.Ensemble(neurons / num_subs, dimensions, count=array_count, intercept=intercept, dt=self.dt, seed=seed, type=type, encoders=encoder, is_subensemble=True, name=subname, decoders=decoder, bias=bias) # creating a process for each subensemble timer_conn, node_conn = Pipe() p = Process(target=e_sub.run, args=(node_conn, ), name=subname) self.nodes[subname] = Node(e_sub, p, timer_conn, e, e_num) e_num += 1
def make(self, name, *args, **kwargs): """Create and return an ensemble of neurons. Note that all ensembles are actually arrays of length 1 :returns: the newly created ensemble :param string name: name of the ensemble (must be unique) :param int seed: random number seed to use. Will be passed to both random.seed() and ca.nengo.math.PDFTools.setSeed(). If this is None and the Network was constructed with a seed parameter, a seed will be randomly generated. """ if 'seed' not in kwargs.keys(): # if no seed provided, get one randomly from the rng kwargs['seed'] = self.random.randrange(0x7fffffff) self.theano_tick=None # just in case the model has been run previously, as adding a new node means we have to rebuild the theano function e = ensemble.Ensemble(*args, **kwargs) self.nodes[name] = e # store created ensemble in node dictionary
def make(self, name, num_subs=1, **kwargs): if num_subs < 1: raise Exception("ERROR", "num_subs must be greater than 0") if 'seed' not in kwargs.keys(): if self.fixed_seed is not None: kwargs['seed'] = self.fixed_seed else: kwargs['seed'] = self.random.randrange(0x7fffffff) kwargs['dt'] = self.dt if num_subs == 1: self._make_ensemble(name, **kwargs) else: if 'mode' in kwargs and kwargs['mode'] == 'direct': raise Exception("ERROR", "do not support direct subensembles") orig_ensemble = ensemble.Ensemble(**kwargs) self.split_ensembles[name] = { 'parent': orig_ensemble, 'children': [] } e_num = 0 for encoder, decoder, bias, alpha in \ orig_ensemble.get_subensemble_parts(num_subs): sub_name = name + "-SUB-" + str(e_num) e_num += 1 kwargs["dimensions"] = orig_ensemble.dimensions if orig_ensemble.neurons_num % num_subs != 0: raise Exception( 'ERROR: The number of neurons is not divisible by num_subs' ) kwargs["neurons"] = orig_ensemble.neurons_num / num_subs kwargs["encoders"] = encoder kwargs["decoders"] = decoder kwargs["bias"] = bias kwargs["alpha"] = alpha self._make_ensemble(sub_name, is_subensemble=True, **kwargs) self.split_ensembles[name]['children'].append(sub_name)
import tree reload(tree) import svm reload(svm) import linear reload(linear) import ensemble reload(ensemble) os.chdir('My/Path/Here') train_X, train_y, dev_X, dev_y, test_X, test_y = load_kc_data.load_kc_housing() # Train models tree_model = tree.DecisionTree(train_X, train_y, dev_X, dev_y, test_X, test_y) linear_model = linear.LinearRegression(train_X, train_y, dev_X, dev_y, test_X, test_y) # Results def evaluate_model(clf,X,y): y_pred = clf.predict(X) rms = sklearn.metrics.mean_squared_error(y,y_pred) print "The model's RMS is " + str(rms) + ", which is " + str(100*rms/np.var(y)) + "% of data variance." print "Decision Tree:" evaluate_model(tree_model, test_X, test_y) print "\nLinear Regression" evaluate_model(linear_model, test_X, test_y) # Averaging the results of the two models ensemble_rms = ensemble.Ensemble(test_X, test_y, [tree_model,linear_model]) print "\nThe ensemble model's RMS is " + str(ensemble_rms) + ", which is " + str(100*ensemble_rms/np.var(test_y)) + "% of data variance."
def runEnsembleVIC(dbname, options): """Driver function for performing a VIC nowcast simulation.""" res = config.getResolution(options['nowcast']) name = options['nowcast']['name'].lower() vicexe = "{0}/vicNl".format(rpath.bins) basin = config.getBasinFile(options['nowcast']) saveto, savevars = config.getVICvariables(options) startyear, startmonth, startday = map( int, options['nowcast']['startdate'].split('-')) endyear, endmonth, endday = map( int, options['nowcast']['enddate'].split('-')) precipdatasets = options['vic']['precip'].split(",") savestate, _ = _saveState(options['vic']) if 'ensemble size' in options['vic']: nens = int(options['vic']['ensemble size']) elif 'observations' in options['vic']: nens = 20 else: nens = len(precipdatasets) models = ensemble.Ensemble(nens, dbname, res, startyear, startmonth, startday, endyear, endmonth, endday, name) if 'initialize' in options['vic'] and options['vic']['initialize']: init_method = options['vic']['initialize'] if isinstance(init_method, bool): init_method = "determ" models.initialize(options, basin, init_method, vicexe) else: models.writeSoilFiles(basin) if 'observations' in options['vic']: method = "random" obsnames = options['vic']['observations'].split(",") if 'update' in options['vic']: update = options['vic']['update'] else: update = None updateDates = observationDates( obsnames, dbname, startyear, startmonth, startday, endyear, endmonth, endday, update) t0 = date(startyear, startmonth, startday) updateDates += [date(endyear, endmonth, endday)] for t in updateDates: if t0 == date(startyear, startmonth, startday): overwrite = True else: overwrite = False ndays = (date(t.year, t.month, t.day) - t0).days t1 = t + timedelta(1) models.setDates(t.year, t.month, t.day, t1.year, t1.month, t1.day) models.initialize(options, basin, method, vicexe, saveindb=True, saveto=saveto, saveargs=savevars, initdays=ndays, overwrite=overwrite) data, alat, alon, agid = assimilate(options, date( models.startyear, models.startmonth, models.startday), models) db = dbio.connect(models.dbname) cur = db.cursor() sql = "select tablename from pg_tables where schemaname='{0}'".format( models.name) cur.execute(sql) tables = [tbl[0] for tbl in cur.fetchall() if tbl[0] != "dssat"] for tbl in tables: sql = "delete from {0}.{1} where fdate=date '{2}-{3}-{4}'".format( models.name, tbl, t.year, t.month, t.day) cur.close() db.close() if bool(data): models.updateStateFiles(data, alat, alon, agid) t0 = date(t.year, t.month, t.day) else: method = "random" t = date(endyear, endmonth, endday) t1 = t + timedelta(1) models.setDates(t.year, t.month, t.day, t1.year, t1.month, t1.day) ndays = (t - date(startyear, startmonth, startday)).days models.initialize(options, basin, method, vicexe, saveindb=True, saveto=saveto, saveargs=savevars, initdays=ndays) for varname in savevars: raster.stddev(models.dbname, "{0}.{1}".format( models.name, varname)) for model in models: shutil.rmtree(model.model_path)
MLP = MLPClassifier(activation="logistic", random_state=2) MLP1 = MLPClassifier(alpha=1, activation="logistic", random_state=2) # # AdaBoost=AdaBoostClassifier() # Gaussian=GaussianNB() # QuadraticDiscriminant=QuadraticDiscriminantAnalysis() fun_list = [ # ("svc",svc), # ("rbf_svc",rbf_svc), # ("poly_svc",poly_svc), # ("lin_svc",lin_svc), # ("knn",knn), # ("lr",lr), # ("GaussianProcess",GaussianProcess), # ("DecisionTree",DecisionTree), # ("RandomForest",RandomForest), ("MLP", MLP), ("MLP1", MLP1), # ("AdaBoost",AdaBoost), # ("Naive Bayes",Gaussian), # ("QDA",QuadraticDiscriminant) ] """ensemle methods """ m = ensemble.Ensemble(fun_list) m.fit(X_train, y_train) # m.predict_prob(X_test,y=y_test) m.predict(X_test, y=y_test) # m.vote(y_test)
def search(self): print "\n\n\nNew run:\n" #load the label and superpose onto selected position cmd.load("%s/labels/%s" % (self.path, self.currentLabel.pdbFile), "currentLabel") print "Attempting superposition..." if not self.superpose(): print "Superposition does not work." print "Possible reasons:" print "1) Glycine? Mutate to Ala first." print "2) Trying to attach DNA label to Protein or vice versa?" if len(self.currentLabel.errorMessage) > 0: print "3) %s" % self.currentLabel.errorMessage self.cleanupAfterRun(my_view) return else: print "Superposition worked!" #if self.currentLabel.rotate == False: # return #prepare movingAtoms array of label, put into correct order... stored.movingAtoms = [] for i in range(0, len(self.currentLabel.atomNames)): xyz = cmd.get_model( "%s & name %s" % ("currentLabel", self.currentLabel.atomNames[i]), 1).get_coord_list() stored.movingAtoms.extend(xyz) self.currentLabel.movingAtoms = numpy.array(stored.movingAtoms) #create object with only the atoms around the label to speed everything up protein ="%s &! %s within %f of %s" %(self.pickedObject1, \ self.residue1Name, \ self.currentLabel.radius, \ "currentLabel") cmd.create("labelEnvironment", "byres %s" % protein) stored.environmentAtomCoordinates = [] stored.environmentAtomNames = [] stored.environmentAtomResidueNames = [] cmd.iterate_state(1, protein, 'stored.environmentAtomCoordinates.append((x,y,z))') cmd.iterate(protein, 'stored.environmentAtomNames.append(name)') cmd.iterate(protein, 'stored.environmentAtomResidueNames.append(resn)') environmentAtomCoordinates = numpy.array( stored.environmentAtomCoordinates) environmentAtomNames = numpy.array(stored.environmentAtomNames) environmentAtomResidueNames = numpy.array( stored.environmentAtomResidueNames) environmentAtomInfo = [ environmentAtomCoordinates, environmentAtomNames, environmentAtomResidueNames ] numberOfCPUs = multiprocessing.cpu_count() numberOfTries = self.currentLabel.numberOfTries[self.thoroughness] numberOfRotamers = self.currentLabel.numberToFind[self.thoroughness] processes = [] chunkSize = int(math.ceil(numberOfRotamers / float(numberOfCPUs))) chunks = [] sum = 0 i = 0 while sum + chunkSize < numberOfRotamers: chunks.append(chunkSize) sum += chunkSize i += 1 chunks.append(numberOfRotamers - i * chunkSize) numberOfProcesses = len(chunks) queue = multiprocessing.Queue() newEnsemble = ensemble.Ensemble() newEnsemble.name = "mW" self.currentLabel.ensembles[newEnsemble.name] = newEnsemble #only use multiprocessing on mac or linux if os.name != "nt": #print chunks print "Trying to find %s rotamers. Using %i cores." % ( numberOfRotamers, numberOfProcesses), for i in range(numberOfProcesses): p = multiprocessing.Process(target = self.currentLabel.generateEnsembleMulti, args = (self.currentLabel.movingAtoms,\ environmentAtomInfo, chunks[i], numberOfTries,\ newEnsemble.name, \ False, self.cutoff, self.clashes,\ queue)) p.start() processes.append(p) else: print "Trying to find %s rotamers. Using 1 core." % ( numberOfRotamers), numberOfProcesses = 1 self.currentLabel.generateEnsembleMulti(self.currentLabel.movingAtoms,\ environmentAtomInfo, numberOfRotamers, numberOfTries,\ newEnsemble.name, \ False, self.cutoff, self.clashes,\ queue) resultsDictionary = {} for i in range(numberOfProcesses): resultsDictionary.update(queue.get()) for p in processes: p.join() print "Done." print "Collecting results...", newRotamers = [] for resultList in resultsDictionary.values(): for result in resultList: newRotamers.append(result) print "Done! Found %i rotamers" % len(newRotamers) #reassign ids. They are not unique with multiprocessing for idx, rotamer in enumerate(newRotamers): rotamer.id = idx newEnsemble.rotamers = newRotamers
def run(self): my_view = cmd.get_view() #mark Search if self.mode == 'Search': #show message cmd.wizard("message", "Searching conformers...") cmd.refresh() if self.currentLabel.uid != "Rx": self.search() print "Creating Rotamers in PyMOL...", for aRotamer in self.currentLabel.ensembles["mW"].rotamers: self.createRotamerInPymol(aRotamer, "mW") # if self.thoroughness == "painstaking" and self.currentLabel.uid == "R1": # scoringWeights = {"totalContactWeight": 0.0, "typeOfContactWeight": 1.0} # for aRotamer in self.currentLabel.ensembles["mW"].rotamers: # aRotamer.score(scoringWeights) # self.currentLabel.ensembles["mW"].sortRotamers("chi2") # numberOfRotamers = len(self.currentLabel.ensembles["mW"].rotamers) # newEnsemble = ensemble.Ensemble() # newEnsemble.name = "contactFit" # newEnsemble.rotamers = self.currentLabel.ensembles["mW"].rotamers[0:20] # self.currentLabel.ensembles["contactFit"] = newEnsemble # for aRotamer in self.currentLabel.ensembles["contactFit"].rotamers: # self.createRotamerInPymol(aRotamer, "contactFit") print "done!" elif self.currentLabel.uid == "Rx": ca1 = numpy.array( cmd.get_model(self.residue1Name + " & name CA", 1).get_coord_list()[0]) ca2 = numpy.array( cmd.get_model(self.residue2Name + " & name CA", 1).get_coord_list()[0]) try: cb1 = numpy.array( cmd.get_model(self.residue1Name + " & name CB", 1).get_coord_list()[0]) except: cb1 = ca1 try: cb2 = numpy.array( cmd.get_model(self.residue2Name + " & name CB", 1).get_coord_list()[0]) except: cb2 = ca2 environmentatoms = numpy.array(cmd.get_model("(%s within 10 of %s or %s) and not (%s or %s)" \ %(self.pickedObject1, \ self.residue1Name, \ self.residue2Name, \ self.residue1Name, \ self.residue2Name), 1).get_coord_list()) anchor1rotamers = self.currentLabel.calculateCone( ca1, cb1, environmentatoms, numberOfAtoms=8000) anchor2rotamers = self.currentLabel.calculateCone( ca2, cb2, environmentatoms, numberOfAtoms=8000) solutions1 = [] solutions2 = [] for anchor1rotamer in anchor1rotamers: anAtom = anchor1rotamer.atoms["N1"] solutions1.append(anAtom.coordinate) for anchor2rotamer in anchor2rotamers: anAtom = anchor2rotamer.atoms["N1"] solutions2.append(anAtom.coordinate) #determine common accessible volume distances1 = self.currentLabel.quick_map(solutions1, cb2) distances2 = self.currentLabel.quick_map(solutions2, cb1) indices1 = numpy.where(numpy.any(distances1 > 6, axis=1)) indices2 = numpy.where(numpy.any(distances2 > 6, axis=1)) solutions1 = numpy.delete(solutions1, indices1, 0) solutions2 = numpy.delete(solutions2, indices2, 0) solutions = numpy.concatenate((solutions1, solutions2)) #create resulting ensemble newEnsemble = ensemble.Ensemble() newEnsemble.name = "mW" id = 0 newRotamers = [] if len(solutions) > 0: for solution in solutions: newRotamer = rotamer.Rotamer() thisAtom = atom.Atom() thisAtom.coordinate = solution thisAtom.name = "N1" thisAtom.element = "N" newRotamer.id = id newRotamer.atoms["N1"] = thisAtom id += 1 newRotamers.append(newRotamer) else: print "Did not find any possible N1 locations. Are the two anchorpoints too far apart?" newEnsemble.rotamers = newRotamers self.currentLabel.ensembles[newEnsemble.name] = newEnsemble cmd.load( "%s/labels/%s" % (self.path, self.currentLabel.pdbFile), "currentLabel") for aRotamer in self.currentLabel.ensembles["mW"].rotamers: self.createRotamerInPymol(aRotamer, "mW") self.numberOfLabel += 1 self.finalCosmetics() #dismiss message cmd.wizard() #mark Measure elif self.mode == "Measure": cmd.wizard("message", "Calculating distances...") cmd.refresh() print "\n\n\nDistance calculation:\n" print "The dashed lines are the c-beta distance (green),\nand the distance between the geometric averages\nof the two ensembles (yellow).\n" print "The following statistics refer to the distribution\nof the individual distances between all conformers (may take a while):\n" #find out what the selections are stored.label1 = [] stored.label2 = [] stored.label1Coordinates = [] stored.label2Coordinates = [] stored.atomNames1 = [] stored.atomNames2 = [] #extract label info cmd.iterate(self.residue1Name, 'stored.label1.append(segi)') cmd.iterate(self.residue2Name, 'stored.label2.append(segi)') cmd.iterate(self.residue1Name, 'stored.atomNames1.append(name)') cmd.iterate(self.residue2Name, 'stored.atomNames2.append(name)') try: label1 = label.Label.fromfile("labels/%s.txt" % stored.label1[0]) cmd.iterate_state( 0, "%s & name %s" % (self.residue1Name, label1.spinLocation), 'stored.label1Coordinates.append((x,y,z))') except: cmd.iterate_state(0, "%s" % (self.residue1Name), 'stored.label1Coordinates.append((x,y,z))') try: label2 = label.Label.fromfile("labels/%s.txt" % stored.label2[0]) cmd.iterate_state( 0, "%s & name %s" % (self.residue2Name, label2.spinLocation), 'stored.label2Coordinates.append((x,y,z))') except: cmd.iterate_state(0, "%s" % (self.residue2Name), 'stored.label2Coordinates.append((x,y,z))') #calculate distances distances = distanceDistribution.DistanceDistribution() dist = distances.calculateDistanceDistribution( stored.label1Coordinates, stored.label2Coordinates) #create pseudoatom at average coordinate of each ensemble and display the distance between them atoms1 = numpy.array(stored.label1Coordinates) atoms2 = numpy.array(stored.label2Coordinates) avgAtoms1 = numpy.average(atoms1, axis=0) avgAtoms2 = numpy.average(atoms2, axis=0) self.createPseudoatom(avgAtoms1, "tmp_average1", 1) self.createPseudoatom(avgAtoms2, "tmp_average2", 1) cmd.distance(self.object_prefix + "avg", "tmp_average1 & name PS1", "tmp_average2 & name PS1") cmd.delete("tmp_average1") cmd.delete("tmp_average2") #cbeta distance if cbeta is present in both selections #cBetaDistance = 0.0 if any("CB" in atom for atom in stored.atomNames1) and any( "CB" in atom for atom in stored.atomNames2): cmd.distance(self.object_prefix + "cBeta", self.residue1Name + " & name CB", self.residue2Name + " & name CB") #for some reason, cmd.distance does not return the correct distance. Although it is shown in the viewer... #get_distance gives the correct distance, but does not create the object in the viewer. cBetaDistance = cmd.get_distance( self.residue1Name + " & name CB", self.residue2Name + " & name CB") cmd.set("dash_color", "green", self.object_prefix + "cBeta") histogram = numpy.histogram(dist, numpy.arange(100)) envelopePlot = numpy.zeros((100, 2)) envelopePlot[0:99] = numpy.column_stack( (histogram[1][0:len(histogram[1]) - 1], histogram[0])) #put point in mid of bin envelopePlot[:, 0] += 0.5 normEnvelopePlot = numpy.copy(envelopePlot) normEnvelopePlot[:, 1] = normEnvelopePlot[:, 1] / numpy.amax( histogram[0]) #combine dist and histogram to single array before output output = numpy.column_stack((envelopePlot, normEnvelopePlot[:, 1])) averageDistance = numpy.average(dist) #make graph dictionary for mtsslPlotter graphtitle = "%s-%s" % (self.residue1Name, self.residue2Name) xlim = [0, 100] ylim = [0, 1] plotDictionary = self.makeGraphDataDictionary( graphtitle, "DistanceDistribution", "Distance (Angstrom)", "Relative Probability", output[:, 0], output[:, 2], 0, xlim, ylim) stored.plots.append(plotDictionary) print "Distribution plot added to memory. Inspect it with mtsslPlotter." #Copy to clipboard header = "Dist. Count Norm.Count\n" outputStr = header + numpy.array_str(output) outputStr = outputStr.replace("[", "") outputStr = outputStr.replace("]", "") self.copyStringToClipboard(outputStr) #Write to file if self.writeToFile: try: filename = "%s-%s" % (self.residue1Name, self.residue2Name) numpy.savetxt(filename + ".txt", output, delimiter='\t') print "Written to file:" print "%s/%s" % (os.getcwd(), filename) except: print "Writing to file failed!" print self.calculateStatistics2(dist) try: if cBetaDistance > 0.0: print "Cbeta distance: %3.1f" % cBetaDistance except: print "No Cbeta distance." cmd.wizard() #mark Distance Map elif self.mode == "Distance Map": #show message cmd.wizard("message", "Calculating distance maps. Please be patient...") cmd.refresh() dm = distanceMap.DistanceMap(self.writeToFile, self.currentLabel, self.homoOligomerMode) if self.pickedObject1 == self.pickedObject2: dm.intraDistanceMap(self.pickedObject1) else: dm.interDistanceMap(self.pickedObject1, self.pickedObject2) print "Done!" cmd.wizard() self.cleanupAfterRun() cmd.set_view(my_view)
def run_e2p2_pipeline(time_stamp, input_fasta_path, blastp_cmd, blast_weight, rpsd_db_name_path, blast_output_path, num_threads, java_cmd, priam_search_jar, blast_evalue, priam_weight, blast_bin_path, priam_profiles_path, priam_output_path, priam_resume, threshold, e2p2_short_output, e2p2_long_output, e2p2_pf_output, e2p2_orxn_pf_output, e2p2_final_pf_output, logging_level, protein_gene_path=None): """Function of the running E2P2 pipeline Args: time_stamp: Time stamp of running the E2P2 pipeline input_fasta_path: Path to fasta input blastp_cmd: Path to blastp command blast_weight: Path to BLAST classifier weight mapping file rpsd_db_name_path: Path to BLAST classifier RPSD database "name" blast_output_path: Path to 'blastp' output num_threads: Number of threads to run 'blastp' java_cmd: Path to java command priam_search_jar: Path to 'PRIAM_search.jar' priam_weight: Path to PRIAM classifier weight mapping file blast_bin_path: Path to BLAST 'bin' folder priam_profiles_path: Path to PRIAM classifier PRIAM profiles folder priam_output_path: Path to 'PRIAM_search.jar' output priam_resume: Flag for whether to resume an existing PRIAM job threshold: Threshold for E2P2 ensemble e2p2_short_output: Path to E2P2 short output e2p2_long_output: Path to E2P2 long output e2p2_pf_output: Path to E2P2 pf output e2p2_orxn_pf_output: Path to E2P2 orxn pf output e2p2_final_pf_output: Path to E2P2 final pf output logging_level: The logging level set for run e2p2 pipeline protein_gene_path: Path to protein ID to gene ID mapping file Raises: KeyError Returns: """ logger = logging.getLogger(definitions.DEFAULT_LOGGER_NAME) # Set up object for running classifiers rc = ensemble.RunClassifiers(time_stamp) # Run blastp classifier rc.blast_classifer(blastp_cmd, rpsd_db_name_path, input_fasta_path, blast_output_path, num_threads, logging_level, definitions.DEFAULT_LOGGER_NAME) # Run priam_search classifier rc.priam_classifer(java_cmd, priam_search_jar, blast_bin_path, priam_profiles_path, input_fasta_path, priam_output_path, logging_level, definitions.DEFAULT_LOGGER_NAME, priam_resume) logger.log(logging.INFO, "Running level-0 classification processes.") rc.run_all_classifiers() try: logger.log(logging.INFO, "Compiling predictions.") # Read in prediction results bc = ensemble.Predictions("Blast") bc.generate_blast_predictions(blast_weight, rc.output_dict["blast"], blast_evalue, logging_level, definitions.DEFAULT_LOGGER_NAME) pc = ensemble.Predictions("Priam") pc.generate_priam_predictions(priam_weight, rc.output_dict["priam"], logging_level, definitions.DEFAULT_LOGGER_NAME) # Set up object for ensemble en = ensemble.Ensemble() # Add classifier results to ensemble object en.add_classifier(bc) en.add_classifier(pc) logger.log(logging.INFO, "Computing ensemble predictions.") # Preform max weight voting on all classifier results en.max_weight_voting(logging_level, definitions.DEFAULT_LOGGER_NAME) # Preform absolute threshold on classifiers voting results en.absolute_threshold(threshold, logging_level, definitions.DEFAULT_LOGGER_NAME) # Set up object for writing E2P2 output e2p2 = file.E2P2files(en.final_predictions) logger.log(logging.INFO, "Preparing results files.") # Add classifer predictions to E2P2 Output for detailed results e2p2.add_predictions_of_classifer(bc) e2p2.add_predictions_of_classifer(pc) e2p2.read_efmap(ef_map_path, logging_level, definitions.DEFAULT_LOGGER_NAME) # Write Outputs e2p2.write_short_results( definitions.DEFAULT_ENSEMBLE_METHOD + " (" + str(threshold) + ")", e2p2_short_output, logging_level, definitions.DEFAULT_LOGGER_NAME) e2p2.write_long_results( definitions.DEFAULT_ENSEMBLE_METHOD + " (" + str(threshold) + ")", e2p2_long_output, logging_level, definitions.DEFAULT_LOGGER_NAME) e2p2.write_pf_results(e2p2_pf_output, logging_level, definitions.DEFAULT_LOGGER_NAME) e2p2.write_orxn_pf_results( e2p2_orxn_pf_output, definitions.EC_SUPERSEDED_MAP, definitions.METACYC_RXN_MAP, definitions.OFFICIAL_EC_METACYC_RXN_MAP, definitions.TO_REMOVE_NON_SMALL_MOLECULE_METABOLISM, logging_level, definitions.DEFAULT_LOGGER_NAME) if protein_gene_path is not None: e2p2.write_final_pf_results( e2p2_final_pf_output, definitions.EC_SUPERSEDED_MAP, definitions.METACYC_RXN_MAP, definitions.OFFICIAL_EC_METACYC_RXN_MAP, definitions.TO_REMOVE_NON_SMALL_MOLECULE_METABOLISM, protein_gene_path, logging_level, definitions.DEFAULT_LOGGER_NAME) logger.log(logging.INFO, "Operation complete.") logger.log(logging.INFO, "Main results are in the file: %s" % e2p2_short_output) logger.log(logging.INFO, "Detailed results are in the file: %s" % e2p2_long_output) if protein_gene_path is not None: logger.log( logging.INFO, "To build PGDB, use .pf file: %s" % e2p2_final_pf_output) else: logger.log(logging.INFO, "To build PGDB, use .pf file: %s" % e2p2_orxn_pf_output) except KeyError as classifer_missing: logger.log( logging.ERROR, "Missing classifer result file(s): " + str(classifer_missing)) sys.exit(1)
modelEb4 = EfficientNet.from_pretrained('efficientnet-b4', in_channels=3, num_classes=4) modelEb3 = EfficientNet.from_pretrained('efficientnet-b3', in_channels=3, num_classes=4) modelRes18 = models.resnet18(pretrained=True) num_ftrs = modelRes18.fc.in_features modelRes18.fc = nn.Linear(num_ftrs, 4) # the last fc layer modelEb4.to(device) modelEb3.to(device) modelRes18.to(device) model = ensemble.Ensemble(modelEb4, modelEb3, modelRes18).to(device) bind_model(model, device) criterion = focal_loss.FocalLoss(device).to(device) optimizerEb4 = torch.optim.Adam(modelEb4.parameters(), lr=learning_rate) optimizerEb3 = torch.optim.Adam(modelEb3.parameters(), lr=learning_rate) optimizerRes18 = torch.optim.Adam(modelRes18.parameters(), lr=learning_rate) scheduler_cosineEb4 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerEb4, cosine_epo) scheduler_cosineEb3 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerEb3, cosine_epo) scheduler_cosineRes18 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerRes18, cosine_epo)