Beispiel #1
0
    def make(self,
             name,
             neurons,
             dimensions,
             array_count=1,
             intercept=(-1, 1),
             seed=None,
             type='lif',
             encoders=None):
        # we need to run the setup again if ensembles are added
        self.setup = False

        if seed is None:
            if self.seed is not None:
                seed = self.random.randrange(0x7fffffff)

        e = ensemble.Ensemble(neurons,
                              dimensions,
                              count=array_count,
                              intercept=intercept,
                              dt=self.dt,
                              seed=seed,
                              type=type,
                              encoders=encoders,
                              name=name)
        self.nodes[name] = e

        timer_conn, node_conn = Pipe()
        p = Process(target=e.run, args=(node_conn, ), name=name)
        self.processes[name] = (p, timer_conn)
Beispiel #2
0
 def make(self,name,neurons,dimensions,array_count=1,intercept=(-1,1),seed=None,type='lif',encoders=None):
     if seed is None:
         if self.seed is not None: 
             seed=self.random.randrange(0x7fffffff)
 
     self.theano_tick=None  # just in case the model has been run previously, as adding a new node means we have to rebuild the theano function
     e=ensemble.Ensemble(neurons,dimensions,count=array_count,intercept=intercept,dt=self.dt,seed=seed,type=type,encoders=encoders)        
     self.node[name]=e
Beispiel #3
0
def runVIC(dbname, options):
    """Driver function for performing a VIC forecast simulation"""
    startyear, startmonth, startday = map(
        int, options['forecast']['startdate'].split('-'))
    endyear, endmonth, endday = map(int,
                                    options['forecast']['enddate'].split('-'))
    # if date(endyear, endmonth, endday) > (date(startyear, startmonth, startday) + relativedelta(months=3)):
    #     print("WARNING! Forecast with lead time longer than 3 months requested. Exiting...")
    #     sys.exit()
    res = config.getResolution(options['forecast'])
    vicexe = "{0}/vicNl".format(rpath.bins)
    basin = config.getBasinFile(options['forecast'])
    saveto, savevars = config.getVICvariables(options)
    name = options['forecast']['name'].lower()
    nens = int(options['forecast']['ensemble size'])
    method = options['forecast']['method']
    name = options['forecast']['name'].lower()
    models = ensemble.Ensemble(nens, dbname, res, startyear, startmonth,
                               startday, endyear, endmonth, endday, name)
    if 'initialize' in options['vic'] and options['vic']['initialize'] in [
            'perturb', 'random'
    ]:
        init_method = options['vic']['initialize']
    else:
        init_method = "determ"  # default option to initialize the ensemble from the same state
    # override initializaton method if assimilation was requested
    if 'observations' in options['vic']:
        init_method = "random"
        models.initialize(options,
                          basin,
                          init_method,
                          vicexe,
                          saveindb=True,
                          saveto=saveto,
                          saveargs=savevars,
                          skipsave=-1)
        data, alat, alon, agid = assimilate(
            options, date(models.startyear, models.startmonth,
                          models.startday), models)
        models.updateStateFiles(data, alat, alon, agid)
    else:
        models.initialize(options,
                          basin,
                          init_method,
                          vicexe,
                          saveindb=True,
                          saveto=saveto,
                          saveargs=savevars)
    models.writeParamFiles()
    models.writeForcings(method, options)
    models.run(vicexe)
    models.setDates(startyear, startmonth, startday, endyear, endmonth, endday)
    models.save(saveto, savevars)
    for varname in savevars:
        raster.stddev(models.dbname, "{0}.{1}".format(models.name, varname))
        raster.mean(models.dbname, "{0}.{1}".format(models.name, varname))
    for e in range(nens):
        shutil.rmtree(models[e].model_path)
Beispiel #4
0
    def make(self, name, neurons, dimensions, array_count=1, intercept=(-1, 1),
            seed=None, type='lif', encoders=None, num_subs=1):

        if num_subs < 1:
            print >> sys.stderr, "num_subs for ensembles must be greater than 0"
            exit(1)

        # TODO: is this necessary?
        # we need to run the setup again if ensembles are added
        self.setup = False

        if seed is None and self.seed is not None:
            seed = self.random.randrange(0x7fffffff)

        e = ensemble.Ensemble(neurons, dimensions, count=array_count,
            intercept=intercept, dt=self.dt, seed=seed, type=type,
            encoders=encoders, name=name)

        # if no subensembles, create just the main ensemble process and exit
        if num_subs == 1:
            timer_conn, node_conn = Pipe()
            p = Process(target=e.run, args=(node_conn, ), name=name)
            self.nodes[name] = Node(e, p, timer_conn)
            return

        e_num = 0
        # create the specified number of subensembles
        for encoder, decoder, bias in e.get_subensemble_parts(num_subs):
            subname = e.name + str(e_num)

            e_sub = ensemble.Ensemble(neurons / num_subs, dimensions,
                count=array_count, intercept=intercept, dt=self.dt,
                seed=seed, type=type,
                encoders=encoder,
                is_subensemble=True,
                name=subname,
                decoders=decoder,
                bias=bias)

            # creating a process for each subensemble
            timer_conn, node_conn = Pipe()
            p = Process(target=e_sub.run, args=(node_conn, ), name=subname)
            self.nodes[subname] = Node(e_sub, p, timer_conn, e, e_num)

            e_num += 1
Beispiel #5
0
    def make(self, name, *args, **kwargs): 
        """Create and return an ensemble of neurons. Note that all ensembles are actually arrays of length 1        
        :returns: the newly created ensemble      

        :param string name: name of the ensemble (must be unique)
        :param int seed: random number seed to use.  Will be passed to both random.seed() and ca.nengo.math.PDFTools.setSeed().
                         If this is None and the Network was constructed with a seed parameter, a seed will be randomly generated.
        """
        if 'seed' not in kwargs.keys(): # if no seed provided, get one randomly from the rng
            kwargs['seed'] = self.random.randrange(0x7fffffff)
    
        self.theano_tick=None  # just in case the model has been run previously, as adding a new node means we have to rebuild the theano function
        e = ensemble.Ensemble(*args, **kwargs) 

        self.nodes[name] = e # store created ensemble in node dictionary
Beispiel #6
0
    def make(self, name, num_subs=1, **kwargs):
        if num_subs < 1:
            raise Exception("ERROR", "num_subs must be greater than 0")

        if 'seed' not in kwargs.keys():
            if self.fixed_seed is not None:
                kwargs['seed'] = self.fixed_seed
            else:
                kwargs['seed'] = self.random.randrange(0x7fffffff)
        kwargs['dt'] = self.dt

        if num_subs == 1:
            self._make_ensemble(name, **kwargs)
        else:
            if 'mode' in kwargs and kwargs['mode'] == 'direct':
                raise Exception("ERROR", "do not support direct subensembles")

            orig_ensemble = ensemble.Ensemble(**kwargs)
            self.split_ensembles[name] = {
                'parent': orig_ensemble,
                'children': []
            }
            e_num = 0
            for encoder, decoder, bias, alpha in \
                orig_ensemble.get_subensemble_parts(num_subs):

                sub_name = name + "-SUB-" + str(e_num)
                e_num += 1

                kwargs["dimensions"] = orig_ensemble.dimensions
                if orig_ensemble.neurons_num % num_subs != 0:
                    raise Exception(
                        'ERROR: The number of neurons is not divisible by num_subs'
                    )
                kwargs["neurons"] = orig_ensemble.neurons_num / num_subs
                kwargs["encoders"] = encoder
                kwargs["decoders"] = decoder
                kwargs["bias"] = bias
                kwargs["alpha"] = alpha
                self._make_ensemble(sub_name, is_subensemble=True, **kwargs)
                self.split_ensembles[name]['children'].append(sub_name)
Beispiel #7
0
import tree
reload(tree)
import svm
reload(svm)
import linear
reload(linear)
import ensemble
reload(ensemble)

os.chdir('My/Path/Here')

train_X, train_y, dev_X, dev_y, test_X, test_y = load_kc_data.load_kc_housing()

# Train models
tree_model = tree.DecisionTree(train_X, train_y, dev_X, dev_y, test_X, test_y)
linear_model = linear.LinearRegression(train_X, train_y, dev_X, dev_y, test_X, test_y)

# Results
def evaluate_model(clf,X,y):
    y_pred = clf.predict(X)
    rms = sklearn.metrics.mean_squared_error(y,y_pred)
    print "The model's RMS is " + str(rms) + ", which is " + str(100*rms/np.var(y)) + "% of data variance."
    
print "Decision Tree:"
evaluate_model(tree_model, test_X, test_y)
print "\nLinear Regression"
evaluate_model(linear_model, test_X, test_y)

# Averaging the results of the two models
ensemble_rms = ensemble.Ensemble(test_X, test_y, [tree_model,linear_model])
print "\nThe ensemble model's RMS is " + str(ensemble_rms) + ", which is " + str(100*ensemble_rms/np.var(test_y)) + "% of data variance."
Beispiel #8
0
def runEnsembleVIC(dbname, options):
    """Driver function for performing a VIC nowcast simulation."""
    res = config.getResolution(options['nowcast'])
    name = options['nowcast']['name'].lower()
    vicexe = "{0}/vicNl".format(rpath.bins)
    basin = config.getBasinFile(options['nowcast'])
    saveto, savevars = config.getVICvariables(options)
    startyear, startmonth, startday = map(
        int, options['nowcast']['startdate'].split('-'))
    endyear, endmonth, endday = map(
        int, options['nowcast']['enddate'].split('-'))
    precipdatasets = options['vic']['precip'].split(",")
    savestate, _ = _saveState(options['vic'])
    if 'ensemble size' in options['vic']:
        nens = int(options['vic']['ensemble size'])
    elif 'observations' in options['vic']:
        nens = 20
    else:
        nens = len(precipdatasets)
    models = ensemble.Ensemble(nens, dbname, res, startyear,
                               startmonth, startday, endyear, endmonth, endday, name)
    if 'initialize' in options['vic'] and options['vic']['initialize']:
        init_method = options['vic']['initialize']
        if isinstance(init_method, bool):
            init_method = "determ"
        models.initialize(options, basin, init_method, vicexe)
    else:
        models.writeSoilFiles(basin)
    if 'observations' in options['vic']:
        method = "random"
        obsnames = options['vic']['observations'].split(",")
        if 'update' in options['vic']:
            update = options['vic']['update']
        else:
            update = None
        updateDates = observationDates(
            obsnames, dbname, startyear, startmonth, startday, endyear, endmonth, endday, update)
        t0 = date(startyear, startmonth, startday)
        updateDates += [date(endyear, endmonth, endday)]
        for t in updateDates:
            if t0 == date(startyear, startmonth, startday):
                overwrite = True
            else:
                overwrite = False
            ndays = (date(t.year, t.month, t.day) - t0).days
            t1 = t + timedelta(1)
            models.setDates(t.year, t.month, t.day, t1.year, t1.month, t1.day)
            models.initialize(options, basin, method, vicexe, saveindb=True,
                              saveto=saveto, saveargs=savevars, initdays=ndays, overwrite=overwrite)
            data, alat, alon, agid = assimilate(options, date(
                models.startyear, models.startmonth, models.startday), models)
            db = dbio.connect(models.dbname)
            cur = db.cursor()
            sql = "select tablename from pg_tables where schemaname='{0}'".format(
                models.name)
            cur.execute(sql)
            tables = [tbl[0] for tbl in cur.fetchall() if tbl[0] != "dssat"]
            for tbl in tables:
                sql = "delete from {0}.{1} where fdate=date '{2}-{3}-{4}'".format(
                    models.name, tbl, t.year, t.month, t.day)
            cur.close()
            db.close()
            if bool(data):
                models.updateStateFiles(data, alat, alon, agid)
            t0 = date(t.year, t.month, t.day)
    else:
        method = "random"
        t = date(endyear, endmonth, endday)
        t1 = t + timedelta(1)
        models.setDates(t.year, t.month, t.day, t1.year, t1.month, t1.day)
        ndays = (t - date(startyear, startmonth, startday)).days
        models.initialize(options, basin, method, vicexe, saveindb=True,
                          saveto=saveto, saveargs=savevars, initdays=ndays)
    for varname in savevars:
        raster.stddev(models.dbname, "{0}.{1}".format(
            models.name, varname))
    for model in models:
        shutil.rmtree(model.model_path)
MLP = MLPClassifier(activation="logistic", random_state=2)
MLP1 = MLPClassifier(alpha=1, activation="logistic", random_state=2)
#
# AdaBoost=AdaBoostClassifier()
# Gaussian=GaussianNB()
# QuadraticDiscriminant=QuadraticDiscriminantAnalysis()
fun_list = [
    # ("svc",svc),
    # ("rbf_svc",rbf_svc),
    # ("poly_svc",poly_svc),
    # ("lin_svc",lin_svc),
    # ("knn",knn),
    # ("lr",lr),
    # ("GaussianProcess",GaussianProcess),
    # ("DecisionTree",DecisionTree),
    # ("RandomForest",RandomForest),
    ("MLP", MLP),
    ("MLP1", MLP1),

    # ("AdaBoost",AdaBoost),
    # ("Naive Bayes",Gaussian),
    # ("QDA",QuadraticDiscriminant)
]
"""ensemle methods
"""
m = ensemble.Ensemble(fun_list)
m.fit(X_train, y_train)
# m.predict_prob(X_test,y=y_test)
m.predict(X_test, y=y_test)

# m.vote(y_test)
Beispiel #10
0
    def search(self):
        print "\n\n\nNew run:\n"
        #load the label and superpose onto selected position
        cmd.load("%s/labels/%s" % (self.path, self.currentLabel.pdbFile),
                 "currentLabel")
        print "Attempting superposition..."
        if not self.superpose():
            print "Superposition does not work."
            print "Possible reasons:"
            print "1) Glycine? Mutate to Ala first."
            print "2) Trying to attach DNA label to Protein or vice versa?"
            if len(self.currentLabel.errorMessage) > 0:
                print "3) %s" % self.currentLabel.errorMessage
            self.cleanupAfterRun(my_view)
            return
        else:
            print "Superposition worked!"
        #if self.currentLabel.rotate == False:
        #	return
        #prepare movingAtoms array of label, put into correct order...
        stored.movingAtoms = []
        for i in range(0, len(self.currentLabel.atomNames)):
            xyz = cmd.get_model(
                "%s & name %s" %
                ("currentLabel", self.currentLabel.atomNames[i]),
                1).get_coord_list()
            stored.movingAtoms.extend(xyz)
        self.currentLabel.movingAtoms = numpy.array(stored.movingAtoms)

        #create object with only the atoms around the label to speed everything up
        protein ="%s &! %s within %f of %s" %(self.pickedObject1, \
                   self.residue1Name, \
                   self.currentLabel.radius, \
                   "currentLabel")
        cmd.create("labelEnvironment", "byres %s" % protein)
        stored.environmentAtomCoordinates = []
        stored.environmentAtomNames = []
        stored.environmentAtomResidueNames = []
        cmd.iterate_state(1, protein,
                          'stored.environmentAtomCoordinates.append((x,y,z))')
        cmd.iterate(protein, 'stored.environmentAtomNames.append(name)')
        cmd.iterate(protein, 'stored.environmentAtomResidueNames.append(resn)')
        environmentAtomCoordinates = numpy.array(
            stored.environmentAtomCoordinates)
        environmentAtomNames = numpy.array(stored.environmentAtomNames)
        environmentAtomResidueNames = numpy.array(
            stored.environmentAtomResidueNames)
        environmentAtomInfo = [
            environmentAtomCoordinates, environmentAtomNames,
            environmentAtomResidueNames
        ]
        numberOfCPUs = multiprocessing.cpu_count()
        numberOfTries = self.currentLabel.numberOfTries[self.thoroughness]
        numberOfRotamers = self.currentLabel.numberToFind[self.thoroughness]
        processes = []
        chunkSize = int(math.ceil(numberOfRotamers / float(numberOfCPUs)))
        chunks = []
        sum = 0
        i = 0
        while sum + chunkSize < numberOfRotamers:
            chunks.append(chunkSize)
            sum += chunkSize
            i += 1
        chunks.append(numberOfRotamers - i * chunkSize)

        numberOfProcesses = len(chunks)

        queue = multiprocessing.Queue()
        newEnsemble = ensemble.Ensemble()
        newEnsemble.name = "mW"
        self.currentLabel.ensembles[newEnsemble.name] = newEnsemble
        #only use multiprocessing on mac or linux
        if os.name != "nt":
            #print chunks
            print "Trying to find %s rotamers. Using %i cores." % (
                numberOfRotamers, numberOfProcesses),
            for i in range(numberOfProcesses):
                p = multiprocessing.Process(target = self.currentLabel.generateEnsembleMulti,
                       args = (self.currentLabel.movingAtoms,\
                       environmentAtomInfo, chunks[i], numberOfTries,\
                       newEnsemble.name, \
                       False, self.cutoff, self.clashes,\
                       queue))
                p.start()
                processes.append(p)
        else:
            print "Trying to find %s rotamers. Using 1 core." % (
                numberOfRotamers),
            numberOfProcesses = 1
            self.currentLabel.generateEnsembleMulti(self.currentLabel.movingAtoms,\
                    environmentAtomInfo, numberOfRotamers, numberOfTries,\
                    newEnsemble.name, \
                    False, self.cutoff, self.clashes,\
                    queue)
        resultsDictionary = {}
        for i in range(numberOfProcesses):
            resultsDictionary.update(queue.get())
        for p in processes:
            p.join()
        print "Done."
        print "Collecting results...",
        newRotamers = []
        for resultList in resultsDictionary.values():
            for result in resultList:
                newRotamers.append(result)
        print "Done! Found %i rotamers" % len(newRotamers)
        #reassign ids. They are not unique with multiprocessing
        for idx, rotamer in enumerate(newRotamers):
            rotamer.id = idx
        newEnsemble.rotamers = newRotamers
Beispiel #11
0
    def run(self):
        my_view = cmd.get_view()
        #mark Search
        if self.mode == 'Search':
            #show message
            cmd.wizard("message", "Searching conformers...")
            cmd.refresh()
            if self.currentLabel.uid != "Rx":
                self.search()
                print "Creating Rotamers in PyMOL...",
                for aRotamer in self.currentLabel.ensembles["mW"].rotamers:
                    self.createRotamerInPymol(aRotamer, "mW")
# 				if self.thoroughness == "painstaking" and self.currentLabel.uid == "R1":
# 					scoringWeights = {"totalContactWeight": 0.0, "typeOfContactWeight": 1.0}
# 					for aRotamer in self.currentLabel.ensembles["mW"].rotamers:
# 						aRotamer.score(scoringWeights)
# 					self.currentLabel.ensembles["mW"].sortRotamers("chi2")
# 					numberOfRotamers = len(self.currentLabel.ensembles["mW"].rotamers)
# 					newEnsemble = ensemble.Ensemble()
# 					newEnsemble.name = "contactFit"
# 					newEnsemble.rotamers = self.currentLabel.ensembles["mW"].rotamers[0:20]
# 					self.currentLabel.ensembles["contactFit"] = newEnsemble
# 					for aRotamer in self.currentLabel.ensembles["contactFit"].rotamers:
# 						self.createRotamerInPymol(aRotamer, "contactFit")
                print "done!"

            elif self.currentLabel.uid == "Rx":
                ca1 = numpy.array(
                    cmd.get_model(self.residue1Name + " & name CA",
                                  1).get_coord_list()[0])
                ca2 = numpy.array(
                    cmd.get_model(self.residue2Name + " & name CA",
                                  1).get_coord_list()[0])
                try:
                    cb1 = numpy.array(
                        cmd.get_model(self.residue1Name + " & name CB",
                                      1).get_coord_list()[0])
                except:
                    cb1 = ca1
                try:
                    cb2 = numpy.array(
                        cmd.get_model(self.residue2Name + " & name CB",
                                      1).get_coord_list()[0])
                except:
                    cb2 = ca2

                environmentatoms = numpy.array(cmd.get_model("(%s within 10 of %s or %s) and not (%s or %s)" \
                     %(self.pickedObject1, \
                      self.residue1Name, \
                      self.residue2Name, \
                      self.residue1Name, \
                      self.residue2Name), 1).get_coord_list())
                anchor1rotamers = self.currentLabel.calculateCone(
                    ca1, cb1, environmentatoms, numberOfAtoms=8000)
                anchor2rotamers = self.currentLabel.calculateCone(
                    ca2, cb2, environmentatoms, numberOfAtoms=8000)
                solutions1 = []
                solutions2 = []
                for anchor1rotamer in anchor1rotamers:
                    anAtom = anchor1rotamer.atoms["N1"]
                    solutions1.append(anAtom.coordinate)
                for anchor2rotamer in anchor2rotamers:
                    anAtom = anchor2rotamer.atoms["N1"]
                    solutions2.append(anAtom.coordinate)

                #determine common accessible volume
                distances1 = self.currentLabel.quick_map(solutions1, cb2)
                distances2 = self.currentLabel.quick_map(solutions2, cb1)
                indices1 = numpy.where(numpy.any(distances1 > 6, axis=1))
                indices2 = numpy.where(numpy.any(distances2 > 6, axis=1))
                solutions1 = numpy.delete(solutions1, indices1, 0)
                solutions2 = numpy.delete(solutions2, indices2, 0)
                solutions = numpy.concatenate((solutions1, solutions2))

                #create resulting ensemble
                newEnsemble = ensemble.Ensemble()
                newEnsemble.name = "mW"
                id = 0
                newRotamers = []
                if len(solutions) > 0:
                    for solution in solutions:
                        newRotamer = rotamer.Rotamer()
                        thisAtom = atom.Atom()
                        thisAtom.coordinate = solution
                        thisAtom.name = "N1"
                        thisAtom.element = "N"
                        newRotamer.id = id
                        newRotamer.atoms["N1"] = thisAtom
                        id += 1
                        newRotamers.append(newRotamer)
                else:
                    print "Did not find any possible N1 locations. Are the two anchorpoints too far apart?"
                newEnsemble.rotamers = newRotamers
                self.currentLabel.ensembles[newEnsemble.name] = newEnsemble
                cmd.load(
                    "%s/labels/%s" % (self.path, self.currentLabel.pdbFile),
                    "currentLabel")
                for aRotamer in self.currentLabel.ensembles["mW"].rotamers:
                    self.createRotamerInPymol(aRotamer, "mW")
            self.numberOfLabel += 1
            self.finalCosmetics()
            #dismiss message
            cmd.wizard()

        #mark Measure
        elif self.mode == "Measure":
            cmd.wizard("message", "Calculating distances...")
            cmd.refresh()
            print "\n\n\nDistance calculation:\n"
            print "The dashed lines are the c-beta distance (green),\nand the distance between the geometric averages\nof the two ensembles (yellow).\n"
            print "The following statistics refer to the distribution\nof the individual distances between all conformers (may take a while):\n"

            #find out what the selections are
            stored.label1 = []
            stored.label2 = []
            stored.label1Coordinates = []
            stored.label2Coordinates = []
            stored.atomNames1 = []
            stored.atomNames2 = []

            #extract label info
            cmd.iterate(self.residue1Name, 'stored.label1.append(segi)')
            cmd.iterate(self.residue2Name, 'stored.label2.append(segi)')
            cmd.iterate(self.residue1Name, 'stored.atomNames1.append(name)')
            cmd.iterate(self.residue2Name, 'stored.atomNames2.append(name)')
            try:
                label1 = label.Label.fromfile("labels/%s.txt" %
                                              stored.label1[0])
                cmd.iterate_state(
                    0,
                    "%s & name %s" % (self.residue1Name, label1.spinLocation),
                    'stored.label1Coordinates.append((x,y,z))')
            except:
                cmd.iterate_state(0, "%s" % (self.residue1Name),
                                  'stored.label1Coordinates.append((x,y,z))')
            try:
                label2 = label.Label.fromfile("labels/%s.txt" %
                                              stored.label2[0])
                cmd.iterate_state(
                    0,
                    "%s & name %s" % (self.residue2Name, label2.spinLocation),
                    'stored.label2Coordinates.append((x,y,z))')
            except:
                cmd.iterate_state(0, "%s" % (self.residue2Name),
                                  'stored.label2Coordinates.append((x,y,z))')
            #calculate distances
            distances = distanceDistribution.DistanceDistribution()
            dist = distances.calculateDistanceDistribution(
                stored.label1Coordinates, stored.label2Coordinates)

            #create pseudoatom at average coordinate of each ensemble and display the distance between them
            atoms1 = numpy.array(stored.label1Coordinates)
            atoms2 = numpy.array(stored.label2Coordinates)
            avgAtoms1 = numpy.average(atoms1, axis=0)
            avgAtoms2 = numpy.average(atoms2, axis=0)
            self.createPseudoatom(avgAtoms1, "tmp_average1", 1)
            self.createPseudoatom(avgAtoms2, "tmp_average2", 1)
            cmd.distance(self.object_prefix + "avg", "tmp_average1 & name PS1",
                         "tmp_average2 & name PS1")
            cmd.delete("tmp_average1")
            cmd.delete("tmp_average2")

            #cbeta distance if cbeta is present in both selections
            #cBetaDistance = 0.0
            if any("CB" in atom for atom in stored.atomNames1) and any(
                    "CB" in atom for atom in stored.atomNames2):
                cmd.distance(self.object_prefix + "cBeta",
                             self.residue1Name + " & name CB",
                             self.residue2Name + " & name CB")
                #for some reason, cmd.distance does not return the correct distance. Although it is shown in the viewer...
                #get_distance gives the correct distance, but does not create the object in the viewer.
                cBetaDistance = cmd.get_distance(
                    self.residue1Name + " & name CB",
                    self.residue2Name + " & name CB")
                cmd.set("dash_color", "green", self.object_prefix + "cBeta")

            histogram = numpy.histogram(dist, numpy.arange(100))
            envelopePlot = numpy.zeros((100, 2))
            envelopePlot[0:99] = numpy.column_stack(
                (histogram[1][0:len(histogram[1]) - 1], histogram[0]))

            #put point in mid of bin
            envelopePlot[:, 0] += 0.5
            normEnvelopePlot = numpy.copy(envelopePlot)
            normEnvelopePlot[:, 1] = normEnvelopePlot[:, 1] / numpy.amax(
                histogram[0])

            #combine dist and histogram to single array before output
            output = numpy.column_stack((envelopePlot, normEnvelopePlot[:, 1]))
            averageDistance = numpy.average(dist)

            #make graph dictionary for mtsslPlotter
            graphtitle = "%s-%s" % (self.residue1Name, self.residue2Name)
            xlim = [0, 100]
            ylim = [0, 1]
            plotDictionary = self.makeGraphDataDictionary(
                graphtitle, "DistanceDistribution", "Distance (Angstrom)",
                "Relative Probability", output[:, 0], output[:,
                                                             2], 0, xlim, ylim)
            stored.plots.append(plotDictionary)
            print "Distribution plot added to memory. Inspect it with mtsslPlotter."

            #Copy to clipboard
            header = "Dist.   Count   Norm.Count\n"
            outputStr = header + numpy.array_str(output)
            outputStr = outputStr.replace("[", "")
            outputStr = outputStr.replace("]", "")
            self.copyStringToClipboard(outputStr)

            #Write to file
            if self.writeToFile:
                try:
                    filename = "%s-%s" % (self.residue1Name, self.residue2Name)
                    numpy.savetxt(filename + ".txt", output, delimiter='\t')
                    print "Written to file:"
                    print "%s/%s" % (os.getcwd(), filename)
                except:
                    print "Writing to file failed!"
            print self.calculateStatistics2(dist)
            try:
                if cBetaDistance > 0.0:
                    print "Cbeta distance: %3.1f" % cBetaDistance
            except:
                print "No Cbeta distance."
            cmd.wizard()

        #mark Distance Map
        elif self.mode == "Distance Map":
            #show message
            cmd.wizard("message",
                       "Calculating distance maps. Please be patient...")
            cmd.refresh()
            dm = distanceMap.DistanceMap(self.writeToFile, self.currentLabel,
                                         self.homoOligomerMode)
            if self.pickedObject1 == self.pickedObject2:
                dm.intraDistanceMap(self.pickedObject1)
            else:
                dm.interDistanceMap(self.pickedObject1, self.pickedObject2)
            print "Done!"
            cmd.wizard()

        self.cleanupAfterRun()
        cmd.set_view(my_view)
Beispiel #12
0
def run_e2p2_pipeline(time_stamp,
                      input_fasta_path,
                      blastp_cmd,
                      blast_weight,
                      rpsd_db_name_path,
                      blast_output_path,
                      num_threads,
                      java_cmd,
                      priam_search_jar,
                      blast_evalue,
                      priam_weight,
                      blast_bin_path,
                      priam_profiles_path,
                      priam_output_path,
                      priam_resume,
                      threshold,
                      e2p2_short_output,
                      e2p2_long_output,
                      e2p2_pf_output,
                      e2p2_orxn_pf_output,
                      e2p2_final_pf_output,
                      logging_level,
                      protein_gene_path=None):
    """Function of the running E2P2 pipeline
	Args:
		time_stamp: Time stamp of running the E2P2 pipeline
		input_fasta_path: Path to fasta input
		blastp_cmd: Path to blastp command
		blast_weight: Path to BLAST classifier weight mapping file
		rpsd_db_name_path: Path to BLAST classifier RPSD database "name"
		blast_output_path: Path to 'blastp' output
		num_threads: Number of threads to run 'blastp'
		java_cmd: Path to java command
		priam_search_jar: Path to 'PRIAM_search.jar'
		priam_weight: Path to PRIAM classifier weight mapping file
		blast_bin_path: Path to BLAST 'bin' folder
		priam_profiles_path: Path to PRIAM classifier PRIAM profiles folder
		priam_output_path: Path to 'PRIAM_search.jar' output
		priam_resume: Flag for whether to resume an existing PRIAM job
		threshold: Threshold for E2P2 ensemble
		e2p2_short_output: Path to E2P2 short output
		e2p2_long_output: Path to E2P2 long output
		e2p2_pf_output: Path to E2P2 pf output
		e2p2_orxn_pf_output: Path to E2P2 orxn pf output
		e2p2_final_pf_output: Path to E2P2 final pf output
		logging_level: The logging level set for run e2p2 pipeline
		protein_gene_path: Path to protein ID to gene ID mapping file
	Raises: KeyError
	Returns:
	"""
    logger = logging.getLogger(definitions.DEFAULT_LOGGER_NAME)
    # Set up object for running classifiers
    rc = ensemble.RunClassifiers(time_stamp)
    # Run blastp classifier
    rc.blast_classifer(blastp_cmd, rpsd_db_name_path, input_fasta_path,
                       blast_output_path, num_threads, logging_level,
                       definitions.DEFAULT_LOGGER_NAME)
    # Run priam_search classifier
    rc.priam_classifer(java_cmd, priam_search_jar, blast_bin_path,
                       priam_profiles_path, input_fasta_path,
                       priam_output_path, logging_level,
                       definitions.DEFAULT_LOGGER_NAME, priam_resume)
    logger.log(logging.INFO, "Running level-0 classification processes.")
    rc.run_all_classifiers()
    try:
        logger.log(logging.INFO, "Compiling predictions.")
        # Read in prediction results
        bc = ensemble.Predictions("Blast")
        bc.generate_blast_predictions(blast_weight, rc.output_dict["blast"],
                                      blast_evalue, logging_level,
                                      definitions.DEFAULT_LOGGER_NAME)
        pc = ensemble.Predictions("Priam")
        pc.generate_priam_predictions(priam_weight, rc.output_dict["priam"],
                                      logging_level,
                                      definitions.DEFAULT_LOGGER_NAME)
        # Set up object for ensemble
        en = ensemble.Ensemble()
        # Add classifier results to ensemble object
        en.add_classifier(bc)
        en.add_classifier(pc)
        logger.log(logging.INFO, "Computing ensemble predictions.")
        # Preform max weight voting on all classifier results
        en.max_weight_voting(logging_level, definitions.DEFAULT_LOGGER_NAME)
        # Preform absolute threshold on classifiers voting results
        en.absolute_threshold(threshold, logging_level,
                              definitions.DEFAULT_LOGGER_NAME)
        # Set up object for writing E2P2 output
        e2p2 = file.E2P2files(en.final_predictions)
        logger.log(logging.INFO, "Preparing results files.")
        # Add classifer predictions to E2P2 Output for detailed results
        e2p2.add_predictions_of_classifer(bc)
        e2p2.add_predictions_of_classifer(pc)
        e2p2.read_efmap(ef_map_path, logging_level,
                        definitions.DEFAULT_LOGGER_NAME)
        # Write Outputs
        e2p2.write_short_results(
            definitions.DEFAULT_ENSEMBLE_METHOD + " (" + str(threshold) + ")",
            e2p2_short_output, logging_level, definitions.DEFAULT_LOGGER_NAME)
        e2p2.write_long_results(
            definitions.DEFAULT_ENSEMBLE_METHOD + " (" + str(threshold) + ")",
            e2p2_long_output, logging_level, definitions.DEFAULT_LOGGER_NAME)
        e2p2.write_pf_results(e2p2_pf_output, logging_level,
                              definitions.DEFAULT_LOGGER_NAME)
        e2p2.write_orxn_pf_results(
            e2p2_orxn_pf_output, definitions.EC_SUPERSEDED_MAP,
            definitions.METACYC_RXN_MAP,
            definitions.OFFICIAL_EC_METACYC_RXN_MAP,
            definitions.TO_REMOVE_NON_SMALL_MOLECULE_METABOLISM, logging_level,
            definitions.DEFAULT_LOGGER_NAME)
        if protein_gene_path is not None:
            e2p2.write_final_pf_results(
                e2p2_final_pf_output, definitions.EC_SUPERSEDED_MAP,
                definitions.METACYC_RXN_MAP,
                definitions.OFFICIAL_EC_METACYC_RXN_MAP,
                definitions.TO_REMOVE_NON_SMALL_MOLECULE_METABOLISM,
                protein_gene_path, logging_level,
                definitions.DEFAULT_LOGGER_NAME)
        logger.log(logging.INFO, "Operation complete.")
        logger.log(logging.INFO,
                   "Main results are in the file: %s" % e2p2_short_output)
        logger.log(logging.INFO,
                   "Detailed results are in the file: %s" % e2p2_long_output)
        if protein_gene_path is not None:
            logger.log(
                logging.INFO,
                "To build PGDB, use .pf file: %s" % e2p2_final_pf_output)
        else:
            logger.log(logging.INFO,
                       "To build PGDB, use .pf file: %s" % e2p2_orxn_pf_output)
    except KeyError as classifer_missing:
        logger.log(
            logging.ERROR,
            "Missing classifer result file(s): " + str(classifer_missing))
        sys.exit(1)
Beispiel #13
0
    modelEb4 = EfficientNet.from_pretrained('efficientnet-b4',
                                            in_channels=3,
                                            num_classes=4)
    modelEb3 = EfficientNet.from_pretrained('efficientnet-b3',
                                            in_channels=3,
                                            num_classes=4)
    modelRes18 = models.resnet18(pretrained=True)
    num_ftrs = modelRes18.fc.in_features
    modelRes18.fc = nn.Linear(num_ftrs, 4)  # the last fc layer

    modelEb4.to(device)
    modelEb3.to(device)
    modelRes18.to(device)

    model = ensemble.Ensemble(modelEb4, modelEb3, modelRes18).to(device)
    bind_model(model, device)

    criterion = focal_loss.FocalLoss(device).to(device)

    optimizerEb4 = torch.optim.Adam(modelEb4.parameters(), lr=learning_rate)
    optimizerEb3 = torch.optim.Adam(modelEb3.parameters(), lr=learning_rate)
    optimizerRes18 = torch.optim.Adam(modelRes18.parameters(),
                                      lr=learning_rate)
    scheduler_cosineEb4 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerEb4, cosine_epo)
    scheduler_cosineEb3 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerEb3, cosine_epo)
    scheduler_cosineRes18 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerRes18, cosine_epo)