preds = self.clf[it].predict(itinsts[columns].values)
            itinsts["ptag"] = preds
            cprobs = self.clf[it].predict_proba(itinsts[columns].values)
            tpcol = np.where(self.clf[it].classes_ == "TP")[0][0]
            itinsts["qual"] = cprobs[:, tpcol]
            insts.append(itinsts)

        return pandas.concat(insts)

    def plots(self, prefix, featurenames):
        """ Make diagnostic plots """

        for it in self.itypes:
            importances = self.clf[it].feature_importances_
            std = np.std([
                tree.feature_importances_ for tree in self.clf[it].estimators_
            ],
                         axis=0)
            indices = np.argsort(importances)[::-1]

            # Print the feature ranking
            print "Feature ranking for INDELTYPE == %s:" % str(it)

            for f in xrange(0, len(indices)):
                print "%d. feature %d:%s (%f +- %f)" % (
                    f + 1, indices[f], featurenames[indices[f]],
                    importances[indices[f]], std[indices[f]])


EVSModel.register("strelka.rf.indel", StrelkaRFIndel)
Exemple #2
0
            axis=0)
        indices = np.argsort(importances)[::-1]

        # Print the feature ranking
        print "Feature ranking:"

        for f in xrange(0, len(indices)):
            print "%d. feature %d:%s (%f +- %f)" % (
                f + 1, indices[f], featurenames[indices[f]],
                importances[indices[f]], std[indices[f]])

    def draw_trees(self):
        """ Draw trees in png files """

        from sklearn.tree import export_graphviz
        import subprocess

        images = []
        for i, tree in enumerate(self.clf.estimators_):
            # creates indel_tree_...dot files
            fname = 'trees/tree_' + str(i) + '.dot'
            with open(fname, 'w') as dotfile:
                export_graphviz(tree, dotfile, feature_names=featurenames)
            # run dot
            subprocess.check_call(
                ["dot -Tpng " + fname + " > " + fname + ".png"], shell=True)
            images.append(Image(fname + ".png"))


EVSModel.register("germline.rf", GermlineRF)
Exemple #3
0
        all_trees = io.classifier_to_dict(self.clf)
        full_model = meta
        full_model["Model"] = all_trees
        modelFile = {
            "CalibrationModels": {
                "Somatic": {
                    varianttype: full_model
                }
            }
        }
        json.dump(modelFile, open(filename, "wb"))

    def plots(self, prefix, featurenames):
        """ Make diagnostic plots """
        importances = self.clf.feature_importances_
        std = np.std(
            [tree.feature_importances_ for tree in self.clf.estimators_],
            axis=0)
        indices = np.argsort(importances)[::-1]

        # Print the feature ranking
        print "Feature ranking:"

        for f in xrange(0, len(indices)):
            print "%d. feature %d:%s (%f +- %f)" % (
                f + 1, indices[f], featurenames[indices[f]],
                importances[indices[f]], std[indices[f]])


EVSModel.register("somatic.rf", SomaticRF)
Exemple #4
0
        importances = self.clf.feature_importances_
        std = np.std(
            [tree.feature_importances_ for tree in self.clf.estimators_],
            axis=0)
        indices = np.argsort(importances)[::-1]

        # Print the feature ranking
        print "Feature ranking:"

        for f in xrange(0, len(indices)):
            print "%d. feature %d:%s (%f +- %f)" % (
                f + 1, indices[f], featurenames[indices[f]],
                importances[indices[f]], std[indices[f]])


#        images = []
#        for i, tree in enumerate(self.clf.estimators_):
# creates indel_tree_...dot files
#            fname = 'trees112/tree_' + str(i) + '.dot'
#            with open(fname, 'w') as dotfile:
#                export_graphviz(
#                    tree,
#                    dotfile,
#                    feature_names=featurenames)
# run dot
#            subprocess.check_call(["dot -Tpng " + fname + " > " + fname + ".png"], shell=True)
#            images.append(Image(fname + ".png"))

EVSModel.register("strelka.rf", StrelkaRF)