preds = self.clf[it].predict(itinsts[columns].values) itinsts["ptag"] = preds cprobs = self.clf[it].predict_proba(itinsts[columns].values) tpcol = np.where(self.clf[it].classes_ == "TP")[0][0] itinsts["qual"] = cprobs[:, tpcol] insts.append(itinsts) return pandas.concat(insts) def plots(self, prefix, featurenames): """ Make diagnostic plots """ for it in self.itypes: importances = self.clf[it].feature_importances_ std = np.std([ tree.feature_importances_ for tree in self.clf[it].estimators_ ], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print "Feature ranking for INDELTYPE == %s:" % str(it) for f in xrange(0, len(indices)): print "%d. feature %d:%s (%f +- %f)" % ( f + 1, indices[f], featurenames[indices[f]], importances[indices[f]], std[indices[f]]) EVSModel.register("strelka.rf.indel", StrelkaRFIndel)
axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print "Feature ranking:" for f in xrange(0, len(indices)): print "%d. feature %d:%s (%f +- %f)" % ( f + 1, indices[f], featurenames[indices[f]], importances[indices[f]], std[indices[f]]) def draw_trees(self): """ Draw trees in png files """ from sklearn.tree import export_graphviz import subprocess images = [] for i, tree in enumerate(self.clf.estimators_): # creates indel_tree_...dot files fname = 'trees/tree_' + str(i) + '.dot' with open(fname, 'w') as dotfile: export_graphviz(tree, dotfile, feature_names=featurenames) # run dot subprocess.check_call( ["dot -Tpng " + fname + " > " + fname + ".png"], shell=True) images.append(Image(fname + ".png")) EVSModel.register("germline.rf", GermlineRF)
all_trees = io.classifier_to_dict(self.clf) full_model = meta full_model["Model"] = all_trees modelFile = { "CalibrationModels": { "Somatic": { varianttype: full_model } } } json.dump(modelFile, open(filename, "wb")) def plots(self, prefix, featurenames): """ Make diagnostic plots """ importances = self.clf.feature_importances_ std = np.std( [tree.feature_importances_ for tree in self.clf.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print "Feature ranking:" for f in xrange(0, len(indices)): print "%d. feature %d:%s (%f +- %f)" % ( f + 1, indices[f], featurenames[indices[f]], importances[indices[f]], std[indices[f]]) EVSModel.register("somatic.rf", SomaticRF)
importances = self.clf.feature_importances_ std = np.std( [tree.feature_importances_ for tree in self.clf.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print "Feature ranking:" for f in xrange(0, len(indices)): print "%d. feature %d:%s (%f +- %f)" % ( f + 1, indices[f], featurenames[indices[f]], importances[indices[f]], std[indices[f]]) # images = [] # for i, tree in enumerate(self.clf.estimators_): # creates indel_tree_...dot files # fname = 'trees112/tree_' + str(i) + '.dot' # with open(fname, 'w') as dotfile: # export_graphviz( # tree, # dotfile, # feature_names=featurenames) # run dot # subprocess.check_call(["dot -Tpng " + fname + " > " + fname + ".png"], shell=True) # images.append(Image(fname + ".png")) EVSModel.register("strelka.rf", StrelkaRF)