def plot_roc(predictions, fig_title, outdir): plt.clf() roc_labels = [str(k) for k in xrange(1, len(predictions) + 1)] roc_labels[-1] = 'mean' pyroc.plot_multiple_roc(predictions, title=fig_title, labels=roc_labels, include_baseline=True) for i in xrange(1, N_repeats + 1): if not os.path.exists(outdir + "roc%d.pdf" % i): plt.savefig(outdir + "roc%d.pdf" % i) break
def ROCPlot(title, labels=None, *args): ''' If the PyROC (https://github.com/marcelcaraciolo/PyROC) module is installed, display the ROC curve for SVM/Logistic Regression classifiers. Inputs: ======= labels : Labels for the legend args: Variable length arguments of the form : actual_1[], predicted_1[], actual_2[], predicted_2[], .... ''' try: from pyroc import random_mixture_model, ROCData, plot_multiple_roc import pylab except ImportError: try: from pyroc import random_mixture_model, ROCData, plot_multiple_roc except ImportError: print 'PyROC does not exist, skipping ROC demo. Install PyROC from : https://github.com/marcelcaraciolo/PyROC ' return if (len(args) == 0): x = random_mixture_model() r1 = ROCData(x) y = random_mixture_model() r2 = ROCData(y) lista = [r1, r2] labels = ['Algorithm-1', 'Algorithm-2'] else: lista = [] for i in range(0, len(args), 2): x1 = args[i] y1 = args[i + 1] x1y1 = ((x1[k], y1[k]) for k in range(len(x1))) r1 = ROCData(x1y1) auc = '%.2f' % r1.auc() if (labels): labels[i / 2] = labels[i / 2] + ', AUC: {0} '.format(auc) lista.append(r1) plot_multiple_roc(lista, title, include_baseline=True, labels=labels) pylab.close()
def ROCPlot(title, labels=None,*args): ''' If the PyROC (https://github.com/marcelcaraciolo/PyROC) module is installed, display the ROC curve for SVM/Logistic Regression classifiers. Inputs: ======= labels : Labels for the legend args: Variable length arguments of the form : actual_1[], predicted_1[], actual_2[], predicted_2[], .... ''' try: from pyroc import random_mixture_model, ROCData, plot_multiple_roc import pylab except ImportError: try: from pyroc import random_mixture_model, ROCData, plot_multiple_roc except ImportError: print 'PyROC does not exist, skipping ROC demo. Install PyROC from : https://github.com/marcelcaraciolo/PyROC ' return if(len(args)==0): x = random_mixture_model() r1 = ROCData(x) y = random_mixture_model() r2 = ROCData(y) lista = [r1,r2] labels = ['Algorithm-1','Algorithm-2'] else: lista = [] for i in range(0,len(args),2): x1 = args[i] y1 = args[i+1] x1y1 = ((x1[k],y1[k]) for k in range(len(x1))) r1 = ROCData(x1y1) auc = '%.2f'%r1.auc() if(labels): labels[i/2] = labels[i/2]+ ', AUC: {0} '.format(auc) lista.append(r1) plot_multiple_roc(lista,title,include_baseline=True,labels=labels) pylab.close()
def main(argv): parser = argparse.ArgumentParser(description="Run sentiment analysis using\ a positive and a negative input file") parser.add_argument("-p", "--positive", help="input relative path of a \ positive data file", required=True) parser.add_argument("-n", "--negative", help="input relative path of a \ negative data file", required=True) parser.add_argument("-d", "--divisions", type=int, help="select the number \ of divisions created in input data: 1 out of d will \ be used for testing.", default=4) parser.add_argument("-l", "--limit_features", type=int, help="number of best \ features to use", default="0") parser.add_argument("-b", "--bigram", help="classify using bigram features.", action="store_true") parser.add_argument("-s", "--stopwords", help="filter out stop words before \ training.", action="store_true") # parser.add_argument("-t", "--tag_negated_words", help="tag negated words with \ # word_not to capture more meaning.", action="store_true") parser.add_argument("-r", "--randomize", help="randomize training data to \ reduce clumping while training.", action="store_true") parser.add_argument("-a", "--average", help="train and test over each \ possible set of divisions and average the results for \ more smoothing.", action="store_true") parser.add_argument("-g", "--graph", help="graphs the resulting ROC curves \ against eachother", action="store_true") args = parser.parse_args() # Set up ROC graphing data and import pyroc as needed ROC_data = 0 if args.graph: from pyroc import plot_multiple_roc ROC_data = [[],[]] # Set up stopword set stopset = [] if args.stopwords: from nltk.corpus import stopwords print "Stop words are being filtered out." stopset = set(stopwords.words('english')) # Finds word scores if not using bigrams word_scores = [] if not args.bigram: word_scores = create_word_scores(args.positive, args.negative) # Check to see what mode of testing is being used; input feature limit: if args.limit_features: limit = args.limit_features if (args.bigram): print '\nEvaluating the best %d bigram word features\n' % (limit) evaluate_features(bigram_word_features, args.positive, args.negative, \ args.divisions, args.average, limit, args.randomize, \ args.stopwords, stopset, word_scores, ROC_data) else: print '\nEvaluating the best %d word features\n' % (limit) evaluate_features(best_word_features, args.positive, args.negative, \ args.divisions, args.average, limit, args.randomize, \ args.stopwords, stopset, word_scores, ROC_data) # Or iteration through default array of feature numbers else: for limit in NUM_FEATURES_TO_TEST: if (args.bigram): print '\nEvaluating the best %d bigram word features\n' % (limit) evaluate_features(bigram_word_features, args.positive, args.negative, \ args.divisions, args.average, limit, args.randomize, \ args.stopwords, stopset, word_scores, ROC_data) else: print '\nEvaluating the best %d word features\n' % (limit) evaluate_features(best_word_features, args.positive, args.negative, \ args.divisions, args.average, limit, args.randomize, \ args.stopwords, stopset, word_scores, ROC_data) if args.graph: plot_multiple_roc(ROC_data[0],'ROC Curves', labels = ROC_data[1])
truth = open("Paper/pred_results/truth.csv") true_vals = {} for line in truth: if line.strip() == '': continue vals = line.strip().split("\t") true_vals[vals[1]] = int(vals[0]) roclist = [] labels = [] for fname in sys.argv[1:]: infile = open(fname) data = [] for line in infile: if line.strip() == '': continue vals = line.strip().split(",") if not vals[0] in true_vals: continue data.append((true_vals[vals[0]], float(vals[1]))) roc = pyroc.ROCData(data) print fname + ": " + str(roc.auc()) roclist.append(roc) labels.append(";".join( [x for x in fname.split("/")[-1].split("_")[0:-1] if x != ""])) pyroc.plot_multiple_roc(roclist, labels=labels)
from gusPyCode.defs.HMMsplice_utils import * import pyroc as roc def toROCdata(scoredDict): rocData = [] for k in scoredDict: rocData.append((scoredDict[k][1],scoredDict[k][0],k)) return rocData anno = '/Users/biggus/Documents/James/Data/genomes/AaegL1/aaegypti.Tx-Ensembl.bed' mult = '/Users/biggus/Documents/James/Data/Solexa/aedes/hmmSplicer/finalResults/Lx_unfiltered/LX.gtag.collapsed.multi.bed' sngl = '/Users/biggus/Documents/James/Data/Solexa/aedes/hmmSplicer/finalResults/Lx_unfiltered/LX.gtag.collapsed.sngl.bed' mult = toROCdata(generateROCcurve(mult, anno, stepSize=10, wiggle=3)[1]) sngl = toROCdata(generateROCcurve(sngl, anno, stepSize=10, wiggle=3)[1]) mROC = roc.ROCData(mult,linestyle='r-') sROC = roc.ROCData(sngl,linestyle='b-') roc.plot_multiple_roc([mROC,sROC],title='Multiples vs Singles',labels=['Multiples','Singles'], include_baseline=1, equal_aspect=True) mROC.auc() sROC.auc()
def PlotMultipleROC(rocs, title='', labels=None, include_baseline=True): import pyroc pyroc.plot_multiple_roc(rocs, title, labels, include_baseline)