Exemple #1
0
def plot_roc(predictions, fig_title, outdir):
    plt.clf()
    roc_labels = [str(k) for k in xrange(1, len(predictions) + 1)]
    roc_labels[-1] = 'mean'
    pyroc.plot_multiple_roc(predictions,
                            title=fig_title,
                            labels=roc_labels,
                            include_baseline=True)

    for i in xrange(1, N_repeats + 1):
        if not os.path.exists(outdir + "roc%d.pdf" % i):
            plt.savefig(outdir + "roc%d.pdf" % i)
            break
Exemple #2
0
def ROCPlot(title, labels=None, *args):
    '''
       If the PyROC (https://github.com/marcelcaraciolo/PyROC) 
       module is installed, display the ROC curve for SVM/Logistic Regression classifiers.
       Inputs:
       =======
       labels : Labels for the legend
       args: Variable length arguments of the form : actual_1[], predicted_1[], actual_2[], predicted_2[], ....
    '''
    try:
        from pyroc import random_mixture_model, ROCData, plot_multiple_roc
        import pylab
    except ImportError:
        try:
            from pyroc import random_mixture_model, ROCData, plot_multiple_roc
        except ImportError:
            print 'PyROC does not exist, skipping ROC demo. Install PyROC from : https://github.com/marcelcaraciolo/PyROC '
            return
    if (len(args) == 0):
        x = random_mixture_model()
        r1 = ROCData(x)
        y = random_mixture_model()
        r2 = ROCData(y)
        lista = [r1, r2]
        labels = ['Algorithm-1', 'Algorithm-2']
    else:
        lista = []
        for i in range(0, len(args), 2):
            x1 = args[i]
            y1 = args[i + 1]
            x1y1 = ((x1[k], y1[k]) for k in range(len(x1)))
            r1 = ROCData(x1y1)
            auc = '%.2f' % r1.auc()
            if (labels):
                labels[i / 2] = labels[i / 2] + ', AUC: {0} '.format(auc)
            lista.append(r1)
    plot_multiple_roc(lista, title, include_baseline=True, labels=labels)
    pylab.close()
Exemple #3
0
def ROCPlot(title, labels=None,*args):
    '''
       If the PyROC (https://github.com/marcelcaraciolo/PyROC) 
       module is installed, display the ROC curve for SVM/Logistic Regression classifiers.
       Inputs:
       =======
       labels : Labels for the legend
       args: Variable length arguments of the form : actual_1[], predicted_1[], actual_2[], predicted_2[], ....
    '''
    try:
        from pyroc import random_mixture_model, ROCData, plot_multiple_roc
        import pylab
    except ImportError:
        try:
            from pyroc import random_mixture_model, ROCData, plot_multiple_roc
        except ImportError:
            print 'PyROC does not exist, skipping ROC demo. Install PyROC from : https://github.com/marcelcaraciolo/PyROC '
            return    
    if(len(args)==0):
        x = random_mixture_model()
        r1 = ROCData(x)
        y = random_mixture_model()
        r2 = ROCData(y)
        lista = [r1,r2]
        labels = ['Algorithm-1','Algorithm-2']
    else:
        lista = []
        for i in range(0,len(args),2):
            x1 = args[i]
            y1 = args[i+1]
            x1y1 = ((x1[k],y1[k]) for k in range(len(x1)))
            r1 = ROCData(x1y1)
            auc = '%.2f'%r1.auc()
            if(labels):
                labels[i/2] = labels[i/2]+ ', AUC: {0} '.format(auc)
            lista.append(r1)            
    plot_multiple_roc(lista,title,include_baseline=True,labels=labels)    
    pylab.close()      
def main(argv):

  parser = argparse.ArgumentParser(description="Run sentiment analysis using\
                                      a positive and a negative input file")

  parser.add_argument("-p", "--positive", help="input relative path of a \
                      positive data file", required=True)

  parser.add_argument("-n", "--negative", help="input relative path of a \
                      negative data file", required=True)

  parser.add_argument("-d", "--divisions", type=int, help="select the number \
                      of divisions created in input data: 1 out of d will \
                      be used for testing.", default=4)

  parser.add_argument("-l", "--limit_features", type=int, help="number of best \
                      features to use", default="0")

  parser.add_argument("-b", "--bigram", help="classify using bigram features.",
                      action="store_true")

  parser.add_argument("-s", "--stopwords", help="filter out stop words before \
                      training.", action="store_true")

  # parser.add_argument("-t", "--tag_negated_words", help="tag negated words with \
  #                     word_not to capture more meaning.", action="store_true")

  parser.add_argument("-r", "--randomize", help="randomize training data to \
                      reduce clumping while training.", action="store_true")

  parser.add_argument("-a", "--average", help="train and test over each \
                      possible set of divisions and average the results for \
                      more smoothing.", action="store_true")

  parser.add_argument("-g", "--graph", help="graphs the resulting ROC curves \
                      against eachother", action="store_true")

  args = parser.parse_args()

  # Set up ROC graphing data and import pyroc as needed
  ROC_data = 0
  if args.graph:
    from pyroc import plot_multiple_roc
    ROC_data = [[],[]]

  # Set up stopword set
  stopset = []
  if args.stopwords:
    from nltk.corpus import stopwords
    print "Stop words are being filtered out."
    stopset = set(stopwords.words('english'))

  # Finds word scores if not using bigrams
  word_scores = []
  if not args.bigram:
    word_scores = create_word_scores(args.positive, args.negative)

  # Check to see what mode of testing is being used; input feature limit:
  if args.limit_features:
    limit = args.limit_features

    if (args.bigram):
      print '\nEvaluating the best %d bigram word features\n' % (limit)
      evaluate_features(bigram_word_features, args.positive, args.negative, \
                        args.divisions, args.average, limit, args.randomize, \
                        args.stopwords, stopset, word_scores, ROC_data)

    else:
      print '\nEvaluating the best %d word features\n' % (limit)
      evaluate_features(best_word_features, args.positive, args.negative, \
                        args.divisions, args.average, limit, args.randomize, \
                        args.stopwords, stopset, word_scores, ROC_data)

  # Or iteration through default array of feature numbers
  else:
    for limit in NUM_FEATURES_TO_TEST:
      if (args.bigram):
        print '\nEvaluating the best %d bigram word features\n' % (limit)
        evaluate_features(bigram_word_features, args.positive, args.negative, \
                        args.divisions, args.average, limit, args.randomize, \
                        args.stopwords, stopset, word_scores, ROC_data)
      else:
        print '\nEvaluating the best %d word features\n' % (limit)
        evaluate_features(best_word_features, args.positive, args.negative, \
                        args.divisions, args.average, limit, args.randomize, \
                        args.stopwords, stopset, word_scores, ROC_data)

  if args.graph:
      plot_multiple_roc(ROC_data[0],'ROC Curves', labels = ROC_data[1])
Exemple #5
0
truth = open("Paper/pred_results/truth.csv")

true_vals = {}
for line in truth:
    if line.strip() == '':
        continue
    vals = line.strip().split("\t")
    true_vals[vals[1]] = int(vals[0])

roclist = []
labels = []
for fname in sys.argv[1:]:
    infile = open(fname)
    data = []
    for line in infile:
        if line.strip() == '':
            continue
        vals = line.strip().split(",")
        if not vals[0] in true_vals:
            continue
        data.append((true_vals[vals[0]], float(vals[1])))

    roc = pyroc.ROCData(data)
    print fname + ": " + str(roc.auc())
    roclist.append(roc)
    labels.append(";".join(
        [x for x in fname.split("/")[-1].split("_")[0:-1] if x != ""]))

pyroc.plot_multiple_roc(roclist, labels=labels)
from gusPyCode.defs.HMMsplice_utils import *
import pyroc as roc

def toROCdata(scoredDict):
	rocData = []
	for k in scoredDict:
		rocData.append((scoredDict[k][1],scoredDict[k][0],k))
	return rocData

anno = '/Users/biggus/Documents/James/Data/genomes/AaegL1/aaegypti.Tx-Ensembl.bed'
mult = '/Users/biggus/Documents/James/Data/Solexa/aedes/hmmSplicer/finalResults/Lx_unfiltered/LX.gtag.collapsed.multi.bed'
sngl = '/Users/biggus/Documents/James/Data/Solexa/aedes/hmmSplicer/finalResults/Lx_unfiltered/LX.gtag.collapsed.sngl.bed'
mult = toROCdata(generateROCcurve(mult, anno, stepSize=10, wiggle=3)[1])
sngl = toROCdata(generateROCcurve(sngl, anno, stepSize=10, wiggle=3)[1])
mROC = roc.ROCData(mult,linestyle='r-')
sROC = roc.ROCData(sngl,linestyle='b-')
roc.plot_multiple_roc([mROC,sROC],title='Multiples vs Singles',labels=['Multiples','Singles'], include_baseline=1, equal_aspect=True)

mROC.auc()
sROC.auc()
def PlotMultipleROC(rocs, title='', labels=None, include_baseline=True):
    import pyroc
    pyroc.plot_multiple_roc(rocs, title, labels, include_baseline)