def plot_box(data, labels, fname):
    f = plt.figure()
    plt.boxplot(data);
    a = f.get_axes()
    plt.setp(a,xticklabels=labels)
    plt.savefig(fname)
    plt.close()
Ejemplo n.º 2
0
    def statistics_charts(self):
        if plt is None:
            return

        for chart in self.stats_charts:
            if chart["type"] == "plot":
                fig = plt.figure(figsize=(8, 2))
                for xdata, ydata, label in chart["data"]:
                    plt.plot(xdata, ydata, "-", label=label)
                plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
            elif chart["type"] == "timeline":
                fig = plt.figure(figsize=(16, 2))
                for i, (starts, stops, label) in enumerate(chart["data"]):
                    plt.hlines([i] * len(starts), starts, stops, label=label)
                plt.ylim(-1, len(chart["data"]))
            elif chart["type"] == "bars":
                fig = plt.figure(figsize=(16, 4))
                plt.bar(range(len(chart["data"])), chart["data"])
            elif chart["type"] == "boxplot":
                fig = plt.figure(figsize=(16, 4))
                plt.boxplot(chart["data"])
            else:
                raise Exception("Unknown chart")
            png = serialize_fig(fig)
            yield chart["name"], html_embed_img(png)
Ejemplo n.º 3
0
 def regional_boxplot(self, folder) :
     """Creates boxplots of the Income per person per region and then saves it to a file"""
     import matplotlib.pyplot as plt
     import numpy as np
     
     if type(folder) == str :
         pass
     else:
         raise ValueError("expected string for foldername")
     self.regional_income()
     incomes = []
     label = []
     for region in self.region_list :
         incomes.append(self.region_income[region])
         label.append(region)
     plt.close()
     plt.figure(figsize=(14, 7))
     plt.boxplot(incomes, labels = label)
     plt.plot([self.global_mean] * (len(self.region_list) + 2), "r--", label="Global Mean")
     plt.plot([self.global_median] * (len(self.region_list) + 2), "g--", label="Global Median")
     plt.xlabel("Region")
     plt.ylabel("Income per person")
     plt.title("Boxplots of the Income per person for each region for the Year " + str(self.year))
     plt.legend()
     plt.savefig(folder + "/income_boxplot_" + str(self.year) +".pdf")
     plt.close()
Ejemplo n.º 4
0
def plot_difficulties(difficulties, bins=10):
    # Data   
    plot_data = []
    names = []
    for y_true, c_val in [(0,0), (0,1), (1,0), (1,1)]:
        diff_yc = difficulties[2*y_true+c_val]
        plot_data.append(diff_yc)
        names.append('y=%d, c=%d' %(y_true, c_val))
        print("y=%d, c=%d, mean=%.5f, std=%.5f" % (y_true, c_val, np.mean(diff_yc), np.std(diff_yc)))

    # Boxplots
    fig, axes = plt.subplots()
    plt.boxplot(plot_data)
    xtickNames = plt.setp(axes, xticklabels=names)
    axes.set_ylim([-.01, 1.01])
    axes.set_ylabel('Difficulty')
    plt.show()

    # Histogram
    fig, axes = plt.subplots()
    plt.yscale('log', nonposy='clip')
    hist = plt.hist(plot_data, label=names, bins=bins)
    plt.legend()
    axes.set_xlabel('Difficulty')
    axes.set_ylabel('Count (log-scale)')
    plt.show()
Ejemplo n.º 5
0
def plot(lookup):
        data = []
        for iiDiameter in sorted(lookup.keys()):
                data.append(lookup[iiDiameter])
        plt.boxplot(data, sym='')
        plt.setp(plt.gca(),'xticklabels',sorted(lookup.keys()))
        plt.show()
Ejemplo n.º 6
0
def bivariate_analysis_cont_catg(cont_catg_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE):

    clean_cont_catg_list = clean_str_list(df,cont_catg_list)

    if len(clean_str_list(df,[target_name])) == 0 and len(cont_catg_list)>0:
        raise ValueError("You seem to have a target variable with string values.")
    clean_df = df.dropna()

    for col in clean_cont_catg_list:

        col_classes =clean_df[col].unique()

        summary = clean_df[col].describe()
        count = summary[0]
        mean = summary[1]
        std = summary[2]

        plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER)
        plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10)

        x = [np.array(clean_df[clean_df[col]==i][target_name]) for i in col_classes]
        y = np.float32(clean_df[target_name])

        f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y)

        plt.xlabel(col+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10)
        plt.ylabel(target_name, fontsize=10)
        plt.boxplot(x)

        print (col+" vs "+target_name+" plotted....")

        COUNTER +=1

    return plt,COUNTER
Ejemplo n.º 7
0
def bivariate_analysis_catg_cont(catg_cont_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE):

    # No need to remove string varible as they are handled by chi2 function of sklearn.
    # clean_catg_cont_list = clean_str_list(df,catg_cont_list)
    clean_catg_cont_list = catg_cont_list
    clean_df = df.dropna()

    for col in clean_catg_cont_list:

        col_classes =df[target_name].unique()

        summary = clean_df[col].describe()
        count = summary[0]
        mean = summary[1]
        std = summary[2]

        plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER)
        plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10)

        x = [np.array(clean_df[clean_df[target_name]==i][col]) for i in col_classes]
        y = clean_df[target_name]

        f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y)

        plt.xlabel(target_name+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10)
        plt.ylabel(col, fontsize=10)
        plt.boxplot(x)

        print (col+" vs "+target_name+" plotted....")

        COUNTER +=1

    return plt,COUNTER
Ejemplo n.º 8
0
def descriptive_stats(array, verbose=True, label='', mean=False, plot=False):
    """ Simple statistics from vector.
    """
    if mean:
        mean_ = np.mean(array)
    median = np.median(array)
    mini = np.min(array)
    maxi = np.max(array)
    first_qu = np.percentile(array, 25)
    third_qu = np.percentile(array, 75)
    
    if verbose:
        if mean:
            label += 'min={:.1f} / 1st QU={:.1f} / ave={:.1f} / med={:.1f} / '
            label += '3rd QU={:.1f} / max={:.1f}'
            print(label.format(mini, first_qu, mean_, median, third_qu, maxi))
        else:
            label += 'min={:.1f} / 1st QU={:.1f} / med={:.1f} / 3rd QU={:.1f} '
            label += '/ max={:.1f}'
            print(label.format(mini, first_qu, median, third_qu, maxi))
    
    if plot:
        boxplot(array, vert=False, meanline=mean, showfliers=True, sym='.')
    
    if mean:
        return mini, first_qu, mean_, median, third_qu, maxi
    else:
        return mini, first_qu, median, third_qu, maxi
Ejemplo n.º 9
0
    def visualize_performance(self):
        intra = self._intra
        inter = self._inter

        labels = [1]*len(intra) + [-1]*len(inter)
        scores = intra+inter

        self._common_visualize_performance( labels, scores)

        plt.figure()
        plt.boxplot([intra, inter])
        plt.xticks([1, 2], ['intra', 'inter'])
        plt.title('Distribution of scores')
        plt.savefig('comparison_score_distribution.pdf')


        plt.figure()
        start = np.min(np.min(intra), np.min(inter))
        end = np.max(np.max(intra), np.max(inter))
        intra_hist, intra_bin = np.histogram(intra,50, (start, end))
        inter_hist, inter_bin = np.histogram(inter,50, (start, end))


        plt.plot(intra_bin[:-1], intra_hist/float(intra_hist.sum()), label='intra', color='blue')
        plt.plot(inter_bin[:-1], inter_hist/float(inter_hist.sum()), label='inter', color='red')
        plt.legend()
        plt.xlabel('Comparison scores')
        plt.ylabel('Probability')
        plt.title('Score distribution')
Ejemplo n.º 10
0
def seperate(R, P):
    N = 0.0
    T = 0.0
    ON = {}
    OFF = {}
    for motif in P:
        M = motif.split("_")[0]
        if M in R:
            k, mu, std, n, m, B, pv = P[motif]
            # cov 	= R[M][3]
            cov = R[M]
            if pv > 0.9999:
                if M not in ON:
                    ON[M] = cov
                else:
                    ON[M] = max(ON[M], cov)

            else:
                if M not in OFF:
                    OFF[M] = cov
                else:
                    OFF[M] = min(cov, OFF[M])

    plt.boxplot((OFF.values(), ON.values()))
    plt.show()
Ejemplo n.º 11
0
    def handle(self, *args, **options):
        fs = 10  # fontsize
        
        versions = models.SourceLine.objects.filter(
            project__startswith='django-').order_by(
            'project').values_list(
            'project', 'progradon__complexity')
        for vers, complexity_iter in itertools.groupby(
            versions, key=operator.itemgetter(1)):
            print vers, ':'
            print '-', ', '.join(str(x) for x in complexity_iter)
        data = models.SourceLine.objects.filter(
            project='django-1.0.1').values_list(
            'progradon__complexity', flat=True)
        plt.boxplot(data) # , labels=labels)
        
        plt.show()

        # xs, ys, areas = zip(*data)
        # ys = areas
        # colors = np.random.rand(len(xs))
        # plt.scatter(xs, ys, c=colors) # s=areas)
        # plt.xlabel('file index')
        # plt.ylabel('version index')
        plt.savefig('z.png')
Ejemplo n.º 12
0
def createBoxPlot(table,title = None, xlab = None, yLab= None, dest = "show"):
    if dest == "none":
        return
    plt.figure("box")
    flatData = [val for sublist in table for val in table[sublist]]

    plotData = []

    unzippedX, unzippedy = zip(*flatData)
    setX = set(unzippedX)
    listX = list(setX)
    listX.sort()
    for x in listX:
        ySet = [datum[1] for datum in flatData if datum[0] == x]
        plotData.append(ySet)
        # plotData = unzippedy
    plt.boxplot(plotData)
    #set xAxis
    plt.xticks(range(len(listX)), listX)
    if title:
        plt.title(title)
    if xlab:
        plt.xlabel(xlab)
    if yLab:
        plt.ylabel(yLab)

    if dest == "show":
        plt.show("box")
    else:
        plt.savefig(dest, bbox_inches='tight')
    plt.clf()
    plt.close("box")
Ejemplo n.º 13
0
def create_boxplot(data, save_dir, correct_entropy=1):
    """
    data_file - path file containing entropy values for the lines added by the mutant files
    save_directory - directory to save the plot in, not including the name of the plot itself
    correct_entropy - the entropy of the lines added by the repair program
    """
    print "CREATE BOXPLOT"
    # fid = open(data_file,'r')
    # data=[float(l.strip()) for l in fid.readlines()]
    print data
    assert len(data) > 0
    # plot mutant entropy
    plt.boxplot(data)
    # plot correct entropy
    p1 = plt.plot([0, 2], [correct_entropy, correct_entropy], color="g")
    # label the repaired program
    l1 = plt.legend([p1], ["repaired program"])

    # annotate the plot
    plt.ylabel("Entropy (bits)")
    plt.title("Entropy of lines added in mutant programs")

    # generate a random number as the name of the plot
    name = str(random.randint(0, sys.maxint))
    plt.savefig(os.path.join(save_dir, name + ".png"), bbox_inches=0)
    print os.path.join(save_dir, name + ".png")
    return name
def boxplot(datadict, name):
    data = np.concatenate(datadict.values())

    xdata = np.arange(data.shape[1]) + 1
    ydata = np.average(data, axis=0)

    std = np.std(data, axis=0)

    minerr = ydata - std
    maxerr = ydata + std

    with open(RESULTS_FOLDER + "/result_%s.json" % name, "w") as f:
        j = {
            "relative_cost": list(ydata),
            "std": list(std)
        }
        json.dump(j, f, indent=1)


    strategy = np.concatenate((np.repeat(np.array(maxerr[9]), 9), maxerr[9:]))

    plt.figure()
    # plt.plot(xdata, func(xdata, popt[0], popt[1], popt[2]))
    plt.plot(xdata, ydata, label="mean")
    plt.plot(xdata, minerr, label="mean - std")
    plt.plot(xdata, maxerr, label="mean + std")
    plt.plot(xdata, strategy, lw=3, ls="--", c="black", label="strategy")

    plt.boxplot(data)

    plt.legend()
    plt.axis([0, 30, 0, maxerr[0]])
    plt.savefig('%s/boxplot_%s.png' % (RESULTS_FOLDER, name))
Ejemplo n.º 15
0
def stats_fn(data_frame):
    global scene
    stat_file = open("Stat_tests_" + scene[:-4] + ".txt", "w")
    seen_pairs = []
    for algorithm in data_frame:
        for algorithm2 in data_frame:
            if (algorithm != algorithm2) and ((algorithm, algorithm2) not in seen_pairs):
                seen_pairs.append((algorithm, algorithm2))
                seen_pairs.append((algorithm2, algorithm))
                statistical_significance = stats.wilcoxon(data_frame[algorithm], data_frame[algorithm2])
                print >> stat_file, algorithm, " VS ", algorithm2, " -->", statistical_significance
                print >> stat_file, algorithm, " median = ", np.median(data_frame[algorithm])
                print >> stat_file, algorithm2, " median = ", np.median(data_frame[algorithm2])
                print >> stat_file, "----------------------------------------------------------"
    # # This part is for drawing the different boxplots
    figure_name = scene + "_.png"
    current_path = os.getcwd()
    os.chdir("/home/omohamme/INRIA/experiments/moop_sim_comparison/boxplots/" + scene[:-4] + "/")
    plt.figure(figsize=(15.0, 11.0))
    plt.boxplot(data_frame.values())
    plt.xticks(range(1, len(data_frame.keys()) + 1), data_frame.keys())
    plt.title(figure_name)
    plt.savefig(figure_name)
    os.chdir(current_path)

    stat_file.close()
Ejemplo n.º 16
0
def plot_importances(forest, cov_dir, basename, X_names):
    est = forest.steps[0][1]
    importances = est.feature_importances_
    indices = np.argsort(importances)[::-1]
    import_dist = np.array([tree.feature_importances_ 
        for tree in est.estimators_])
    
    np.savetxt(os.path.join(cov_dir, 'feature-importance-' + \
            basename + '.dat'),
        import_dist, header=" ".join(X_names), comments='')
    
    import_dist = import_dist.T[indices][::-1].T
    
    print("Feature ranking:")
    for f in range(len(X_names)):
        print("%d. %s (%.3g)" % (f+1, X_names[indices[f]], 
            importances[indices[f]]))
    print()
    
    mpl.rc('text', usetex='false') 
    plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
    plt.boxplot(import_dist, vert=0)
    plt.yticks(range(1,1+len(X_names)),
               np.array(X_names)[indices][::-1])
    plt.xlabel("Feature importance")
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, 'feature_importance-' + \
        basename + '.pdf'))
    plt.close()
def __create_num_threads_vs_jct_graph(num_threads_to_jcts, output_dir, phase):
  """
  Create a graph of num threads per disk vs. JCT for the specified phase, which must be either
  "write" or "read". num_threads_to_jcts should be a dictionary of the form:
    { num threads : ( list of write JCTs, list of read JCTs ) }
  """
  assert phase in ["write", "read"]

  num_ticks = len(num_threads_to_jcts) + 2
  xmax = num_ticks - 1
  max_jct = max([jct
    for write_jcts, read_jcts in num_threads_to_jcts.itervalues()
    for jct in (write_jcts if phase == "write" else read_jcts)])
  ymax = max_jct * 1.1
  pyplot.title("Num threads per disk vs. JCT ({} phase)".format(phase))
  pyplot.xlabel("Num threads per disk")
  pyplot.ylabel("JCT (s)")
  pyplot.grid(b=True)
  pyplot.xlim(xmin=0, xmax=xmax)
  pyplot.ylim(ymin=0, ymax=ymax)

  # Build a list of lists of JCTs, sorted by num threads per disk.
  all_jcts = [write_jcts if phase == "write" else read_jcts
    for _, (write_jcts, read_jcts) in sorted(num_threads_to_jcts.iteritems())]
  pyplot.boxplot(all_jcts, whis=[0, 100])

  # Replace the visually-correct x-axis values with the numerically correct values.
  pyplot.xticks(xrange(num_ticks), [""] + sorted(num_threads_to_jcts.keys()) + [""])

  # Save the graph as a PDF.
  output_filepath = path.join(output_dir, "{}_phase_num_threads_vs_jct.pdf".format(phase))
  with backend_pdf.PdfPages(output_filepath) as pdf:
    pdf.savefig()

  pyplot.close()
def main():

	statistics_file = open("statistics.txt", "r")

	content = statistics_file.readlines()

	index = 0
	i = 0

	statistics = {}
	data = [[] for _ in range(5)]

	for string in content:
		if "[" in string:
			split_spaces = string.split(" ")
			for splitted in split_spaces :
				splitted = splitted.replace("[", "")
				splitted = splitted.replace("]", "")
				splitted = splitted.replace(",", "")
				splitted = splitted.replace("\n", "")
				try:
					val = int(splitted)
				except :
					val = float(splitted)
				data[i] += [val]
			i += 1
			if i == 5 :
				i = 0
				statistics.update({index : data})
				data = [[] for _ in range(5)]
				index += 1

	mean_time = []
	std_time = []

	for key, val in statistics.items():
		mean_time += [np.mean(val[4])]
		std_time += [np.std(val[4])]

	plt.figure(figsize=(16, 9))
	plt.ylabel("Time")
	plt.xlabel("Game Speed")
	plt.xlim(0, len(mean_time) + 1)
	labels = [str(i) + "x" for i in range(len(mean_time))]
	plt.errorbar([i + 1 for i in range(len(mean_time))], mean_time, yerr = std_time)
	plt.xticks([i + 1 for i in range(len(mean_time))], labels)
	plt.savefig("time_speed.png", bbox_inches='tight', dpi = 200)
	plt.close()

	plt.figure(figsize=(16, 9))
	plt.ylim(0, 40)
	plt.boxplot([val[1] for key, val in statistics.items()], labels=[str(i + 1) + "x" for i in range(len(mean_time))])
	plt.savefig("kill_bot1_speed.png", bbox_inches='tight', dpi = 200)
	plt.close()

	plt.figure(figsize=(16, 9))
	plt.ylim(0, 40)
	plt.boxplot([val[2] for key, val in statistics.items()], labels=[str(i + 1) + "x" for i in range(len(mean_time))])
	plt.savefig("kill_bot2_speed.png", bbox_inches='tight', dpi = 200)
	plt.close()
Ejemplo n.º 19
0
def plot_htseqcount_dist(htseqfile, plot):
    '''Run from htseq_out folder'''
    counts = list_htseq_counts(htseqfile)
    logcounts = [np.log2(c+1) for c in counts]
    if plot == 'y':
        plt.hist(logcounts, bins=1000, color='b')
        plt.ylim(0, 500)
        plt.savefig('loghist.png')
        plt.close()
        plt.hist(counts, bins=10000, color='b')
        plt.xlim(0, 100000)
        plt.ylim(0, 4000)
        plt.savefig('hist.png')
        plt.close()
        plt.boxplot(counts)
        plt.ylim(0, 10000)
        plt.savefig('boxplot.png')
    d = {}
    d['med'] = np.median(counts)
    d['logmed'] = np.median(logcounts)
    d['max'] = np.max(counts)
    d['logmax'] = np.max(logcounts)
    d['min'] = np.min(counts)
    d['logmin'] = np.min(logcounts)
    return(d)
Ejemplo n.º 20
0
def plot(y_label,key):
    x_ticks = ["ARI","AMI","H","C","V","P","R","F1"]

    y = []
    if key=="base":
        for i,data in enumerate([adjusted_rand_scores,#adjusted_rand_scores_random,
                                 adjusted_mutual_info_scores,#adjusted_mutual_info_scores_random,
                                 homogeneity_scores,#homogeneity_scores_random,
                                 completeness_scores,#completeness_scores_random,
                                 v_measures_scores,#v_measures_scores_random
                                 pairwise_precision,
                                 pairwise_recall,
                                 pairwise_f1
                                 ]):
            y.append(data.flatten())
    elif key=="random":
        for i,data in enumerate([adjusted_rand_scores_random,
                                 adjusted_mutual_info_scores_random,
                                 homogeneity_scores_random,
                                 completeness_scores_random,
                                 v_measures_scores_random,
                                 pairwise_precision_random,
                                 pairwise_recall_random,
                                 pairwise_f1_random
                                 ]):
            y.append(data.flatten()) 
    y=np.array(y)
    print(y.shape)
    plt.boxplot(y.T)
    plt.xticks(np.arange(1,len(x_ticks)+1,1),x_ticks)
    plt.xlabel("measures")
    plt.ylabel(y_label)
def dictPlot( d, barchart=True, label='run', **plotargs ):
    "Plot an n to many mapping"
    xvals = sorted( d.keys() )
    yvals = [ d[ x ] for x in xvals ]
    ind = np.arange( len( yvals ) )
    width = .35
    indcenter = ind + .5 * width
    plt.xticks( indcenter, [ str( x ) for x in xvals ] )
    # Use box plot unless bar chart was specified
    if not barchart:
        plt.boxplot( yvals )
        return
    # If we only have one run, just plot bars
    if not reduce( and_, [ len( y ) > 1 for y in yvals ] ):
        plt.bar( ind, [ y[ 0 ] for y in yvals ], width )
        return
    # Otherwise, scatter plot points
    # was: plt.plot( ind + .5 * width, yvals, 'o', **plotargs )
    for x, y in zip( indcenter, yvals ):
        plt.plot( [ x ] * len( y ), y, 'o', **plotargs )
    # hack - is there a better way to add legend?
    plt.plot( indcenter[ 0 ], yvals[ 0 ][ 0 ], 'o',
             label=label, **plotargs )
    # And plot a bar chart of the means
    means = [ sum( y ) / len( y ) for y in yvals ]
    plt.bar( ind, means, width, label='mean' )
def make_error_boxplot(expected_files, observed_files, names):
    #http://matplotlib.org/examples/pylab_examples/boxplot_demo2.html
    errors, relative_errors = [], []
    for expected_file, observed_file in zip(expected_files, observed_files):
        try:
            _, _, error, relative_error = \
                    get_file_error(expected_file, observed_file)
            errors.append(error)
            relative_errors.append(relative_error)
        except TypeError:
            return None

    fig = plt.figure(figsize=(6,4))
    ax = plt.subplot(2, 1, 1)
    plt.boxplot(errors)
    plt.xticks([])
    ax.set_title("Errors")
    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
              alpha=0.5)

    ax = plt.subplot(2, 1, 2)
    plt.boxplot(relative_errors)
    ticks = [x + 1 for x in range(len(names))]
    names = [extract_number(name) for name in names]
    ax.set_title("Relative errors")
    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
              alpha=0.5)

    return fig
Ejemplo n.º 23
0
def plot(revisions, benchmarks, subdir='.', baseurl='https://github.com/idaholab/moose/commit/'):
    data = []
    labels = []
    for rev, bench in zip(revisions, benchmarks):
        data.append(bench.realruns)
        labels.append(rev[:7])

    median = sorted(data[0])[int(len(data[0])/2)]
    plt.axhline(y=median*1.05, linestyle='--', linewidth=2, color='red', alpha=.5, label='+5%')
    plt.axhline(y=median*1.01, linestyle=':', linewidth=2, color='red', label='+1%')
    plt.axhline(y=median, dashes=[48, 4, 12, 4], color='black', alpha=.5)
    plt.axhline(y=median*.99, linestyle=':', linewidth=2, color='green', label='-1%')
    plt.axhline(y=median*.95, linestyle='--', linewidth=2, color='green', alpha=.5, label='-5%')

    plt.boxplot(data, labels=labels, whis=1.5)
    plt.xticks(rotation=90)
    plt.ylabel('Time (seconds)')

    fig = plt.gcf()

    ax = fig.axes[0]
    labels = ax.get_xticklabels()
    for label in labels:
        label.set_url(urlparse.urljoin(baseurl, label.get_text()))

    legend = ax.legend(loc='upper right')

    fig.subplots_adjust(bottom=.15)
    fig.savefig(os.path.join(subdir, benchmarks[0].name + '.svg'))
    plt.clf()
Ejemplo n.º 24
0
    def sale_price_per_sq_foot_boxplot(self, groupby, title):
        """Boxplot of sale price per square foot, grouped by a groupby variable

        title is the plot title"""
        fig = init_fig()

        # This figure needs to be extra wide
        fig.set_size_inches(10, 4)

        # Remove missings and restrict to the columns we need
        data = self.data[[groupby, "sale_price_per_sqft"]].dropna()

        # The boxplot function takes a list of Series, so we make one Series for each
        # group, and append them all into a list
        groups = list()
        values = data[groupby].value_counts().index  # All the levels of the groupby variable

        for value in values:
            groups.append(data.loc[data[groupby] == value, "sale_price_per_sqft"])

        # Now make the plot. The empty string means we don't want the outliers, since
        # they will mess up the axis scale
        plt.boxplot(groups, 0, "")

        plt.ylabel("Sale Price per Sq. Ft.")
        plt.title(title)
        plt.xticks(np.arange(len(values)) + 1, values)

        return fig_to_svg(fig)
Ejemplo n.º 25
0
def distance_distribution_plot(learner,box_kwargs=None,**kwargs):
    """
    plots the distribution of distances to/from predicted events from/to
    actual events, dependning on kwargs
    
    Args:
        learner: the learner object to use
        kwargs: passed to event_distance_distribution (ie: to_true=T/F)
    """
    train_scores = learner._scores_by_params(train=True)
    valid_scores = learner._scores_by_params(train=False)
    if (box_kwargs is None):
        box_kwargs = dict(whis=[5,95])
    name = learner.description.lower()
    x_values = learner.param_values()
    train_dist = Learning.event_distance_distribution(train_scores,**kwargs)
    valid_dist = Learning.event_distance_distribution(valid_scores,**kwargs)
    dist_plot = lambda x: [v for v in x]
    train_plot = dist_plot(train_dist)
    valid_plot = dist_plot(valid_dist)
    plt.boxplot(x=train_plot,**box_kwargs)
    plt.boxplot(x=valid_plot,**box_kwargs)
    plt.gca().set_yscale('log')
    PlotUtilities.lazyLabel("Tuning parameter","Distance Distribution (idx)",
                            "Event distributions for {:s}".format(name),
                            frameon=False)
Ejemplo n.º 26
0
def make_plot_lfw_reorder_other(save=False):
    conn = pm.Connection()
    db = conn['hyperopt']
    Jobs = db['jobs']
    
    exp_key = 'thor_model_exploration.model_exploration_bandits.LFWBanditModelExplorationOther/hyperopt.Random'

    H = Jobs.group(['spec.order'],
                   {'exp_key': exp_key, 'state':2, 
                    'spec.preproc.size.0':250
                   },
                   {'losses': []},
                   'function(d, o){o.losses.push(d.result.loss);}')
        
    order_choices = params.order_choices
    ords = pluck(H, 'spec.order')
    reinds = [ords.index(_o) for _o in order_choices]
    H = [H[_r] for _r in reinds]

    od = {'lpool': 'p', 'activ': 'a', 'lnorm': 'n'}
    order_labels = [','.join([od[b] for b in Before]) + '|' + ','.join([od[b] for b in After]) for (Before, After) in order_choices]
 
    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(18,8))
    plt.boxplot([1-np.array(h['losses']) for h in H])
    means = [1-np.array(h['losses']).mean() for h in H]
    plt.plot(range(1,len(H)+1), means, color='green')
    plt.scatter(range(1,len(H)+1), means)
    
    plt.xticks(range(1,len(ords)+1),  order_labels, rotation=60)
    
    plt.ylabel('Absolute performance')
    plt.xlabel('Architecture tag')
Ejemplo n.º 27
0
def boxplot_by_pft(var, timestep, cmtnum, stages, ref_veg_map, ref_run_status):
  '''
  Work in progress...
  '''

  data, units = stitch_stages(var, timestep, stages)
  print "data size:", data.size
  print data.shape

  d2 = data
  # d2 = sum_across_compartments(data)
  # print "data size after summing compartments:", d2.size

  d3 = mask_by_cmt(d2, cmtnum, ref_veg_map)
  print "data size after masking cmt:", d3.count()

  d3 = mask_by_failed_run_status(d3, ref_run_status)
  print "data count after masking run status:", d3.count()

  pft0avg = np.ma.average(d3, axis=(2,3))
  #plt.plot(pft0avg) # Line plot
  plt.boxplot(
      pft0avg,
      labels = ["PFT {}".format(i) for i in range(0, 10)],
      whis='range',
      showfliers=False,
      patch_artist=True,
      boxprops=dict(color='blue', alpha=0.25),
      whiskerprops=dict(color='red'),
      capprops=dict(color='blue'),
  )
  plt.ylabel(units)
  plt.show(block=True)
def plot(work_time_deltas_hours):
 
    # 45 minutes break is assumed    
    work_overtime = sum([w - 8.75 for w in work_time_deltas_hours ])
 
    plt.boxplot(work_time_deltas_hours)
    plt.ylabel("Working Hours")
        
    plt.xticks([0,1,2],())    
        
    yvalues = numpy.arange(numpy.floor(numpy.min(work_time_deltas_hours)),numpy.ceil(numpy.max(work_time_deltas_hours)),0.25)    
    plt.yticks(yvalues,[ str(math.floor(x)) + "h " + str(int((x % 1.0) * 60)) +"min" for x  in yvalues],rotation=0)
  
    # Debug
    print("Mean: "+str(numpy.mean(work_time_deltas_hours))) 
    print("Min: "+str(numpy.min(work_time_deltas_hours)))
    print("Max: "+str(numpy.max(work_time_deltas_hours)))
    print("Median: "+str(numpy.median(work_time_deltas_hours)))
    print("Work overtime: "+ str(work_overtime))
    print("Days tracked: "+str(len(work_time_deltas_hours)))
     
    plt.text(1.35,10,"Mean: " + str(math.floor(numpy.mean(work_time_deltas_hours))) + "h " + str(int((numpy.mean(work_time_deltas_hours) % 1.0) * 60)) + "min"
             "\nMax: " + str(math.floor(numpy.max(work_time_deltas_hours))) + "h " + str(int((numpy.max(work_time_deltas_hours) % 1.0) * 60)) + "min"
             "\nMin: "+ str(math.floor(numpy.min(work_time_deltas_hours))) + "h " + str(int((numpy.min(work_time_deltas_hours) % 1.0) * 60)) + "min"
             "\nMedian: "+ str(math.floor(numpy.median(work_time_deltas_hours))) + "h " + str(int((numpy.median(work_time_deltas_hours) % 1.0) * 60)) + "min"+
             "\nOvertime: " + str(math.floor(work_overtime)) +"h "+ str(int((work_overtime % 1.0) * 60)) + "min" +
             "\nDays: " + str(len(work_time_deltas_hours)),
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.5))
    
    plt.title("Working Hours Boxplot")
    plt.show()   
Ejemplo n.º 29
0
def main():

    data = []
    data_month = []

    # Post to database
    con = mdb.connect(host='192.168.1.143', db='monitor', user='******')
    
    #Format of data structure
    #[mm][dd][data]
    #mm:    This is the month of the dataset.  Keep in mind that it is indexed from zero.  So August (8) is actually 7.
    #dd:    This is the day within the month.
    #data:  This is an array of the the data from the day.  Each datapoint is a tuple of (datetime, value).
    
    with con:
        cur = con.cursor()
        #cur.execute("SELECT temp_actual FROM sensor1 GROUP BY HOUR(datetime) LIMIT 0, 30")
        for m in range(1,12):
            for d in range(1,31):
                cur.execute("SELECT datetime,temp_actual FROM sensor1 WHERE DAY(datetime) = %i AND MONTH(datetime) = %i" %(d,m))
                data_month.append(np.array(cur.fetchall()))  
            data.append(data_month)
            data_month = []
    con.close()
    
    plt.boxplot(data[7-1][11][:,1])
    plt.show()
    
    '''
Ejemplo n.º 30
0
def nrgserrs_0n(save=False):
    fnames_0 = glob('errfe0_*1.txt')
    fnames_0 = move_ten(fnames_0)


    out_file = []
    control_FEs = []
    pc = 0
    for control_set in fnames_0:
        FE, err = numpy.loadtxt(control_set)
        control_FEs.append(FE)
        out_file.append([pc,
                         numpy.mean(FE),
                         1 - numpy.mean(err)])
        pc +=1

    fig = plt.figure(figsize=(8, 4))
    plt.boxplot(control_FEs)
    plt.title('F by % of corruption')
    plt.ylabel('free energy')
    labels = [(str(x) + '%') for x in range(0, 11)]
    plt.xticks(range(1, 12), labels)
    if save:
        fig.savefig('FE_boxplots0n1c', bbox_inches='tight')
        save_name = 'errfe_stats_0n.csv'
        header = 'pc,Fmean,er'
        numpy.savetxt(save_name, out_file, delimiter=',',
                      fmt='%1.0f,%1.2f,%1.2f',
                      header=header, comments='')
    else:
        plt.tight_layout()
        print numpy.array(out_file)
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=5,
           min_weight_fraction_leaf=0.0, n_estimators=800, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

models.append(('LassoReg', Lasso(alpha=0.1)))
models.append(('SVM', svReg))
models.append(('LinearReg', LinearRegression()))
models.append(('randForest', randForReg))

mas = make_scorer(mean_absolute_error, greater_is_better=False);
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, x_train, y_train, cv=kfold, n_jobs=4)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
# boxplot algorithm comparison
fig = pyplot.figure()
fig.suptitle('Classification Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.grid()
pyplot.show()
Ejemplo n.º 32
0
for name, model in models:
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)
print('\n'.join(map(str, results)))

# Select Best Model

# Compare Algorithms
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

'''6. Make Predictions'''
# Make predictions on validation dataset
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

svc = SVC(gamma='auto')
svc.fit(X_train, Y_train)
predictions = svc.predict(X_validation)
# plot histogram of distances
centers_distances = centers_distances / my_dpmm
centers_distances_min = centers_distances_min / my_dpmm
edges_distances = edges_distances / my_dpmm
edges_distances_min = edges_distances_min / my_dpmm

centers_distances_avg = np.mean(
    centers_distances[np.nonzero(centers_distances)])
centers_distances_min_avg = centers_distances_min.mean()
edges_distances_avg = np.mean(edges_distances[np.nonzero(edges_distances)])
edges_distances_min_avg = edges_distances_min.mean()

hist_dist_fig, hist_dist_ax = plt.subplots()
bp = plt.boxplot(
    (centers_distances[np.nonzero(centers_distances)], centers_distances_min,
     edges_distances[np.nonzero(edges_distances)], edges_distances_min),
    notch=0)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')

hist_dist_ax.yaxis.grid(True,
                        linestyle='-',
                        which='major',
                        color='lightgrey',
                        alpha=0.5)

# Hide these grid behind plot objects
hist_dist_ax.set_axisbelow(True)
hist_dist_ax.set_title('Recap of distances between objects (mm)')
hist_dist_ax.set_xlabel('Type')
Ejemplo n.º 34
0
def case5_single_boxplot():
    x = [1, 5, 5.1, 5.1, 5.5, 5.4, 5.5, 5.4, 5.6, 5.7, 6., 6.1, 9]
    plt.boxplot(x)
    plt.show()
plt.ylabel('Steps')
plt.title('Steps by Tasks')
plt.xticks(index + bar_width, ('open map', 'mcs', 'climate', 'bluetooth audio', 'park'))
plt.legend()

# plt.show()



# plot HR
data = np.array( Variables.end2end_map_hr )
plt.figure('HR Value')
plt.subplot(2,5,1)
plt.ylabel('Heart Rate')
plt.xlabel('Open Map Task End2End')
plt.boxplot(data, 0, 'gD')

data = np.array( Variables.end2end_mcs_hr )
plt.subplot(2,5,2)
plt.ylabel('Heart Rate')
plt.xlabel('MCS Task End2End')
plt.boxplot(data, 0, 'gD')

data = np.array( Variables.end2end_climate_hr )
plt.subplot(2,5,3)
plt.ylabel('Heart Value')
plt.xlabel('Climate Task End2End')
plt.boxplot(data, 0, 'gD')

data = np.array( Variables.end2end_bt_hr )
plt.subplot(2,5,4)
Ejemplo n.º 36
0
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = StratifiedKFold(n_splits=10, random_state=1)
    cv_results = cross_val_score(model,
                                 X_train,
                                 Y_train,
                                 cv=kfold,
                                 scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
# Compare Algorithms
pyplot.boxplot(results, labels=names)
pyplot.title('Algorithm Comparison')
pyplot.show()

# Make predictions on validation dataset
model = SVC(gamma='auto')
model.fit(X_train, Y_train)
predictions = model.predict(X_validation)
# Evaluate predictions
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
Ejemplo n.º 37
0
expoBoot = expo[bootstrapIndices]
gumbBoot = gumb[bootstrapIndices]
lognBoot = logn[bootstrapIndices]
triaBoot = tria[bootstrapIndices]

data = [
    norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot, tria,
    triaBoot
]

fig = plt.figure(figsize=(10, 6))
fig.canvas.set_window_title('A Boxplot Example')
ax1 = fig.add_subplot(111)
plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)

bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')

# Add a horizontal grid to the plot, but make it very light in color
# so we can use it for reading data values but not be distracting
ax1.yaxis.grid(True,
               linestyle='-',
               which='major',
               color='lightgrey',
               alpha=0.5)

# Hide these grid behind plot objects
ax1.set_axisbelow(True)
ax1.set_title(
Ejemplo n.º 38
0
import numpy as np
import matplotlib.pyplot as plt

data_to_plot = np.array([
    157, 159, 161, 164, 165, 166, 167, 167, 167, 168, 169, 170, 170, 170, 171,
    171, 172, 172, 172, 172, 173, 173, 175, 175, 177, 178, 178, 179, 185
])

plt.figure(1, figsize=(5, 6))
plt.subplot(111)
plt.axis([0, 1, 155, 190])
plt.boxplot(data_to_plot, showfliers=True)

plt.show()
Ejemplo n.º 39
0
def plotCF(x, y, labels, numberOfRuns):
    print "plotBF()"
    totalFrames = {u'07': 64, u'14': 32, u'28': 16}
    fig = plt.figure(1, frameon=True)
    fig.subplots_adjust(bottom=0.2)
    ax = plt.subplot(111)
    ax.yaxis.grid()
    data = []
    for messageLength in y:
        for offset in y[messageLength]:
            for delay in y[messageLength][offset]:
                for frameLength in y[messageLength][offset][delay]:
                    for interval in y[messageLength][offset][delay][
                            frameLength]:
                        if y[messageLength][offset][delay][frameLength][
                                interval][1] == 0:
                            data.append([0])
                        else:
                            data.append([
                                (float(y[messageLength][offset][delay]
                                       [frameLength][interval][2]) /
                                 float(y[messageLength][offset][delay]
                                       [frameLength][interval][1])) * 100
                            ])
                            # #print (float(y[messageLength][offset][delay][frameLength][interval][0])/float(y[messageLength][offset][delay][frameLength][interval][1]))/float(frameLength)
    medianpointprops = dict(marker='', linestyle='-', color='red')
    bp = plt.boxplot(data,
                     sym='+',
                     vert=1,
                     whis=1.5,
                     patch_artist=True,
                     medianprops=medianpointprops)
    # colors = ['#3D9970', '#FF9136', '#FFC51B']
    colors = ['white', 'white', 'white']
    k = 0
    i = 0
    for patch in bp['boxes']:
        if k > 7 and k < 15:
            i = 1
        elif k > 15:
            i = 2
        patch.set_facecolor(colors[i])
        plt.setp(bp['whiskers'], color='black')
        plt.setp(bp['fliers'], color='blue')
        k += 1
    plt.ylabel('Correct frames received (%)')
    c = 0
    ax.text(c + 3, 110.0, u'FL=7')
    ax.text(c + 11, 110.0, u'FL=14')
    ax.text(c + 19, 110.0, u'FL=28')
    offset = 8.5
    for i in range(1, 25):
        plt.plot([offset, offset], [-1, 100], color='#000000')
        offset += 8
    tickMarks = range(1, 25)
    x = range(30, 110, 10)
    y = x
    x.extend(y)
    x.extend(y)
    plt.xticks(tickMarks, tuple(x))
    plt.tick_params(axis='both', which='major', labelsize=5)
    # ax.set_ylim([0, 26])
    plt.xlabel("Time Interval [ms]")
    box = ax.get_position()
    ax.set_position([
        box.x0 * 0.9, box.y0 + box.height * 0.20, box.width * 1.0,
        box.height * 0.80
    ])
    ax.yaxis.grid(True, linestyle='-', which='major', color='grey')
    ax.set_axisbelow(True)
    plt.savefig(cfResultsFile)
    print "boxplot data: "
Ejemplo n.º 40
0
fig = plt.hist(df['residual sugar'], bins=bin_edges)

# add plot labels
plt.xlabel('count')
plt.ylabel('residual sugar')
plt.show()

# create scatterplot
fig = plt.scatter(df['pH'], df['residual sugar'])

# add plot labels
plt.xlabel('pH')
plt.ylabel('residual sugar')
plt.show()

plt.boxplot(df['alcohol'])

plt.ylim([8, 16])
plt.ylabel('alcohol')

fig = plt.gca()
fig.axes.get_xaxis().set_ticks([])
plt.show()

#gen random num
print("random num", np.random.uniform(0, 10))

#create array of random nums 100 nums of 1-10
observations = np.random.uniform(0, 10, 100)
print(observations)
fig = plt.hist(observations, bins=bin_edges)
Ejemplo n.º 41
0
def drawBox(heights):
    pyplot.boxplot([heights],labels=['Heights'])
    pyplot.title('Heights of Students')
    pyplot.show()
Ejemplo n.º 42
0
    # create the modeling pipeline
    pipeline = Pipeline(steps=[('i', SimpleImputer(
        strategy=s)), ('m', RandomForestClassifier())])
    # evaluate the model
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    scores = cross_val_score(pipeline,
                             X,
                             y,
                             scoring='accuracy',
                             cv=cv,
                             n_jobs=-1)
    # store results
    results.append(scores)
    print('>%s %.3f (%.3f)' % (s, mean(scores), std(scores)))
# plot model performance for comparison
pyplot.boxplot(results, labels=strategies, showmeans=True)
pyplot.show()
# most frequent is the best candidate, showing the least variance

# split train, test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# define pipeline
pipe = Pipeline(steps=[('i', SimpleImputer(
    strategy='most_frequent')), (
        'scaler', StandardScaler()), ('rf', RandomForestClassifier())])

# set parameters for grid search
params = {
    'rf__n_estimators': [10, 25, 50, 100, 250, 500],
    'rf__max_depth': [10, 25, 50],
Ejemplo n.º 43
0
        # print "	"+clf_name+":"
        # prediction stage
        start_time = time.time()
        for ind in range(num_layers):
            clf.fit(temp)
            # print "LSH Forest:"
            # clf.display()  # CONSOLE OUTPUT
            # print "-------------"
            # evaluation stage
            y_pred = clf.decision_function(temp).ravel()
            if (ind != 0):
                for row in temp:
                    row.pop(0)
            if (ind < num_layers - 1):
                y_pred = y_pred.tolist()
            temp = np.c_[y_pred, temp].tolist()
        train_time = time.time() - start_time
        test_time = time.time() - start_time - train_time
        auc = roc_auc_score(ground_truth, -1.0 * y_pred)
        results.append(auc * 100)
        # print "AUC:	", auc
        # print "Training time:	", train_time
        # print "Testing time:	", test_time
with open("deep2.csv", "w") as filerw:
    resultWriter = csv.writer(filerw)
    resultWriter.writerow(results)

filerw.close()
mpl.boxplot(results)
mpl.show()
print results
Ejemplo n.º 44
0
def plotEOM(noLoadData, withLoadData, numberOfRuns):
    print "plotEOM()"
    totalFrames = {u'07': 64, u'14': 32, u'28': 16}
    plt.figure(1, figsize=(width, height), frameon=False)
    fig = plt.figure(1, frameon=True)
    fig.subplots_adjust(bottom=0.2)
    ax = plt.subplot(1, 2, 1)
    ax.yaxis.grid()
    data = []
    y = noLoadData
    for messageLength in y:
        for offset in y[messageLength]:
            for delay in y[messageLength][offset]:
                for frameLength in y[messageLength][offset][delay]:
                    for interval in y[messageLength][offset][delay][
                            frameLength]:
                        data.append([
                            float(y[messageLength][offset][delay][frameLength]
                                  [interval][0]) / (len(numberOfRuns)) * 100
                        ])
    # medianpointprops = dict(marker='', linestyle='-', color='red')
    bp = plt.boxplot(data, patch_artist=True)
    # colors = ['#3D9970', '#FF9136', '#FFC51B']
    colors = ['white', 'white', 'white']
    k = 0
    i = 0
    for patch in bp['boxes']:
        if k > 7 and k < 15:
            i = 1
        elif k > 15:
            i = 2
        patch.set_facecolor(colors[i])
        # plt.setp(bp['whiskers'], color='black')
        # plt.setp(bp['fliers'], color='blue')
        k += 1
    plt.ylabel('End of message errors (%)')
    c = 0
    ax.text(c + 3, 110.0, u'FL=7')
    ax.text(c + 11, 110.0, u'FL=14')
    ax.text(c + 19, 110.0, u'FL=28')
    # ax.text(c+11.1, 125, u'No Load')
    offset = 8.5
    for i in range(1, 25):
        plt.plot([offset, offset], [-1, 100], color='#000000')
        offset += 8
    tickMarks = range(1, 25)
    x = range(30, 110, 10)
    y = x
    x.extend(y)
    x.extend(y)
    plt.xticks(tickMarks, tuple(x))
    # plt.tick_params(axis='both', which='major', labelsize=10)
    # ax.set_ylim([0, 26])
    plt.xlabel("Time Interval (ms)\n(a) End of message errors without load")
    box = ax.get_position()
    ax.set_position([box.x0 * .5, box.y0, box.width * 1.22, box.height * 0.95])
    # ax.yaxis.grid(True, linestyle='-', which='major', color='grey')
    ax.set_axisbelow(True)
    plt.savefig(eomResultsFile)

    # WITH LOAD
    if delayToPlot[0] == u'3.0':
        return
    ax = plt.subplot(1, 2, 2)
    ax.yaxis.grid()
    data = []
    y = withLoadData
    for messageLength in y:
        for offset in y[messageLength]:
            for delay in y[messageLength][offset]:
                for frameLength in y[messageLength][offset][delay]:
                    for interval in y[messageLength][offset][delay][
                            frameLength]:
                        data.append([
                            float(y[messageLength][offset][delay][frameLength]
                                  [interval][0]) / (len(numberOfRuns)) * 100
                        ])
    # medianpointprops = dict(marker='', linestyle='-', color='red')
    bp = plt.boxplot(data, patch_artist=True)
    # colors = ['#3D9970', '#FF9136', '#FFC51B']
    colors = ['white', 'white', 'white']
    k = 0
    i = 0
    for patch in bp['boxes']:
        if k > 7 and k < 15:
            i = 1
        elif k > 15:
            i = 2
        patch.set_facecolor(colors[i])
        # plt.setp(bp['whiskers'], color='black')
        # plt.setp(bp['fliers'], color='blue')
        k += 1
    plt.ylabel('End of message errors (%)')
    c = 0
    ax.text(c + 3.1, 110.0, u'FL=7')
    ax.text(c + 11.1, 110.0, u'FL=14')
    ax.text(c + 19.1, 110.0, u'FL=28')
    # ax.text(c+11.1, 125, u'With Load')
    offset = 8.5
    for i in range(1, 25):
        plt.plot([offset, offset], [-1, 100], color='#000000')
        offset += 8
    tickMarks = range(1, 25)
    x = range(30, 110, 10)
    y = x
    x.extend(y)
    x.extend(y)
    plt.xticks(tickMarks, tuple(x))
    # plt.tick_params(axis='both', which='major', labelsize=10)
    plt.xlabel("Time Interval (ms)\n(b) With Load")
    box = ax.get_position()
    ax.set_position(
        [box.x0 + 0.01, box.y0, box.width * 1.22, box.height * 0.95])
    # ax.yaxis.grid(True, linestyle='-', which='major', color='grey')
    ax.set_axisbelow(True)
    # plt.tight_layout()
    plt.savefig(eomResultsFile)
Ejemplo n.º 45
0
def main():
    config, groups_map, data = load_files()

    # Convert the 'data' df into a 2d array of grades given by each grader
    grades = {}
    combined = "ALL"
    data_cols = config['dataHeaders']['data']
    group_col = config['dataHeaders']['group']
    group_col_list = config['groupHeaders']['group']
    use_threshold = config['useAbove']
    exclude = config['excludeGraders'] if 'excludeGraders' in config else []
    for index1, row in data.iterrows():
        group_num = str(int(row.at[group_col]))
        for index2, item in row.filter(items=data_cols).iteritems():
            if group_num in group_col_list:
                grader = groups_map.at[index2, group_num]
                # print(group_num, "\t", index2, "\t", grader)
                # print(item)
                if item > use_threshold and grader not in exclude:
                    grades.setdefault(combined, []).append(item)
                    grades.setdefault(grader, []).append(item)

    # Create box plot
    grades_values = list(grades.values())
    grades_keys = list(grades.keys())
    try:
        box_values = plt.boxplot(grades_values, labels=grades_keys)
    except ValueError as e:
        print('Keys: %a' % grades_keys, file=sys.stderr)
        print('Values: %a' % grades_values, file=sys.stderr)
        sys.exit('EXCEPTION: ValueError! ' + str(e))
    # Retrieve data from the box plot
    res = {key: [v.get_ydata() for v in value] for key, value in box_values.items()}

    # EXTRACT TABLE DATA
    whiskers_min = [min(item) for item in res['whiskers'][::2]]  # Lower Whisker
    whiskers_Q1 = [max(item) for item in res['whiskers'][::2]]  # Q1
    whiskers_Q3 = [min(item) for item in res['whiskers'][1::2]]  # Q3
    whiskers_max = [max(item) for item in res['whiskers'][1::2]]  # Upper Whisker
    medians = [item[0] for item in res['medians']]  # Q2

    # ALTERNATIVE METHODS, EQUIVALENT DATA
    # caps_lower = [item[0] for item in res['caps'][::2]] # Lower Whisker
    # caps_upper = [item[0] for item in res['caps'][1::2]] # Upper Whisker
    # boxes_lower = [min(item) for item in res['boxes']] # Q1
    # boxes_upper = [max(item) for item in res['boxes']] # Q3

    # print(res['fliers']) # outliers
    # print(res['means']) # EMPTY ARRAY!

    # Format and display box plot
    if config['output']['pyplot']:
        # TODO CONFIG FLAGS
        plt.grid(axis='y')
        plt.figure(figsize=(12, 4))
        plt.yticks(np.arange(use_threshold, 1.1, step=0.05))
        try:
            plt.savefig('output_box_plot.png', dpi=200)
        except IOError as e:
            print(e, file=sys.stderr)
            print("ERROR: FAILED TO SAVE PYPLOT FIGURE! "
                  "Please make sure the file is not already open", file=sys.stderr)
        # plt.show()

    # Calculate hypergeometric CDFs
    h_cuts = [whiskers_Q1, medians, whiskers_Q3]
    M = len(grades[combined])  # total overall
    N = [len(graded) for graded in grades_values]  # number the grader completed

    # CALCULATE n,x using >= cut value to find chance of getting as extreme or more extreme

    # number of total satisfying. (Matrix len(grades) by len(h_cuts)
    n = [[sum([1 for item in grades[combined] if item >= val]) for val in cut]
         for cut in h_cuts]
    # number of selected satisfying. (Matrix len(grades) by len(h_cuts)
    x = [[sum([1 for item in gr if item >= val])
          for val, gr in zip(cut, grades_values)]
         for cut in h_cuts]
    # Prob to get sample LESS 'extreme' (with more selected<cut). (Matrix len(grades) by len(h_cuts)
    h_cuts_probs = [[hypergeom.cdf(c_x, M, c_n, c_N)
                     for c_x, c_n, c_N in zip(cut_x, cut_n, N)]
                    for cut, cut_n, cut_x in zip(h_cuts, n, x)]  #

    data_out = [
        ["Labels"] + grades_keys,
        ["whisker_min"] + whiskers_min,
        ["Q1"] + whiskers_Q1,
        ["Q2"] + medians,
        ["Q3"] + whiskers_Q3,
        ["whisker_max"] + whiskers_max,
        ["M"] + [M for _ in range(len(N))],
        ["N"] + N,
        ["n_Q1"] + n[0],
        ["n_Q2"] + n[1],
        ["n_Q3"] + n[2],
        ["x_Q1"] + x[0],
        ["x_Q2"] + x[1],
        ["x_Q3"] + x[2],
        ["Fx(x)_Q1"] + h_cuts_probs[0],
        ["Fx(x)_Q2"] + h_cuts_probs[1],
        ["Fx(x)_Q3"] + h_cuts_probs[2]
    ]

    if config['output']['format'] == 'xlsx':
        create_xlsx(config, data_out,
                    grades_keys, grades_values, [whiskers_Q1, medians, whiskers_Q3])
    else:
        with open(config['output']['filename'] + '.csv', 'w+') as csv_outfile:
            csv_w = csv.writer(csv_outfile, delimiter=',', lineterminator='\n')
            csv_w.writerows(data_out)
Ejemplo n.º 46
0
def makesingleboxplot(thisdirname, subdirname, thisfilename):

    maxnodeint = 1
    firsttime = 0
    firstdigit = 1
    segname = []
    ylabelunits = []
    idecpoint = -1
    ivallen = 0
    iseform = -1
    decdigits = 2

    tablefmtstring = "%d %d "

    thisdatafile = thisdirname + "/" + thisfilename
    datafile = open(thisdatafile)
    for line in datafile:
        thisnode = []
        del thisnode[:]
        tud, jud, tid, mname, pname2, nodename, trest = line.split(' ', 6)

        nfile = thisdirname + "/" + ''.join(nodename)
        if firsttime < 1:

            firstdigit = re.search(r"\d", ''.join(nodename))
            firstdigit = firstdigit.start()
            firsttime = 1
            for i in xrange(0, firstdigit):
                segname.append(nodename[i])
            segname.append('%')

            segname.append('0')
            nlen = len(nodename) - firstdigit
            segname.append("%d" % nlen)
            segname.append('d')
            trestlen = len(trest)
            spdigit = -1
            spdigit = trest.find(' ')

            valstring = []
            valstring = trest.split()[0]

            if len(trest.split()) > 1:
                ylabelunits = trest.split()[1]
            ivallen = len(valstring)
            idecpoint = -1
            for j in xrange(0, ivallen):
                if valstring[j] == '.':
                    idecpoint = j
                    break

            iseform = str(valstring).find('e')
            decdigits = 2
            if idecpoint > -1:
                decdigits = ivallen - idecpoint + 1

            if iseform > -1:
                vfmtstring = "%%.%de %%.%de %%.%de %%.%de " % \
                             (decdigits, decdigits,
                              decdigits - 2, decdigits - 2)
            else:
                vfmtstring = "%%.%df %%.%df %%.%df %%.%df\n" % \
                             (decdigits, decdigits,
                              decdigits - 2, decdigits - 2)
            tablefmtstring = tablefmtstring + vfmtstring
        sepdigit = re.search(r"\D", ''.join(nodename[firstdigit:]))
        if sepdigit:
            sepdigit = sepdigit.start()
            tnode = int(nodename[firstdigit:firstdigit + sepdigit - 1])
        else:
            tnode = int(nodename[firstdigit:])

        if tnode > maxnodeint:
            maxnodeint = tnode

        t2restlen = len(trest)
        vspdigit = -1
        vspdigit = trest.find(' ')

        tvalstring = []
        if vspdigit > -1:
            for j in xrange(0, vspdigit):
                tvalstring.append(trest[j])
        else:
            tvalstring = trest
        nodefile = open(nfile, "a")

        print >> nodefile, trest.split()[0]

        nodefile.close()

    # Begin processing a list of files in the current directory, complicated list of lists
    chtodir = "cd " + thisdirname
    os.system(chtodir)

    fstring = ''.join(segname)  # "wf%03d"
    maxnodenum = maxnodeint + 1  # 620
    if len(ylabelunits) < 1:
        ytitle = "Performance (MB/sec)"
    else:
        ytitle = "Performance " + "(" + str(ylabelunits) + ")"

    ctitle = subdirname + " Performance Boxplot"

    fname = {}
    dlist = []
    hpldata = []
    tmarks = []
    tname = []
    nnum = []
    nticks = 0

    for x in range(1, maxnodenum):
        fname[x] = fstring % (x)

        if os.path.isfile(thisdirname + "/" + fname[x]):
            nticks += 1
            dlist.append(loadtxt(thisdirname + "/" + fname[x]))
            nnum.append(x)
            if (x % 2) == 0:
                tmarks.append("%d" % (x))
            else:
                tmarks.append('')

    dmeans = []
    for j in dlist:
        dmeans.append(j.mean())

    dmins = []
    for j in dlist:
        dmins.append(j.min())

    dmaxs = []
    for j in dlist:
        dmaxs.append(j.max())

    dsamps = []
    for j in dlist:
        dsamps.append(j.size)

    dstds = []
    for j in dlist:
        dstds.append(j.std())

    nodetable = []
    nodetable = zip(nnum, dsamps, dmeans, dstds, dmins, dmaxs)

    tablename = thisdirname + "/FinalDataTable.txt"
    tablefile = open(tablename, "w")
    for i in nodetable:
        tablefile.write(tablefmtstring % (i[0], i[1], i[2], i[3], i[4], i[5]))
    tablefile.close()

    nodemeans = []
    nodemeans = zip(nnum, dmeans)

    nodemeans.sort(key=lambda x: x[1])
    xtickmarks = []

    # try to add a null tick mark at beginning
    xtickmarks.append('')
    # end try

    for i, v in nodemeans:
        xname = fstring % i
        xtickmarks.append(xname)

    dlist.sort(key=lambda a: a.mean())
    plt.figure()
    if len(dlist) < 1:
        return 0

    plt.boxplot(dlist)
    plt.xlabel('Node')
    plt.ylabel(ytitle)
    plt.title(ctitle)

    nnodes = len(dlist)

    adjustedwidth = nnodes / 100.0 * 10.0
    if adjustedwidth < 10.0:
        adjustedwidth = 10.0

    ticktable = []
    for k in xtickmarks:
        ticktable.append(k)

    plt.xticks(range(len(xtickmarks)), rotation=90, fontsize=6)
    plt.xticks(range(len(ticktable)), xtickmarks, rotation=90, fontsize=6)

    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(adjustedwidth, 7.0)

    plotname = thisdirname + "/" + subdirname + "Boxplot.png"
    plt.savefig(plotname, dpi=150)

    return nnodes
Ejemplo n.º 47
0
def main():
	x_data , y_data = get_data()
	plt.figure()
	plt.boxplot(np.concatenate((x_data , y_data.reshape((len(y_data) , 1))) , axis = 1))
	plt.show()
Ejemplo n.º 48
0
# ===== Importação da biblioteca para Manipulação, Leitura, Visualização de dados. =====
import pandas as pd

# ===== Carregando a base de dados =====
base = pd.read_csv('credit-data.csv')

# ===== Apagando dados não preenchidos os NAN =====
base = base.dropna()

# outliers idade
# ===== Importação da biblioteca para visualização de graficos =====
import matplotlib.pyplot as plt
plt.boxplot(base.iloc[:, 2], showfliers=True)
# ===== Capturando os outliers =====
outliers_age = base[(base.age < -20)]

# outliers loan (loan contem a dívida)
plt.boxplot(base.iloc[:, 3])
outliers_loan = base[(base.loan > 13400)]
Ejemplo n.º 49
0
#  The above table shows the number of earthquake occurs at differentt magnitude.

# In[100]:

(n, bins, patches) = plt.hist(e_data["Magnitude"], range=(0, 10), bins=10)
plt.xlabel("Earthquake Magnitudes")
plt.ylabel("Number of Occurences")
plt.title("Overview of earthquake magnitudes")

print("Magnitude" + "   " + "Number of Occurence")
for i in range(5, len(n)):
    print(str(i) + "-" + str(i + 1) + "         " + str(n[i]))

# In[101]:

plt.boxplot(e_data["Magnitude"])
plt.show()

# In[102]:

highly_affected = e_data[e_data["Magnitude"] >= 8]

# In[103]:

print(highly_affected.shape)

# In[106]:

e_data["Month"] = e_data['Date'].dt.month

# In[107]:
Ejemplo n.º 50
0
    def plot(self, filename, output_text):
        colors = ['r', 'b', 'g']
        ekf = self.ekf
        gt = self.gt
        vl = self.vl
        of = self.of

        with PdfPages(filename) as pdf:
            # positions
            plt.figure()
            max_y = 10

            def clip_list(array):
                return [max(-max_y, min(max_y, a)) for a in array]

            plt.plot(ekf['t'],
                     clip_list(ekf['x']),
                     colors[0],
                     linewidth=0.5,
                     label='EKF Pos. (X)')
            plt.plot(ekf['t'],
                     clip_list(ekf['y']),
                     colors[1],
                     linewidth=0.5,
                     label='EKF Pos. (Y)')
            plt.plot(ekf['t'],
                     clip_list(ekf['z']),
                     colors[2],
                     linewidth=0.5,
                     label='EKF Pos. (Z)')
            plt.fill_between(ekf['t'],
                             clip_list(ekf['x']) - ekf['cov_13'],
                             clip_list(ekf['x']) + ekf['cov_13'],
                             facecolor=colors[0],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             clip_list(ekf['y']) - ekf['cov_14'],
                             clip_list(ekf['y']) + ekf['cov_14'],
                             facecolor=colors[1],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             clip_list(ekf['z']) - ekf['cov_15'],
                             clip_list(ekf['z']) + ekf['cov_15'],
                             facecolor=colors[2],
                             alpha=0.5)
            plt.autoscale(False)
            plt.plot(gt['t'],
                     clip_list(gt['x']),
                     color=colors[0],
                     linewidth=0.5,
                     dashes=(1, 1),
                     label='Ground Truth (X)')
            plt.plot(gt['t'],
                     clip_list(gt['y']),
                     color=colors[1],
                     linewidth=0.5,
                     dashes=(1, 1))
            plt.plot(gt['t'],
                     clip_list(gt['z']),
                     color=colors[2],
                     linewidth=0.5,
                     dashes=(1, 1))
            plt.plot(vl['t'],
                     clip_list(vl['x']),
                     color=colors[0],
                     linestyle='None',
                     marker='o',
                     markersize=2,
                     label='Observation (X)')
            plt.plot(vl['t'],
                     clip_list(vl['y']),
                     color=colors[1],
                     linestyle='None',
                     marker='o',
                     markersize=2)
            plt.plot(vl['t'],
                     clip_list(vl['z']),
                     color=colors[2],
                     linestyle='None',
                     marker='o',
                     markersize=2)
            plt.xlabel('Time (s)')
            plt.ylabel('Position (m)')
            plt.title('Position')
            plt.legend(prop={'size': 6})
            plt.ylim(-max_y + 0.2, max_y + 0.2)
            pdf.savefig()

            # angles
            plt.figure()
            plt.plot(ekf['t'],
                     ekf['angle1'],
                     colors[0],
                     linewidth=0.5,
                     label='EKF')
            plt.plot(ekf['t'], ekf['angle2'], colors[1], linewidth=0.5)
            plt.plot(ekf['t'], ekf['angle3'], colors[2], linewidth=0.5)
            plt.fill_between(ekf['t'],
                             ekf['angle1'] - ekf['cov_1'],
                             ekf['angle1'] + ekf['cov_1'],
                             facecolor=colors[0],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             ekf['angle2'] - ekf['cov_2'],
                             ekf['angle2'] + ekf['cov_2'],
                             facecolor=colors[1],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             ekf['angle3'] - ekf['cov_3'],
                             ekf['angle3'] + ekf['cov_3'],
                             facecolor=colors[2],
                             alpha=0.5)
            plt.autoscale(False)
            plt.plot(gt['t'],
                     gt['angle1'],
                     color=colors[0],
                     linewidth=0.25,
                     dashes=(1, 1),
                     label='Grount Truth')
            plt.plot(gt['t'],
                     gt['angle2'],
                     color=colors[1],
                     linewidth=0.25,
                     dashes=(1, 1))
            plt.plot(gt['t'],
                     gt['angle3'],
                     color=colors[2],
                     linewidth=0.25,
                     dashes=(1, 1))
            plt.plot(vl['t'],
                     vl['angle1'],
                     color=colors[0],
                     linestyle='None',
                     marker='o',
                     markersize=2,
                     label='Observation')
            plt.plot(vl['t'],
                     vl['angle2'],
                     color=colors[1],
                     linestyle='None',
                     marker='o',
                     markersize=2)
            plt.plot(vl['t'],
                     vl['angle3'],
                     color=colors[2],
                     linestyle='None',
                     marker='o',
                     markersize=2)
            plt.xlabel('Time (s)')
            plt.ylabel('Angle ($^\circ$)')
            plt.title('Orientation')
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            # feature counts
            plt.figure()
            plt.plot(ekf['t'],
                     ekf['ml_count'],
                     linestyle='None',
                     marker='x',
                     markersize=6,
                     color='#FF0000',
                     label='Integrated VL Features')
            plt.plot(ekf['t'],
                     ekf['of_count'],
                     marker='|',
                     markersize=2,
                     color='#0000FF',
                     label='Integrated OF Features')
            plt.plot(vl['t'],
                     vl['count'],
                     linestyle='None',
                     marker='.',
                     markersize=2,
                     color='#B300FF',
                     label='Observed VL Features (at Reg. Time)')
            plt.plot(of['t'],
                     of['count'],
                     linestyle='None',
                     marker=',',
                     markersize=2,
                     color='#00FFb3',
                     label='Observed OF Features (at Reg. Time)')
            plt.xlabel('Time (s)')
            plt.ylabel('Number of Features')
            plt.title('EKF Features')
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            # mahalnobis distance
            plt.figure()
            if len(self.mahal['boxes']) > 0:
                boxes = plt.boxplot(self.mahal['boxes'],
                                    positions=self.mahal['times'],
                                    widths=0.2,
                                    manage_xticks=False,
                                    patch_artist=True)
                plt.setp(boxes['whiskers'], color='Black', linewidth=0.25)
                plt.setp(boxes['caps'], color='Black', linewidth=0.25)
                plt.setp(boxes['medians'], color='Black', linewidth=0.25)
                plt.setp(boxes['fliers'], color='r', marker='x', markersize=1)
                plt.setp(boxes['boxes'],
                         color='Black',
                         facecolor='SkyBlue',
                         linewidth=0.25)
            plt.title('VL Features Mahalnobis Distances')
            plt.xlabel('Time (s)')
            plt.ylabel('Mahalnobis Distance')
            pdf.savefig()
            plt.close()

            # linear velocity and acceleration
            plt.figure()
            plt.plot(ekf['t'],
                     ekf['vx'],
                     color=colors[0],
                     linewidth=0.5,
                     label='Velocity')
            plt.plot(ekf['t'], ekf['vy'], color=colors[1], linewidth=0.5)
            plt.plot(ekf['t'], ekf['vz'], color=colors[2], linewidth=0.5)
            plt.fill_between(ekf['t'],
                             ekf['vx'] - ekf['cov_7'],
                             ekf['vx'] + ekf['cov_7'],
                             facecolor=colors[0],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             ekf['vy'] - ekf['cov_8'],
                             ekf['vy'] + ekf['cov_8'],
                             facecolor=colors[1],
                             alpha=0.5)
            plt.fill_between(ekf['t'],
                             ekf['vz'] - ekf['cov_9'],
                             ekf['vz'] + ekf['cov_9'],
                             facecolor=colors[2],
                             alpha=0.5)
            plt.title('Velocity')
            plt.xlabel('Time (s)')
            plt.ylabel('Velocity (m/s)')
            plt.ylim(-0.5, 0.5)
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            plt.figure()
            plt.plot(ekf['t'],
                     ekf['ax'],
                     color=colors[0],
                     dashes=(1, 1),
                     linewidth=0.5,
                     label='Acceleration')
            plt.plot(ekf['t'],
                     ekf['ay'],
                     color=colors[1],
                     dashes=(1, 1),
                     linewidth=0.5)
            plt.plot(ekf['t'],
                     ekf['az'],
                     color=colors[2],
                     dashes=(1, 1),
                     linewidth=0.5)
            plt.title('Acceleration')
            plt.xlabel('Time (s)')
            plt.ylabel('Acceleration (m/s$^2$)')
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            # angle and angular velocity
            plt.figure()
            ax = plt.gca()
            ax.plot(ekf['t'],
                    ekf['angle1'],
                    colors[0],
                    linewidth=0.5,
                    label='Angle')
            ax.plot(ekf['t'], ekf['angle2'], colors[1], linewidth=0.5)
            ax.plot(ekf['t'], ekf['angle3'], colors[2], linewidth=0.5)
            ax2 = ax.twinx()
            ax2.plot(ekf['t'],
                     ekf['ox'],
                     color=colors[0],
                     linewidth=0.5,
                     dashes=(1, 1),
                     label='Angular Velocity')
            ax2.plot(ekf['t'],
                     ekf['oy'],
                     color=colors[1],
                     linewidth=0.5,
                     dashes=(1, 1))
            ax2.plot(ekf['t'],
                     ekf['oz'],
                     color=colors[2],
                     linewidth=0.5,
                     dashes=(1, 1))
            ax.set_title('Angular Velocity')
            ax.set_xlabel('Time (s)')
            ax.set_ylabel('Angle ($^\circ$)')
            ax2.set_ylabel('Angular Velocity ($^\circ$/s)')
            lines, labels = ax.get_legend_handles_labels()
            lines2, labels2 = ax2.get_legend_handles_labels()
            ax.legend(lines + lines2, labels + labels2, prop={'size': 6})
            pdf.savefig()
            plt.close()

            # bias
            plt.figure()
            ax = plt.gca()
            ax.plot(ekf['t'],
                    ekf['abx'],
                    colors[0],
                    linewidth=0.5,
                    label='Accelerometer Bias')
            ax.plot(ekf['t'], ekf['aby'], colors[1], linewidth=0.5)
            ax.plot(ekf['t'], ekf['abz'], colors[2], linewidth=0.5)
            ax.fill_between(ekf['t'],
                            ekf['abx'] - ekf['cov_10'],
                            ekf['abx'] + ekf['cov_10'],
                            facecolor=colors[0],
                            alpha=0.5)
            ax.fill_between(ekf['t'],
                            ekf['aby'] - ekf['cov_11'],
                            ekf['aby'] + ekf['cov_11'],
                            facecolor=colors[1],
                            alpha=0.5)
            ax.fill_between(ekf['t'],
                            ekf['abz'] - ekf['cov_12'],
                            ekf['abz'] + ekf['cov_12'],
                            facecolor=colors[2],
                            alpha=0.5)
            ax2 = ax.twinx()
            ax2.plot(ekf['t'],
                     ekf['gbx'],
                     color=colors[0],
                     linewidth=0.5,
                     dashes=(1, 1),
                     label='Gyrometer Bias')
            ax2.plot(ekf['t'],
                     ekf['gby'],
                     color=colors[1],
                     linewidth=0.5,
                     dashes=(1, 1))
            ax2.plot(ekf['t'],
                     ekf['gbz'],
                     color=colors[2],
                     linewidth=0.5,
                     dashes=(1, 1))
            ax2.fill_between(ekf['t'],
                             ekf['gbx'] - ekf['cov_4'],
                             ekf['gbx'] + ekf['cov_4'],
                             facecolor=colors[0],
                             alpha=0.5)
            ax2.fill_between(ekf['t'],
                             ekf['gby'] - ekf['cov_5'],
                             ekf['gby'] + ekf['cov_5'],
                             facecolor=colors[1],
                             alpha=0.5)
            ax2.fill_between(ekf['t'],
                             ekf['gbz'] - ekf['cov_6'],
                             ekf['gbz'] + ekf['cov_6'],
                             facecolor=colors[2],
                             alpha=0.5)
            ax.set_title('Bias Terms')
            ax.set_xlabel('Time (s)')
            ax.set_ylabel('Accelerometer Bias (m/s$^2$)')
            ax2.set_ylabel('Gyrometer Bias ($^\circ$/s)')
            lines, labels = ax.get_legend_handles_labels()
            lines2, labels2 = ax2.get_legend_handles_labels()
            ax.legend(lines + lines2, labels + labels2, prop={'size': 6})
            pdf.savefig()
            plt.close()

            # covariance
            plt.figure()
            plt.plot(ekf['t'],
                     covariance_map(self.ekf, 'cov_13', 'cov_14', 'cov_15'),
                     colors[0],
                     linewidth=0.5,
                     label='Position Covariance')
            plt.plot(ekf['t'],
                     covariance_map(self.ekf, 'cov_7', 'cov_8', 'cov_9'),
                     colors[1],
                     linewidth=0.5,
                     label='Velocity Covariance')
            plt.plot(ekf['t'],
                     covariance_map(self.ekf, 'cov_1', 'cov_2', 'cov_3'),
                     colors[2],
                     linewidth=0.5,
                     label='Orientation Covariance')
            plt.title('Std. Deviation')
            plt.xlabel('Time (s)')
            plt.ylabel('Covariance')
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            # mahalnobis distance histogram
            plt.figure()
            plt.hist(
                [item for sublist in self.mahal['boxes'] for item in sublist],
                bins=200,
                range=(0, 50),
                normed=True)
            plt.xlabel('Mahalnobis Distance')
            plt.ylabel('pdf')
            plt.title('Mahalnobis Distance Histogram')
            pdf.savefig()
            plt.close()

            plt.figure()
            plt.imshow(np.transpose(self.vl_heatmap),
                       cmap='hot',
                       interpolation='nearest',
                       vmin=0,
                       vmax=np.amax(self.vl_heatmap))
            plt.title('Visual Landmarks Density')
            pdf.savefig()
            plt.close()

            plt.figure()
            plt.plot(of['t'],
                     of['oldest'],
                     color=colors[0],
                     linewidth=0.5,
                     label='Oldest')
            plt.plot(of['t'],
                     of['median'],
                     color=colors[1],
                     linewidth=0.5,
                     label='Median')
            plt.plot(of['t'],
                     of['youngest'],
                     color=colors[2],
                     linewidth=0.5,
                     label='Youngest')
            plt.xlabel('Time (s)')
            plt.ylabel('Optical Flow Feature Age (s)')
            plt.title('Optical Flow Feature Age')
            plt.legend(prop={'size': 6})
            pdf.savefig()
            plt.close()

            plt.figure()
            plt.imshow(np.transpose(self.of_heatmap),
                       cmap='hot',
                       interpolation='nearest',
                       vmin=0,
                       vmax=np.amax(self.vl_heatmap))
            plt.title('Optical Flow Density')
            pdf.savefig()
            plt.close()

            plt.figure()
            plt.axis('off')
            plt.text(0.0, 0.5, output_text)
            pdf.savefig()
            plt.close()
import matplotlib.pyplot as plt
figure1 = [k[1] for k in train_X]
figure2 = [k[2] for k in train_X]
figu1=[]
figu2=[]
for k in train_X:
    if k[1]>40 or k[2]>40:
        continue 
    else:
        figu1.append(k[1])
        figu2.append(k[2])
#labels = ["Intercolumnar distance", "Upper margin", "Lower margin", "Exploitation", 
#          "Row number", "Modular ratio", "Interlinear spacing", 
#          "Weight", "Peak number", "mr/is"]
labels=["Upper margin", "Lower margin"]
plt.boxplot([figu1, figu2], labels=labels, sym ="o", whis = 1.5)
plt.show()


figure7 = []
figure8 = []
for k in train_X:
    if k[7]>10 or k[8]>10:
        continue 
    else:
        figure7.append(k[7])
        figure8.append(k[8])
  
yy=[]
figure=[k[0] for k in train_X]
for i in range(len(data)-1):
def reportPlotAllProjectBreaksDistribution(o_names, p_names, path):
    import matplotlib.pyplot as plt
    import numpy, csv, pandas

    breaks_stats = pandas.DataFrame(columns=[
        'project', 'mean', 'st_dev', 'var', 'median', 'breaks_devlife_corr'
    ])
    projects_counts = []
    for i in range(0, len(o_names)):
        chosen_project = i  # FROM 0 TO n-1

        project_name = o_names[chosen_project]
        main_project = p_names[chosen_project]
        breaks_lifetime = pandas.DataFrame(columns=['BpY', 'life'])

        #Read Breaks Table
        with open(
                path + '/' + project_name + '/' + main_project +
                '/inactivity_interval_list.csv', 'r') as f:  #opens PW file
            breaks_list = [
                list(map(str, rec)) for rec in csv.reader(f, delimiter=',')
            ]

        counts_perYear = []
        for row in breaks_list:
            num_breaks = len(row[1:]) - 2
            if num_breaks > 0:
                num_days = int(row[-2])
                years = num_days / 365
                BpY = num_breaks / years
                counts_perYear.append(BpY)
                add(breaks_lifetime, [BpY, num_days])
        projects_counts.append(counts_perYear)
        add(breaks_stats, [
            project_name,
            numpy.mean(counts_perYear),
            numpy.std(counts_perYear),
            numpy.var(counts_perYear),
            numpy.median(counts_perYear),
            numpy.corrcoef(breaks_lifetime['BpY'],
                           breaks_lifetime['life'])[1][0]
        ])

    breaks_stats.to_csv(path + '/breaks_stats_all.csv',
                        sep=';',
                        na_rep='NA',
                        header=True,
                        index=False,
                        mode='w',
                        encoding='utf-8',
                        quoting=None,
                        quotechar='"',
                        line_terminator='\n',
                        decimal='.')

    labels = []
    for name in p_names:
        if name == 'framework':
            labels.append('laravel')
        else:
            labels.append(name)

    plt.clf()
    projects_counts.reverse()
    plt.boxplot(projects_counts)
    labels.reverse()
    plt.xticks(numpy.arange(1, len(p_names) + 1), labels, rotation=20)
    # Pad margins so that markers don't get clipped by the axes: plt.margins(0.2)
    # Tweak spacing to prevent clipping of tick-label: plt.subplots_adjust(bottom=0.15)
    plt.grid(False)
    plt.ylabel("Pauses per Year")
    plt.savefig(path + "/BreaksDistribution", dpi=600)
    plt.clf()
Ejemplo n.º 53
0
print("Median API: ", median1)
print("Mode API: ", mode1)
print("Range: ", range1)
print("Standard Deviation API: ", standard_deviation)
print("Variance API: ", variance, sep = "\n")
print("Percentile API: ", percentile)

"""# Data Visualization <br>
Histogram plotting
"""

dataset.hist(xlabelsize= 10, ylabelsize= 10, figsize = (10,10))

"""# Box Plot visualization <br>
1. Visual representation of numerical data through their quartiles. <br>
2. Used to detect outliers in dataset<br>
3. Summarizes data using 25th, 50th, and 75th percentile
"""

data = np.array(dataset)
for i in range(1, 4):
    plt.boxplot(np.array(data[:, i], dtype='float'))
    plt.show()

sns.boxplot(data=dataset.ix[:, 1:5])

sns.boxplot(x=dataset['species'], y=dataset['sepal_length'])



credit_risk_data['LoanAmount'].fillna(credit_risk_data['LoanAmount'].median(),
                                      inplace=True)

# In[484]:

# Loan_Amount_Term
credit_risk_data['Loan_Amount_Term'].fillna(
    credit_risk_data['Loan_Amount_Term'].median(), inplace=True)

# In[485]:

# Handling outliers

# In[486]:

plt.boxplot(credit_risk_data['ApplicantIncome'])

# In[487]:

columns = [
    'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term'
]
upper = []
lower = []
values = []

for col in columns:
    q1, q3 = np.percentile(credit_risk_data[col], [25, 75])
    IQR = q3 - q1
    lower_bound = q1 - (1.5 * IQR)
    upper_bound = q3 + (1.5 * IQR)
Ejemplo n.º 55
0
def plotAttribute(cur, planners, attribute, typename):
    """Create a plot for a particular attribute. It will include data for
    all planners that have data for this attribute."""
    labels = []
    measurements = []
    nanCounts = []
    if typename == 'ENUM':
        cur.execute('SELECT description FROM enums where name IS "%s"' %
                    attribute)
        descriptions = [t[0] for t in cur.fetchall()]
        numValues = len(descriptions)
    for planner in planners:
        cur.execute('SELECT %s FROM runs WHERE plannerid = %s AND %s IS NOT NULL' \
            % (attribute, planner[0], attribute))
        measurement = [t[0] for t in cur.fetchall() if t[0] != None]
        if measurement:
            cur.execute('SELECT count(*) FROM runs WHERE plannerid = %s AND %s IS NULL' \
                % (planner[0], attribute))
            nanCounts.append(cur.fetchone()[0])
            labels.append(planner[1])
            if typename == 'ENUM':
                scale = 100. / len(measurement)
                measurements.append(
                    [measurement.count(i) * scale for i in range(numValues)])
            else:
                measurements.append(measurement)

    if not measurements:
        print('Skipping "%s": no available measurements' % attribute)
        return

    plt.clf()
    ax = plt.gca()
    if typename == 'ENUM':
        width = .5
        measurements = np.transpose(np.vstack(measurements))
        colsum = np.sum(measurements, axis=1)
        rows = np.where(colsum != 0)[0]
        heights = np.zeros((1, measurements.shape[1]))
        ind = range(measurements.shape[1])
        for i in rows:
            plt.bar(ind, measurements[i], width, bottom=heights[0], \
                color=matplotlib.cm.hot(int(floor(i * 256 / numValues))), \
                label=descriptions[i])
            heights = heights + measurements[i]
        xtickNames = plt.xticks([x + width / 2. for x in ind],
                                labels,
                                rotation=30)
        ax.set_ylabel(attribute.replace('_', ' ') + ' (%)')
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
        props = matplotlib.font_manager.FontProperties()
        props.set_size('small')
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop=props)
    elif typename == 'BOOLEAN':
        width = .5
        measurementsPercentage = [sum(m) * 100. / len(m) for m in measurements]
        ind = range(len(measurements))
        plt.bar(ind, measurementsPercentage, width)
        xtickNames = plt.xticks([x + width / 2. for x in ind],
                                labels,
                                rotation=30)
        ax.set_ylabel(attribute.replace('_', ' ') + ' (%)')
    else:
        if int(matplotlibversion.split('.')[0]) < 1:
            plt.boxplot(measurements, notch=0, sym='k+', vert=1, whis=1.5)
        else:
            plt.boxplot(measurements,
                        notch=0,
                        sym='k+',
                        vert=1,
                        whis=1.5,
                        bootstrap=1000)
        ax.set_ylabel(attribute.replace('_', ' '))
        xtickNames = plt.setp(ax, xticklabels=labels)
        plt.setp(xtickNames, rotation=25)
    ax.set_xlabel('Motion planning algorithm')
    ax.yaxis.grid(True,
                  linestyle='-',
                  which='major',
                  color='lightgrey',
                  alpha=0.5)
    if max(nanCounts) > 0:
        maxy = max([max(y) for y in measurements])
        for i in range(len(labels)):
            x = i + width / 2 if typename == 'BOOLEAN' else i + 1
            ax.text(x,
                    .95 * maxy,
                    str(nanCounts[i]),
                    horizontalalignment='center',
                    size='small')
    plt.show()
Ejemplo n.º 56
0
import matplotlib.pyplot as plt
import random

vetor = []

for i in range(10):
    vetor.append(random.randint(0, 10000))

plt.boxplot(vetor)
plt.show()
    precision.append(cv_prec.mean())
    recall.append(cv_recall.mean())
    f1.append(cv_f1.mean())

    print('----------------------------------------')
    print(msg)
    Y_pred = cross_val_predict(model, X_input, Y_output, cv=N)
    conf_mat = confusion_matrix(Y_output, Y_pred)
    print(conf_mat)
    print('----------------------------------------')

# boxplot for accuracy comparison
graph = plt.figure()
graph.suptitle('Accuracy Comparison')
ax = graph.add_subplot(111)
plt.boxplot(accuracyresults)
ax.set_xticklabels(names)

y_pos = np.arange(len(accuracy))

# bar chart accuracy comparison
graph2 = plt.figure()
graph2.suptitle('Accuracy Comparison')
ax2 = graph2.add_subplot(111)
plt.bar(y_pos, accuracy, align='center', alpha=0.5)
plt.xticks(y_pos, names)
plt.show()

#Removing unwantd characters
names = str(names)
names = names.replace('[', '').replace(']', '').replace("'", "")
Ejemplo n.º 58
0
# 读取数据
with open(filename, 'r') as f:
    _ = f.read()
# 数据转换
data = pd.read_json(_)

# 展示出各坐标轴空值总和
print(data.isnull().sum())

if data.isnull().sum().sum() != 0:
    print("存在空值,需要进行处理")

# 画出各坐标轴盒图,寻找特殊坐标
plt.subplot(131)
plt.boxplot(data[data.columns[0]])
plt.subplot(132)
plt.boxplot(data[data.columns[1]])
plt.subplot(133)
plt.boxplot(data[data.columns[2]])
plt.show()

# 计算该问卷数据回答时间,初步判断是否为有效数据
answer_time = data.shape[0] / 5 / 60
if answer_time > 90 or answer_time < 10:
    print("答题时长异常(少于10min或大于90min),建议删除该数据")

# 计算该问卷每个坐标轴方差,若方差过小,可能是手机放在桌上,未拿在手上
if data[data.columns[0]].var() < 0.001 or data[
        data.columns[1]].var() < 0.001 or data[data.columns[2]].var() < 0.001:
    print("可能手机放在平面,未拿在手上,建议删除")
Ejemplo n.º 59
0
def etandeamain():
    aaconversiondict = {
        'Ala': 'A',
        'Cys': 'C',
        'Asp': 'D',
        'Glu': 'E',
        'Phe': 'F',
        'Gly': 'G',
        'His': 'H',
        'Ile': 'I',
        'Lys': 'K',
        'Leu': 'L',
        'Met': 'M',
        'Asn': 'N',
        'Pro': 'P',
        'Gln': 'Q',
        'Arg': 'R',
        'Ser': 'S',
        'Thr': 'T',
        'Val': 'V',
        'Trp': 'W',
        'Tyr': 'Y'
    }
    toaaarray = [
        'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q',
        'R', 'S', 'T', 'V', 'W', 'Y'
    ]
    EAdict = readinEA()
    highcovlist = obtainhighcov()
    etdict = obtainETscores()

    cancerfile = open('../DATA/Mutations/filelist.txt')
    cancerlist = []
    for line in cancerfile:
        cancer = line.strip('\n')
        if cancer != '':
            cancerlist.append(cancer)
    print(cancerlist)
    cancerlist = ['COAD', 'HNSC', 'LUAD', 'LUSC', 'SKCM', 'STAD', 'UCEC']
    #cancerlist=['LUAD','SKCM']
    #print(EAdict)
    #print(etdict)
    #print('highcovlist',highcovlist)
    etallarray = []
    for i in etdict:
        etallarray.append(float(etdict[i]))

    for cancertype in cancerlist:
        print('cancertype', cancertype)
        eamutationdict, eamutlist = acquiremutations(cancertype)
        listofsigpos, lowsigpos = getlistofsigpos(cancertype)
        #print(eamutationdict)
        ethyperarray = []
        ethypoarray = []
        earandarray = []
        eahyperarray = []
        eahypoarray = []
        eaallarray = eamutlist
        for pos in EAdict:
            position = int(pos[1:])
            etvalue = etdict[int(position)]
            eaarray = EAdict[pos]
            if position in highcovlist:
                ''
            for j in EAdict[pos]:
                if j != '-':
                    earandarray.append(float(j))
        print('listofsigpos', listofsigpos)
        print('lowsigpos', lowsigpos)
        for pos in eamutationdict:  #Getting EA the significantly mutated positions, hypo and hyper
            #print(pos)
            if pos in listofsigpos:
                ''
                for ea in eamutationdict[pos]:
                    if eahyperarray == []:
                        eahyperarray = [
                            float(ea),
                        ]
                    else:
                        eahyperarray.append(float(ea))

            if pos in lowsigpos:
                for ea in eamutationdict[pos]:
                    if eahyperarray == []:
                        eahypoarray = [
                            float(ea),
                        ]
                    else:
                        eahypoarray.append(float(ea))
        for pos in listofsigpos:
            if pos in etdict:
                ethyperarray.append(float(etdict[pos]))
        for pos in lowsigpos:
            if pos in etdict:
                ethypoarray.append(float(etdict[pos]))

        etplist = []
        ettuple = [ethyperarray, ethypoarray, etallarray]
        for i in ettuple:
            for j in ettuple:
                try:
                    t, p = ttest_ind(i, j)
                except:
                    p = 1.00
                #t,p=mannwhitneyu(i,j)
                #t,p=ks_2samp(i,j)
                etplist.append(round(p, 3))

        plot1 = plt.boxplot(
            ettuple,
            whis=1,
            patch_artist=True,
        )
        plt.setp(plot1['boxes'],
                 color='Black',
                 linewidth=2,
                 facecolor='lightgray')
        plt.setp(
            plot1['whiskers'],  # customise whisker appearence
            color='black',  # whisker colour
            linewidth=1)  # whisker thickness

        plt.setp(
            plot1['caps'],  # customize lines at the end of whiskers 
            color='black',  # cap colour
            linewidth=1)  # cap thickness

        plt.setp(
            plot1['fliers'],  # customize marks for extreme values
            color='white',  # set mark colour
            marker='o',  # maker shape
            markersize=4)  # marker size

        plt.setp(
            plot1['medians'],  # customize median lines
            color='Black',  # line colour
            linewidth=2)  # line thickness
        plt.title('ET comparison of %s \n%s' % (cancertype, etplist[-3:]))
        plt.xticks(
            [1, 2, 3],
            ['Frequent Positions', 'Infrequent Positions', 'Random Control'])
        plt.savefig('../Paper/PotentialFigures/EAET/ET_%s.png' % (cancertype))
        plt.close()
        #plt.hist(ethyperarray,alpha=0.5)
        #plt.hist(ethypoarray,alpha=0.5)
        #plt.hist(etallarray,alpha=0.5)
        #plt.show()

        eaoutfile = open(
            '../ImagesOlfactory/%s/EAforboxplot_%s.txt' %
            (cancertype, cancertype), 'w')
        eatuple = [eahyperarray, eahypoarray, eaallarray, earandarray]
        for i in eatuple:
            for j in i:
                eaoutfile.write(str(j))
                eaoutfile.write('\t')
            eaoutfile.write('\n')
        eaplist = []
        for i in eatuple:
            for j in eatuple:
                try:
                    t, p = ttest_ind(i, j)
                except:
                    p = 1.00
                #t,p=mannwhitneyu(i,j)
                #t,p=ks_2samp(i,j)
                eaplist.append(round(p, 5))
        plot1 = plt.boxplot(
            eatuple,
            whis=1,
            patch_artist=True,
        )
        plt.setp(plot1['boxes'],
                 color='Black',
                 linewidth=2,
                 facecolor='lightgray')
        plt.setp(
            plot1['whiskers'],  # customise whisker appearence
            color='black',  # whisker colour
            linewidth=1)  # whisker thickness

        plt.setp(
            plot1['caps'],  # customize lines at the end of whiskers 
            color='black',  # cap colour
            linewidth=1)  # cap thickness

        plt.setp(
            plot1['fliers'],  # customize marks for extreme values
            color='white',  # set mark colour
            marker='o',  # maker shape
            markersize=4)  # marker size

        plt.setp(
            plot1['medians'],  # customize median lines
            color='Black',  # line colour
            linewidth=2)  # line thickness
        plt.title('EA comparison of %s\n%s' % (cancertype, eaplist[-4:]))
        plt.xticks([1, 2, 3, 4], [
            'Frequent Positions', 'Infrequent Positions', 'All Mutations',
            'Random Control'
        ])

        plt.ylim(0, 100)
        plt.savefig('../Paper/PotentialFigures/EAET/EA_%s.png' % (cancertype))
        plt.close()
Ejemplo n.º 60
0
# Exercise 4.2.3

from matplotlib.pyplot import boxplot, xticks, ylabel, title, show

# requires data from exercise 4.2.1
from ex4_2_1 import *

boxplot(X)
xticks(range(1,5),attributeNames)
ylabel('cm')
title('Fisher\'s Iris data set - boxplot')
show()

print('Ran Exercise 4.2.3')