def plot_perf(df, engines, title, filename=None): from matplotlib.pyplot import figure, rc try: from mpltools import style except ImportError: pass else: style.use('ggplot') rc('text', usetex=True) fig = figure(figsize=(4, 3), dpi=100) ax = fig.add_subplot(111) for engine in engines: ax.plot(df.size, df[engine], label=engine, lw=2) ax.set_xlabel('Number of Rows') ax.set_ylabel('Time (s)') ax.set_title(title) ax.legend(loc='best') ax.tick_params(top=False, right=False) fig.tight_layout() if filename is not None: fig.savefig(filename)
def plot(self): dfn = joinpath(DATA_DIR, ('%s.pypkl' % self.id)) with open(dfn, 'rb') as f: mean_lists = pickle.load(f) std_lists = pickle.load(f) gmean_lists = pickle.load(f) hmean_lists = pickle.load(f) if use_mpl_style: style.use('ggplot') x_lists = numpy.array(self.asic_area_list) * 0.01 legend_labels=['-'.join(['%d'%a for a in alloc_config]) for alloc_config in self.alloc_configs] def cb_func(axes,fig): matplotlib.rc('xtick', labelsize=8) matplotlib.rc('ytick', labelsize=8) matplotlib.rc('legend', fontsize=8) axes.legend(axes.lines, legend_labels, loc='upper right', title='Acc3, 4, 5, 6 alloc', bbox_to_anchor=(0.85,0.55,0.2,0.45)) plot_data(x_lists, mean_lists, xlabel='Total ASIC allocation', ylabel='Speedup (mean)', xlim=(0, 0.5), #ylim=(127, 160), figsize=(4, 3), ms_list=(8,), #xlim=(0, 0.11), cb_func=cb_func, figdir=FIG_DIR, ofn='%s-%s.%s' % (self.id, '-'.join([s[-1:] for s in self.kids]), self.fmt) )
def figures(): from pylab import figure from matplotlib import rc rc('ps', useafm=True) rc('pdf', use14corefonts=True) rc('text', usetex=True) rc('font', family='sans-serif') rc('font', **{'sans-serif': ['Computer Modern']}) from mpltools import style style.use('ggplot') rc('axes', grid=False) ensure_dir('figures') for d, _ in DATASETS: fig = figure() plot_part_sizes(fig, d) fig.savefig('figures/%s.part_sizes.pdf' % d, bbox_inches='tight') for eps in ['eps1', 'eps2']: d, _ = DATASETS[0] fig = figure() plot_eps_sweep(fig, d, eps) fig.savefig('figures/%s.sweep_%s.pdf' % (d, eps), bbox_inches='tight')
def plot_length_dist(series, fmt='eps', axis=None, count_series=None, xlims=[0, 20], title='', label='', color='k'): 'Takes a series of sequences, and plots their lengths.' style.use('ggplot') series = series.apply(len) if count_series is None: count_series = series.apply(lambda x: 1) length_df = pd.concat([series, count_series], axis=1) length_df.columns = ['Length', 'Count'] final_series = length_df.groupby('Length').sum() if 0 not in final_series.index: final_series.ix[0] = 0 final_series = final_series.sort() final_series.columns = [label] ax = final_series.plot(xticks=range(*xlims), color=color, ax=axis) plt.xlim(*xlims) if len(title) > 0: plt.title(title) plt.xlabel('Cellular Fragment Length', fontsize=16) # get the exponent and label it ax = plt.gca() ax.get_yaxis().get_major_formatter().set_scientific(True) offset = ax.get_yaxis().get_offset_text() plt.ylabel(r'Counts ( x $10^7$)', fontsize=16) return ax
def do_plot(numpy_stats, mpl_stats, mahotas_stats, skimage_stats, sklearn_stats): from mpltools import style from matplotlib import pyplot as plt import datetime style.use('ggplot') def do_plot(s, name): import numpy as np plt.fill_between(np.concatenate(([min_date], s.datetimes)), np.concatenate(([0], s.lines / 1000.))) plt.plot(np.concatenate(([min_date], s.datetimes)), np.concatenate(([0], s.lines / 1000.)), color="k", lw=2) plt.text(tx, s.lines.max() / 1000. * .7, name) tx = datetime.datetime(2002, 2, 1) min_date = numpy_stats.datetimes[0] plt.subplot(5, 1, 1) do_plot(numpy_stats, 'numpy') plt.subplot(5, 1, 2) do_plot(mpl_stats, 'matplotlib') plt.subplot(5, 1, 3) do_plot(mahotas_stats, 'mahotas') plt.subplot(5, 1, 4) do_plot(skimage_stats, 'skimage') plt.subplot(5, 1, 5) do_plot(sklearn_stats, 'sklearn') plt.tight_layout() plt.savefig('nr_lines.png')
def show_gauge(gauge): import matplotlib.pyplot as plt from mpltools import style style.use('ggplot') plotter = GaugePlotting(gauge) plotter.plot(plt) plt.show()
def plot_logs(*logs_with_labels): from pylab import figure from matplotlib import rc rc('ps', useafm=True) rc('pdf', use14corefonts=True) rc('text', usetex=True) rc('font', family='sans-serif') rc('font', **{'sans-serif': ['Computer Modern']}) try: from mpltools import style style.use('ggplot') rc('axes', grid=False) except ImportError: print >> sys.stderr, 'mpltools not installed, using standard (boring) style' fig = figure() curves = [(read_log(filename), label) for filename, label in (ll.strip().split(':') for ll in logs_with_labels)] plot_convergence(fig.gca(), curves) fig.set_size_inches(6, 4) fig.savefig('convergence.pdf', bbox_inches='tight') fig = figure() plot_convergence_envelope(fig.gca(), curves) fig.set_size_inches(6, 4) fig.savefig('convergence_envelope.pdf', bbox_inches='tight')
def do_plot(numpy_stats, mpl_stats, mahotas_stats, skimage_stats, sklearn_stats): from mpltools import style from matplotlib import pyplot as plt import datetime style.use('ggplot') def do_plot(s, name): import numpy as np plt.fill_between(np.concatenate(([min_date], s.datetimes)), np.concatenate(([0], s.lines/1000.))) plt.plot(np.concatenate(([min_date], s.datetimes)), np.concatenate(([0], s.lines/1000.)) , color="k", lw=2) plt.text(tx, s.lines.max()/1000.*.7, name) tx = datetime.datetime(2002,2,1) min_date = numpy_stats.datetimes[0] plt.subplot(5,1,1) do_plot(numpy_stats, 'numpy') plt.subplot(5,1,2) do_plot(mpl_stats, 'matplotlib') plt.subplot(5,1,3) do_plot(mahotas_stats, 'mahotas') plt.subplot(5,1,4) do_plot(skimage_stats, 'skimage') plt.subplot(5,1,5) do_plot(sklearn_stats, 'sklearn') plt.tight_layout() plt.savefig('nr_lines.png')
def plot_res(res, png_path, covs, covariate, cluster_df, weights_df=None): from matplotlib import pyplot as plt from mpltools import style style.use('ggplot') region = "{chrom}_{start}_{end}".format(**res) if png_path.endswith('show'): png = None elif png_path.endswith(('.png', '.pdf')): png = "%s.%s%s" % (png_path[:-4], region, png_path[-4:]) elif png_path: png = "%s.%s.png" % (png_path.rstrip("."), region) if is_numeric(getattr(covs, covariate)): f = plot_continuous(covs, cluster_df, covariate, res['chrom'], res, png) else: f = plt.figure(figsize=(11, 4)) ax = f.add_subplot(1, 1, 1) if 'spaghetti' in png_path and cluster_df.shape[0] > 1: plot_dmr(covs, cluster_df, covariate, res['chrom'], res, png, weights_df) else: plot_hbar(covs, cluster_df, covariate, res['chrom'], res, png) plt.title('p-value: %.3g %s: %.3f' % (res['p'], covariate, res['coef'])) f.set_tight_layout(True) if png: plt.savefig(png) else: plt.show() plt.close()
def main(argv): style.use('ggplot') parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-i', '--infile', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--outfile', type=str, required=True) parser.add_argument('--stats-file', type=argparse.FileType('w')) parser.add_argument('--threeprime-trimmed', type=str) parser.add_argument('--three-prime-nucs', type=int, default=1) parser.add_argument('--convert-single-g', action='store_true') args = parser.parse_args(argv) df = pd.read_table(args.infile, index_col=None) if args.convert_single_g: df = df.apply(convert_single_g, axis=1) df = df[df['OFFSET_FROM_START'] == 0] if args.threeprime_trimmed is not None: if args.threeprime_trimmed == '': df = df[df['3PTRIMMED'].isnull()] else: df = df[df['3PTRIMMED'] == args.threeprime_trimmed] df = calculate_num_times_map(df) df = df.apply(lambda x: pd.Series( [x['ORIGINAL_SEQUENCE'][-1].upper() + \ x['THREEPRIME_OF_CLEAVAGE'][0:args.three_prime_nucs].upper(), float(x['COUNT']) / float(x['NUM_TIMES_MAP'])]), axis=1) s = df.groupby(0).sum().sort(columns=1, ascending=False) s.plot(kind='bar') ax = plt.gca() ax.legend().set_visible(False) plt.savefig(args.outfile, format='eps') s = s.reset_index() total = s[1].sum() gs_pos_one = s[s[0].map(lambda x: x[1] == 'G')][1].sum() as_pos_zero = s[s[0].map(lambda x: x[0] == 'A')][1].sum() gs_exclusive = s[s[0].map(lambda x: x[1] == 'G' and x[0] != 'A')][1].sum() as_exclusive = s[s[0].map(lambda x: x[0] == 'G' and x[1] != 'G')][1].sum() gs_or_as_pos_zero = s[s[0].map( lambda x: x[0] == 'A' or x[0] == 'G')][1].sum() gs_both_exclusive = s[s[0].map( lambda x: 'G' in x and x[0] != 'A')][1].sum() gs_or_as_pos_zero_exclusive = s[s[0].map( lambda x: (x[0] == 'A' or x[0] == 'G') and x[1] != 'G')][1].sum() gs_pos_zero = s[s[0].map(lambda x: x[0] == 'G')][1].sum() if args.stats_file: for frac, string in [ (gs_pos_one / total, 'Fraction Gs at position 1: %f\n'), (as_pos_zero / total, 'Fraction As at position 0: %f\n'), (gs_exclusive / total, 'Fraction Gs at position 1 with no As: %f\n'), (as_exclusive / total, 'Fraction As at position 0 with no Gs: %f\n'), (gs_or_as_pos_zero / total, 'Fraction As or Gs at pos 0: %f\n'), (gs_pos_zero / total, 'Fraction Gs at position 0: %f\n'), ((as_exclusive / (gs_both_exclusive + as_exclusive)), 'Fraction A over total: %f\n'), (total, 'Total: %f\n') ]: args.stats_file.write(string % frac)
def precision_recall(): # from sklearn.metrics import roc_auc_score # from sklearn.metrics import roc_curve from sklearn.metrics import precision_recall_curve from sklearn.metrics import auc from sklearn.metrics import classification_report from mpltools import style style.use('ggplot') makes = ['bmw', 'ford'] types = ['sedan', 'SUV'] args = makes + types config = get_config(args) (dataset, config) = fgu.get_all_metadata(config) for ii, attrib_name in enumerate(args): # attrib_name = 'bmw' attrib_clf = AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name)) bnet = BayesNet(config, dataset['train_annos'], dataset['class_meta'], [attrib_clf], desc=str(args)) res = bnet.create_attrib_res_on_images() attrib_selector = AttributeSelector(config, dataset['class_meta']) # attrib_meta = attrib_selector.create_attrib_meta([attrib_clf.name]) pos_classes = attrib_selector.class_ids_for_attribute(attrib_name) true_labels = np.array(res.class_index.isin(pos_classes)) print "--------------{}-------------".format(attrib_name) print res[str.lower(attrib_name)].describe() print classification_report(true_labels, np.array(res[str.lower(attrib_name)]) > 0.65, target_names=['not-{}'.format(attrib_name), attrib_name]) precision, recall, thresholds = precision_recall_curve(true_labels, np.array(res[str.lower(attrib_name)])) score = auc(recall, precision) print("Area Under Curve: %0.2f" % score) # score = roc_auc_score(true_labels, np.array(res[str.lower(attrib_name)])) # fpr, tpr, thresholds = roc_curve(true_labels, np.array(res[str.lower(attrib_name)])) plt.subplot(2,2,ii+1) # plt.plot(fpr, tpr) plt.plot(recall, precision, label='Precision-Recall curve') plt.title('Precision-Recall: {}'.format(attrib_name)) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') plt.xlabel('Recall') plt.ylabel('Precision') plt.legend(['area = {}'.format(score)]) plt.draw() plt.show()
def plot(src_filename): #global num_figs src_dir = os.path.dirname(src_filename) + '/' #remove any existing png files in the directory existing_pngs = glob.glob('%s*.png' % (src_dir)) for png_filename in existing_pngs: os.remove(png_filename) csv_file = open(src_filename, 'rb') reader = csv.reader(csv_file, delimiter=',') rows = list(reader) csv_file.close() if len(rows) == 0: print 'No data in csv file.' exit() headers = rows[0] style.use('ggplot') print '\nGenerating graphs for %s:' % (src_filename) for i in range(1, len(rows)): print 'Graph %d of %d' % (i, len(rows) - 1) params, samples = row_to_data(rows[i], headers) max_iters = params['max_iters'] #for ga if 'max_ga_init_iters' in params: max_iters += params['max_ga_init_iters'] sample_interval = max_iters / len(samples) + (1 if max_iters % len(samples) else 0) x_ticks = range(0, max_iters, sample_interval) #append final fitness value x_ticks.append(max_iters) samples.append(params['Avg Fitness']) fig = plt.figure(i) fig.clear() #clear figure in case this function has already been called for this figure fig.suptitle('F%d' % (params['bench_fcn'])) ax = fig.add_subplot(111) ax.set_xlabel('Iteration Index') ax.set_ylabel('Best Fitness') plt.axis([0, max_iters, 0, samples[len(samples) / 2] * 3]) #plt.axis([0, max_iters, 0, samples[0]]) plt.plot(x_ticks, samples, antialiased=True) plt.savefig('%sfig-%d.png' % (src_dir, i), dpi=300)
def main(argv): style.use('ggplot') parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-i', '--infile', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--outfile', type=str, required=True) parser.add_argument('--stats-file', type=argparse.FileType('w')) parser.add_argument('--threeprime-trimmed', type=str) parser.add_argument('--three-prime-nucs', type=int, default=1) parser.add_argument('--convert-single-g', action='store_true') args = parser.parse_args(argv) df = pd.read_table(args.infile, index_col=None) if args.convert_single_g: df = df.apply(convert_single_g, axis=1) df = df[df['OFFSET_FROM_START'] == 0] if args.threeprime_trimmed is not None: if args.threeprime_trimmed == '': df = df[df['3PTRIMMED'].isnull()] else: df = df[df['3PTRIMMED'] == args.threeprime_trimmed] df = calculate_num_times_map(df) df = df.apply(lambda x: pd.Series( [x['ORIGINAL_SEQUENCE'][-1].upper() + \ x['THREEPRIME_OF_CLEAVAGE'][0:args.three_prime_nucs].upper(), float(x['COUNT']) / float(x['NUM_TIMES_MAP'])]), axis=1) s = df.groupby(0).sum().sort(columns=1, ascending=False) s.plot(kind='bar') ax = plt.gca() ax.legend().set_visible(False) plt.savefig(args.outfile, format='eps') s = s.reset_index() total = s[1].sum() gs_pos_one = s[s[0].map(lambda x: x[1] == 'G')][1].sum() as_pos_zero = s[s[0].map(lambda x: x[0] == 'A')][1].sum() gs_exclusive = s[s[0].map(lambda x: x[1] == 'G' and x[0] != 'A')][1].sum() as_exclusive = s[s[0].map(lambda x: x[0] == 'G' and x[1] != 'G')][1].sum() gs_or_as_pos_zero = s[s[0].map(lambda x: x[0] == 'A' or x[0] == 'G')][1].sum() gs_both_exclusive = s[s[0].map(lambda x: 'G' in x and x[0] != 'A')][1].sum() gs_or_as_pos_zero_exclusive = s[s[0].map(lambda x: (x[0] == 'A' or x[0] == 'G') and x[1] != 'G')][1].sum() gs_pos_zero = s[s[0].map(lambda x: x[0] == 'G')][1].sum() if args.stats_file: for frac, string in [ (gs_pos_one / total, 'Fraction Gs at position 1: %f\n'), (as_pos_zero / total, 'Fraction As at position 0: %f\n'), (gs_exclusive / total, 'Fraction Gs at position 1 with no As: %f\n'), (as_exclusive / total, 'Fraction As at position 0 with no Gs: %f\n'), (gs_or_as_pos_zero / total, 'Fraction As or Gs at pos 0: %f\n'), (gs_pos_zero / total, 'Fraction Gs at position 0: %f\n'), ((as_exclusive / (gs_both_exclusive + as_exclusive)), 'Fraction A over total: %f\n'), (total, 'Total: %f\n') ]: args.stats_file.write(string % frac)
def plot_graph(G, titl='', nodesize=300, widthsize=1.5, plotfname='/tmp/tmp.png'): import matplotlib as mpl mpl.use('Agg', warn=False) import matplotlib.pyplot as plt from mpltools import style, layout style.use('ggplot') cm = plt.get_cmap('cool') cm.set_under('w') f = plt.figure(1) ax = f.add_subplot(1, 1, 1) obs = open('obs.txt', 'r').read().split('\n') obs = [o.decode('utf-8')[:-1] for o in obs if len(o) > 0] txtstr = '' # for k, v in enumerate(obs[::-1]): # txtstr += '%d: {%s}' % (len(obs) - k - 1, v) + '\n' # titl = '%d nodes, %d edges' % (G.number_of_nodes(), G.number_of_edges()) f.text(0.02, 0.98, txtstr, transform=ax.transAxes, verticalalignment='top') plt.title(titl) pos = nx.get_node_attributes(G, 'xy') nodes = nx.draw_networkx_nodes(G, pos, cmap=cm, node_color='c', labels=None, with_labels=False, ax=ax, node_size=nodesize) edges = nx.draw_networkx_edges(G, pos, width=widthsize, ax=ax) pos_short = {} # for i in range(0, len(obs)): # pos_short[i] = '%d' % i for k, v in enumerate(obs): pos_short[k] = '{%s}' % v labels = nx.draw_networkx_labels(G, pos, labels=pos_short, font_size=8) plt.axis('off') f.set_facecolor('w') plt.savefig(plotfname, dpi=300, bbox_inches='tight') plt.clf() return 0
def basic_sin(): from mpltools import style style.use('ggplot') t = np.arange(0.0, 2.0, 0.1) s = np.sin(2*np.pi*t) s2 = np.cos(2*np.pi*t) pp.plot(t, s, 'o-', lw=4.1) pp.plot(t, s2, 'o-', lw=4.1) pp.xlabel('time(s)') #pp.xlabel('time(s) _ % $ \\') pp.ylabel('Voltage (mV)') pp.title('Easier than easy $\\frac{1}{2}$') pp.grid(True) return 'Simple $\sin$ plot with some labels'
def basic_sin(): from mpltools import style style.use('ggplot') t = np.arange(0.0, 2.0, 0.1) s = np.sin(2 * np.pi * t) s2 = np.cos(2 * np.pi * t) pp.plot(t, s, 'o-', lw=4.1) pp.plot(t, s2, 'o-', lw=4.1) pp.xlabel('time(s)') #pp.xlabel('time(s) _ % $ \\') pp.ylabel('Voltage (mV)') pp.title('Easier than easy $\\frac{1}{2}$') pp.grid(True) return 'Simple $\sin$ plot with some labels'
def config_plots(): # matplotlib has some annoying warnings we will ignore import warnings warnings.filterwarnings('ignore', module='matplotlib') warnings.filterwarnings('ignore', module='mpltools') from mpltools import style style.use('ggplot') import matplotlib as mpl mpl.rcParams['figure.figsize'] = (10.0, 6.0) mpl.rcParams['figure.dpi'] = 300.0 mpl.rcParams['xtick.labelsize'] = 12.0 mpl.rcParams['ytick.labelsize'] = 12.0 mpl.rcParams['axes.labelsize'] = 16.0 mpl.rcParams['axes.titlesize'] = 18.0 mpl.rcParams['legend.fontsize'] = 16.0
def plot_graph(G, titl='', nodesize=300, widthsize=1.5, plotfname='/tmp/tmp.png'): import matplotlib as mpl mpl.use('Agg', warn=False) import matplotlib.pyplot as plt from mpltools import style, layout style.use('ggplot') cm = plt.get_cmap('cool') cm.set_under('w') f = plt.figure(1) ax = f.add_subplot(1,1,1) obs = open('obs.txt', 'r').read().split('\n') obs = [o.decode('utf-8')[:-1] for o in obs if len(o) > 0] txtstr = '' # for k, v in enumerate(obs[::-1]): # txtstr += '%d: {%s}' % (len(obs) - k - 1, v) + '\n' # titl = '%d nodes, %d edges' % (G.number_of_nodes(), G.number_of_edges()) f.text(0.02, 0.98, txtstr, transform=ax.transAxes, verticalalignment='top') plt.title(titl) pos = nx.get_node_attributes(G,'xy') nodes = nx.draw_networkx_nodes(G, pos, cmap = cm, node_color='c', labels=None, with_labels=False, ax=ax, node_size=nodesize) edges = nx.draw_networkx_edges(G, pos, width=widthsize, ax=ax) pos_short = {} # for i in range(0, len(obs)): # pos_short[i] = '%d' % i for k, v in enumerate(obs): pos_short[k] = '{%s}' % v labels = nx.draw_networkx_labels(G, pos, labels=pos_short, font_size=8) plt.axis('off') f.set_facecolor('w') plt.savefig(plotfname, dpi=300, bbox_inches='tight') plt.clf() return 0
def main(args): stripAccession = args['--strip_accession'] ## # Parameters for reading in the expression files ## exp1 = ExpressionTools.parseExpressionFile(args['EXP1'], stripAccession=stripAccession) name1 = args['--name'][0] norm1 = args['--norm'][0] exp2 = ExpressionTools.parseExpressionFile(args['EXP2'], stripAccession=stripAccession) name2 = args['--name'][1] norm2 = args['--norm'][1] ExpressionTools.normalizeSets(exp1, exp2) ## # Keep track of number of points who's values we truncate ## trunc1, trunc2 = 0, 0 trunc1NZ, trunc2NZ = 0, 0 e1 = float(args['--m1']) e2 = float(args['--m2']) newExp = [] # Read in the values from the first dataset, truncating the expression # level if necessary for e in exp1.exps_: ev = e.expression if e.expression < e1: ev = 0.0 trunc1 += 1 trunc1NZ += 1 if e.expression > 0.0 else 0 newExp.append(ExpressionTools.ExpressionDatum(e.name, e.length, ev)) exp1.exps_ = newExp # Read in the values from the second dataset, truncating the expression # level if necessary newExp = [] for e in exp2.exps_: ev = e.expression if e.expression < e2: ev = 0.0 trunc2 += 1 trunc2NZ += 1 if e.expression > 0.0 else 0 newExp.append(ExpressionTools.ExpressionDatum(e.name, e.length, ev)) exp2.exps_ = newExp ## ## normalization and pairing ## exp1.normalize(norm1) exp2.normalize(norm2) # zip the results back up into matches matches = exp1.zipWithMatching(exp2) # get just the expression values as x and y x, y = zip(*[(e[0].expression, e[1].expression) for e in matches]) sx = sorted(exp1.exps_, key = lambda x: x.expression) sy = sorted(exp2.exps_, key = lambda x: x.expression) ## Some summary statistics about the data print("{}: min = {}({}), max = {}({})".format(name1, sx[0].expression, sx[0].name, sx[-1].expression, sx[-1].name)) print("{}: min = {}({}), max = {}({})".format(name2, sy[0].expression, sy[0].name, sy[-1].expression, sy[-1].name)) ## # If we need to 'align' the datasets ## #yscale = sum([e.expression for e in exp1.exps_]) / sum([e.expression for e in exp2.exps_]) #ys = [yscale * e[1].expression for e in matches] #ys = [e[1].expression for e in matches] #meanX = sum(x) / float(len(x)) #meanY = sum(ys) / float(len(ys)) #yshift = meanX - meanY ## # Don't do any 'alignment' for now ## yscale = 1.0 yshift = 0.0 print("yscale = {}".format(yscale)) print("yshift = {}".format(yshift)) matches = [(ExpressionTools.ExpressionDatum(e[0].name, e[0].length, e[0].expression),\ ExpressionTools.ExpressionDatum(e[1].name, e[1].length, yscale * e[1].expression + yshift ) )\ for e in matches] x, y = zip(*[(e[0].expression, e[1].expression) for e in matches]) ## # Compute the RMSE and median percentage error ## rmse = ExpressionTools.RMSE(matches) medPE = ExpressionTools.medPE(matches) print("RMSE {} vs {} is {:0.2f}".format(name1, name2, rmse)) #print("Trimmed RMSE {} vs {} is {}".format(name1, name2, ExpressionTools.TrimmedRMSE(matches, 0.005))) print("MedPE {} vs {} is {:0.2f}".format(name1, name2, medPE)) percentError, _ = ExpressionTools.PE(matches) print("Percentage error computed on {} points".format(len(percentError))) ## # Plot the histogram of percentage errors ## #plt.hist(percentError, bins=100, range=(0,100)) #plt.ylim(0, 3100) #plt.show() ## # We don't use these numbers right now ## #relativeErrors = ExpressionTools.relativeErrors(matches) #print("EF15 = {}".format(ExpressionTools.errorFraction(relativeErrors, 0.15))) #print("IsoEM MedPE {}".format(ExpressionTools.medPEIsoEM(relativeErrors))) print("Cutoff removed {} points from dataset 1 ({} were nonzero)".format(trunc1, trunc1NZ)) print("Cutoff removed {} points from dataset 2 ({} were nonzero)".format(trunc2, trunc2NZ)) ## # Compute and print the correlation statistics ## pr = sp.stats.pearsonr(x, y)[0] sr = sp.stats.spearmanr(x, y)[0] print("Pearson r = {0}".format(pr)) print("Spearman r = {0}".format(sr)) if args['--noplot']: return font = {'family': 'normal', 'weight': 'normal', 'size': 20} ## R style! style.use('ggplot') plt.rc('font', **font) nstr = {"id": "", "log": "($\\log_2$)", "rpkm": "(RPKM)", "lrpkm": "($\\log_2$ RPKM)", "frac": "(tfrac)", "fracLengthNorm": "(tfrac)", "rpkm2tpm": "(TPM)", "tpm" : "(TPM)", "tpmlog" : "($\\log_2$ TPM)"} xlabel = "{0}{1}".format(name1, nstr[norm1]) ylabel = "RPKM{0}".format(nstr[norm2]) #ylabel = "{0}{1}".format(name2, nstr[norm2]) if name2 != "" else "" # with open("datapoints.txt", "wb") as ofile: # ofile.write('"{}","{}"\n'.format(xlabel, ylabel)) # for xi, yi in itertools.izip(x, y): # ofile.write('{},{}\n'.format(xi, yi)) #minVal = 0 #maxVal = 20.0 minVal = min(min(x), min(y)) maxVal = max(max(x), max(y)) #plt.axis([minVal, maxVal, minVal, maxVal]) plt.axis([min(x), max(x), min(y), max(y)]) # Don't draw the top and right axes for loc, spine in plt.gca().spines.items(): if loc in ['left','bottom']: spine.set_position(('outward',10)) # outward by 10 points spine.set_color('black') elif loc in ['right','top']: spine.set_color('none') # don't draw spine else: raise ValueError('unknown spine location: %s'%loc) # ticks point outward plt.gca().tick_params(axis='both', direction='out') # remove unneeded ticks plt.gca().get_xaxis().tick_bottom() plt.gca().get_yaxis().tick_left() # plt.text(0.30, 0.90, r"$\sigma = {0:.3},\, \rho = {1:.3}$".format(pr, sr), # fontsize=24, # horizontalalignment='center', # verticalalignment='center', # transform = plt.axes().transAxes) plt.xlabel(xlabel) if not args['--noylabel']: plt.ylabel(ylabel) ## Draw the correlation values inside the plot #plt.gca().text(min(x), max(y)-2, r"$\sigma$={0:0.2f}, $\rho$={1:0.2f}".format(pr, sr)) #plt.gca().text(-4, 20.5, r"$\sigma$={0:0.2f}, $\rho$={1:0.2f}, RMSE={2:0.2f}, mPE={3:0.2f}".format(pr, sr, rmse, medPE)) #plt.gca().text(-2, 19, r"$\sigma$={0:0.2f}, $\rho$={1:0.2f}".format(pr, sr, rmse, medPE)) #plt.ylabel("{0}{1}".format(name2, nstr[norm2])) plt.gca().set_aspect('equal') #cb = plt.colorbar() if args['--scatter']: # Why hexbin here you say? Well, it's because I don't know how else # to get dotted lines (like those of the grid) around the plot. Hexbin # seems to do this automatically. So for the sake of consistency with the # hexbin plots below, I want these plots to have the dotted boundary grid # lines as well. plt.hexbin([0], [0], extent=(minVal, maxVal, minVal, maxVal), mincnt=1.0, alpha=0.0) plt.scatter(x, y, alpha=0.7) else: plt.hexbin(x, y, extent=(minVal, maxVal, minVal, maxVal), mincnt=1.0, gridsize=100, bins='log', cmap=plt.cm.YlOrRd, alpha=0.8) plt.gca().xaxis.grid(color='gray', linestyle='dashed') plt.gca().yaxis.grid(color='gray', linestyle='dashed') # import random # for d in matches: # if d[0].expression > 8.0 and d[0].expression < 8.5 and d[1].expression > 2.5 and d[1].expression < 3.0: # plt.annotate( # d[0].name, # xy = (d[0].expression + random.random(), d[1].expression + random.random()), xytext = (-20, 20), # textcoords = 'offset points', ha = 'right', va = 'bottom', # bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), # arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) of = open('strange_blob.txt', 'wb') for xi, yi in matches: if xi.expression >= 4 and xi.expression <= 9: if yi.expression >= 11 and yi.expression <= 15: of.write("{}\t{}\t{}\n".format(xi.name, xi.expression, yi.expression)) of.close() ## uncomment this to print the title #plt.title(args['--name'][1]) plt.tight_layout() if args['--out']: print("saving to {}".format(args['--out'])) plt.savefig(args['--out']) else: plt.show()
def plot(files, pstyle = 'ggplot', output=None, seq=None, xkcd=False): global csv_file_handle global value csv_file_handle ={} # op_sum = {'1':['L1-dcache-loads','L1-dcache-stores','L1-dcache-prefetches','L1-icache-loads'], # '2':['L1-dcache-load-misses','L1-dcache-store-misses','L1-dcache-prefetch-misses','L1-icache-load-misses'], # '3':[ 'LLC-loads','LLC-stores','LLC-prefetches'], # '4':['LLC-load-misses','LLC-store-misses','LLC-prefetch-misses'], # '5':['dTLB-loads','dTLB-stores','iTLB-loads'], # '6':['dTLB-load-misses','dTLB-store-misses','iTLB-load-misses'], # 'Bandwidth':['offcore_response_corewb_local_dram_0','offcore_response_prefetch_any_llc_miss_0','LLC-prefetches','cache-misses']} # # op_div = [['cache-references','uops_retired_any'],['cache-misses','uops_retired_any'], ['instructions','cycles'], # ['cache-misses','cache-references']] #enable for i7 op_sum = { 'contention': ['cache-misses'], 'band': ['cache-references', 'cache-misses'], 'total_bandwidth': ['cache-references'], } op_diff ={} op_div= [['cache-references','uops_retired_any'],['cache-misses','uops_retired_any'], ['instructions','cycles'],['cache-misses','cache-references'],['cache-references','cycles'], ['cache-misses','cycles']] print pstyle if pstyle: try: from mpltools import style style.use( pstyle) except ImportError: print "Need mpltools for setting styles (pip install mpltools)" import gen_level try: import brewer2mpl all_colors = brewer2mpl.get_map('Paired', 'Qualitative', 12).hex_colors except ImportError: print "Install brewer2mpl for better colors (pip install brewer2mpl)" all_colors = ('green','orange','red','blue', 'black','olive','purple','#6960EC', '#F0FFFF', '#728C00', '#827B60', '#F87217', '#E55451', # 16 '#F88017', '#C11B17', '#17BFC2', '#C48793') # 20 cur_colors = collections.defaultdict(lambda: all_colors) assigned = dict() # assigned= {'mbw-cache-references': [0,2345,..], 'soplex-cache-references': [32,532,12,..], ..} Events and values for all processes if len(files) < 2 : print "More than one file needed. Exiting!" sys.exit(0) for file in files: processname = file.split("/")[-1] if file: try: inf = open( file, "r") except: return else: inf = sys.stdin csv_file_handle[processname] = csv.reader(inf) timestamps = dict() val = "" first_time = True event_list = [] # event_list= [cache-references, instructions,..] for processname,rc in csv_file_handle.items(): for r in rc: if config.burst: if len(r) == 2: ts=0 val, event = r if first_time and event not in event_list: event_list.append(event) event = str(processname)+"-"+event else: continue if event not in assigned: level = gen_level.get_level(event) assigned[event] = cur_colors[level][0] cur_colors[level] = cur_colors[level][1:] if len(cur_colors[level]) == 0: cur_colors[level] = all_colors value[event] = [] timestamps[event] = [] timestamps[event].append(float(ts)) try: value[event].append(float(val.replace("%",""))) except ValueError: value[event].append(0.0) first_time = False levels = dict() for j in assigned.keys(): levels[gen_level.get_level(j)] = True if xkcd: try: plt.xkcd() except NameError: print "Please update matplotlib. Cannot enable xkcd mode." #print value if config.normalize: for key in value: entries= value[key] normalized_values = [numpy.float64(entry)/max(entries) for entry in entries] value[key] = normalized_values if seq: os.umask(0000) if os.path.exists(seq): shutil.rmtree(seq) os.makedirs(seq) else: os.makedirs(seq) n = 1 print "Assigned Keys: ", assigned.keys() #print "event list: ", event_list for l in levels.keys(): ax = plt.subplot(len(levels), 1, n) if val.find('%') >= 0: ax.set_ylim(0, 100) t = [] for j in event_list: print j, gen_level.get_level(j), l for processname in csv_file_handle: if gen_level.get_level(j) == l: t.append(j) ax.plot(value[str(processname)+"-"+j], label = str(processname)+"-"+j ) if seq: leg = ax.legend( loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+j) plt.cla() leg = ax.legend(t, loc='upper left') leg.get_frame().set_alpha(0.5) n += 1 if len(op_diff) > 0: for key, components in op_diff.items(): print components #print [(value[component]) for component in components] #print [len(value[component]) for component in components] diff_value={} if key =='contention' : print "KEY: ", key ax1 = plt.subplot(2,1,1) ax2 = plt.subplot(2,1,2) else: ax = plt.subplot(1, 1, 1) for processname in csv_file_handle: diff_value[processname]=[x-y for x,y in zip(value[str(processname)+"-"+components[0]],value[str(processname)+"-"+components[1]])] #print sum_value #print "DONE!!" # print len(sum_value) # print len(timestamps[components[0]]) if key is not 'contention': ax.plot(diff_value[processname], label = str(processname)+"-"+'-'.join(components)) else: ax1.plot(diff_value[processname], label = str(processname)+"-"+'-'.join(components)) if seq: if key is not 'contention': leg = ax.legend(loc='upper left') leg.get_frame().set_alpha(0.5) else: leg = ax1.legend(loc='upper left') leg.get_frame().set_alpha(0.5) if key =='contention': #plot the drop in performance of each process: perf_drop = compute_contention(diff_value) for process, drop in perf_drop.items(): ax2.plot(drop, label="Drop in perf of "+str(process)) #change to a function later avg_perf_drop = sum(drop)/len(drop) f_handle= open(config.execution_time_dir+'/estimateddrop-'+process+'-'+ ''.join([p if p is not process else '' for p,d in perf_drop.items()])+'.log','w+') f_handle.write(str(avg_perf_drop)) f_handle.close() leg=ax2.legend(loc= 'upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+'+'.join(components)) plt.cla() if len(op_sum) > 0: for key, components in op_sum.items(): print components #print [(value[component]) for component in components] #print [len(value[component]) for component in components] sum_value={} if key =='contention' : print "KEY: ", key ax1 = plt.subplot(2,1,1) ax2 = plt.subplot(2,1,2) else: ax = plt.subplot(1, 1, 1) for processname in csv_file_handle: sum_value[processname]=sum(map(numpy.array, [value[str(processname)+"-"+component] for component in components])) #print sum_value #print "DONE!!" # print len(sum_value) # print len(timestamps[components[0]]) if key is not 'contention': ax.plot(sum_value[processname], label = str(processname)+"-"+'+'.join(components)) else: ax1.plot(sum_value[processname], label = str(processname)+"-"+'+'.join(components)) if seq: if key is not 'contention': leg = ax.legend(loc='upper left') leg.get_frame().set_alpha(0.5) else: leg = ax1.legend(loc='upper left') leg.get_frame().set_alpha(0.5) if key =='contention': #plot the drop in performance of each process: perf_drop = compute_contention(sum_value) #print perf_drop for process, drop in perf_drop.items(): ax2.plot(drop, label="Drop in perf of "+str(process)) #change to a function later if len(drop)>0: avg_perf_drop = sum(drop)/len(drop) f_handle= open(config.execution_time_dir+'/estimateddrop-'+process+'-'+ ''.join([p if p is not process else '' for p,d in perf_drop.items()])+'.log','w+') f_handle.write(str(avg_perf_drop)) f_handle.close() leg=ax2.legend(loc= 'upper left') leg.get_frame().set_alpha(0.5) elif key =='total_bandwidth': plt.cla() ax = plt.subplot(1, 1, 1) total_bw = total_bandwidth(sum_value) ax.plot(total_bw['total'], label = 'Total Bandwidth') leg = ax.legend(loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+key+": "+'+'.join(components)) plt.cla() if len(op_div) > 0: ax = plt.subplot(1, 1, 1) for components in op_div: print components for processname in csv_file_handle: ax.plot([numpy.float64(x)/y for x,y in zip(value[str(processname)+"-"+components[0]],value[str(processname)+"-"+components[1]])], label= str(processname)+"-"+'/'.join(components)) if seq: leg = ax.legend( loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+'_'.join(components)) plt.cla() plt.xlabel('Time') if val.find('%') >= 0: plt.ylabel('Bottleneck %') else: plt.ylabel("Counter value") if output: plt.savefig(output) else: if not seq: plt.show()
def processPlot(self): self.preparePlot() if os.path.isfile(os.path.join(self.home, 'data_sets.csv')): with open(os.path.join(self.home, 'data_sets.csv')) as file_handle: csv_file = csv.reader(file_handle) row_cnt = 0 for row_string in csv_file: if row_cnt == 0: pass else: self.plot_set[row_string[0]] = [row_string[1], row_string[2], row_string[3]] row_cnt += 1 sorted_set = self.plot_set.keys() sorted_set.sort() seq_cnt = 1 for seq in sorted_set: print 'Processing - %s of %s' % (seq_cnt, len(sorted_set)) seq_cnt += 1 plot_text = self.plot_set[seq][2] for p in range(len(self.plots)): print 'Processing set - %s of %s' % (p + 1, len(self.plots)) style.use('mpl_dark_harlan') fig = plt.figure(p + 1) ax = fig.add_subplot(111) ax2 = ax.twinx() ax_array = self.array_builder() self.prepare_plot(self.plots[p]) self.grapher(ax_array, ax, ax2) ax.set_xlim(left=mdates.strpdate2num(self.date_axis)(self.plot_set[seq][0]), right=mdates.strpdate2num(self.date_axis)(self.plot_set[seq][1])) plt.title('%s - %s' % (seq, (self.plots[p].split('_')[1].replace('-', ' ').replace('.csv', '')))) ax.set_ylabel(self.y_label) if len(self.sec_array_keys) == 1: ax2.set_ylabel(self.sec_array_keys[0]) else: ax2.set_ylabel(self.y2_label) ax.set_xlabel(self.x_label) ax.set_xticklabels(ax_array.keys()[0], rotation=45, fontsize=8) ax.xaxis.set_major_formatter(mdates.DateFormatter(self.date_legend)) fig.autofmt_xdate() plt.grid(True) ax.legend(loc=2, borderaxespad=1., fontsize=10) ax2.legend(loc=1, borderaxespad=1., fontsize=10) # fig.text(.1, .1, plot_text, horizontalalignment='center') self.prim_array_keys = [] self.sec_array_keys = [] fig.subplots_adjust(left=0.06, bottom=0.12, right=0.94, top=0.94, wspace=None, hspace=None) fig.set_size_inches(22, 16, forward=True) if self.graph_type.lower() == "pdf": self.savePdf() else: plt.show() plt.close() print 'Processing complete'
""" =============== Multiple styles =============== You can specify multiple plot styles by passing a list of style names to `style.use`. The styles are evaluated from the first to last element of the list, so if there are settings that are defined in multiple styles, the settings in the later style files will override those in the earlier files. In this example, the 'ggplot' style alters the colors of elements to make the plot pretty, and the 'pof' style (Physics of Fluids journal) alters the figure size so that it fits in a column, alters line and text sizes, etc. """ import numpy as np import matplotlib.pyplot as plt from mpltools import style style.use(['ggplot', 'pof']) x = np.linspace(0, 2 * np.pi) plt.plot(x, np.cos(x)) plt.xlabel('x label') plt.ylabel('y label') plt.title('title') plt.show()
from mpltools import style style.use('jfm') import matplotlib as mpl mpl.rcParams['figure.figsize'] = [5.0,3.3] mpl.rcParams['figure.dpi'] = 300 mpl.rcParams['savefig.dpi'] = 300 #mpl.rcParams['savefig.bbox'] = 'tight' #mpl.rcParams['savefig.pad_inches'] = 0.05 mpl.rcParams['font.family'] = 'serif' mpl.rcParams['font.serif'] = ['Computer Modern Roman'] mpl.rcParams['text.usetex'] = True
import numpy as np import matplotlib.pyplot as plt from mpltools import style from mpltools import layout def color_cycle_example(ax): L = 6 x = np.linspace(0, L) ncolors = len(plt.rcParams['axes.color_cycle']) shift = np.linspace(0, L, ncolors, endpoint=False) for s in shift: ax.plot(x, np.sin(x + s), 'o-') def image_and_patch_example(ax): ax.imshow(np.random.random(size=(20, 20)), interpolation='none') c = plt.Circle((5, 5), radius=5, label='patch') ax.add_patch(c) style.use('grayscale') figsize = layout.figaspect(0.5) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=figsize) color_cycle_example(ax1) image_and_patch_example(ax2) plt.show()
style=[] n = False clip = True smooth = [False]*len(args) xl, yl = [], [] lw = [1]*len(args) xshift = [0]*len(args) yshift = [0]*len(args) scall = False ba = False # specify mpltools style files - has to be done in advance for o, a in opts: if '--mplstyle' in o: from mpltools import style as mplstyle mplstyle.use(a) elif '--scaleall' in o: scall = True print 'Scale All active!' # create plot figure with default options #fig = plt.figure(1,(11.5,8.3)) fig = plt.figure(1) #ax1 = plt.axes([0.10,0.10,0.8,0.8]) ax1 = fig.add_subplot(111) plt.xlabel("Displacement") plt.ylabel("Force") #plt.grid(True) # use arguments for o, a in opts:
return fig def test_simple_plot(): fig, ax = plt.subplots() ax.plot([0, 1]) ax.set_xlabel('x-label') ax.set_ylabel('y-label') ax.set_title('title') return fig # Only show styles defined by package, not by user. base_styles = list(style.baselib.keys()) for sty in base_styles: # reset matplotlib defaults before applying new style plt.rcdefaults() style.use(sty, use_baselib=True) print("Plotting tests for '%s' style" % sty) fig = test_artists_plot() fig.savefig(pth.join(PATH, 'test_artists_png', sty + '.png')) fig.savefig(pth.join(PATH, 'test_artists_pdf', sty + '.pdf')) fig = test_simple_plot() fig.savefig(pth.join(PATH, 'test_simple_png', sty + '.png')) fig.savefig(pth.join(PATH, 'test_simple_pdf', sty + '.pdf'))
color3 = '#9acd32' color2 = '#ff0000' lw1=4 aph=.7 # set the linewidth of each legend object def leg_width(lg,fs): for legobj in lg.legendHandles: legobj.set_linewidth(fs) # # plotting # style.use('dark_background') # isotherms tilted fig = plt.figure(figsize=(17,9.)) ax = fig.add_axes((0.1, 0.2, 0.8, 0.7)) ax.spines['right'].set_color('none') ax.spines['top'].set_color('none') ax.plot(mu2,mu2,linewidth=lw1,alpha=aph,color=color2) ax.plot(mu2,cmu2,linewidth=lw1,alpha=aph,color=color1) ax.plot(mu2,tmu2,linewidth=lw1,alpha=aph,color=color3)
def _generate_plots(self, report_name): """ :param report_name: """ font = {'size': '8'} #'family' : 'monospace', #'weight' : 'bold', rc('font', **font) style.use('ggplot') l1 = self.df1['Latency'] l2 = self.df2['Latency'] plt.figure(figsize=(8, 5), dpi=150) plt.hist(l1, bins=math.pow(len(l1), float(1) / 3), normed=True, color=['g'], fill=True, alpha=0.40, histtype='step', label='1') plt.hist(l2, bins=math.pow(len(l1), float(1) / 3), normed=True, color=['g'], fill=True, alpha=0.40, histtype='step', label='2') plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.legend() plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of all response time', fontsize=10) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/hist_prob_all.png') plt.close() plt.figure(figsize=(8, 5), dpi=150) l1_90 = l1[l1 < np.percentile(l1, 90)].reset_index(drop=True) l2_90 = l2[l2 < np.percentile(l2, 90)].reset_index(drop=True) plt.hist(l1_90, bins=math.pow(len(l1_90), float(1) / 3), normed=True, color=['g'], fill=True, alpha=0.40, histtype='step', label='1') plt.hist(l2_90, bins=math.pow(len(l2_90), float(1) / 3), normed=True, color=['g'], fill=True, alpha=0.40, histtype='step', label='2') plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.legend() plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of 90% line response time', fontsize=10) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/hist_prob_90line.png') plt.close() # generate compare plots for tests # this code is ugly x1 = self.df1.loc[:, ['label', 'Latency']] x1 = x1.groupby('label') x1m = x1['Latency'].agg([np.mean, np.std]) x2 = self.df2.loc[:, ['label', 'Latency']] x2 = x2.groupby('label') x2m = x2['Latency'].agg([np.mean, np.std]) df = x1m.join(x2m, how='outer', lsuffix='1', rsuffix='2') for label, data in df.iterrows(): file_name = self._normalize_test_name(label) if not pd.isnull(data['mean1']): d1 = x1.get_group(label)['Latency'] if not pd.isnull(data['mean2']): d2 = x2.get_group(label)['Latency'] if not pd.isnull(data['mean1']) or not pd.isnull(data['mean2']): plt.figure(figsize=(6, 4)) if not pd.isnull(data['mean1']): d1.hist(normed=True, alpha=0.2, label='1') d1.plot(kind='kde', label='1') if not pd.isnull(data['mean2']): d2.hist(normed=True, alpha=0.2, label='2') d2.plot(kind='kde', label='2') # if not pd.isnull(data['mean1']) and not pd.isnull(data['mean2']): # l = len(d1) if len(d1) > len(d2) else len(d2) # n, bins, patches = plt.hist([d1, d2], # bins=math.pow(l, float(1) / 3), # normed=1, # alpha=0.60, # label=['1', '2'], # color=['g', 'b']) # #plt.plot(bins, pl.normpdf(bins, np.mean(d1), np.std(d1)), 'r--', color='g', label='norm1', linewidth=2) # #plt.plot(bins, pl.normpdf(bins, np.mean(d2), np.std(d2)), 'r--', color='b', label='norm2', linewidth=2) # elif not pd.isnull(data['mean1']): # n, bins, patches = plt.hist(d1, # bins=math.pow(len(d1), float(1) / 3), # normed=1, # alpha=0.60, # label='1', # color='g') # plt.plot(bins, pl.normpdf(bins, np.mean(d1), np.std(d1)), 'r--', color='g', label='norm1', linewidth=2) # elif not pd.isnull(data['mean2']): # n, bins, patches = plt.hist(d2, # bins=math.pow(len(d2), float(1) / 3), # normed=1, # alpha=0.60, # label='2', # color='b') # plt.plot(bins, pl.normpdf(bins, np.mean(d2), np.std(d2)), 'r--', color='g', label='norm2', linewidth=2) plt.legend() plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of all response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_hist_prob_all.png') plt.close() plt.figure(figsize=(6, 4)) if not pd.isnull(data['mean1']): d1[d1 < np.percentile(d1, 90)].hist(normed=True, alpha=0.2, label='1') try: d1[d1 < np.percentile(d1, 90)].plot(kind='kde', label='1') except np.linalg.linalg.LinAlgError: pass # if singular matrix - no plot except: raise if not pd.isnull(data['mean2']): d2[d2 < np.percentile(d2, 90)].hist(normed=True, alpha=0.2, label='2') try: d2[d2 < np.percentile(d2, 90)].plot(kind='kde', label='2') except np.linalg.linalg.LinAlgError: pass # if singular matrix - no plot except: raise # if not pd.isnull(data['mean1']) and not pd.isnull(data['mean2']): # l = len(d1) if len(d1) > len(d2) else len(d2) # n, bins, patches = plt.hist([d1[d1 < np.percentile(d1, 90)], d2[d2 < np.percentile(d2, 90)]], # bins=math.pow(l, float(1) / 3), # normed=1, # alpha=0.60, # label=['1', '2'], # color=['g', 'b']) # plt.plot(bins, pl.normpdf(bins, np.mean(d1[d1 < np.percentile(d1, 90)]), np.std(d1[d1 < np.percentile(d1, 90)])), 'r--', color='g', label='norm1', linewidth=2) # plt.plot(bins, pl.normpdf(bins, np.mean(d2[d2 < np.percentile(d2, 90)]), np.std(d2[d2 < np.percentile(d2, 90)])), 'r--', color='b', label='norm2', linewidth=2) # elif not pd.isnull(data['mean1']): # n, bins, patches = plt.hist(d1[d1 < np.percentile(d1, 90)], # bins=math.pow(len(d1[d1 < np.percentile(d1, 90)]), float(1) / 3), # normed=1, # alpha=0.60, # label='1', # color='g') # plt.plot(bins, pl.normpdf(bins, np.mean(d1[d1 < np.percentile(d1, 90)]), np.std(d1[d1 < np.percentile(d1, 90)])), 'r--', color='g', label='norm1', linewidth=2) # elif not pd.isnull(data['mean2']): # n, bins, patches = plt.hist(d2[d2 < np.percentile(d2, 90)], # bins=math.pow(len(d2[d2 < np.percentile(d2, 90)]), float(1) / 3), # normed=1, # alpha=0.60, # label='2', # color='b') # plt.plot(bins, pl.normpdf(bins, np.mean(d2[d2 < np.percentile(d2, 90)]), np.std(d2[d2 < np.percentile(d2, 90)])), 'r--', color='g', label='norm2', linewidth=2) plt.legend() plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of 90% line response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_hist_prob_90line.png') plt.close() plt.figure(figsize=(6, 4), dpi=150) if not pd.isnull(data['mean1']): plt.plot(range(1, len(d1) + 1), d1, 'ro', color='g', alpha=0.50, label='1') if not pd.isnull(data['mean2']): plt.plot(range(1, len(d2) + 1), d2, 'ro', color='b', alpha=0.50, label='2') plt.legend() plt.xlabel('Request', fontsize=9) plt.ylabel('Time', fontsize=9) plt.title('Requests time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_requests.png') plt.close()
# It is made available under the MIT License from __future__ import print_function try: from gensim import corpora, models, similarities except: print("import gensim failed.") print() print("Please install it") raise try: from mpltools import style style.use("ggplot") except: print("Could not import mpltools: plots will not be styled correctly") import matplotlib.pyplot as plt import numpy as np from os import path if not path.exists("./data/ap/ap.dat"): print("Error: Expected data to be present at data/ap/") print("Please cd into ./data & run ./download_ap.sh") corpus = corpora.BleiCorpus("./data/ap/ap.dat", "./data/ap/vocab.txt") model = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word, alpha=None) for ti in xrange(84):
import sys import os import cPickle as pickle from pandas import DataFrame, Series import pandas as pd import matplotlib.pyplot as plt from mpltools import style from mpltools import layout style.use(['ggplot', 'pof']) almost_black = '#262626' from database import Database cwd = os.path.dirname(os.path.abspath(__file__)) datadir = os.path.join(os.path.split(cwd)[0], 'data') resultsdir = os.path.join(os.path.split(cwd)[0], 'results') marketing = {'passive_marketing': ['environmental_impact', 'investment', 'govt_incentives', 'pv_education', 'rate_changes', 'industry_growth'], 'active_marketing': ['online_review', 'past_work', 'event_marketing', 'channel_partnering', 'webtools', 'promotions', 'contact', 'bragging']} pie_colors = { 'environmental_impact': '#7C8FB0', 'investment': '#EEAD51', 'govt_incentives': '#8CC43D', 'pv_education': '#2B292A', 'rate_changes': '#FED700', 'industry_growth': '#426986', 'online_review': '#8B8878', 'past_work': '#426986', 'event_marketing': '#87CEFA', 'channel_partnering': '#EEAD51',
from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt import pyfits from matplotlib import colors from mpltools import style style.use('dark_background') datafile = 'gti_photon_events.fits' outfile = '' data = pyfits.open(datafile)[1].data print(len(data)) pos = [] for event in data: pos.append([event[3] - 180, event[4]]) print("Done loading") print(len(pos)) def bonne_project(pos, parallel=90, meridian=0): pos = np.array(pos) p1 = np.deg2rad(parallel) mer = np.deg2rad(meridian) rho = 1.0 / np.tan(p1) + p1 - np.deg2rad(pos[1]) E = (np.deg2rad(pos[0]) - mer) * np.cos(np.deg2rad(pos[1])) / rho x = rho * np.sin(E) y = 1.0 / np.tan(p1) - rho * np.cos(E) return np.array([x, y])
interval-plot.py file (or stdin) delimeter must be , this is for data that is not normalized.''') p.add_argument('--xkcd', action='store_true', help='enable xkcd mode') p.add_argument('--style', help='set mpltools style (e.g. ggplot)') p.add_argument('file', help='CSV file to plot (or stdin)', nargs='?') p.add_argument('--output', '-o', help='Output to file. Otherwise show.', nargs='?') args = p.parse_args() if args.style: try: from mpltools import style style.use(args.style) except ImportError: print "Need mpltools for setting styles (pip install mpltools)" import gen_level try: import brewer2mpl all_colors = brewer2mpl.get_map('Paired', 'Qualitative', 12).hex_colors except ImportError: print "Install brewer2mpl for better colors (pip install brewer2mpl)" all_colors = ( 'green', 'orange', 'red', 'blue',
def plot(file, pstyle = 'ggplot', output=None, seq=None, xkcd=False): # op_sum = {'1':['L1-dcache-loads','L1-dcache-stores','L1-dcache-prefetches','L1-icache-loads'], # '2':['L1-dcache-load-misses','L1-dcache-store-misses','L1-dcache-prefetch-misses','L1-icache-load-misses'], # '3':[ 'LLC-loads','LLC-stores','LLC-prefetches'], # '4':['LLC-load-misses','LLC-store-misses','LLC-prefetch-misses'], # '5':['dTLB-loads','dTLB-stores','iTLB-loads'], # '6':['dTLB-load-misses','dTLB-store-misses','iTLB-load-misses'], # 'Bandwidth':['offcore_response_corewb_local_dram_0','offcore_response_prefetch_any_llc_miss_0','LLC-prefetches','cache-misses']} # op_div = [['cache-references','uops_retired_any'],['cache-misses','uops_retired_any'], ['instructions','cycles'], # ['cache-misses','cache-references']] #enable for i7 op_sum = { 'contention': ['cache-misses'], 'band': ['cache-references', 'cache-misses'], 'total_bandwidth': ['cache-references'] } op_div= [['instructions','cycles'],['cache-misses','cache-references'],['cache-references','cycles'], ['cache-misses','cycles']] print pstyle if pstyle: try: from mpltools import style style.use( pstyle) except ImportError: print "Need mpltools for setting styles (pip install mpltools)" import gen_level try: import brewer2mpl all_colors = brewer2mpl.get_map('Paired', 'Qualitative', 12).hex_colors except ImportError: print "Install brewer2mpl for better colors (pip install brewer2mpl)" all_colors = ('green','orange','red','blue', 'black','olive','purple','#6960EC', '#F0FFFF', '#728C00', '#827B60', '#F87217', '#E55451', # 16 '#F88017', '#C11B17', '#17BFC2', '#C48793') # 20 cur_colors = collections.defaultdict(lambda: all_colors) assigned = dict() if file: try: inf = open( file, "r") except: return else: inf = sys.stdin rc = csv.reader(inf) timestamps = dict() value = dict() val = "" for r in rc: if burst: if len(r) == 2: ts=0 val, event = r else: continue if not burst: # timestamp,event,value if len(r) < 3: continue print r if len(r) >= 5: ts, event, val, thresh, desc = r elif len(r) >= 4: ts, val, unit, event = r else: ts, val, event = r if event not in assigned: level = gen_level.get_level(event) assigned[event] = cur_colors[level][0] cur_colors[level] = cur_colors[level][1:] if len(cur_colors[level]) == 0: cur_colors[level] = all_colors value[event] = [] timestamps[event] = [] timestamps[event].append(float(ts)) try: value[event].append(float(val.replace("%",""))) except ValueError: value[event].append(0.0) levels = dict() for j in assigned.keys(): levels[gen_level.get_level(j)] = True if xkcd: try: plt.xkcd() except NameError: print "Please update matplotlib. Cannot enable xkcd mode." #print value if normalize: for key in value: entries= value[key] normalized_values = [numpy.float64(entry)/max(entries) for entry in entries] value[key] = normalized_values if seq: os.umask(0000) if os.path.exists(seq): shutil.rmtree(seq) os.makedirs(seq) else: os.makedirs(seq) n = 1 print assigned.keys() for l in levels.keys(): ax = plt.subplot(len(levels), 1, n) if val.find('%') >= 0: ax.set_ylim(0, 100) t = [] for j in assigned.keys(): print j, gen_level.get_level(j), l if gen_level.get_level(j) == l: t.append(j) if not burst: if 'style' not in globals(): ax.plot(timestamps[j], value[j], assigned[j]) else: ax.plot(timestamps[j], value[j]) else: ax.plot(value[j]) if seq: leg = ax.legend([j], loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+j) plt.cla() leg = ax.legend(t, loc='upper left') leg.get_frame().set_alpha(0.5) n += 1 if len(op_sum) > 0: for key, components in op_sum.items(): print components #print [(value[component]) for component in components] #print [len(value[component]) for component in components] sum_value=sum(map(numpy.array, [value[component] for component in components])) #print sum_value #print "DONE!!" # print len(sum_value) # print len(timestamps[components[0]]) if not burst: ax.plot(timestamps[components[0]], sum_value) else: ax.plot(sum_value) if seq: leg = ax.legend(['+'.join(components)], loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+'+'.join(components)) plt.cla() if len(op_div) > 0: for components in op_div: print components if not burst: ax.plot(timestamps[components[0]],[numpy.float64(x)/y for x,y in zip(value[components[0]],value[components[1]])]) else: ax.plot([numpy.float64(x)/y for x,y in zip(value[components[0]],value[components[1]])]) if seq: leg = ax.legend(['/'.join(components)], loc='upper left') leg.get_frame().set_alpha(0.5) plt.savefig(seq+"/"+'_'.join(components)) plt.cla() plt.xlabel('Time') if val.find('%') >= 0: plt.ylabel('Bottleneck %') else: plt.ylabel("Counter value") if output: plt.savefig(output) else: if not seq: plt.show()
#!/usr/bin/env python # a bar plot with errorbars # %matplotlib inline import h5py import sys import os import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from mpltools import style style.use('mystyle') # path = 'C:\\Users\\KOM\\Desktop\\EMG_Praktikumsgruppe\\messungen\\' path = './' #filename = "florian_12kg_2014.12.15.hdf5" #filename = "record_florian_7500g_max.hdf5" #filename = "florian_10kg_stehen_2014.12.19.hdf5" #filename = "tobias_5kg.hdf5" filename = sys.argv[1] print "Processing " + filename plot_basename = "plots/" + os.path.basename(filename).replace(".hdf5", "") # Open HDF5 File f = h5py.File(path + filename) # Copy actual samples into a simple array
from poisson_disc import PoissonDiskSampler import matplotlib.pyplot as plt from mpltools import style style.use(['ggplot']) from datetime import datetime def main(): width = 480 height = 320 radius = 5 now = datetime.now() pds = PoissonDiskSampler(width, height, radius) samples = pds.get_sample() print 'samples size: ', len(samples) print 'generating samples took ', (datetime.now() - now) xs = [int(s[0]) for s in samples] ys = [int(s[1]) for s in samples] plt.plot(xs, ys, 'o') plt.xlim([0,width]) plt.ylim([0,height]) plt.show() if __name__ == '__main__': main()
Creation date : 2014.02.13 Last Modified : 2015.01.30 Modified By : Jin Kim jjinking(at)gmail(dot)com ''' import csv import cPickle import matplotlib.pyplot as plt import numpy as np import pandas as pd import Queue import sys from collections import defaultdict from contextlib import closing from datetime import datetime from mpltools import style; style.use('ggplot') from sklearn import cross_validation from sklearn.ensemble import RandomForestClassifier import dataio def df_equal(df1, df2, decimals=None): ''' Compare the values of two pandas DataFrame objects element by element, and if every single element is equal, return True Parameter decimals determines the number of decimal places to round decimal values before comparing ''' # First, compare the sizes if df1.shape != df2.shape:
perf stat -I1000 -x, -o file ... toplev -I1000 -x, -o file ... interval-plot.py file (or stdin) delimeter must be , this is for data that is not normalized.''') p.add_argument('--xkcd', action='store_true', help='enable xkcd mode') p.add_argument('--style', help='set mpltools style (e.g. ggplot)') p.add_argument('file', help='CSV file to plot (or stdin)', nargs='?') p.add_argument('--output', '-o', help='Output to file. Otherwise show.', nargs='?') args = p.parse_args() if args.style: try: from mpltools import style style.use(args.style) except ImportError: print "Need mpltools for setting styles (pip install mpltools)" import gen_level try: import brewer2mpl all_colors = brewer2mpl.get_map('Paired', 'Qualitative', 12).hex_colors except ImportError: print "Install brewer2mpl for better colors (pip install brewer2mpl)" all_colors = ('green','orange','red','blue', 'black','olive','purple','#6960EC', '#F0FFFF', '#728C00', '#827B60', '#F87217', '#E55451', # 16 '#F88017', '#C11B17', '#17BFC2', '#C48793') # 20
return fig def test_simple_plot(): fig, ax = plt.subplots() ax.plot([0, 1]) ax.set_xlabel('x-label') ax.set_ylabel('y-label') ax.set_title('title') return fig # Only show styles defined by package, not by user. base_styles = style.baselib.keys() for sty in base_styles: # reset matplotlib defaults before applying new style plt.rcdefaults() style.use(sty, use_baselib=True) print "Plotting tests for '%s' style" % sty fig = test_artists_plot() fig.savefig(pth.join(PATH, 'test_artists_png', sty + '.png')) fig.savefig(pth.join(PATH, 'test_artists_pdf', sty + '.pdf')) fig = test_simple_plot() fig.savefig(pth.join(PATH, 'test_simple_png', sty + '.png')) fig.savefig(pth.join(PATH, 'test_simple_pdf', sty + '.pdf'))
from mpl_toolkits.mplot3d import Axes3D from mpltools import style from scipy.io import loadmat import mympltools as util import numpy as np import matplotlib.pyplot as plt style.use('dippa3D') d=loadmat('harmonic_3D.mat') keys = ('lh_em_n','est_em_n','lh_bfgs_n','est_bfgs_n') colors = ('#348ABD','#E24A33') labels = ('$\sigma_\omega$','$\sigma_x$','$\ell$') w = 426.79134/72.27 #h = w #fig = plt.figure(figsize=(w,2*w),facecolor='w') zllim = 1.5e4 for j in range(2): fig,ax = util.getpadfigure(figw=w/1.2,figh=w/1.2,is3D=True) #fig = plt.figure(figsize=(w,w),facecolor='w') #ax = fig.add_subplot(111,projection='3d') lh = d[keys[j*2]] est = d[keys[j*2+1]] for k in range(100): zi = lh[:,k]>zllim ax.plot(est[0,zi,k],est[1,zi,k],lh[zi,k], alpha=0.5, color=colors[j], lw=0.9, marker=None, markersize=3)
""" =============== Multiple styles =============== You can specify multiple plot styles by passing a list of style names to `style.use`. The styles are evaluated from the first to last element of the list, so if there are settings that are defined in multiple styles, the settings in the later style files will override those in the earlier files. In this example, the 'ggplot' style alters the colors of elements to make the plot pretty, and the 'pof' style (Physics of Fluids journal) alters the figure size so that it fits in a column, alters line and text sizes, etc. """ import numpy as np import matplotlib.pyplot as plt from mpltools import style style.use(["ggplot", "pof"]) x = np.linspace(0, 2 * np.pi) plt.plot(x, np.cos(x)) plt.xlabel("x label") plt.ylabel("y label") plt.title("title") plt.show()
import matplotlib.pyplot as plt from mpltools import style from mpltools import layout def color_cycle_example(ax): L = 6 x = np.linspace(0, L) ncolors = len(plt.rcParams['axes.color_cycle']) shift = np.linspace(0, L, ncolors, endpoint=False) for s in shift: ax.plot(x, np.sin(x + s), 'o-') def image_and_patch_example(ax): ax.imshow(np.random.random(size=(20, 20)), interpolation='none') c = plt.Circle((5, 5), radius=5, label='patch') ax.add_patch(c) style.use('grayscale') figsize = layout.figaspect(0.5) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=figsize) color_cycle_example(ax1) image_and_patch_example(ax2) plt.show()
#load configurations mainconf = minidom.parse('../main_config.xml') mainconfigurations = mainconf.getElementsByTagName('config') log_dir=mainconfigurations[0].getElementsByTagName('log_dir')[0].childNodes[0].nodeValue log_dir="../"+log_dir plot_dir=mainconfigurations[0].getElementsByTagName('plot_dir')[0].childNodes[0].nodeValue plot_dir="../"+plot_dir chunk_size=None file_size_distribution=None queue_limit=None #plot settings legend_pos = (0.5,-0.2) style.use('ieee.transaction') # pl.rcParams['lines.linewidth'] = 2 # pl.rcParams['font.weight']="large" # pl.rcParams['legend.loc'] = 'best' # pl.rcParams['legend.set_bbox_to_anchor'] = (1,0.5) pl.rc('legend', loc='upper center')#, bbox_to_anchor=(1, 0.5))#, color='r') # pl.rcParams['legend.fancybox']=True#, shadow=True # pl.rcParams['legend.bbox_to_anchor']=(1, 0.5) # pl.rcParams['legend.bbox_to_anchor']=(1, 0.5) # pl.rcParams['bbox_to_anchor']=(1, 0.5) # pl.legend(bbox_to_anchor=(1, 0.5)) markerslist=["o","v","^","s","*","D","p","<", ">", "H", "1", "2","3", "4"] # markerslist=["o","o","v","v","^","^","s","s","*","*","D","D","p","p","<","<"] # markerslist=["x","x","x","x","x"] pl.rcParams['savefig.dpi']=300
""" Reference for matplotlib artists This example displays several of matplotlib's graphics primitives (artists) drawn using matplotlib API. A full list of artists and the documentation is available at http://matplotlib.org/api/artist_api.html. Copyright (c) 2010, Bartosz Telenczuk BSD License """ import matplotlib.pyplot as plt plt.rcdefaults() from mpltools import style style.use('gallery') import numpy as np import matplotlib.pyplot as plt import matplotlib.path as mpath import matplotlib.lines as mlines import matplotlib.patches as mpatches from matplotlib.collections import PatchCollection def label(xy, text): y = xy[1] - 0.15 # shift y-value for label so that it's below the artist plt.text(xy[0], y, text, ha="center", family='sans-serif', size=14) fig, ax = plt.subplots() # create 3x3 grid to plot the artists grid = np.mgrid[0.2:0.8:3j, 0.2:0.8:3j].reshape(2, -1).T
""" Simple demo of a horizontal bar chart. """ import matplotlib.pyplot as plt; plt.rcdefaults() from mpltools import style; style.use('gallery') import numpy as np import matplotlib.pyplot as plt # Example data people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim') y_pos = np.arange(len(people)) performance = 3 + 10 * np.random.rand(len(people)) error = np.random.rand(len(people)) plt.barh(y_pos, performance, xerr=error, align='center', alpha=0.4) plt.yticks(y_pos, people) plt.xlabel('Performance') plt.title('How fast do you want to go today?') plt.show()
# import waldo.images.worm_finder as wf import waldo.metrics.report_card as report_card # import waldo.metrics.step_simulation as ssim # import waldo.viz.eye_plots as ep # from waldo.gui import pathcustomize STYLE = 'ggplot' try: # MPL 1.4+ plt.style.use(STYLE) except AttributeError: # fallback to mpltools from mpltools import style style.use(STYLE) class CalibrationBar(QtGui.QWidget): calibration_changed = QtCore.pyqtSignal([]) def __init__(self, enclosureSizeValue, figure, ax, color=(0.5, 0.8, 0.4, 1), parent=None): super(CalibrationBar, self).__init__() self.enclosureSizeValue = enclosureSizeValue self.figure = figure self.ax = ax self.parent = parent self.plate_distance = None self.plate_distance_artists = [] self.color = color
import numpy as np import matplotlib.pyplot as plt from mpltools import style from scipy.stats import multivariate_normal style.use('ggplot') def gaussian(x, mu, sigma): """ This is the pdf for the Gaussian defined by mu and sigma for a certain sample x. """ normalizer = 1. / (2 * np.pi)**(x.shape[0] / 2) * 1. / np.sqrt( np.linalg.det(sigma)) return normalizer * np.exp( -1. / 2 * np.dot(x - mu, np.dot(np.linalg.inv(sigma), x - mu))) #return multivariate_normal.pdf(x, mu, sigma) # Scipy method: significantly slower than implementation above, results are equal def loglikelihood(pi, mu, sigma, data): """ Computes the log likelihood as a function of the mixing coefficients, mu, sigma and the data. """ return np.sum([ np.log( np.sum([pi[k] * gaussian(x, mu[k], sigma[k]) for k in range(0, K)])) for x in data ]) def EM(data, K, iterations=100, min_delta=1.0e-4):
def _generate_plots(self, report_name): """ :param report_name: """ font = {'size': '8'} #'family' : 'monospace', #'weight' : 'bold', rc('font', **font) style.use('ggplot') #if self.perfmon: # for (name, param) in self.perfmon.items(): # with pandas.plot_params.use('x_compat', True): # plt.figure(figsize=(12, 8), dpi=150) # # fig, host = plt.subplots() # #axes = [host] + [host.twinx() for i in xrange(0, len(param['input']) - 1)] # axes = [host, host] # # for i in xrange(0, len(param['input'])): # input_file = param['input'].keys()[i] # input_param = param['input'][input_file] # ax = axes[i] # df = pandas.read_csv(input_file) # # if 'real-date' in input_param: # df['timeStamp'] = df['timeStamp'].apply(lambda x: datetime.datetime.fromtimestamp(int(str(x)[:-3])).strftime('%Y-%m-%d %H:%M:%S')) # else: # start_time = datetime.datetime.fromtimestamp(int(str(df['timeStamp'][0])[:-3])) # df['timeStamp'] = df['timeStamp'].apply(lambda x: str(datetime.datetime.fromtimestamp(int(str(x)[:-3])) - start_time)) # # if 'group-by' in input_param: # grp = df.groupby(input_param['group-by']) # # for g in grp: # if g[0] in input_param['groups']: # for column_name, column_param in input_param['columns'].items(): # if not 'as_is' in column_param or not column_param['as_is']: # g[1][column_name] = g[1][column_name].apply(lambda x: float(x) / 1000) # # c = g[1].set_index('timeStamp').unstack()[column_name] # c.plot(label=g[0], ax=ax) # # else: # for column in input_param['columns']: # c = df.set_index('timeStamp').unstack()[column] # #c.plot(label=column, ax=ax) # # #ax1 = plt.subplot(111) # # box = host.get_position() # # host.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # # host.legend(axes, loc='center left', bbox_to_anchor=(1, 0.5)) # # plt.xticks(rotation=70) # plt.xlabel('') # plt.ylabel('Metrics') # # plt.tight_layout(rect=[0.02, 0, 0.8, 1]) # plt.savefig('test.png') # plt.close() # # exit(0) # i = 1 # for df in self.perfmon: # grp = df.groupby('label') # plt.figure(figsize=(12, 8), dpi=150) # ax = plt.subplot(111) # # for g in grp: # elapsed = g[1].set_index('timeStamp').unstack()['elapsed'] # elapsed.plot(label=g[0]) # # font_prop = FontProperties() # font_prop.set_size('small') # # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop=font_prop) # # plt.xticks(rotation=70) # for tick in ax.xaxis.get_major_ticks(): # tick.label.set_fontsize(10) # plt.xlabel('') # plt.ylabel('Metrics') # # plt.tight_layout(rect=[0.02, -0.02, 0.8, 1]) # plt.savefig('results/' + report_name + '/plots/perfmon{0}.png'.format(i)) # i += 1 # plt.close() l = self.df['Latency'] from scipy.stats import gaussian_kde plt.figure(figsize=(8, 5), dpi=150) l.hist(normed=True, alpha=0.2) l.plot(kind='kde') plt.fill(color='0.8') #density = gaussian_kde(l) #plt.plot(density) #plt.fill(l.index, density(l.index), alpha=.5, zorder=5, antialiased=True, color="#E01B6A") #plt.fill() plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of all response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/hist_prob_all.png') plt.close() plt.figure(figsize=(8, 5), dpi=150) l[l < np.percentile(l, 90)].hist(normed=True, alpha=0.2) l[l < np.percentile(l, 90)].plot(kind='kde') plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of 90% line response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/hist_prob_line90.png') plt.close() # percentile plot d = np.sort(l).cumsum() p = np.array( [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]) perc = mlab.prctile(d, p=p) plt.figure(figsize=(8, 5), dpi=150) plt.plot(d) plt.plot((len(d) - 1) * p / 100., perc, 'r.') plt.xticks((len(d) - 1) * p / 100., map(str, p)) plt.xlabel('Percentile', fontsize=9) plt.ylabel('Response time', fontsize=9) plt.title('Percentiles', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/percentiles.png') plt.close() for label, data in self._group_by_operation: file_name = self._normalize_test_name(label) d = data['Latency'] # histogram of all response time plt.figure(figsize=(6, 4)) d.hist(normed=True, alpha=0.2) try: d.plot(kind='kde') except np.linalg.linalg.LinAlgError: pass # if singular matrix - no plot except: raise plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of all response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_hist_prob_all.png') plt.close() # histogram of 90% line response time plt.figure(figsize=(6, 4), dpi=150) d[d < np.percentile(d, 90)].hist(normed=True, alpha=0.2) try: d[d < np.percentile(d, 90)].plot(kind='kde') except np.linalg.linalg.LinAlgError: pass # if singular matrix - no plot except: raise plt.xlabel('Response time', fontsize=9) plt.ylabel('Probability', fontsize=9) plt.title('Histogram of 90% line response time', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_hist_prob_90line.png') plt.close() # scatterplot plt.figure(figsize=(6, 4), dpi=150) a = data['Latency'] plt.plot(range(1, len(a) + 1), a, 'ro', color='g', alpha=0.50) plt.xlabel('Request', fontsize=9) plt.ylabel('Response time', fontsize=9) plt.title('Requests times', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_requests.png') plt.close() # percentile plot pd = np.sort(d).cumsum() p = np.array([ 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0 ]) perc = mlab.prctile(pd, p=p) plt.figure(figsize=(6, 4), dpi=150) plt.plot(pd) plt.plot((len(pd) - 1) * p / 100., perc, 'r.') plt.xticks((len(pd) - 1) * p / 100., map(str, p)) plt.xlabel('Percentile', fontsize=9) plt.ylabel('Response time', fontsize=9) plt.title('Percentiles', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig('results/' + report_name + '/plots/' + file_name + '_percentiles.png') plt.close()
from collections import defaultdict import string import json from datetime import datetime from matplotlib import pyplot as plt try: from mpltools import style style.use('ggplot') except: pass with open('words.json', 'rb') as fp: d = json.load(fp) gangsters = ['attackers','gunman','gunmen','thugs','armed','robbers'] terrorists = ['terrorist','terrorists','terrorism','terror','alshabab','shabab','shabaab','alshabaab','al-shabab','islamist','islam'] c = dict() for y in d: for h in d[y]: date = ' '.join(str(y).split('-')) +" "+ str(h) if date not in c: c[date] = defaultdict(int) for t,v in d[y][h]: if t in terrorists: c[date]['Terrorists'] += v continue if t in gangsters: c[date]['Gangsters'] += v continue
def draw(dataFileName): A = np.array # shortcut style.use('dippa') cycle = plt.rcParams["axes.color_cycle"] defc = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] inp = loadmat(dataFileName,squeeze_me=True,struct_as_record=False) try: xlabel = inp["xlabel"] except KeyError as e: xlabel = None try: ylabel = inp["ylabel"] except KeyError as e: ylabel = None try: title = inp["title"] except KeyError as e: title = None try: legend = inp["legend"] except KeyError as e: legend = None try: legendkw = _todict(inp["legendkw"]) except KeyError as e: legendkw = {} try: w = inp["w"] except KeyError as e: w = 5 try: margins = {'margins': inp["margins"]} except KeyError as e: margins = {} try: margins['figh'] = inp["h"] except KeyError: pass try: alpha = inp["alpha"] except KeyError as e: alpha = 0.05 try: axlim = inp["axis"] except KeyError as e: axlim = None try: ticklabels = inp["ticklabels"] except KeyError as e: ticklabels = None fig,ax = util.getpadfigure(w,**margins) # plot lines = [] for k,triplet in enumerate(inp["data"]): if len(triplet) < 2: continue if len(triplet) > 2 and type(triplet[2]) is not unicode: triplet[2] = "" arg = triplet[0:3] #print(arg) kw = _setcolor(triplet,cycle,defc,k) lines.append(ax.plot(*arg,**kw)[0]) #print(kw) #if kw.has_key("yerr"): # kw["ax"] = ax # errorfill(arg[0],arg[1],**kw) #else: # ax.plot(*arg,**kw) ax.hold(True) util.padaxis(ax,alpha,l=axlim) if title is not None: ax.set_title(title,family='serif') if xlabel is not None: xlabel = ax.set_xlabel(xlabel,family='serif') if ylabel is not None: ylabel = ax.set_ylabel(ylabel,family='serif') if legend is not None: #print(legend) #print(lines) lg = ax.legend(lines,legend.flat,**legendkw) # if xlabel is not None: # for text in lg.get_texts(): # text.set_color(xlabel.get_color()) for text in lg.get_texts(): text.set_family('serif') if ticklabels is not None: if not ticklabels[0]: ax.set_xticklabels([]) if not ticklabels[1]: ax.set_yticklabels([]) return(ax)