def show_two_barplots(df, road_name, save=False, btype='less'): df['st_from_short'] = df.st_from_show.apply(lambda x: str(x)[:25]) df['st_to_short'] = df.st_to_show.apply(lambda x: str(x)[:25]) df['link'] = df.st_from_short + ' - ' + df.st_to_short sns.set_style('whitegrid') sns.set_context('poster', font_scale=0.7, rc={'axes.titlesize':18, 'axes.labelsize':14}) fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(14,20)) df.depart.fillna(0, inplace=True) df['bottom'] = df.apply(lambda row: row.depart if row.train <= row.depart else row.train, axis=1) df['top'] = df.apply(lambda row: row.train if row.train <= row.depart else row.depart, axis=1) sns.set_color_codes('pastel') sns.barplot(x='bottom', y='link', data=df[df.train <= VOL_PERCENT * df.depart].sort_values('bottom'), label='Поезда из АС ССП', color="b", orient='h', ax=ax[0]) sns.barplot(x='bottom', y='link', data=df[df.train * VOL_PERCENT > df.depart].sort_values('bottom'), label='Поезда в результатах', color="r", orient='h', ax=ax[1]) sns.set_color_codes('muted') sns.barplot(x='top', y='link', data=df[df.train <= VOL_PERCENT * df.depart].sort_values('bottom'), label='Поезда в результатах', color="b", orient='h', ax=ax[0]) sns.barplot(x='top', y='link', data=df[df.train * VOL_PERCENT > df.depart].sort_values('bottom'), label='Поезда из АС ССП', color="r", orient='h', ax=ax[1]) ax[0].legend(ncol=1, loc="upper right", frameon=True) ax[1].legend(ncol=1, loc="upper right", frameon=True) ax[0].set(xlabel='', title='Нехватка запланированных поездов') ax[1].set(xlabel='', title='Избыток запланированных поездов') sns.despine() if save: filename = road_name + '.png' fig.savefig(REPORT_FOLDER + filename, bbox_inches='tight') add_image(filename, scale=1.0)
def shot_distribution(self): sns.set_style('white') sns.set_color_codes() fig = plt.figure(figsize=(12,11)) plt.scatter(self.shot_df.LOC_X, self.shot_df.LOC_Y) plt.show() fig.savefig('shot_distribution.png', dpi=fig.dpi)
def generate_clusters(words, vectors_in_2D, print_status=True): # HDBSCAN, i.e. hierarchical density-based spatial clustering of applications with noise (https://github.com/lmcinnes/hdbscan) vectors = vectors_in_2D sns.set_context('poster') sns.set_color_codes() plot_kwds = {'alpha' : 0.5, 's' : 500, 'linewidths': 0} clusters = HDBSCAN(min_cluster_size=2).fit_predict(vectors) palette = sns.color_palette("husl", np.unique(clusters).max() + 1) colors = [palette[cluster_index] if cluster_index >= 0 else (0.0, 0.0, 0.0) for cluster_index in clusters] fig = plt.figure(figsize=(30, 30)) plt.scatter(vectors.T[0], vectors.T[1], c=colors, **plot_kwds) plt.axis('off') x_vals = [i[0] for i in vectors] y_vals = [i[1] for i in vectors] plt.ylim(min(y_vals)-0.3, max(y_vals)+0.3) plt.xlim(min(x_vals)-0.3, max(x_vals)+0.3) font_path = getcwd() + '/fonts/Comfortaa-Regular.ttf' font_property = matplotlib.font_manager.FontProperties(fname=font_path, size=24) for i, word in enumerate(words): if type(word) != type(None): if type(word) != type(""): word = unidecode(word).replace("_", " ") else: word = word.replace("_", " ") text_object = plt.annotate(word, xy=(x_vals[i], y_vals[i]+0.05), font_properties=font_property, color=colors[i], ha="center") plt.subplots_adjust(left=(500/3000), right=(2900/3000), top=1.0, bottom=(300/2700)) plt.savefig(get_visualization_file_path(print_status), bbox_inches="tight") return clusters
def createSubOverviewPage(self): layout = QtGui.QGridLayout() w = QtGui.QWidget() sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(20, 12)) canvas = figureCanvas(f) canvas.setParent(w) sns.set(style="whitegrid") q = QtSql.QSqlQuery("""SELECT EXP_DATE, SUM(AMOUNT), SUM(AMOUNT*(1+EXP_RETURN*(datediff(EXP_DATE, SETTLE_DATE)+1)/36500.0)) FROM LIABILITY WHERE EXP_DATE>='%s' GROUP BY EXP_DATE ORDER BY EXP_DATE"""%self.sysdate.date().toPyDate()) dates, vals = [], [] x_amt = range(0,1000000000,100000000) while q.next(): dates.append(q.value(0).toDate().toPyDate().isoformat()) vals.append((q.value(1).toDouble()[0], q.value(2).toDouble()[0])) data = pd.DataFrame(vals, index=dates, columns=['Amount', 'Total Return']) # Plot the total crashes sns.set_color_codes("pastel") sns.barplot(x='Total Return', y=dates, data=data, label='Interest', color="b") # Plot the crashes where alcohol was involved sns.set_color_codes("muted") sns.barplot(x='Amount', y=dates, data=data, label="Principal", color="b") # Add a legend and informative axis label ax.legend(ncol=2, loc="upper right", frameon=True) ax.set(ylabel="Maturity Date", title='Liability Overview') sns.despine(left=True, bottom=True) layout.addWidget(w, 0, 0, 1, 1) return layout
def show_comparison(predicted, actual): # Setup data for visualization predicted_makes = predicted[(predicted['fgm'] == True)] predicted_misses = predicted[(predicted['fgm'] == False)] actual_makes = actual[(actual['fgm'] == True)] actual_misses = actual[(actual['fgm'] == False)] # Setup plots sns.set_style("white") sns.set_color_codes() f, (predicted, actual) = plt.subplots(ncols=2, sharey=True) f.text(0.5,0.975,'Naive Bayes Make/Miss Shot Classification \n(showing results of classifying ~25% of Stephen Curry\'s shots from the 2014-2015 season)',horizontalalignment='center', verticalalignment='top') draw_court(ax=predicted, outer_lines=True) draw_court(ax=actual, outer_lines=True) # Plot predicted and actual side by side predicted.scatter(predicted_makes.x, predicted_makes.y, color='g') predicted.scatter(predicted_misses.x, predicted_misses.y, color='r') predicted.set_title('Predicted Shot Chart') actual.scatter(actual_makes.x, actual_makes.y, color='g') actual.scatter(actual_misses.x, actual_misses.y, color='r') actual.set_title('Actual Shot Chart') plt.xlim(-300,300) plt.ylim(-100,500) plt.show()
def draw_static(columns_name): # loading dataset columns_value = df.loc[:, columns_name].values fread = shelve.open(hashtable_path) dic = fread[columns_name][0] fread.close() # format: idx, posi, all result = np.zeros((len(list(dic.keys())), 3)) for i in trange(n, desc=columns_name): line = str(columns_value[i]) line = [int(x) for x in line.split(':')] l = label_ary[i] for val in line: if val == 0: continue result[dic[val] - 1][2] += 1 if l == 1: result[dic[val] - 1][1] += 1 max_x = np.max(result) result = result / np.max(max_x) output = [] for key in dic.keys(): output.append(['{}_'.format(key), result[dic[key] - 1][1], \ result[dic[key] - 1][2]]) result = sorted(output, key=lambda x: x[1], reverse=True) result = pd.DataFrame(result, columns=['idx', 'posi', 'all']) # just show part features... y_total = result.shape[0] y_show = min(y_maxnum, y_total) result = result.head(y_show) fig_len = max(10, int(1.0 * result.shape[0] / 50 * 15)) sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(6, fig_len)) sns.set_color_codes("pastel") sns.barplot(x="all", y="idx", data=result, label="Total", color="b") sns.set_color_codes("muted") sns.barplot(x="posi", y="idx", data=result, label="positive", color="b") ax.legend(ncol=2, loc="lower right", frameon=True) ax.set(xlim=(0, 1), ylabel="{}/{}".format(y_show, y_total), \ xlabel='{}({})'.format(columns_name, int(max_x))) sns.despine(left=True, bottom=True) # plt.show() plt.savefig('{}.png'.format(columns_name)) plt.close()
def scatterShots(dataframe): sns.set_style("white") sns.set_color_codes() plt.figure(figsize=(12,11)) drawCourt(outer_lines=True) plt.scatter(dataframe.LOC_X, dataframe.LOC_Y) # Adjust plot limits to just fit in half court plt.xlim(-250,250) # Descending values along th y axis from bottom to top # in order to place the hoop by the top of plot plt.ylim(422.5, -47.5) plt.show()
def drawShotChart(shot_df): with pd.option_context('display.max_columns', None): display(shot_df.head()) sns.set_style("white") sns.set_color_codes() plt.figure(figsize=(8,16)) plt.scatter(shot_df.LOC_X, shot_df.LOC_Y) draw_court(outer_lines=True) # Descending values along the axis from left to right plt.xlim(300,-300) plt.ylim(-100, 950) plt.show()
def create_hist_plot(hist_dict, header, out_dir, data_file): """ See https://stanford.edu/~mwaskom/software/seaborn/examples/horizontal_barplot.html @param hist_dict: dict of label, count @param header: name of dictionary @param out_dir: str, name of directory where files are to be saved @param data_file: name of data file @return: a list of lists (label, count) """ # remove spaces in name header = "".join(header.split()) # convert dict to list for creating bar chat bar_data = [[key, val] for key, val in hist_dict.items()] bar_data.sort(key=itemgetter(1), reverse=True) # bar chart background style sns.set(style="whitegrid", font="Arial") # color options include pastel sns.set_color_codes("deep") # Initialize the matplotlib figure f, ax = plt.subplots(figsize=(6, 6)) # Create pandas dataframe new_df = pd.DataFrame(bar_data, columns=["key", "count"]) # Plot sns.barplot(x="count", y="key", data=new_df, label="Total", color="b") # other options: xlim=(0, 24) ax.set(xlabel="Count", ylabel="") ax.set_title(header) with warnings.catch_warnings(): warnings.simplefilter("ignore") plt.tight_layout() f_name = create_out_fname(data_file, suffix=header, base_dir=out_dir, ext=".png") plt.savefig(f_name, dpi=300) print("Wrote file: {}".format(f_name)) # quote strings for printing so csv properly read, and add header count_to_print = [[header + "_key", header + "_count"]] for row in bar_data: count_to_print.append([row[0], row[1]]) return count_to_print
def class_perc_plot(app_train, feature, label_rotation=True, horizontal_layout=True): temp = app_train[feature].value_counts() df1 = pd.DataFrame({feature: temp.index, 'Number of contracts': temp.values}) # Calculate the percentage of target=1 per category value cat_perc = app_train[[feature, 'TARGET']].groupby([feature], as_index=False).mean() cat_perc.sort_values(by='TARGET', ascending=False, inplace=True) print('\n{}'.format(feature)) print(cat_perc) print(len(df1[feature].tolist())) if temp.shape[0] < 11: if horizontal_layout: fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 10)) else: fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(12, 14)) sns.set_color_codes("pastel") s = sns.barplot(ax=ax1, x=feature, y="Number of contracts", data=df1) if label_rotation: s.set_xticklabels(s.get_xticklabels(), rotation=40) s = sns.barplot(ax=ax2, x=feature, y='TARGET', order=cat_perc[feature], data=cat_perc) if label_rotation: s.set_xticklabels(s.get_xticklabels(), rotation=40) plt.ylabel('Percent of target with value 1 [%]', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=10) else: fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 0.5*temp.shape[0])) sns.set_color_codes("pastel") s = sns.barplot(ax=ax1, y=feature, x="Number of contracts", data=df1) s = sns.barplot(ax=ax2, y=feature, x='TARGET', order=cat_perc[feature], data=cat_perc) plt.xlabel('Percent of target with value 1 [%]', fontsize=10) plt.tick_params(axis='both', which='major', labelsize=10) plt.tight_layout() plt.savefig('pic/' + feature + '_perc.png') plt.show();
def drawCommenter(self): sorted_cnts = [t[0] for t in sorted(self.comments.items(), key=lambda x: -x[1])][:100] print (sorted_cnts) y = [self.comments[u] for u in sorted_cnts] y_pushes= [self.pushes[u] for u in sorted_cnts] y_hates = [self.hates[u] for u in sorted_cnts] x = range(len(y)) f, ax = plt.subplots(figsize=(10, 6)) sns.set(style='whitegrid') sns.set_color_codes('pastel') sns.plt.plot(x, y , label='Total comments', color='blue') sns.plt.plot(x, y_pushes, label='Total pushes' , color='green') sns.plt.plot(x, y_hates , label='Total hates' , color='red') ax.legend(ncol=2, loc='upper right', frameon=True) ax.set(ylabel='counts', xlabel='Rank', title ='Total comments') sns.despine(left=True, bottom=True) plt.show(f)
def Generate_scores(img): print("[INFO] loading and preprocessing image...") image = image_utils.load_img(img, target_size=(224, 224)) image = image_utils.img_to_array(image) image = np.expand_dims(image, axis=0) image = preprocess_input(image) print("[INFO] loading network...") model = VGG16(weights="imagenet") print("[INFO] classifying image...") preds = model.predict(image) (__, inID, label) = decode_predictions(preds)[0][0] result=decode_predictions(preds, top=10)[0] display(Image(img)) result_frame=pd.DataFrame(result).ix[:,1:] result_frame.columns=["item", "probability"] result_frame.index=result_frame.index +1 #display(result_frame) import seaborn as sns import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') plt.figure(figsize=(10,7)) sns.set_style('white') sns.set_context('talk',font_scale=1.8) sns.set_color_codes("pastel") ax=sns.barplot(x='probability',y='item',data=result_frame,color="b", palette="Blues_r") sns.plt.title('What is it?') ax.set(xlim=(0, 1)) ax.set(xlabel='Probability', ylabel='Object')
import APPIREDII_Blood_Params_Parser as dp import math """ Code to model the innate immune response coupled with the Alkaline Phosphatase simulator. Created on Mar 2016. Runtime is in minutes. Wound section added on June 2019 by Mark de Boer and Ben Dickens """ __author__ = "Presbitero" sns.set_palette("deep") sns.set_color_codes("deep") def vectorfield(w, t, p, params ): """ Define differential equations for the innate immune system. Arguments: w : vector of the state variables w=[N_R, AP_Eblood, AP_Etissue, AP_Eliver, AP_Sblood, AP_Stissue, ITMblood, ITMtissue, M_R, M_A, CH, N_A, ND_A, ACH, ND_N] t : time p : vector of the parameters """ N_R, AP_Eblood, AP_Etissue, AP_Eliver, AP_Sblood, AP_Stissue, ITMblood, ITMtissue, M_R, M_A, CH, N_A, ND_A, \
plt.legend(fontsize=16) plt.xlabel(r'$\theta$', fontsize=14) plt.gca().axes.get_yaxis().set_ticks([]) ''' iris = sns.load_dataset("iris") df = iris.query("species == ('setosa', 'versicolor')") y_0 = pd.Categorical(df['species']).codes x_n = ['sepal_length', 'sepal_width'] x_0 = df[x_n].values ''' palette = 'muted' sns.set_palette(palette) sns.set_color_codes(palette) np.set_printoptions(precision=2) pd.set_option('display.precision', 2) iris = sns.load_dataset("iris") df = iris.query("species == ('setosa', 'versicolor')") y_0 = pd.Categorical(df['species']).codes x_n = ['sepal_length', 'sepal_width'] x_0 = df[x_n].values x_0 = (x_0 - x_0.mean(axis=0)) / x_0.std(axis=0) iris_data = { 'n': x_0.shape[0], 'species': y_0, 'sepal_length': x_0[:, 0], 'sepal_width': x_0[:, 1]
def plotFeatureLOQ(tData, splitByBatch=True, plotBatchLOQ=False, zoomLOQ=False, logY=False, tightYLim=True, nbPlotPerRow=3, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7)): """ Violin plot for each feature with line at LOQ concentrations. Option to split by batch, add each batch LOQs, split by SampleType. :param TargetedDataset tData: :py:class:`TargetedDataset` :param bool splitByBatch: If ``True`` separate each violin plot by batch :param bool plotBatchLOQ: If ``True`` add lines at LOQs (LLOQ/ULOQ) for each batch, and points for samples that will be out of LOQ :param bool zoomLOQ: If ``True`` plots a zoomed ULOQ plot on top, all data in the centre and a zoomed LLOQ plot at the bottom :param bool logY: If ``True`` log-scale the y-axis :param bool tightYLim: if ``True`` ylim are close to the points but can let LOQ lines outside, if ``False`` LOQ lines will be part of the plot. :param int nbPlotPerRow: Number of plots to place on each row :param savePath: If ``None`` plot interactively, otherwise save the figure to the path specified :type savePath: None or str :param str figureFormat: If saving the plot, use this format :param int dpi: Plot resolution :param figureSize: Dimensions of the figure :type figureSize: tuple(float, float) :raises ValueError: if targetedData does not satisfy to the TargetedDataset definition for QC """ # Check dataset is fit for plotting tmpTData = copy.deepcopy(tData) # to not log validateObject validDataset = tmpTData.validateObject(verbose=False, raiseError=False, raiseWarning=False) if not validDataset['QC']: raise ValueError( 'Import Error: tData does not satisfy to the TargetedDataset definition for QC' ) # Plot setup nbFeat = tData.noFeatures + 1 # allow a spot for the legend box nbRow = int(numpy.ceil(nbFeat / nbPlotPerRow)) newHeight = figureSize[ 1] * nbRow # extend the plot height to allow for the multiple rows # Allow vertical space for subplots if zoomLOQ: realNbRow = 7 * nbRow # 6 for plot (1 ULOQ, 4 all, 1 LLOQ) + 1 x-axis label and title else: realNbRow = nbRow sns.set_style("ticks", {'axes.linewidth': 0.75}) sns.set_color_codes(palette='deep') fig = plt.figure(figsize=(figureSize[0], newHeight), dpi=72) gs = gridspec.GridSpec(realNbRow, nbPlotPerRow) # With LOQ subplots if zoomLOQ: # Loop over features (and a space to fit a legend), with zoomed subplots for featID in range(0, nbFeat): # Keep track of plot position plot_vert_pos = int( numpy.floor(featID / nbPlotPerRow) * 7) # jump 7 every time, 6 for plot + 1 for x-axis and title plot_horz_pos = featID - int( numpy.floor(featID / nbPlotPerRow) * nbPlotPerRow) # Init plot row and column position (ULOQ subplot = top 1/6th height, main plot 4/6th, LLOQ bottom 1/6th) if featID < (nbFeat - 1): ax_ULOQ = plt.subplot(gs[plot_vert_pos, plot_horz_pos]) ax_all = plt.subplot( gs[(plot_vert_pos + 1):(plot_vert_pos + 5), plot_horz_pos]) ax_LLOQ = plt.subplot(gs[(plot_vert_pos + 5), plot_horz_pos]) else: ax_leg = plt.subplot( gs[(plot_vert_pos):(plot_vert_pos + 7), plot_horz_pos]) # legend use all the height available # plot the features if featID < (nbFeat - 1): # Feature name and unit featName = tData.featureMetadata.loc[ featID, 'Feature Name'] + " - (" + tData.featureMetadata.loc[ featID, 'Unit'] + ")" # Detect if it's the first or last plot in the row, to add y-label if plot_horz_pos == 0: isFirstInRow = True else: isFirstInRow = False # Detect if it's the last in a line (or just last one), to put subplot names on the right hand side if (plot_horz_pos == (nbPlotPerRow - 1)) | (featID == (nbFeat - 1)): isLastInRow = True else: isLastInRow = False # x-axis label change depending on splitByBatch if splitByBatch: xLab = 'Batch' else: xLab = 'Sample Type' # Plot if isFirstInRow: # y labels on the left _featureLOQViolinPlotHelper(ax=ax_all, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=None, xTick=False, yLabel='Concentration', yTick=True, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_ULOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=featName, xLabel=None, xTick=False, yLabel=None, yTick=True, subplot='ULOQ', flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_LLOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=xLab, xTick=True, yLabel=None, yTick=True, subplot='LLOQ', flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) elif isLastInRow: # y labels on the right _featureLOQViolinPlotHelper(ax=ax_all, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=None, xTick=False, yLabel=None, yTick=True, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_ULOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=featName, xLabel=None, xTick=False, yLabel='ULOQ', yTick=True, subplot='ULOQ', flipYLabel=True, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_LLOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=xLab, xTick=True, yLabel='LLOQ', yTick=True, subplot='LLOQ', flipYLabel=True, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) else: # no y labels _featureLOQViolinPlotHelper(ax=ax_all, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=None, xTick=False, yLabel=None, yTick=True, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_ULOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=featName, xLabel=None, xTick=False, yLabel=None, yTick=True, subplot='ULOQ', flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) _featureLOQViolinPlotHelper(ax=ax_LLOQ, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=xLab, xTick=True, yLabel=None, yTick=True, subplot='LLOQ', flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) # The last is the Legend else: _featureLOQViolinPlotHelper(ax=ax_leg, tData=tData, featID=None, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=None, xTick=False, yLabel=None, yTick=False, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=True) # No tight layout with subplots # Without LOQ subplots else: # Loop over features (and a space to fit a legend), single plot for featID in range(0, nbFeat): # Keep track of plot position plot_vert_pos = int(numpy.floor(featID / nbPlotPerRow)) plot_horz_pos = featID - int( numpy.floor(featID / nbPlotPerRow) * nbPlotPerRow) # Define ax ax_single = plt.subplot(gs[plot_vert_pos, plot_horz_pos]) # plot the features if featID < (nbFeat - 1): # Feature name and unit featName = tData.featureMetadata.loc[ featID, 'Feature Name'] + " - (" + tData.featureMetadata.loc[ featID, 'Unit'] + ")" # Detect if it's the first or last plot in the row, to add y-label if plot_horz_pos == 0: isFirstInRow = True else: isFirstInRow = False # x-axis label change depending on splitByBatch if splitByBatch: xLab = 'Batch' else: xLab = 'Sample Type' # Plot if isFirstInRow: _featureLOQViolinPlotHelper(ax=ax_single, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=featName, xLabel=xLab, xTick=True, yLabel='Concentration', yTick=True, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) else: _featureLOQViolinPlotHelper(ax=ax_single, tData=tData, featID=featID, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=featName, xLabel=xLab, xTick=True, yLabel=None, yTick=True, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=False) # Plot the Legend else: _featureLOQViolinPlotHelper(ax=ax_single, tData=tData, featID=None, splitByBatch=splitByBatch, plotBatchLOQ=plotBatchLOQ, title=None, xLabel=None, xTick=False, yLabel=None, yTick=False, subplot=None, flipYLabel=False, logY=logY, tightYLim=tightYLim, showLegend=False, onlyLegend=True) # Tight layout fig.tight_layout() # Save or output if savePath: plt.savefig(savePath, bbox_inches='tight', format=figureFormat, dpi=dpi) plt.close() else: plt.show()
import sys sys.path.append( "/home/mohanty/.conda/envs/mohanty/lib/python2.7/site-packages/") sys.path.append( "/home/mohanty/.conda/envs/.pkgs/scikit-learn-0.17.1-np110py27_0/lib/python2.7/site-packages" ) import matplotlib matplotlib.use('Agg') import seaborn as sns sns.set(style="darkgrid", font_scale=0.6) sns.set_color_codes("dark") import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties fontP = FontProperties() fontP.set_size('small') from matplotlib import rcParams rcParams.update({'figure.autolayout': True}) import numpy as np import pandas as pd import os import re
def plotStatusChart(): df = getTasksDataFrame(attributes=['DATE_TASK_CREATED']) df = df[df['STATUS'] != TaskStatus.COMPLETED] x_statuses = [] y_taskcounts_low = [] y_taskcounts_normal = [] y_taskcounts_high = [] y_taskcounts_urgent = [] for status in TaskStatus: if status == TaskStatus.UNKNOWN or status == TaskStatus.COMPLETED or status == TaskStatus.ARCHIVED: continue x_statuses.append(status.value) y_taskcounts_low.append( len(df[df['STATUS'] == status][df['TASK_PRIORITY_NAME'] == 'Low'])) y_taskcounts_normal.append( len(df[df['STATUS'] == status][df['TASK_PRIORITY_NAME'] == 'Normal'])) y_taskcounts_high.append( len(df[df['STATUS'] == status][df['TASK_PRIORITY_NAME'] == 'High'])) y_taskcounts_urgent.append( len(df[df['STATUS'] == status][df['TASK_PRIORITY_NAME'] == 'Urgent'])) y_taskcounts_low = np.array(y_taskcounts_low) y_taskcounts_normal = np.array(y_taskcounts_normal) + y_taskcounts_low y_taskcounts_high = np.array(y_taskcounts_high) + y_taskcounts_normal y_taskcounts_urgent = np.array(y_taskcounts_urgent) + y_taskcounts_high for idx, count in reversed(list(enumerate(y_taskcounts_urgent))): if count == 0: try: y_taskcounts_low = np.delete(y_taskcounts_low, idx) y_taskcounts_normal = np.delete(y_taskcounts_normal, idx) y_taskcounts_high = np.delete(y_taskcounts_high, idx) y_taskcounts_urgent = np.delete(y_taskcounts_urgent, idx) x_statuses = np.delete(x_statuses, idx) except Exception as e: print(str(e)) f, ax = plt.subplots(figsize=(8, 8)) sns.set_color_codes("bright") sns.barplot(x=x_statuses, y=y_taskcounts_urgent, label="Urgent", color="#962B3D") sns.barplot(x=x_statuses, y=y_taskcounts_high, label="High", color="#E38B83") sns.barplot(x=x_statuses, y=y_taskcounts_normal, label="Normal", color="#9DBFD2") sns.barplot(x=x_statuses, y=y_taskcounts_low, label="Low", color="#C0BCD5") ax.legend(ncol=2, loc="upper left", frameon=True) ax.set(xlabel='Task Status', ylabel='No. of Tasks') ax.set_xticklabels(ax.get_xticklabels(), rotation=20, ha='right') ax.yaxis.set_major_locator(MaxNLocator(integer=True)) figBytes = BytesIO() f.savefig(figBytes, format='jpg') figBytes.seek(0) figData = base64.b64encode(figBytes.getvalue()) return figData
def parameter_plotting(dataset, data_dir, plot_dir, pairwise=False): plt.rcdefaults() # IMO very sloppy way to do it clear_name = lambda X: X.split(':')[-1] if X.split(':')[0] == 'classifier' else X.split(':')[0] #Styles sns.set_style('whitegrid', {'axes.linewidth':1.25, 'axes.edgecolor':'0.15', 'grid.linewidth':1.5, 'grid.color':'gray'}) sns.set_color_codes() plt.rcParams['figure.figsize'] = (12.0, 9.0) plt.rc('text', usetex=False) plt.rc('font', size=13.0, family='serif') preprocessor='NoPreprocessing' ## Parameter importance table state_run_dir = os.path.join(data_dir, dataset, preprocessor, 'merged_runs') # fanova_set = pyfanova.fanova.Fanova(state_run_dir) fanova_set = pyfanova.fanova.Fanova(state_run_dir, improvement_over='QUANTILE', quantile_to_compare=0.25) max_marginals = 7 cols_imp_df = ['marginal', 'parameter'] temp_df = pd.DataFrame(fanova_set.print_all_marginals(max_num=max_marginals, pairwise=pairwise), columns=cols_imp_df) flatex = '%d_marginal_table_for_%s_over_q1_noprepro.tex' % (max_marginals, dataset) # flatex = '%d_marginal_table_for_%s_default_noprepro.tex' % (max_marginals, dataset) # To avoid dots pd.set_option('display.max_colwidth', -1) temp_df.to_latex(os.path.join(plot_dir, 'tables', flatex)) print("Done printing latex") pd.set_option('display.max_colwidth', 51) if pairwise: temp_df.loc[:, 'parameter'] = temp_df.parameter.str.split(' x ') ## Plot now the marginals viz_set = pyfanova.visualizer.Visualizer(fanova_set) categorical_params = fanova_set.get_config_space().get_categorical_parameters() for p in temp_df.parameter: fig_hyper, ax_hyper = plt.subplots(1,1) if len(p) == 1: label = clear_name(p[0]) if p[0] not in categorical_params: viz_set.plot_marginal(p[0], ax=ax_hyper) else: viz_set.plot_categorical_marginal(p[0], ax=ax_hyper) ax_hyper.set_xlabel(label) else: label = clear_name(p[0]) +'_X_'+clear_name(p[1]) if p[0] in categorical_params: if p[1] not in categorical_params: viz_set.plot_categorical_pairwise(p[0], p[1], ax=ax_hyper) ax_hyper.set_xlabel(clear_name(p[1])) ax_hyper.legend(loc='best', title=clear_name(p[0])) else: continue else: if p[1] not in categorical_params: viz_set.plot_contour_pairwise(p[0], p[1], ax=ax_hyper) ax_hyper.set_xlabel(clear_name(p[0])) ax_hyper.set_ylabel(clear_name(p[1])) else: viz_set.plot_categorical_pairwise(p[1], p[0], ax=ax_hyper) ax_hyper.set_xlabel(clear_name(p[0])) ax_hyper.legend(loc='best', title=clear_name(p[1])) plt.tight_layout() # fig_hyper.savefig(os.path.join(plot_dir, '%s_for_%s_noprepro.pdf' % (label, dataset))) fig_hyper.savefig(os.path.join(plot_dir, '%s_for_%s_over_q1_noprepro.pdf' % (label, dataset)))
parser.add_argument('--save_dir', type=str, default='../../communications/thesis/figures/lista', help='save file for the figure') parser.add_argument('-x', type=int, default=600, help='iteration maximal on the figure') parser.add_argument('-y', type=int, default=50, help='scaling factor for y') parser.add_argument('--eps', type=float, default=1e-6, help='scaling factor for y') parser.add_argument('--rm', nargs='+', type=str, default=[], help='remove some curves from the plot') parser.add_argument('--seaborn', action="store_true", help="use seaborn color in the plots") parser.add_argument('--noshow', action="store_true", help="use seaborn color in the plots") args = parser.parse_args() if args.seaborn: import seaborn seaborn.set_color_codes(palette='deep') seaborn.set_style("darkgrid", { "axes.facecolor": ".9", "figures.facecolor": (1, 1, 0, 0.5)}) seaborn.despine(left=True, bottom=True) mpl.rcParams['figure.figsize'] = [12, 6] mk_curve(args.exp, eps=args.eps, max_iter=args.x, sym=args.y, save=args.save, save_dir=args.save_dir, rm=args.rm) if not args.noshow: plt.show()
def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', histBins=100, color=None, logy=False, logx=False, xlim=None, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7)): """ histogram(values, inclusionVector=None, quantiles=None, histBins=100, color=None, logy=False, logx=False, **kwargs) Plot a histogram of values, optionally segmented according to observed quantiles. Quantiles can be calculated on a second *inclusionVector* when specified. :param values: Values to plot :type values: numpy.array or dict :param inclusionVector: Optional second vector with same size as values, used to select quantiles for plotting. :type inclusionVector: None or numpy.array :param quantiles: List of quantile bounds to segment the histogram by :type quantiles: None or List :param str title: Title for the plot :param str xlabel: Label for the X-axis :param int histBins: Number of bins to break the histgram into :param color: List of specific colours to use for plotting :type color: None or List :param bool logy: If ``True`` plot y on a log scale :param bool logx: If ``True`` plot x on a log scale :param xlim: Specify upper and lower bounds of the X axis :type xlim: tuple of int """ fig = plt.figure(figsize=figureSize, dpi=dpi) ax = plt.subplot(1, 1, 1) # Set the colorpalette if color is not None: sns.set_color_codes(palette='deep') ax.set_prop_cycle(cycler('color', color)) elif quantiles is not None: flatui = ["#16a085", "#3498db", "#707b7c"] #, "#d2b4de", "#aeb6bf"] ax.set_prop_cycle(cycler('color', flatui)) # Set masks etc if required (not currently possible when values is a dictionary) if not isinstance(values, dict): # If we don't have a matching pair of vectors use values for both. if not numpy.size(inclusionVector) == numpy.size(values): inclusionVector = values # If we are limiting axes, delete elements outof bounds if not xlim is None: mask = numpy.logical_and(values >= xlim[0], values <= xlim[1]) inclusionVector = inclusionVector[mask] values = values[mask] # Remove non-finite elements maskFinite = numpy.logical_and(numpy.isfinite(inclusionVector), numpy.isfinite(values)) inclusionVector = inclusionVector[maskFinite] values = values[maskFinite] minVal = numpy.nanmin(values) maxVal = numpy.nanmax(values) # Calculate ranges for dict entries early. else: if not inclusionVector is None: raise ValueError( "Cannot provide an inclusion vector when plotting groups.") # Set min and max values if not xlim is None: minVal = xlim[0] maxVal = xlim[1] else: minVal = numpy.nan maxVal = numpy.nan for key in values: if (numpy.isnan(minVal)) | (minVal > numpy.nanmin( values[key])): minVal = numpy.nanmin(values[key]) if (numpy.isnan(maxVal)) | (maxVal < numpy.nanmax( values[key])): maxVal = numpy.nanmax(values[key]) label = values.keys() # If log scale for x if logx == True: if minVal == 0: minVal = numpy.finfo(numpy.float64).epsneg if minVal < 0: logx = False nbins = histBins xscale = 'linear' else: nbins = 10**numpy.linspace(numpy.log10(minVal), numpy.log10(maxVal), histBins) xscale = 'log' else: nbins = histBins xscale = 'linear' # If we are plotting multiple histograms on the same axis if isinstance(values, dict): for key in values: localValues = values[key] # If we are limiting axes, delete elements outof bounds if not xlim is None: mask = numpy.logical_and(localValues >= xlim[0], localValues <= xlim[1]) localValues = localValues[mask] # If we are plotting on a log scale, convert any 0 values to numpy.finfo(numpy.float64).epsneg if logx == True: localValues[localValues == 0] = numpy.finfo( numpy.float64).epsneg ax.hist(localValues, alpha=.4, range=(minVal, maxVal), label=key, bins=nbins) # If we are segmenting by quantiles elif quantiles: # Find bounds in inclusion vector quantiles = numpy.percentile(inclusionVector, quantiles) label = "Below {0:,.2f}".format(quantiles[0]) mask = inclusionVector <= quantiles[0] if sum(mask) <= 1: plt.plot([], label=label) else: ax.hist(values[mask], alpha=.4, label=label, bins=nbins) for i in range(0, len(quantiles) - 1): label = "Between {0:,.2f} and {1:,.2f}".format( quantiles[i], quantiles[i + 1]) mask = (inclusionVector > quantiles[i]) & (inclusionVector <= quantiles[i + 1]) if sum(mask) <= 1: plt.plot([], label=label) else: ax.hist(values[mask], alpha=.4, label=label, bins=nbins) label = "Above {0:,.2f}".format(quantiles[-1]) mask = inclusionVector > quantiles[-1] if sum(mask) <= 1: plt.plot([], label=label) else: ax.hist(values[mask], alpha=.4, label=label, bins=nbins) else: if len(values) <= 1: plt.plot([]) else: ax.hist(values, label='', bins=nbins) ax.set_ylabel('Count') if logy: ax.set_yscale('log', nonpositive='clip') if not xlim is None: ax.set_xlim(xlim) ax.set_xlabel(xlabel) if 'label' in locals(): ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) ax.set_xscale(xscale) fig.suptitle(title) if savePath: plt.savefig(savePath, bbox_inches='tight', format=figureFormat, dpi=dpi) plt.close() else: plt.show()
ax4.set(xlabel="e") ax4.set_title('e and y') ax5.scatter(data_train['f'], data_train['y']) ax5.set(ylabel="y") ax5.set(xlabel="f") ax5.set_title('f and y') ax6.scatter(data_train['g'], data_train['y']) ax1.set(ylabel="y") ax1.set(xlabel="g") ax6.set_title('g and y') # In[94]: #Checking the distribution of the dependant variable sns.set_style("white") sns.set_color_codes(palette='deep') f, ax = plt.subplots(figsize=(8, 10)) ax.xaxis.grid(False) ax.set(ylabel="Frequency") ax.set(xlabel="y") ax.set(title="y distribution") normalized_y = data_train['y'] # helpful_normalized.describe() sns.distplot(normalized_y, color='b') # In[12]: #skewness and kurtosis print("Skewness: " + str(data_train['y'].skew())) print("Kurtosis: " + str(data_train['y'].kurt()))
import sys import numpy as np from matplotlib import pyplot as plt import seaborn as sns from vec_root import chandrupatla figname = sys.argv[0].replace('.py', '.pdf') sns.set_style('ticks') sns.set_color_codes('bright') fig, axes = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(8, 4)) stardata = [ [10.0, 0.63, 0.0066, 1e-4, axes[0]], [20.0, 5.453, 0.1199, 0.1, axes[1]], [40.0, 22.19, 0.4468, 1.0, axes[2]], ] MICRON = 1e-4 # cm # Velocities in units of km/s (10 km/s -> 100 km/s) vgrid = np.linspace(10.0, 100.0, 800) vgrid = np.logspace(1.1, 3.1, 800) # Densities in units of 1 pcc (0.01 -> 1e5) logngrid = np.linspace(-3.3, 6.3, 800) # 2d versions of velocity and density grids vv, nn = np.meshgrid(vgrid, 10**logngrid) def rstar(v10, n, L4): """Characteristic radius in pc""" return 2.21*np.sqrt(L4/n)/v10 def taustar(v10, n, L4, kappa600=1.0):
def period_prad_slices(mode='tall'): sns_set_style('ticks') sns.set_color_codes() yt = [0.5, 1, 2, 4, 8, 16, 32] xt = [0.3, 1, 3, 10, 30, 100, 300] # Provision figure height = 3.375 width = 3.6 if mode == 'four-equal-smet': smet_bins = [-0.75, -0.45, -0.15, 0.15, 0.45] labels = [] for i in range(4): labels += '[Fe/H] = ({:+.2f},{:+.2f})'.format( smet_bins[i], smet_bins[i + 1]) nrows = 2 ncols = 2 fig, axL = subplots(nrows=nrows, ncols=ncols, figsize=(ncols * width, nrows * height), sharex=True, sharey=True) if mode == 'four-equal-stars': lamo = cksmet.io.load_table('lamost-cal-cuts', cache=1) lamo = lamo[~lamo.isany] quantiles = [0.25, 0.5, 0.75] lamo = lamo.lamo_smet lamoq = lamo.quantile(quantiles) smet_bins = [-10] + list(lamoq) + [10] labels = [ '[Fe/H] < ${:.3f}$'.format(smet_bins[1]), '[Fe/H] = (${:+.3f}$,${:+.3f}$)'.format(*smet_bins[1:3]), '[Fe/H] = (${:+.3f}$,${:+.3f}$)'.format(*smet_bins[2:4]), '[Fe/H] > ${:+.3f}$'.format(smet_bins[3]), ] nrows = 2 ncols = 2 fig, axL = subplots(nrows=nrows, ncols=ncols, figsize=(ncols * width, nrows * height), sharex=True, sharey=True) setp(axL[1, :], xlabel='Orbital Period (days)') setp(axL[:, 0], ylabel='Planet Size (Earth-radii)') # Load up LAMOST metallicities lamo = cksmet.io.load_table('lamost-cal-cuts', cache=1) lamo = lamo[~lamo.isany] plnt = cksmet.io.load_table('cks-cuts') plnt = plnt[~plnt.isany] i = 0 nbins = len(smet_bins) - 1 axL = axL.flatten() while True: if i == nbins: break ax = axL[i] sca(ax) loglog() smet1 = smet_bins[i] smet2 = smet_bins[i + 1] plnt_cut = plnt[plnt.cks_smet.between(smet1, smet2)] lamo_cut = lamo[lamo.lamo_smet.between(smet1, smet2)] f_planet = 1.0 * len(plnt_cut) / len(plnt) f_stars = 1.0 * len(lamo_cut) / len(lamo) label = labels[i] label += "\n$f_p = {:.0f}\%$".format(100 * f_planet) # Whole sample for comparison kwpts = dict(marker='o', ms=4, lw=0) kw = dict(label=label, color='LightGray', **kwpts) plot(plnt.koi_period, plnt.iso_prad, **kw) # Whole sample for comparison kw = dict(label='', color='b', **kwpts) plot(plnt_cut.koi_period, plnt_cut.iso_prad, **kw) #legend(frameon=True,markerscale=0,framealpha=0.5,fontsize='small',handletextpad=-2,loc='upper right') bbox_props = dict(boxstyle="round,pad=0.,rounding_size=0.2", fc='w', alpha=0.7, ec='none') at = AnchoredText(label, loc=1, frameon=True, prop=dict(size='small')) ax.add_artist(at) setp(at.patch, **bbox_props) yticks(yt, yt) xticks(xt, xt) i += 1 # Errorbar for ax, letter in zip(axL.flatten(), string.ascii_lowercase): sca(ax) at = AnchoredText(letter, loc=2, frameon=True, prop=letter_text_props) ax.add_artist(at) setp(at.patch, **letter_bbox_props) grid() # Errorbar sca(axL[0]) ebarx = sqrt(3) * 100. ebary = 0.7 yferr = 0.1 yerr = [[ebary - ebary / (1 + yferr)], [ebary * (1 + yferr) - ebary]] xerr = [[0.00], [0.00]] errorbar([ebarx], [ebary], yerr=yerr, zorder=10, fmt='o', c='g', ms=4) xlim(0.3, 1000) ylim(0.5, 32) fig.set_tight_layout(True) text(ebarx, ebary, ' Median\n Uncert.', size='small', va='center') minorticks_off()
print("here...1") fig, ax = plt.subplots() print("here...2") # dist.plot.kde(ax=ax, legend=False, title='Histogram: A vs. B') print("here...3") # dist.plot.hist(density=True, ax=ax, bins = 20) print("here...4") ax.set_ylabel('Probability') ax.set_xlabel('B-factor values') print("here...5") ax.grid(axis='y') print("here...6") # ax.set_facecolor('#d8dcd6') # plt.show() # print("done") sns.set_color_codes("bright") sns.distplot(bval, fit=scipy.stats.norm, kde=False, color='#1F77B4') print(f, mean, stdev) plt.show() btnpress = plt.waitforbuttonpress(0.1) if btnpress: plt.waitforbuttonpress(-1)
def main(): # prosites = [[key,prosite_id] for key,prosite_id in wd40_prosites.iteritems() ] # from multiprocessing import Pool # p = Pool(3) # results = p.map(query,prosites) # p.close() # keys = [k for k,v in results] # values = [v for k,v in results] # write_lis_lis(values,'pfam_propellers',cols=keys) # pickle.dump(results,open('wd4o_prosites.pickle','w')) results = pickle.load(open('wd4o_prosites.pickle')) # pfams = pickle.load(open('pfam_propellers.pickle','r')) results = OrderedDict([[k,v] for k,v in results if len(v) > 0]) # filter empty entries print 'types of propellers: ',len(results) #plot all propellers f,ax = plt.subplots(figsize=(6,6)) sns.set_context(rc={'patch.linewidth':0.0}) sns.set_color_codes('pastel') wd = pd.DataFrame({'Prosite Entries':results.keys(),'UniProt Sequence Num':map(len,results.values())}) wd = wd.sort_values('UniProt Sequence Num',ascending=False) h = sns.barplot(y='UniProt Sequence Num',x='Prosite Entries',data=wd,color='b') h.figure.subplots_adjust(top=0.88,bottom=0.10,left=0.15,right=0.95) ax.set(ylabel='UniProt Sequence Num',xlabel='Prosite Entries',title='Num of WD40s in Uniprot according to Different Prosite Annotation') # plt.xticks(rotation='vertical') plt.savefig('num_of_different_wd40s_by_prosites',dpi=300) plt.close('all') #plot wd40s f,ax = plt.subplots(figsize=(7,6)) sns.set_context(rc={'patch.linewidth':0.0}) wd40_names = ['PS50082','PS50294','PS00678'] wd40_names = [k for k in wd40_names if k in results.keys()] wd40s_large = [results[k] for k in wd40_names] wd40s_total = set.union(*map(set,wd40s_large)) wd = pd.DataFrame({'Prosite entries':wd40_names,'UniProt Sequence Num':map(len,[wd40s_total for i in range(len(wd40_names))])}) sns.barplot(x='Prosite entries',y='UniProt Sequence Num',data=wd,color='lightblue') sns.set_color_codes('muted') wd = pd.DataFrame({'Prosite entries':wd40_names,'UniProt Sequence Num':map(len,wd40s_large)}) wd = wd.sort_values('UniProt Sequence Num',ascending=False) sns.barplot(x='Prosite entries',y='UniProt Sequence Num',data=wd,color='b') ax.set(xlabel='Prosite entries',ylabel='UniProt Sequence Num',title='Num of WD40s in Uniprot according to Different Prosite Annotation') plt.savefig('wd40_in_uniprot_by_prosite',dpi=300) plt.close('all') with open('wd40s_table.txt','w') as w_f: for name,num in zip(wd40_names,map(len,wd40s_large)): print >> w_f, '{0:<30}{1:<}'.format(name,num) # plot wd40s venns sns.set_color_codes('bright') set1 = set(wd40s_large[0]) set2 = set(wd40s_large[1]) set10 = len(set1.difference(set2)) set12 = len(set1.intersection(set2)) set02 = len(set2.difference(set1)) v = venn2(subsets={'10':4,'11':1,'01':4},set_labels=(wd40_names[0],wd40_names[1])) v.get_label_by_id('10').set_text(str(set10)) v.get_label_by_id('11').set_text(str(set12)) v.get_label_by_id('01').set_text(str(set02)) plt.savefig(wd40_names[0]+'_'+wd40_names[1]+'.png',dpi=300) plt.close('all') set1 = set(wd40s_large[0]) set2 = set(wd40s_large[2]) set10 = len(set1.difference(set2)) set12 = len(set1.intersection(set2)) set02 = len(set2.difference(set1)) v = venn2(subsets={'10':1,'11':4,'01':1},set_labels=(wd40_names[0],wd40_names[2])) v.get_label_by_id('10').set_text(str(set10)) v.get_label_by_id('11').set_text(str(set12)) v.get_label_by_id('01').set_text(str(set02)) plt.savefig(wd40_names[0]+'_'+wd40_names[2]+'.png',dpi=300) plt.close('all') set1 = set(wd40s_large[1]) set2 = set(wd40s_large[2]) set10 = len(set1.difference(set2)) set12 = len(set1.intersection(set2)) set02 = len(set2.difference(set1)) v = venn2(subsets={'10':4,'11':1,'01':4},set_labels=(wd40_names[1],wd40_names[2])) v.get_label_by_id('10').set_text(str(set10)) v.get_label_by_id('11').set_text(str(set12)) v.get_label_by_id('01').set_text(str(set02)) plt.savefig(wd40_names[1]+'_'+wd40_names[2]+'.png',dpi=300) plt.close('all') set1 = set(wd40s_large[0]) set2 = set(wd40s_large[1]) set3 = set(wd40s_large[2]) set100 = len(set1.difference(set2.union(set3))) set110 = len(set1.intersection(set2).difference(set3)) set010 = len(set2.difference(set1.union(set3))) set101 = len(set1.intersection(set3).difference(set2)) set111 = len(set1.intersection(set2).intersection(set3)) set011 = len(set2.intersection(set3).difference(set1)) set001 = len(set3.difference(set1.union(set2))) v = venn3(subsets={'100':1, '110':1, '010':1, '101':1, '111':1, '011':1, '001':1}, set_labels = (wd40_names[0], wd40_names[1], wd40_names[2] )) v.get_label_by_id('100').set_text(str(set100)) v.get_label_by_id('110').set_text(str(set110)) v.get_label_by_id('010').set_text(str(set010)) v.get_label_by_id('101').set_text(str(set101)) v.get_label_by_id('111').set_text(str(set111)) v.get_label_by_id('011').set_text(str(set011)) v.get_label_by_id('001').set_text(str(set001)) plt.savefig(wd40_names[0]+'_'+wd40_names[1]+'_'+wd40_names[2]+'.png',dpi=300) # plot heatmap of wd40s shared by different annotation sns.set_color_codes('pastel') table = [] keys = wd40_names for w in wd40s_large: row = [len(set(w).intersection(set(wr)))*1.0/len(w) for wr in wd40s_large] table.append(row) data = pd.DataFrame(table,columns=keys,index=keys) fig = plt.figure(figsize=(7,8)) ax = fig.add_subplot(111) h = sns.heatmap(data,annot=True,fmt='.2f',cmap='Blues') # h.figure.tight_layout() h.figure.subplots_adjust(top=0.9,bottom=0.05,left=0.18,right=0.98) ax.set_xticklabels(keys,rotation=0) ax.set_yticklabels(keys[::-1],rotation=0) ax.set_title('Comaration of Different Annotation Methods') plt.savefig('Comaration_of Different_Annotation_Methods.png',dpi=300) plt.close('all')
def plotBurndownChart(burndown_df): global burndown_remaining_gradient global burndown_ideals_gradient N = len(x_dates) ind = np.arange(N) # the evenly spaced plot indices #Remaining Effort Line Regression #daily_effort_model = sm.OLS(np.array(burndown_df['daily_efforts'].tolist()) - int(burndown_df['daily_efforts'].tolist()[0]), ind) ind_xval = sm.add_constant(ind) daily_effort_model = sm.OLS(burndown_df['daily_efforts'].tolist(), ind_xval) daily_effort_results = daily_effort_model.fit().params #print('Daily Efforts Gradient111:', daily_effort_results) #daily_effort_regress_pts = [max(0, daily_effort_results[0]*i+int(burndown_df['daily_efforts'].tolist()[0])) for i in ind] daily_effort_regress_pts = [ max(0, daily_effort_results[1] * i + daily_effort_results[0]) for i in ind ] print('Daily Efforts Gradient:', daily_effort_results[1]) burndown_remaining_gradient = daily_effort_results[1] #Ideal Effort Line Regression daily_ideal_slope, daily_ideal_intercept, daily_ideal_rval, daily_ideal_pval, daily_ideal_stderr = stats.linregress( ind, burndown_df['daily_ideals']) daily_ideal_regress_pts = [ max(0, daily_ideal_slope * i + daily_ideal_intercept) for i in ind ] print('Daily Ideals Gradient:', daily_ideal_slope) burndown_ideals_gradient = daily_ideal_slope #daily_ideal_model = sm.OLS(np.array(burndown_df['daily_ideals'].tolist()) - int(burndown_df['daily_ideals'].tolist()[0]), ind) #daily_ideal_results = daily_ideal_model.fit().params #daily_ideal_regress_pts = [max(0, daily_ideal_results[0]*i+int(burndown_df['daily_ideals'].tolist()[0])) for i in ind] #print('Daily Ideals Gradient:', daily_ideal_results[0]) sns.set_style('darkgrid') sns.set_color_codes("bright") # multiple line plot fig, ax = plt.subplots() ax.set_position([0, 0, 1, 1]) ax.plot('id', 'daily_efforts', data=burndown_df, marker='', color='m', linewidth=2, label="Remaining Efforts") ax.plot(ind, daily_effort_regress_pts, marker='', color='m', linestyle=':', linewidth=1, label="Remaining Efforts Trend") ax.plot('id', 'daily_ideals', data=burndown_df, marker='', color='pink', linewidth=2, label="Ideal Remaining Effort") ax.plot(ind, daily_ideal_regress_pts, marker='', color='pink', linestyle=':', linewidth=2, label="Ideal Remaining Effort Trend") #ax.plot( ind, daily_ideal_regress_pts2, marker='', color='pink', linestyle=':', linewidth=2, label="Ideal Remaining Effort Trend") ax.set_xticks(ind) #ax.set_yticks(np.arange(0, max(burndown_df['daily_efforts']), 2)) ax.set_yticks( np.linspace(max(0, ax.get_ybound()[0] - 5), ax.get_ybound()[1] + 5, 15, dtype=np.int)) ax.set_xticklabels(x_labels) #ax.set_title('Burndown Chart') ax.set_xlabel('Days') ax.set_ylabel('Remaining Efforts') ax2 = ax.twinx() ax2.bar(ind - 0.15 / 2, burndown_df['daily_created'], width=0.15, color='b', label="Tasks Created") ax2.bar(ind + 0.15 / 2, burndown_df['daily_completed'], width=0.15, color='y', label="Tasks Completed") ax2.plot('id', 'daily_tasks', data=burndown_df, marker='o', markerfacecolor='blue', markersize=4, color='skyblue', linestyle='dashed', linewidth=2, label='Remaining Tasks') #ax2.set_yticks(np.arange(0, max(burndown_df['daily_tasks']+2), 2)) ax2.set_yticks( np.linspace(0, ax2.get_ybound()[1] + 1, ax2.get_ybound()[1] + 2, dtype=np.int)) ax2.grid(None) ax2.set_ylabel('Remaining and Created/Completed Tasks') #fig.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d %b')) #fig.gca().xaxis.set_major_formatter(ticker.FuncFormatter(format_date)) #fig.gca().xaxis.set_major_locator(mdates.DayLocator()) fig.autofmt_xdate() lines, labels = ax.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax2.legend(lines + lines2, labels + labels2, loc=0) #ax.set_xlim([0, len(ind)-1]) #ax2.set_xlim([0, len(ind)-1]) fig.set_size_inches(16.5, 10.5) figBytes = BytesIO() fig.savefig(figBytes, format='png', bbox_inches=0) figBytes.seek(0) figData = base64.b64encode(figBytes.getvalue()) return figData
def set(self,font_scale = 2): sns.set(font="Droid Sans",font_scale = font_scale) sns.set_style("whitegrid") sns.set_color_codes("dark") sns.set_palette("Paired",8)
cnt_byCS.reset_index(inplace=True) # Ok this does it, but I am not sure it's the best way to do it # Didn't find anything better cnt_byCountry['state'] cnt_byCS['Total'] = cnt_byCountry['state'] # will it match indices? # Yeah it did but need to do earlier cnt_byCS = grp_CS['state', 'type'].count() cnt_byCS = cnt_byCS.join(cnt_byCountry['state'], lsuffix='country') # will it match indices? # Awesome! df_stats = cnt_byCS['type']/cnt_byCS['state'] df_stats = df_stats.unstack().T df_stats = df_stats.rename(columns=dics['country']['x_Rdic']) # Plot the crashes where alcohol was involved sb.set_color_codes('muted') fig = plt.figure(figsize=(10,6)) for i, c in enumerate(df_stats.columns): fig.add_subplot(2,2,i+1) sb.barplot(x=df_stats.index, y=df_stats.ix[:, c]) plt.vlines(df_country.ix[c, 'avg'], 0, 0.35) plt.vlines(df_country.ix[c, 'q01'], 0, 0.35, linestyle='--') # plt.vlines(df_country.ix[c, 'q90'], 0, 0.35, linestyle='--') # this gets completely screwed up, not sure why, despite the numerical values being correct # Seems fair to say that it is cultural trait # Looking for most importnat features
def report_states(references, tracks, distance, filename): mpl.use('pgf') mpl.rcParams.update({ "text.usetex": True, "pgf.texsystem": "pdflatex", }) current_palette = sns.color_palette() sns.set_color_codes() # https://python-graph-gallery.com/100-calling-a-color-with-seaborn/ # palette = itertools.cycle(sns.color_palette()) for ref in references: fig_rep, axarr = plt.subplots(4, 2, figsize=(6.125, 8.6)) # fig_rep, axarr = plt.subplots(3,2,figsize=(6.125,7)) # fig_rep, axarr = plt.subplots(4,2,figsize=(7.14,8.8)) for track in tracks: if (track[0] == 'KF'): color = 'b' elif (track[0] == 'UKF'): color = 'g' shape_color = 'indianred' segments, traj_ref = \ associate_segments_common_frame(ref[1], track[1],distance) # color=next(palette) for i, segment in enumerate(segments): if i == 0: plot.traj_xy(axarr[0, 0:2], segment, '-', color, track[0], 1, ref[1].timestamps[0]) angular_vel(axarr[2, 1], segment, '-', shape_color, 'Shape', 1, ref[1].timestamps[0]) else: plot.traj_xy(axarr[0, 0:2], segment, '-', color, None, 1, ref[1].timestamps[0]) angular_vel(axarr[2, 1], segment, '-', shape_color, None, 1, ref[1].timestamps[0]) plot.linear_vel(axarr[1, 0:2], segment, '-', color, track[0], 1, ref[1].timestamps[0]) plot.traj_yaw(axarr[2, 0], segment, '.', shape_color, None, 1, ref[1].timestamps[0]) plot.dimensions(axarr[3, 0:2], segment, '-', shape_color, track[0], 1, ref[1].timestamps[0]) ref_color = 'gray' plot.traj_xy(axarr[0, 0:2], traj_ref, '-', ref_color, 'Reference', 1, ref[1].timestamps[0]) plot.vx_vy(axarr[1, 0:2], traj_ref, '-', ref_color, 'Reference', 1, ref[1].timestamps[0]) plot.traj_yaw(axarr[2, 0], traj_ref, '.', ref_color, None, 1, ref[1].timestamps[0]) plot.angular_vel(axarr[2, 1], traj_ref, '-', ref_color, None, 1, ref[1].timestamps[0]) if filename.split('/')[0] == 'simulation': axarr[3, 0].axhline(y=3.9, color='gray') axarr[3, 1].axhline(y=1.78, color='gray') else: axarr[3, 0].axhline(y=0.385, color='gray') axarr[3, 1].axhline(y=0.2, color='gray') for i in range(0, 4): for j in range(0, 2): axarr[i, j].set_xlim(left=0) red = mpatches.Patch(color='indianred', label='Shape KF') gray = mpatches.Patch(color='gray', label='Reference') green = mpatches.Patch(color='b', label='KF') blue = mpatches.Patch(color='g', label='UKF') lgd = fig_rep.legend(handles=[green,blue,red,gray],\ loc='lower center',ncol = 4, borderpad=0.7,\ bbox_to_anchor=(0.54,0), columnspacing=0.8) fig_rep.tight_layout() fig_rep.subplots_adjust(bottom=0.11) fig_rep.savefig("/home/kostas/report/figures/" + filename + ref[0] + ".pgf", bbox_inches='tight')
matplotlib.rc('font', size=posterfont, family='serif') matplotlib.rc('axes', labelsize=posterfont) matplotlib.rc('legend', fontsize=posterfont) matplotlib.rc('xtick', labelsize=posterfont) matplotlib.rc('ytick', labelsize=posterfont) matplotlib.rc('text.latex', preamble=r'\usepackage[T1]{fontenc}\usepackage{lmodern}') """ import matplotlib.pyplot as plt import seaborn as sns # Activate Seaborn color aliases sns.set_palette('colorblind') sns.set_color_codes(palette='colorblind') sns.set_context('paper', font_scale=1.7) sns.set_style("ticks") def fix_margins(): plots.plot_margins() import kicdata as kic import ts_powerspectrum as pspec # Make cryptic abbreviations minmax = 'min%s_max%s' % (minfreq, maxfreq) para = 'q%s_s%s_k%s' % (quarter, sigma, kernelsize) direc = './data/%s/%s' % (ID, minmax)
def presentation_states(references, tracks, distance, filename): current_palette = sns.color_palette() sns.set_color_codes() for ref in references: fig_rep, axarr = plt.subplots(2, 4, figsize=(19.2, 10.8)) for track in tracks: if (track[0] == 'KF'): color = 'b' elif (track[0] == 'UKF'): color = 'g' shape_color = 'indianred' segments, traj_ref = \ associate_segments_common_frame(ref[1], track[1],distance) for i, segment in enumerate(segments): if i == 0: plot.traj_xy(axarr[0:2, 0], segment, '-', color, track[0], 1, ref[1].timestamps[0]) angular_vel(axarr[1, 2], segment, '-', shape_color, 'Shape', 1, ref[1].timestamps[0]) else: plot.traj_xy(axarr[0:2, 0], segment, '-', color, None, 1, ref[1].timestamps[0]) angular_vel(axarr[1, 2], segment, '-', shape_color, None, 1, ref[1].timestamps[0]) plot.linear_vel(axarr[0:2, 1], segment, '-', color, track[0], 1, ref[1].timestamps[0]) plot.traj_yaw(axarr[0, 2], segment, '.', shape_color, None, 1, ref[1].timestamps[0]) plot.dimensions(axarr[0:2, 3], segment, '-', shape_color, track[0], 1, ref[1].timestamps[0]) plot.traj_xy(axarr[0:2, 0], traj_ref, '-', 'gray', 'Reference', 1, ref[1].timestamps[0]) plot.vx_vy(axarr[0:2, 1], traj_ref, '-', 'gray', 'reference', 1, ref[1].timestamps[0]) plot.traj_yaw(axarr[0, 2], traj_ref, '.', 'gray', None, 1, ref[1].timestamps[0]) plot.angular_vel(axarr[1, 2], traj_ref, '-', 'gray', None, 1, ref[1].timestamps[0]) if filename.split('/')[0] == 'simulation': axarr[0, 3].axhline(y=3.9, color='gray') axarr[1, 3].axhline(y=1.78, color='gray') else: axarr[0, 3].axhline(y=0.4, color='gray') axarr[1, 3].axhline(y=0.2, color='gray') for i in range(0, 4): for j in range(0, 2): axarr[j, i].set_xlim(left=0) red = mpatches.Patch(color='indianred', label='Shape KF') gray = mpatches.Patch(color='gray', label='Reference') green = mpatches.Patch(color='b', label='KF') blue = mpatches.Patch(color='g', label='UKF') lgd = fig_rep.legend(handles=[green,blue,red,gray],\ loc='lower center',ncol = 4, borderpad=0.7,\ bbox_to_anchor=(0.54,0), columnspacing=0.8) # handles, labels = axarr[0,0].get_legend_handles_labels() # lgd = fig_rep.legend(handles, labels, loc='lower center',ncol = # len(labels), borderpad=0.7) fig_rep.subplots_adjust(bottom=0.11) fig_rep.tight_layout() # plt.show() fig_rep.savefig( "/home/kostas/Dropbox/presentation_final/figures/eight_plots.png", bbox_inches='tight', transparent=False)
# # get shot chart data # shots = response.json()['resultSets'][0]['rowSet'] headers = harden_json.get('resultSets')[0].get('headers') shots = harden_json.get('resultSets')[0].get('rowSet') return pandas.DataFrame(shots, columns=headers) if __name__ == '__main__': shot_df = get_shot_dataframe() with pandas.option_context('display.max_columns', None): display(shot_df.head()) seaborn.set_style('white') seaborn.set_color_codes() # # Basic Graph plt.figure(figsize=(12, 11)) plt.scatter(shot_df.LOC_X, shot_df.LOC_Y) ## Plot Right side # right = shot_df[shot_df.SHOT_ZONE_AREA == 'Right Side(R)'] # plt.figure(figsize=(12,11)) # plt.scatter(right.LOC_X, right.LOC_Y) plt.xlim(300, -300) plt.ylim(-100, 500) draw_court() plt.show() # joint_shot_chart = seaborn.jointplot(shot_df.LOC_X, shot_df.LOC_Y, stat_func=None, kind='scatter', space=0, alpha=0.5) # joint_shot_chart.fig.set_size_inches(12,11)
def presentation_four_states(references, tracks, distance, filename): current_palette = sns.color_palette() sns.set_color_codes() fig_dynamic, ax_dyn = plt.subplots(2, 2, figsize=(7.7, 4), dpi=300, sharex=True, constrained_layout=True) fig_shape, ax_shape = plt.subplots(2, 2, figsize=(7.7, 4), dpi=300, sharex=True, constrained_layout=True) for ref in references: for track in tracks: if (track[0] == 'KF'): color = 'b' elif (track[0] == 'UKF'): color = 'g' shape_color = 'indianred' segments, traj_ref = \ associate_segments_common_frame(ref[1], track[1],distance) for i, segment in enumerate(segments): if i == 0: plot.traj_xy(ax_dyn[0, 0:2], segment, '-', color, track[0], 1, ref[1].timestamps[0]) angular_vel(ax_shape[1, 0], segment, '-', shape_color, 'Shape', 1, ref[1].timestamps[0]) else: plot.traj_xy(ax_dyn[0, 0:2], segment, '-', color, None, 1, ref[1].timestamps[0]) angular_vel(ax_shape[1, 0], segment, '-', shape_color, None, 1, ref[1].timestamps[0]) plot.linear_vel(ax_dyn[1, 0:2], segment, '-', color, track[0], 1, ref[1].timestamps[0]) plot.traj_yaw(ax_shape[0, 0], segment, '-', shape_color, None, 1, ref[1].timestamps[0]) plot.dimensions(ax_shape[0:2, 1], segment, '-', shape_color, track[0], 1, ref[1].timestamps[0]) plot.traj_xy(ax_dyn[0, 0:2], traj_ref, '-', 'gray', 'Reference', 1, ref[1].timestamps[0]) plot.vx_vy(ax_dyn[1, 0:2], traj_ref, '-', 'gray', 'reference', 1, ref[1].timestamps[0]) plot.traj_yaw(ax_shape[0, 0], traj_ref, '.', 'gray', None, 1, ref[1].timestamps[0]) plot.angular_vel(ax_shape[1, 0], traj_ref, '-', 'gray', None, 1, ref[1].timestamps[0]) # if filename.split('/')[0] == 'simulation': # axarr[0,3].axhline(y=3.9, color='gray') # axarr[1,3].axhline(y=1.78, color='gray') # else: ax_shape[0, 0].set_xlabel('') ax_shape[0, 1].set_xlabel('') ax_dyn[0, 0].set_xlabel('') ax_dyn[0, 1].set_xlabel('') ax_shape[0, 1].axhline(y=0.385, color='gray') ax_shape[1, 1].axhline(y=0.2, color='gray') for i in range(0, 2): for j in range(0, 2): ax_dyn[j, i].set_xlim(left=0) ax_shape[j, i].set_xlim(left=0) red = mpatches.Patch(color='indianred', label='Shape Kalman Filter') gray = mpatches.Patch(color='gray', label='Reference') green = mpatches.Patch(color='b', label='Kalman Filter') blue = mpatches.Patch(color='g', label='Unscented Kalman Filter') lgd_dynamic = fig_dynamic.legend(handles=[green,blue,gray],\ loc='lower center',ncol = 3, borderpad=0.3,\ columnspacing=0.8, borderaxespad = -3) lgd_shape = fig_shape.legend(handles=[red,gray],\ loc='lower center',ncol = 2, borderpad=0.3,\ borderaxespad = -3, columnspacing=0.8, frameon=True) # fig_dynamic.subplots_adjust(bottom=0.11) # fig_shape.subplots_adjust(bottom=0.11) # fig_dynamic.tight_layout() # fig_shape.tight_layout() # plt.show() # fig_dynamic.savefig("/home/kostas/Dropbox/final_presentation/figures/dynamic_plots.png", # bbox_extra_artists=[lgd],bbox_inches='tight',transparent=False) fig_shape.savefig( "/home/kostas/Dropbox/final_presentation/figures/shape_plots.png", bbox_extra_artists=[lgd_shape], transparent=True) fig_dynamic.savefig( "/home/kostas/Dropbox/final_presentation/figures/dynamic_plots.png", bbox_extra_artists=[lgd_dynamic], transparent=True)
print(scores.corr(method='spearman')[['prevalence', 'n']]) # ------------------------------------------------ # Plot # ------------------------------------------------ # Libraries import seaborn as sns import matplotlib.pyplot as plt # Seaborn sns.set_theme(style="whitegrid") # -------------- # Plot FacetGrid # -------------- """ sns.set_color_codes("muted") sns.despine(left=True, bottom=True) # Create facet grid g = sns.FacetGrid(scores_stacked, col="metric", col_wrap=3, sharey=False, aspect=1.5) # Plot sns plots g.map_dataframe(sns.barplot, x="month", y="result", linewidth=0.76) """ # --------------- # Plot main
# coding: utf-8 # In[4]: get_ipython().run_line_magic('matplotlib', 'inline') import pymc3 as pm import numpy as np import scipy.stats as stats import matplotlib.pyplot as plt import seaborn as sns palette = 'muted' sns.set_palette("summer"); sns.set_color_codes(palette) def posterior(grid_points=100, heads=5, tosses=20): #Defining a grid for the coin flip problem. #The underlying assumption is that we make 20 tosses and we observe 5 heads. grid = np.linspace(0, 1, grid_points) prior = np.repeat(5, grid_points) likelihood = stats.binom.pmf(heads, tosses, grid) unstd_posterior = likelihood * prior posterior = unstd_posterior / unstd_posterior.sum() return grid, posterior
def load_data(self, loader, custom_preprocessing: data.Pipeline = DEFAULT_DATA_PIPELINE, verbose=True): self.verbose = verbose if self.verbose: # create an image folder self.img_stats_folder = os.path.join(self.data_path, 'stats') create_dir_if_necessary(self.img_stats_folder) self.logger.info( f'Getting {self.pretrained_word_embeddings} with dimension {self.pretrained_word_embeddings_dim}' ) word_vectors: vocab word_vectors = None if self.pretrained_word_embeddings == 'glove': word_vectors = vocab.GloVe( name=self.pretrained_word_embeddings_name, dim=self.pretrained_word_embeddings_dim) elif self.pretrained_word_embeddings == 'fasttext': word_vectors = vocab.FastText(language=self.language) self.logger.info('Word vectors successfully loaded.') self.logger.debug('Start loading dataset') self.dataset = loader(self.name, word_vectors, self.configuration, self.batch_size, self.data_path, self.train_file, self.valid_file, self.test_file, self.use_cuda, self.verbose) self.vocabs = self.dataset['vocabs'] self.task = self.dataset['task'] self.ds_stats = self.dataset['stats'] self.split_length = self.dataset['split_length'] self.train_iter, self.valid_iter, self.test_iter = self.dataset[ 'iters'] self.fields = self.dataset['fields'] self.target = self.dataset['target'] self.target_names = [n for n, _ in self.target] self.examples = self.dataset['examples'] self.embedding = self.dataset['embeddings'] self.dummy_input = self.dataset['dummy_input'] self.source_field_name = self.dataset['source_field_name'] self.target_field_name = self.dataset['target_field_name'] self.padding_field_name = self.dataset['padding_field_name'] self.baselines = self.dataset['baselines'] self.target_size = len(self.vocabs[self.target_vocab_index]) self.source_embedding = self.embedding[self.source_index] self.class_labels = list(self.vocabs[self.target_vocab_index].itos) self.source_reverser = self.dataset['source_field'] self.target_reverser = self.target[0] self.log_parameters() if verbose: # sns.set(style="whitegrid") sns.set_style("white") sns.despine() sns.set_color_codes() # sns.set_context("paper") sns.set(rc={"font.size": 18, "axes.labelsize": 22}) # sns.set(font_scale=1.7) self.show_stats() else: self._calculate_dataset_stats() self.logger.info('Dataset loaded. Ready for training')
def main(): # -------------- # --- CONFIG --- # -------------- parser = argparse.ArgumentParser( description="A script to plot results from a decoding model", epilog='''Examples: - python perf_plot.py -p per_label_results.csv -r perf.PNG''' ) # TODO: combine and replace by just the experiment config parser.add_argument("-p", "--perf_file", default="../Data/results/per_label_results.csv", help="path to the CSV file with the per-label " "performance") parser.add_argument("-r", "--results_file", default="../Data/results/perf.PNG", help="Path to the file where results are saved") parser.add_argument("-f", "--features", default="../Data/X.p", help="Path of the pickle file " "containing the matrix of fMRI stat maps") parser.add_argument("-m", "--mask", default="../Data/masks/mask.nii.gz", help="Path of the Nifti file containing the mask used " "for full voxel maps") parser.add_argument("-a", "--atlas", default="../Data/models/components_1024_task.nii.gz", help="Path of the Nifti file containing the brain atlas" " (dictionary) used to embed the features used " "for both encoding and decoding") parser.add_argument("-l", "--labels", default="../Data/Y.p", help="Path of the pickle file " "containing the matrix of map labels") parser.add_argument("-d", "--decoding_model", default="../Data/models/clf.pt", help="Path of the pytorch dump of a decoding model " "trained provided on the maps and labels") parser.add_argument("-e", "--encodings", default="../Data/models/encodings.p", help="Path of the pickle dump of the dictionary " "of encoding maps") parser.add_argument("-c", "--concepts", default="../Data/concepts.csv", help="Path of the CSV file of comma separated concepts" ", ordered as the columns of the labels file") parser.add_argument("-r", "--results_file", default="../Data/results/maps.PNG", help="Path to the file where results are saved") args = parser.parse_args() # -------------------- # --- DATA LOADING --- # -------------------- results_model = pd.read_csv(args.perf_file, index_col=0) results_model.sort_values("AUC TEST", ascending=False, inplace=True) results_model.index.name = "Concept" # ------------------------ # --- PLOT PERFORMANCE --- # ------------------------ sns.set(style="whitegrid") sns.set_context("notebook", font_scale=1.1) font = {'fontname': 'DejaVu Sans Mono'} ylabels = list(results_model.index) ylabels_padded = [" " * 39 + wrap(lab, 37) for lab in ylabels] fig, axes = plt.subplots(nrows=1, ncols=3, gridspec_kw={'width_ratios': [10, 18, 40]}) fig.set_figheight(16) fig.set_figwidth(15) sns.set_style("whitegrid") ind = np.arange(len(results_model))[::-1] height_nnod = 0.4 height_other = 0.15 # Plot ratio in TRAIN i = 0 sns.set_color_codes("muted") axes[i].set(xlim=(0, 0.75), ylim=(-1, len(results_model)), ylabel="", xlabel="ratio in TRAIN dataset") axes[i].xaxis.set_major_locator(ticker.MultipleLocator(0.25)) axes[i].invert_xaxis() axes[i].set_yticklabels(np.arange(len(results_model))) axes[i].set_yticks([]) # Hide the left y-axis ticks axes[i].grid(False) ax_twin = axes[i].twinx() # Create a twin x-axis ax_twin.axvline(0.25, 0, 1, color='grey', linestyle='--', linewidth=0.5) ax_twin.axvline(0.5, 0, 1, color='grey', linestyle='--', linewidth=0.5) ax_twin.barh(ind, results_model["ratio TRAIN"].values, height_nnod + 2 * height_other, linewidth=0, color='darkorange') ax_twin.set_yticks(ind) ax_twin.set_yticklabels(ylabels_padded, horizontalalignment='center', **font) ax_twin.set(ylabel="", ylim=(-1, len(results_model))) ax_twin.tick_params(axis=u'both', which=u'both', length=0) ax_twin.grid(False) # Plot empty i = 1 axes[i].axis("off") axes[i].set(ylim=(-1, len(results_model))) # Plot AUC i = 2 axes[i].axvline(0.25, 0, 1, color='grey', linestyle='--', linewidth=0.5) axes[i].axvline(0.5, 0, 1, color='grey', linestyle='--') axes[i].axvline(0.75, 0, 1, color='grey', linestyle='--', linewidth=0.5) axes[i].barh(ind, results_model["AUC TEST"].values, height_nnod, linewidth=0, color='darkorange') axes[i].set(xlim=(0, 1), ylim=(-1, len(results_model)), ylabel="", xlabel="AUC on IBC dataset") axes[i].set_yticks(ind) axes[i].set_yticklabels([" "] * len(results_model)) axes[i].xaxis.set_major_locator(ticker.MultipleLocator(0.25)) axes[i].grid(False) plt.savefig(args.result_file, bbox_inches='tight', pad_inches=0.1) # -------------------- # --- DATA LOADING --- # -------------------- vocab = list(pd.read_csv(args.concepts, index_col=0).values.flat) mask = nib.load(args.maskl) masker = NiftiMasker(mask_img=mask).fit() atlas = nib.load(args.atlas) atlas_masked = masker.transform(atlas) with open(args.encodings, 'rb') as f: encodings_dict = pickle.load(f) with open(args.features, 'rb') as f: X = pickle.load(f) decoder = PytorchEstimator.from_file(args.decoding_model) X_t = torch.tensor(X).float() X_t.requires_grad = True decoder.model.eval() # ------------------------- # --- PLOT CONSTRUCTION --- # ------------------------- n = len(vocab) n_col = 6 scale_box = 0.98 box_axes = [None] * n title_axes = [None] * n enc_axes = [None] * n dec_axes = [None] * n n_row = (n - 1) // n_col + 1 width = 1 / n_col height = 1 / n_row fig = plt.figure(figsize=(4 * n_col, 4 * n_row)) for i in range(n): concept = vocab[i] row = i // n_col col = i % n_col min_x = col * width min_y = (n_row - row - scale_box) * height box_axes[i] = fig.add_axes([ min_x, min_y, width * scale_box, height * scale_box ]) box_axes[i].set_xticks([]) box_axes[i].set_yticks([]) sep = mlines.Line2D( [0.1, 0.9], [0.43, 0.43], color='grey', linewidth=1.0 ) box_axes[i].add_line(sep) title_axes[i] = fig.add_axes([ min_x, min_y + 0.87 * height * scale_box, width * scale_box, 0.13 * height * scale_box ]) title_axes[i].set_xticks([]) title_axes[i].set_yticks([]) if len(concept) <= 27: title = concept else: title = concept[:24] + "..." title_axes[i].text( 0.5, 0.5, title, fontsize=18, weight='bold', va='center', ha='center' ) enc_axes[i] = fig.add_axes([ min_x + 0.03 * width * scale_box, min_y + 0.45 * height * scale_box, 0.93 * width * scale_box, 0.40 * height * scale_box ]) enc_axes[i].set_xticks([]) enc_axes[i].set_yticks([]) pl_enc = plot_embedded( encodings_dict[concept].flatten(), atlas_masked, masker, plot_type="glass_brain", axes=enc_axes[i] ) pl_enc.title("Enc.", color='k', bgcolor='w', alpha=0, size=16, weight='bold') dec_axes[i] = fig.add_axes([ min_x + 0.03 * width * scale_box, min_y, 0.93 * width * scale_box, 0.40 * height * scale_box ]) dec_axes[i].set_xticks([]) dec_axes[i].set_yticks([]) der = torch.mean( torch.autograd.grad(torch.mean(decoder.model(X_t)[:, i]), X_t)[0], 0).detach().numpy() pl_dec = plot_embedded( der, atlas_masked, masker, plot_type="glass_brain", axes=dec_axes[i] ) pl_dec.title("Dec.", color='k', bgcolor='w', size=16, weight='bold') plt.savefig(args.results_file, dpi=75, bbox_inches='tight', pad_inches=0.1) print(">>> Finished generating plots - file saved:", args.results_file)
# .. note:: Could this be done nicely without a loop by using # some of the seaborn functionality such as the # FaceGrid? Note we can also incorporate the hue. # Loop for i, c in enumerate(FEATURES): # Draw print(" Drawing... %s." % c) # Create figure fig, ax = \ plt.subplots(1, 1, figsize=(8, 5)) # Configuration and plot sns.set_color_codes("muted") sns.boxplot(x=data.day, y=data[c], whis=1.5, hue='dengue_interpretation', fliersize=0, showfliers=False, linewidth=0.75, saturation=0.75, palette='Set3', data=data, ax=ax) # Draw normal reference range if c in NRR: ax.fill_between(x=sorted(data.day.values),
rcParams['xtick.labelsize'] = 20 rcParams['ytick.labelsize'] = 20 rcParams['legend.fontsize'] = 14 N = len(ds) + 1 ind = np.arange(N) graph_width = 0.35 labels = np.append(ds['Experiment No'].values, 14) bars1 = np.append(ds['Volume no calving bsl km3'].values, vol_bsl_fari) bars2 = np.append(ds['Volume no calving in km3'].values, vol_fari) bars3 = ds['Volume with calving bsl km3'].values bars4 = ds['Volume with calving in km3'].values sns.set_color_codes() sns.set_color_codes("colorblind") p1 = ax1.barh(ind[0:8], bars1[0:8] * -1, color="indianred", edgecolor="white", height=graph_width) p1_extra = ax1.barh(ind[8:13], bars1[8:13] * -1, color="indianred", edgecolor="white", height=graph_width, alpha=0.5)
sa_results_df['index_order'] = ['first_order', 'total_order', 'first_order', 'total_order', 'first_order', 'total_order', 'first_order', 'total_order'] ''' sa_results = pd.read_csv('../sim_results/sa_results.csv') sa_results['pred_index'] = sa_results['predictor'].astype( str) + '_' + sa_results['index_order'].astype(str) sa_results['sens'] = sa_results['pred_index'].map( sa_results.groupby(['pred_index'])['sensitivity'].mean()) sa_results['predictor_phase'] = sa_results['predictor'].astype( str) + '__PhaseGroup' + sa_results['phase_group'].astype(str) print(sa_results.head()) f = plt.figure(figsize=(100, 100)) sns.set_color_codes("pastel") g = sns.catplot(y="predictor_phase", x="sensitivity", hue="index_order", data=sa_results, height=6, kind="bar", palette="muted", legend=False, orient='h') g.despine(left=True) g.set_ylabels("Sensitivity") plt.legend(loc='upper right') plt.title('Sensitivity Analysis') plt.show() g.savefig("../sa_results.pdf", bbox_inches='tight')
#Let us do the plots here from matplotlib import gridspec from matplotlib.colors import LogNorm if is_seaborn_plot: import seaborn as sns sns.set(style='ticks') sns.set_color_codes(seaborn_palette) fsx = figure_size_x fsy = figure_size_y fos = font_size_label vari = params[0] posterior = params[1] chi2 = params[2] + params[3] #=========================================================== # plot chains #=========================================================== def plot_chains(): plt.xlabel('iteration') plt.ylabel('Reduced $\chi^2$') plt.hist2d(vari, chi2 / (ndata - npars), bins=50) fname = outdir + '/' + star + '_chains.pdf' print('Creating ', fname) plt.savefig(fname, bbox_inches='tight') plt.close()
warnings.simplefilter(action='ignore', category=FutureWarning) import numpy as np import scipy as sp import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf import statsmodels.stats.api as sms import sklearn as sk import matplotlib as mpl import matplotlib.pylab as plt from mpl_toolkits.mplot3d import Axes3D import seaborn as sns from sklearn.datasets import make_regression sns.set() sns.set_style("whitegrid") sns.set_color_codes() print('import configuration completed !') def load_weather(): wther = pd.read_csv('../../../data/basic/weather.csv', parse_dates=['date']) dates = wther['date'].dt wther['year'] = dates.year wther['month'] = dates.month wther['day'] = dates.day return wther
import conic_parameters from equation6 import Shell # Vectorized version of this function since the one in # conic_parameters is scalar-only def theta_tail(beta, xi, f=conic_parameters.finf, th_init=np.radians(91.0)): thinf = np.empty_like(beta) for i, b in enumerate(beta): thinf[i] = fsolve(f, th_init, args=(b,xi)) return np.pi - thinf plotfile = sys.argv[0].replace('.py', '.pdf') sns.set_style('ticks') sns.set_color_codes(palette='deep') fig, ax = plt.subplots(figsize=(4, 5)) ks = [0.0, 0.5, 3.0, 8.0] colors = 'krmb' beta = np.logspace(-4.0,-0.01,1000) xmin, xmax = 0.0001, 1.0 ymin, ymax = -90.0, 90.0 labelsize = "small" legend_size = "medium" #ax.fill_between([xmin, xmax], [ymin, ymin], [0, 0], color='y', alpha=0.05) ax.fill_between([xmin, xmax], [0, 0], [45, 45], color='k', alpha=0.2) ax.fill_between([xmin, xmax], [45, 45], [ymax, ymax], color='k', alpha=0.1) wbox=dict(facecolor='white', alpha=0.9, ec='none', pad=1.0)
def make_fig(params): """Make a DDM illustrative figure.""" # set up figure v = params['v'] a = params['a'] z = params['z'] t = params['t'] np.random.seed(0) sns.set(style="white", context='paper') sns.set_color_codes() golden = (1 + 5 ** 0.5) / 2 single_column = (3.346, 2.301) fig = plt.figure(figsize=single_column) gs = gridspec.GridSpec(3, 1, height_ratios=[1, golden, 1], hspace=0) mx = 5 # first rt kde df, _ = hddm.generate.gen_rand_data(params, subjs=1, size=10000) x = df[df.response == 1].rt.values ax = plt.subplot(gs[0]) bandwidth = .8 * x.std() * x.size ** (-1 / 5.) support = np.linspace(0, mx, 1000) kernels = [] for x_i in x: kernel = norm(x_i, bandwidth).pdf(support) kernels.append(kernel) density = np.sum(kernels, axis=0) my = np.max(density) * 1.05 ax.plot(support, density) ax.fill_between(support, 0, density, alpha=.5) ax.set_ylim(0, my) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels([]) ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) # traces ax = plt.subplot(gs[1]) x = np.linspace(0, mx, 101) delta = x[1] nd_samples = np.round(t / delta).astype(int) d_samples = len(x) - nd_samples y0 = np.zeros(nd_samples) * np.nan y0[-1] = 0 for i in xrange(5): y1 = np.cumsum(norm.rvs(v * delta, np.sqrt(delta), size=d_samples)) y = a * z + np.concatenate([y0, y1]) try: idx1 = np.where(y > a)[0][0] + 1 except: idx1 = np.inf try: idx2 = np.where(y < 0)[0][0] + 1 except: idx2 = np.inf if idx1 < idx2: y[idx1:] = np.nan ax.plot(x, y, 'b', zorder=-12, alpha=.5) if idx2 < idx1: y[idx2:] = np.nan ax.plot(x, y, 'r', zorder=-11, alpha=.5) ax.set_ylim(0, a) ax.set_xlim(0, mx) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels(['$0$', '$a\cdot{}z$', '$a$']) ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([0, a * z, a]) # boundaries and nd time ax.plot([0, mx], [0, 0], 'k') ax.plot([0, mx], [a, a], 'k') ax.plot([0, 0], [0, a], 'k') ax.plot([0, t - delta], [a * z, a * z], 'k') ax.plot(np.array([0, 1]) + t - delta, np.array([0, v]) + a * z, 'k') ax.text(t / 2., a * z * 0.7, '$t$', fontsize=8) ax.text(0.5 + t, (a * z + a) / 2., '$v$', fontsize=8) # second rt kde x = df[df.response == 0].rt.values ax = plt.subplot(gs[2]) bandwidth = .8 * x.std() * x.size ** (-1 / 5.) support = np.linspace(0, mx, 100) kernels = [] for x_i in x: kernel = norm(x_i, bandwidth).pdf(support) kernels.append(kernel) density = np.sum(kernels, axis=0) ax.plot(support, density, 'r') ax.fill_between(support, 0, density, color='r', alpha=.5) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels([]) ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) ax.set_ylim(0, my) ax.invert_yaxis() sns.despine(bottom=True, left=True) plt.tight_layout() plt.savefig('fig1c.pdf')
distance = [] # img1 = img.getImg(filenames[0]) for i,fn in enumerate(filenames): # print i,fn img1 = img.getImg(filenames[0]) hist_img = img.hist(img1) try: url = filenames[i+1] img2 = img.getImg(url) hist_img2 = img.hist(img2) except IndexError as ie: print 'end for loop' break distance.append(np.sum(np.abs(hist_img-hist_img2))) # print distance np_distance = np.array(distance,dtype=np.double) # np_distance = preprocessing.scale(np_distance) print np_distance length = np_distance.shape[0] x_label = np.arange(0,length) sns.set(style="whitegrid") dic_data = {} dic_data['index'] = x_label dic_data['samilary'] = np_distance sns.set_color_codes("pastel") sns.barplot(x="index", y="samilary", data=dic_data,label="Total", color="b") sns.plt.show()
'sMinus=N&Position=&Rank=N&RookieYear=&Season=2014-15&Seas'\ 'onSegment=&SeasonType=Regular+Season&TeamID=0&VsConferenc'\ 'e=&VsDivision=&mode=Advanced&showDetails=0&showShots=1&sh'\ 'owZones=0' response = requests.get(shot_chart_url) headers = response.json()['resultSets'][0]['headers'] shots = response.json()['resultSets'][0]['rowSet'] shot_df = pd.DataFrame(shots, columns=headers) # View the head of the DataFrame and all its columns from IPython.display import display with pd.option_context('display.max_columns', None): display(shot_df.head()) sns.set_style("white") sns.set_color_codes("dark") plt.figure(figsize=(12,11)) plt.scatter(shot_df.LOC_X, shot_df.LOC_Y) img = urllib.urlretrieve("http://stats.nba.com/media/players/230x185/201939.png", "201935.png") chef_pic = plt.imread(img[0]) plt.imshow(chef_pic,zorder=0, alpha=0.9) plt.show()
# In[2]: # comment this out if running file as a script get_ipython().magic('matplotlib inline') # In[3]: # Plot style customization sns.set_style("white") sns.set_context("talk") # use "colorblind" if you don't want reds and greens together sns.set_color_codes("muted") colors = ['b','g','r'] # # Economic growth # # How can we account for the fact that national economies systematically continue to grow? The macroeconomics that describe boom-recession cycles don't show that the overall trend is upward. In fact, when one thinks about the macroeconomy in the short run it is not obvious at all that the economy should rise above some static equilibrium level. # # Growth theory tries to fill in the gaps. It considers the aggregate economy in the long run, where it is reasonable to apply so-called "Kaldor facts" which summarize the empirical evidence: # # * Output per worker and capital per worker tends to grow over time # * They also grow at similar rates # * The return to capital (the interest rate) over time is fixed # * The return to labor (the wage) seems to grow over time # * Labor's share of output is stable #
%matplotlib inline import matplotlib.pyplot as plt import numpy as np from scipy.stats import multivariate_normal import random as rn import eif as iso import seaborn as sb sb.set_style(style="whitegrid") sb.set_color_codes() import scipy.ndimage from scipy.interpolate import griddata import numpy.ma as ma from numpy.random import uniform, seed
""" A = np.append(trans_prob.transpose() - np.identity(40), [[1] * 40], axis=0) b = np.array([0] * 40 + [1]).transpose() prob_dist_monopoly = np.linalg.solve(A.transpose().dot(A), A.transpose().dot(b)) prob_dict_markov = {} for i, j in zip(list_of_block_monopoly, prob_dist_monopoly): print(i, end=': ') print(j) prob_dict_markov[i] = j colors = [ 'lightblack', 'brown', 'grey', 'brown', 'grey', 'grey', 'cyan', 'grey', 'cyan', 'cyan', 'grey', 'purple', 'grey', 'purple', 'purple', 'grey', 'orange', 'grey', 'orange', 'orange', 'grey', 'red', 'grey', 'red', 'red', 'grey', 'yellow', 'yellow', 'grey', 'yellow', 'grey', 'green', 'green', 'grey', 'green', 'grey', 'grey', 'blue', 'grey', 'blue' ] import seaborn as sns import matplotlib.pyplot as plt # Plot THE DISTRIBUTION plt.style.use('fivethirtyeight') plt.figure(figsize=(12, 5)) sns.set_color_codes('pastel') sns.barplot(x=list_of_block_monopoly, y=prob_dist_monopoly, palette=colors) plt.xticks(rotation=90) #plt.savefig('gabar', bbox_inches = 'tight') plt.show()
path_dict_out[title] = ([],-1) return path_dict_out, dist target = 'Steve Martin' path_dict_out, max_dist = find_all_nodes(graph,start=target) #identify nodes that the target failed to connect with by labelling these nodes with an empty list and a degree of separation of -1 ###calculate path length stats: dist_dict = {} for k,v in path_dict_out.iteritems(): dist_dict[v[1]] = dist_dict.get(v[1],0) + 1 x = [] y = [] for k,v in dist_dict.iteritems(): x.append(k) y.append(v) #current_palette = sns.color_palette(sns.diverging_palette(145, 280, s=85, l=25, n=max_dist)) sns.set_color_codes('deep') sns.barplot(x,y,color='b') plt.title('Path Distances from the "{}" Article'.format(target),fontsize=22) plt.xlabel('distance',fontsize=18) plt.ylabel('article count',fontsize=18) plt.ylim(0,20000) plt.tick_params(labelsize=15) plt.show()
assert (len(feature_list) == len(vif)) vif_df = (pd.DataFrame({ 'Features': feature_list, 'VIF': vif }).sort_values('VIF', ascending=False).reset_index()) # saving variable importance dataframe to make plots in csv vif_df.to_csv(results_folder + 'rf_cd_vif.csv') """ VIF plot """ plt.subplots(figsize=(15, 15)) sns.set_color_codes('muted') sns.barplot(x='VIF', y='Features', data=vif_df.iloc[:10, :], color='b') plt.title('Random Forest Variable Importance to Predict Flare') plt.xlabel('Variable Importance') plt.ylabel('Top 10 Features') plt.tight_layout(pad=2, w_pad=2, h_pad=2) # save odds ratiosplot; pass white facecolor to save plt.savefig(results_folder + "rf_cd_vif_plot.pdf", facecolor='white') """ 1. RF Classification report and summary statistics """ # creating classification report class_report = classification_report(y_test, y_pred_class) print(class_report) # write classification report print(class_report,
def plot_log_likelihood_test(df_log_px): plt.figure(figsize=(14, 6), dpi=80) plt.title("Log likelihood") sns.set_color_codes() sns.distplot(df_log_px, bins=40, kde=True, rug=True, color='blue') plt.savefig(image_dir + 'log_likelihood_test' + '.png')
def procesado_no_supervisado(list_url): dfs = [] for i in list_url: #ABRO FICHERO file = pd.read_csv(i, encoding='latin1', sep=';', error_bad_lines=False) if 'Time' not in file.columns: file = pd.read_csv(i, encoding='latin1', sep=',', error_bad_lines=False) tsec = [] t_uni = [] con = '' #ESTADISTICAS CON LA FUNCION LENGTH len_mean = [] len_std = [] len_min = [] len_max = [] estadistica = file.Length.describe() for i in range(file.shape[0]): len_mean.append(estadistica[1]) len_std.append(estadistica[2]) len_min.append(estadistica[3]) len_max.append(estadistica[7]) file['len_mean'] = len_mean file['len_std'] = len_std file['len_min'] = len_min file['len_max'] = len_max #PONGO CORRECTAMENTE LA COLUMNA TIEMPO if type(file['Time'][0]) != str: file['Time'] = file['Time'].astype(str) for i in file['Time']: spliteo = i.split('.') con = spliteo[0] + '.' + spliteo[1] tsec.append(con) t_uni.append(spliteo[0]) file['tiempo_dec(s)'] = tsec file['tiempo_dec(s)'] = file['tiempo_dec(s)'].astype(float) file['t_uni'] = t_uni file['t_uni'] = file['t_uni'].astype(float) #CALCULO N PAQUETES EN EL ULTIMO SEGUNDO q = deque() max_dif = 1 #promedio del tiempo entre paquetes en las ventanas promedio_t_ultimo_s = [] promedio = 0 n_paquetes = [] ocurrencias = 0 max_len_q = 0 for i in file['tiempo_dec(s)']: q.append(i) elimina_valores(q, dif_max=max_dif) max_len_q = max(max_len_q, len(q)) promedio = max_dif / len(q) n_paquetes.append(len(q)) promedio_t_ultimo_s.append(promedio) file['n_paquetes'] = n_paquetes file['n_paquetes'].astype(float) # print(file['n_paquetes']) dfs.append(file) #CREO COLUMNA PARA INDICAR SI N PAQUETES ES > 300 PAQUETES/s df_con_indica_n_paquetes = [] for df in dfs: alto_n_paquetes = [] for i in df.n_paquetes: if i < 300: alto_n_paquetes.append(0) else: alto_n_paquetes.append(1) df['alto_n_paquetes'] = alto_n_paquetes df_con_indica_n_paquetes.append(df) dfs_ataque = [] #CREO COLUMNA PARA INDICAR SI LOS DATOS VAN A UN PUERTO CALIENTE 21 O 22 dfs_pcaliente = [] for df in df_con_indica_n_paquetes: puerto_caliente = [] for fila in range(df.shape[0]): if type(df['Info'][fila]) != str: df['Info'] = df['Info'].astype(str) if (' 21 ' in df.Info[fila]) or (' 22 ' in df.Info[fila]): # print(file.Info[fila],'--tenemos 21 o 22') puerto_caliente.append(1) else: puerto_caliente.append(0) df['puerto_caliente'] = puerto_caliente dfs_pcaliente.append(df) #CREO COLUMNA INDICANDO PUERTO DESTINO df_total1 = columna_puerto_destino(dfs_pcaliente) df_total = escaner_puertos(df_total1) cont = 0 for df in df_total: xx = df.puerto_destino.value_counts( ascending=False)[df.puerto_destino.value_counts( ascending=False).index != 0].index[1:4] yy = df.puerto_destino.value_counts( ascending=False)[df.puerto_destino.value_counts( ascending=False).index != 0][1:4] fig, axes = plt.subplots(1, 1, sharey=True, figsize=(6, 4)) sns.set_color_codes("pastel") sns.barplot(x=xx, y=yy, data=df, label="numero de peticiones", color="b") sns.despine(left=True, bottom=True) plt.legend(ncol=2, loc='best', frameon=True) plt.xlabel('puerto destino') plt.ylabel('nº de peticiones') plt.savefig(list_url[cont] + '_puertos_mas_frecuentes.png') plt.show() plt.close() cont += 1 #CREO COLUMNA PARA CONTAR POR CADA IP SI HAY IPS DESTINO QUE df_total_concatenados = pd.concat(df_total)
matplotlib.rc('figure', figsize=fig_size) matplotlib.rc('font', size=8, family='serif') matplotlib.rc('axes', labelsize=8) matplotlib.rc('legend', fontsize=8) matplotlib.rc('xtick', labelsize=8) matplotlib.rc('ytick', labelsize=8) matplotlib.rc('text.latex', preamble= r'\usepackage[T1]{fontenc}\usepackage{lmodern}') #matplotlib_setup() import matplotlib.pyplot as plt import seaborn as sns # Activate Seaborn color aliases sns.set_palette('colorblind') sns.set_color_codes(palette='colorblind') plt.style.use('ggplot') sns.set_context('poster') sns.set_style("ticks") def power_spectrum(time, amplitude, weight=None, minfreq=None, maxfreq=None, oversample=None, memory_use=None, freq=None): """ This function returns the power spectrum of the desired star. Arguments: - 'time': Time in megaseconds from the timeserie analysis. - 'amplitude': Photometry data from the timeserie analysis. - 'weight': Weights for each point in the time series. - 'minfreq': The lower bound for the frequency interval
def evaluation(dataset, data_dir, plot_dir): plt.rcdefaults() #Styles sns.set_style('whitegrid', {'axes.linewidth':1.25, 'axes.edgecolor':'0.15', 'grid.linewidth':1.5, 'grid.color':'gray'}) sns.set_color_codes() plt.rcParams['figure.figsize'] = (12.0, 9.0) plt.rc('text', usetex=False) plt.rc('font', size=14.0, family='sans-serif') # Data location and scenario preprocessor='all' # Load configurations reader = cr.ConfigReader(data_dir=data_dir, dataset=dataset) tdf = reader.load_validation_trajectories(preprocessor=preprocessor, load_config=True) # Decode number of layers tdf.loc[:, ('classifier','num_layers')] = tdf['classifier']['num_layers'].apply(lambda X:ord(X)-ord('a')) ## Plot average best architectures top5 = tdf.sort_values([('smac','test_performance')]).head(1) lays = np.int(np.ceil(np.array(top5['classifier']['num_layers']).mean())) labels_list = ['Layer_'+str(i) for i in range(1,7)] pre_m = top5['preprocessor']['choice'].describe().top activations = [] n_layers = [] weights = [] for i in np.arange(1, lays): activations.append(top5['classifier']['activation_layer_'+str(i)].describe().top) n_layers.append(top5['classifier']['num_units_layer_'+str(i)].mean()) weights.append(top5['classifier']['weight_init_'+str(i)].describe().top) tab = top5.classifier.T.dropna() table_list = ['batch_size', 'dropout_output', 'learning_rate', 'lambda2', 'number_epochs', 'solver'] t = tab.loc[table_list] t = t.append(top5['preprocessor']['choice']) a = pd.Series(np.array(n_layers)) botoms = np.fabs(a.sub(a.max()))/2 activ_list = ['relu', 'elu', 'leaky', 'sigmoid', 'tanh', 'scaledTanh', 'linear'] colr_list = sns.xkcd_palette(["windows blue", "pastel blue", "grey blue", "red orange", "emerald", "pine green", "amber"]) activation_color_codes = dict(zip(activ_list,colr_list)) bar_width = 0.1 colors_bars = [activation_color_codes.get(i) for i in activations] with sns.axes_style('ticks'): fig_arch = plt.figure(1, figsize=(15.,9.)) ax_arch = plt.subplot(111) bars = ax_arch.bar(np.arange(lays-1)-(bar_width/2), a, bottom=botoms, width=bar_width, color=colors_bars) sns.despine(left=True) ax_arch.set_ylabel('Number of units in Layer') ax_arch.set_yticklabels([]) ax_arch.set_yticks([]) ax_arch.set_xticks(np.arange(lays-1)) ax_arch.set_xticklabels(labels_list[:lays-1]) ax_arch = autolabel(bars, ax_arch) table_ax(ax_arch, t) ax_arch.legend([b for b in bars], activations, loc='best') ax_arch.set_title('Single best architecture found for dataset %s' % dataset) ax_arch.set_xlim(-0.5, lays-1) fig_arch.savefig(plot_dir + "Best_architecture_on_%s.pdf" % dataset) # Start filtering the error temp_df = tdf.copy() temp_df.columns = tdf.columns.droplevel(0) min_perf = temp_df['test_performance'].min() mean_perf = temp_df['test_performance'].mean() std_perf = temp_df['test_performance'].std() qtil_10 = temp_df['test_performance'].quantile(0.1) del temp_df m = tdf[('smac', 'test_performance')] <= qtil_10 # Setting values to log scale and categorical values log_columns = ['beta1', 'beta2', 'gamma', 'lambda2', 'learning_rate', 'momentum','num_units_layer_1', 'num_units_layer_2', 'num_units_layer_3', 'num_units_layer_4', 'num_units_layer_5', 'num_units_layer_6', 'power', 'std_layer_1', 'std_layer_2', 'std_layer_3','std_layer_4', 'std_layer_5', 'std_layer_6'] for lc in log_columns: try: tdf.loc[:, ('classifier', lc)] = np.log10(tdf.loc[:, ('classifier', lc)]) except KeyError: continue ## After Setting the frames. Start with the plotting plt.clf() # Plot the empirical CDF sorted_train = (tdf['smac']['train_performance'].sort_values(ascending=True).values) sorted_test = (tdf['smac']['test_performance'].sort_values(ascending=True).values) ytrain = np.arange(len(sorted_train)) / float(len(sorted_train)) ytest = np.arange(len(sorted_test)) / float(len(sorted_test)) plt.step(sorted_train, ytrain, label="Train Performance", lw=2.5) plt.step(sorted_test, ytest, label="Test Performance", lw=2.5) plt.xlabel("Cross-validation error $y(x)$") plt.ylabel(r"Number of Configs (%)") plt.xlim(0.0, min(1.0, sorted_test.max()+0.01)) plt.title("Empirical CDF of configurations based on error") plt.legend(loc='best') plt.tight_layout() plt.savefig(plot_dir + 'CDF_Error_%s.pdf' % dataset) categories=['solver','lr_policy','num_layers'] mask_filter = tdf[('smac','test_performance')] <= qtil_10 filtered = tdf[mask_filter] for category in categories: fig_f, axs = plt.subplots(ncols=2, nrows=1, figsize=(15.0, 10.5)) ax0, ax1 = axs.flat sns.boxplot(x=('classifier', category), y=('smac','test_performance'), data=filtered.sort_values(by=[('classifier', category)]), ax=ax0) ax0.set_xlabel(category) ax0.set_ylabel('Test error performance') ax0.set_title('Error distribution based on %s' % category) sns.countplot(x=('classifier', category), data=filtered.sort_values(by=[('classifier', category)]), ax=ax1) ax1.set_xlabel(category) ax1.set_ylabel('Times used') ax1.set_title('Bar plot of frequency of %s' % category) fig_f.suptitle("Descriptive stats of %s on dataset %s using 10%% of configurations" % (category, dataset), y=0.98) # fig_f.tight_layout() fig_f.savefig(plot_dir + 'Descriptive_plots_over_%s_on_%s.pdf' % (category, dataset)) fig_f.show() ## Plot distro over learning rates # Create the grouping of the filtered DF classifier_df = tdf[m]['classifier'] solver_filt = classifier_df.groupby('solver') # with sns.color_palette('Set1',8): # for name,groups in solver_filt: # plt.hist(groups.learning_rate.values, alpha=0.5, bins=20, label=name) # plt.legend() col_hist = sns.color_palette('Paired',8, desat=0.8) rows_to_plot = np.int(np.ceil(len(solver_filt)/2.)) fig2, axs = plt.subplots(nrows=rows_to_plot, ncols=2, figsize=(12.,17.)) fig2.suptitle('Distribution of learning rate values for each\ solver on dataset %s \n (based on 50%% best configurations)' % dataset, y=1.02) for ax, (name, groups) in zip(axs.flat,solver_filt): ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True, label=name, alpha=0.9, color=col_hist.pop()) ax.set_xlabel('learning rate values (log scale)') ax.set_ylabel('# of Configs') ax.legend(loc='best') # plt.tight_layout() ax = axs.flat[-1] ax.set_visible(False) fig2.savefig(plot_dir + 'Histogram_of_learning_rate_solver_on_dataset_%s.pdf' % dataset) ## Plot over different preprocessing methods # Create the grouping of the filtered DF prepro_filt = tdf[m].groupby([('preprocessor','choice')]) prepro_color = sns.color_palette('Paired',14, desat=0.8) fig4, axs = plt.subplots(nrows=3, ncols=5, sharex='col', figsize=(22.,12.)) fig4.suptitle('Distribution of learning rate for each preprocessor on dataset %s'% dataset, y=1.02 ) for ax, (name, grops) in zip(axs.flat,prepro_filt): groups = grops['classifier'] ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True, label=name, color=prepro_color.pop()) ax.set_xlabel('learning rate values (log scale)') ax.set_ylabel('# of Configs') ax.legend(loc='best') # plt.tight_layout() fig4.savefig(plot_dir + 'Histogram_of_learning_rate_prepro_on_dataset_%s.pdf' % dataset)
import conic_parameters import theta_ratio_fit sys.path.append('../conic-projection') from conproj_utils import Conic XI_LIST = [None, 1.0, 0.8, 0.4] BETA_LIST = [0.1, 0.01, 0.0001] nxi, nbeta = len(XI_LIST), len(BETA_LIST) ntheta = 100 theta = np.linspace(0.0, np.pi, ntheta) figfilename = sys.argv[0].replace('.py', '.pdf') sns.set_style('whitegrid') sns.set_color_codes('dark') NROWS = 2 fig, axes = plt.subplots(nxi, nbeta, sharex=True, sharey=True) xmin, xmax = -5.0, 2.1 ymin, ymax = -0.1, 7.0 # xmin, xmax = -7.0, 4.1 # ymin, ymax = -0.1, 11.0 ytop = ymin + 0.98*(ymax - ymin) xright = xmin + 0.98*(xmax - xmin) whitebox = {'edgecolor': 'none', 'facecolor': 'white', 'alpha': 0.7, 'boxstyle': 'round,pad=0.1'}
def main_5(): x = dict() y = list() z = list() l_1 = list() l_2 = list() if directory == 'ble_output': raise Exception('wrong - directory- ') r = 0 for d in range(len(delay)): path = my.path_media + "json_file/" + directory + "/relay_" + str( relay[r]) + "/x/delay_XXX_" + str(delay[d]) + ".json" files = my.get_grouped_files(source_path=path, delay=delay, index_delay=d) for item in files: data = my.open_file_and_return_data(path=item) time_ = (data['_command']['delay']) ble = data['summary']['mex_']['ble']['R'] wifi = data['summary']['mex_']['wifi']['R'] total = data['summary']['mex_']['total']['R'] p_ble = ble * 100 / total p_wifi = wifi * 100 / total x[time_] = list() x[time_].append(p_ble) z.append('ble') x[time_].append(p_wifi) z.append('wifi') l_1.append(time_) y.append(p_ble) l_2.append(100) dataframe = {'total': l_2, 'ble': y, 'delay': l_1} df = pd.DataFrame(dataframe) print(df) fig_dims = (10, 4) fig, ax = plt.subplots(figsize=fig_dims) sns.set_context('notebook') sns.set_color_codes("muted") sns.barplot(x='delay', y='total', data=df, label="total", color='b', ax=ax) sns.set_color_codes("muted") sns.barplot(x='delay', y='ble', data=df, label="ble", color='y', ax=ax) sns.despine() plt.show() print("--") dataframe = { 'type': ['ble', 'wifi'], '50': x[50], '100': x[100], '150': x[150], '200': x[200], '250': x[250], '500': x[500], '1000': x[1000] } df = pd.DataFrame(dataframe) print(df.head()) sns.set_context('notebook') df.set_index('type').T.plot(kind='bar', stacked=True) plt.legend(loc='best') my_point = list() my_label = list() delay.reverse() for i in range(len(delay)): my_point.append(i + 1) frequency = 1 / (delay[i] / 1000) my_label.append(round(frequency, 2)) plt.xticks(my_point, my_label, rotation=30) plt.xlabel("Frequency (Hz)", fontsize=16) plt.ylabel("Percentage", fontsize=16) # plt.title(title) sns.despine( ) # is a function that removes the spines from the right and upper portion of the plot by default. plt.show()
shot_chart_url = cp3_url() pd.set_option('display.width', 200) response = requests.get(shot_chart_url) headers = response.json()['resultSets'][0]['headers'] shots = response.json()['resultSets'][0]['rowSet'] shot_df = pd.DataFrame(shots, columns=headers) sns.set_style("white") sns.set_color_codes() # sns.despine(left=True) pic = urllib.urlretrieve("http://stats.nba.com/media/players/230x185/101108.png", "101108.png") paul_pic = plt.imread(pic[0]) img = OffsetImage(paul_pic, zoom=0.6) img.set_offset((400,350)) cmap=plt.cm.gist_heat_r # plt.axis('off') # plt.figure(figsize=(12,11)) # plt.figure(figsize=(6,5.5)) # cp3 = plt.hexbin(shot_df.LOC_X, shot_df.LOC_Y, gridsize=[20,7]) joint_shot_chart = sns.jointplot(shot_df.LOC_X, shot_df.LOC_Y, size=7.5, stat_func=None,
total_sal_wins = pd.merge(total_sal, teams[['teamID', 'yearID', 'W']]) total_sal_wins.head() # #### Problem 1(d) # # How would you graphically display the relationship between total wins and total salaries for a given year? What kind of plot would be best? Choose a plot to show this relationship and specifically annotate the Oakland baseball team on the on the plot. Show this plot across multiple years. In which years can you detect a competitive advantage from the Oakland baseball team of using data science? When did this end? # # **Hints**: Use a `for` loop to consider multiple years. Use the `teamID` (three letter representation of the team name) to save space on the plot. # In[178]: #your code here import seaborn as sns sns.set(style='ticks', context='notebook') sns.set_color_codes('dark') last_year = total_sal_wins.yearID.max() not_OAK = total_sal_wins[total_sal_wins.teamID != 'OAK'] OAK = total_sal_wins[total_sal_wins.teamID == 'OAK'] plt.scatter(not_OAK.salary, not_OAK.W, label='not OAK', alpha=0.7) plt.scatter(OAK.salary, OAK.W, c='r', s=100, label='OAK') plt.scatter(OAK[OAK.yearID == last_year].salary, OAK[OAK.yearID == last_year].W, c='g', s=200, label='OAK %d' % last_year) plt.legend(loc='best') plt.xlabel('Salary, 100 mln') plt.ylabel('Wins') sns.despine()