Esempi in Python per pointplot, esempi in Python per seaborn.pointplot

Esempio n. 1

0

Mostra file

File: rbugs_statistics.py Progetto: eleweek/dataisbeautiful

def do_plot(flair_stats, filename):
    months = []
    new_flaired = []
    total = []

    for month, month_stats in sorted(flair_stats.items()):
        total.append(sum(month_stats.values()))
        new_flaired.append(month_stats['new'])
        months.append(month)

    sns.set_style('whitegrid')

    total_plot_color = sns.xkcd_rgb["denim blue"]
    ignored_plot_color = sns.xkcd_rgb["orange red"]

    total_plot = sns.pointplot(x=months, y=total, color=total_plot_color)
    sns.pointplot(x=months, y=new_flaired, color=ignored_plot_color)

    total_patch = mpatches.Patch(color=total_plot_color)
    ignored_patch = mpatches.Patch(color=ignored_plot_color)

    total_plot.set(ylabel="Number of bugreports", xlabel="Month")
    total_plot.set_title('/r/bugs statistics by month:\nReddit admins consistently ignore half of bugreports', y=1.02)
    sns.plt.legend([total_patch, ignored_patch], ['Total number of bugreports',
                                                  'Number of ignored bugreports (submissions with "new" flair)'],
                   loc="lower left")

    sns.plt.savefig(filename)

Esempio n. 2

0

Mostra file

File: analysis_helpers.py Progetto: judithfan/graphcomm

def plot_topk_all_models():
    '''
    Generate line plot that visualizes, for various values of k, the proportion of trials
    for which the model assigned the correct sketch category a rank of <= k.
    '''
    Q = load_all_topk_predictions()
    krange = 64 ## how many values of k to plot
    sns.set_context('poster')
    fig = plt.figure(figsize=(8,8))
    colors = [(0.2,0.2,0.2),(0.8,0.3,0.3),(0.3,0.3,0.8),(0.5,0.5,0.5),(0.6,0.2,0.6)]
    sns.pointplot(x='k',
                  y='prop',
                  hue='adaptor',
                  data=Q,
                  palette=colors,
                  markers = '.',
                  ci='sd',              
                  join=True)
    plt.ylabel('proportion',fontsize=24)
    plt.xlabel('k',fontsize=24)
    plt.title('% correct within top k')
    plt.ylim([0,1.1])
    # plt.xlim([-0.1,krange])
    plt.xlim([0,18])
    # locs, labels = plt.xticks(np.linspace(0,krange-1,9),map(int,np.linspace(0,krange-1,9)+1),fontsize=16)
    plt.tight_layout()
    plt.legend(bbox_to_anchor=(1.0, 0.9))

Esempio n. 3

0

Mostra file

File: __init__.py Progetto: cjw-charleswu/eaglesense

def classifier_selection_time(data, filename):
    """
    """

    num_classifiers = data.shape[0]

    training_time = data.filter(regex="classifier|training time", axis=1)
    training_time.columns = ["classifier", "time"]
    training_time["Dataset"] = pd.Series(["Training"] * num_classifiers)

    testing_time = data.filter(regex="classifier|testing time", axis=1)
    testing_time.columns = ["classifier", "time"]
    testing_time["Dataset"] = pd.Series(["Testing"] * num_classifiers)

    time_data = pd.concat(
        (testing_time, training_time), ignore_index=True, axis=0)

    with sns.axes_style("ticks"):
        fig, ax = plt.subplots()

        sns.pointplot(x="classifier", y="time", hue="Dataset", data=time_data,
                      palette="Set1", dodge=True, ax=ax)

        ax.set_xticklabels(ax.xaxis.get_ticklabels(),
                           rotation=params.X_TICK_ROTATION)
        ax.set_xlabel("Classifier")
        ax.set_ylabel("Average Time (msec / example)")

        sns.despine()
        fig.set_size_inches(params.FIGSIZE)
        plt.tight_layout()
        plt.savefig(filename)

Esempio n. 4

0

Mostra file

File: __init__.py Progetto: cjw-charleswu/eaglesense

def classifier_selection_error(data, filename):
    """
    """

    num_classifiers = data.shape[0]

    data["training error"] = data["training error"].round(decimals=3)
    data["testing error"] = data["testing error"].round(decimals=3)

    training_error = data.filter(regex="classifier|training error", axis=1)
    training_error.columns = ["classifier", "error"]
    training_error["Dataset"] = pd.Series(["Training"] * num_classifiers)

    testing_error = data.filter(regex="classifier|testing error", axis=1)
    testing_error.columns = ["classifier", "error"]
    testing_error["Dataset"] = pd.Series(["Testing"] * num_classifiers)

    error_data = pd.concat(
        (testing_error, training_error), ignore_index=True, axis=0)

    with sns.axes_style("ticks"):
        fig, ax = plt.subplots()

        sns.pointplot(x="classifier", y="error", hue="Dataset", data=error_data,
                      palette="Set1", dodge=True, ax=ax)

        ax.set_xticklabels(ax.xaxis.get_ticklabels(),
                           rotation=params.X_TICK_ROTATION)
        ax.set_xlabel("Classifier")
        ax.set_ylabel("Mean Error")

        sns.despine()
        fig.set_size_inches(params.FIGSIZE)
        plt.tight_layout()
        plt.savefig(filename)

Esempio n. 5

0

Mostra file

File: simulated_experiments.py Progetto: thran/experiments2.0

def skill_correlations(runs=50, n_clusters=5):
    results = []
    clustering = kmeans
    for run in range(runs):
        for skill_correlation in list(np.arange(0, 0.9, 0.1)) + [0.85]:
            for clustering in clusterings:
                for students in [10, 20, 30, 50, 100, 200, 300, 500, 1000, 2000, 3000, 5000, 10000, 20000, 50000, 100000, 200000, 500000, 1000000]:
                    answers, items  = data(n_students=students, n_items=20, n_concepts=n_clusters, skill_correlation=skill_correlation)
                    true_cluster_names = list(items['concept'].unique())
                    X = similarity(answers)
                    items_ids = X.index
                    ground_truth = np.array([true_cluster_names.index(items.get_value(item, 'concept')) for item in items_ids])

                    labels = clustering(X, n_clusters, euclid=euclid)
                    rand = rand_index(ground_truth, labels)

                    print(run, skill_correlation, clustering.__name__, students, '===', rand)
                    if rand >= 0.9:
                        results.append([students, clustering.__name__, rand, skill_correlation])
                        break

    results = pd.DataFrame(results, columns=['students', 'clustering', 'rand_index', 'skill_correlation'])

    print(results)
    f, ax = plt.subplots(figsize=(7, 7))
    ax.set(yscale="log")
    sns.pointplot(data=results, x='skill_correlation', y='students', hue='clustering', ax=ax)

Esempio n. 6

0

Mostra file

File: analyse_single_point_stats.py Progetto: ARCHER-CSE/parallel-io

def main(argv):

    # Lists of marker styles and line styles
    markers = 10 * ['o','^','x']
    lines = 10 * ['-','--','-.']

    infile = sys.argv[1]

    resframe = pd.read_csv(infile)

    print "Summary of all results found:"
    print resframe
    labels = map(int, resframe['Writers'].unique())
    labels.sort()

    fig, ax = plt.subplots()

    sns.pointplot(x='Writers', y='Write Bandwidth (MiB/s)',
      data=resframe, hue='Scheme', scale=0.75, markers=markers,
      linestyles=lines)
    ax.set_ylim(ymin=0)

    plt.ylabel('Max. Write Bandwidth / MiB/s')
    plt.xlabel('Writers')
    plt.legend()
    plt.savefig('max_bandwidth_stats.png')
    plt.clf()

    sys.exit(0)

Esempio n. 7

0

Mostra file

File: DistorbutionOfValues.py Progetto: TDaltonC/ValueMRI_Analysis

def cumm_plot(data, **kwargs):
#    data.sort('Opt1Value', inplace = True)
#    data['rank'] = data.sort('Opt1Value')['Opt1Value'].transform(lambda score: np.linspace(0, 1, len(score)))    
    data['order'] = data.sort('Opt1Value').groupby(['new_type'])['Opt1Value'].transform(lambda score: np.linspace(0, 1, len(score)))
    data.sort('new_type', inplace = True)
    
    sb.pointplot('Opt1Value', 'order', data = data, hue = 'new_type', **kwargs)

Esempio n. 8

0

Mostra file

File: leaving.py Progetto: thran/experiments2.0

def rolling_success_diff(answers, last_count=4, filters=None, only_last=True):
    if filters is None:
        filters = [None]

    data = []
    for filter in filters:
        df = filter_users(answers, min_answer_count=filter)
        for df in df.groupby('user'):
            df = df[1]
            mean = df['correct'].mean()
            if len(df) < last_count:
                continue
            for x in df['correct'].rolling(last_count, last_count).mean():
                if np.isnan(x):
                    continue
                if not only_last:
                    data.append([np.round(x - mean, 1), filter, 0])
            if not only_last:
                data[-1][-1] = 1
            else:
                data.append([x - mean, filter, 1])
    df = pd.DataFrame(data, columns=['rolling_success_diff', 'min_answers', 'leave'])
    if not only_last:
        sns.pointplot(data=df, x='rolling_success_diff', y='leave', hue='min_answers').set(ylim=(0, 0.2))
    else:
        for filter in filters:
            sns.distplot(df.loc[df['min_answers'] == filter, 'rolling_success_diff'], label=str(filter))
        plt.legend(loc=1)
    return df

Esempio n. 9

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/nyc-taxi-analysis

def _make_all_pred_plots(big_d, time_d, fig_name):
    fig, axarr = plt.subplots(2, 3, figsize=(11, 5))
    axarr = np.ravel(axarr)
    letters = "abcdef"
    for i, (field, ax) in enumerate(zip(fields, axarr)):
        title = "({}) {}".format(letters[i], titles[i])
        if i < 5:
            ax = sns.pointplot(x="n_vehicles", y=field, hue="predictions",
                               data=big_d, palette=clrs, ax=ax)
        else:
            ax =sns.pointplot(x="vehicles", y=field, hue="predictions",
                              data=time_d, palette=clrs, ax=ax)
        ax.set_xticklabels(vehicles)
        ax.set_title(title, fontsize=13, y=-0.45)
        ax.set_ylabel(prettify(field))
        ax.set_xlabel("Number of Vehicles")
        if "%" in prettify(field):
            ax.set_ylim([0, 1])
            vals = ax.get_yticks()
            ax.set_yticklabels(['{:3.0f}%'.format(x * 100) for x in vals])
        handles, _ = ax.get_legend_handles_labels()
        ax.legend().remove()
    fig.subplots_adjust(wspace=0.4, hspace=0.58)
    lgd = fig.legend(
        handles,
        fancy_preds,
        loc="lower center", fancybox=True,
        bbox_to_anchor=(0.46, 0.96),
        title="Number of Samples", markerscale=2.5, ncol=4)
    fig.savefig(
        fig_dir + fig_name,
        bbox_inches='tight', bbox_extra_artists=(lgd,))
    plt.close()

Esempio n. 10

0

Mostra file

File: ngram.py Progetto: ewulczyn/wiki-detox

def calibration_curve_plotter(y_test, prob_pos, n_bins=10):

    brier = brier_score_loss(y_test, prob_pos, pos_label=1)

    fig = plt.figure(0, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    df = pd.DataFrame({"true": y_test})
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    binids = np.digitize(prob_pos, bins) - 1
    df["Bin center"] = bins[binids] + 0.5 / n_bins
    df[""] = "Model calibration: (%1.5f)" % brier
    o = bins + 0.5 / n_bins

    df2 = pd.DataFrame({"true": o, "Bin center": o})
    df2[""] = "Perfect calibration"

    df = pd.concat([df, df2])

    sns.pointplot(x="Bin center", y="true", data=df, order=o, hue="", ax=ax1)

    ax2.hist(prob_pos, range=(0, 1), bins=10, label="Model", histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    # ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots")

    ax2.set_xlabel("Predicted Probability")
    ax2.set_ylabel("Count")

    plt.tight_layout()

Esempio n. 11

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/mod

def make_interval_comp_plots(df):
    fig, ax = plt.subplots(1, 1, figsize=aux_fig_size)
    sns.pointplot(x="interval", y="comp_time", data=df, ax=ax,
                  color=sns.xkcd_rgb["bright red"])
    ax.set_xlabel("Step Size [s]")
    ax.set_ylabel(prettify("comp_time"))
    ax.set_xticklabels(intervals)
    plt.savefig("figs/interval-comp_time.png", bbox_inches="tight")
    plt.close()

Esempio n. 12

0

Mostra file

File: test_compound.py Progetto: TheChymera/chyMRI

def test_activity_timecourse_with_inlay():
	import pandas as pd
	import matplotlib.pyplot as plt
	import samri.plotting.maps as maps
	import seaborn as sns
	from os import path

	# Style elements
	palette=["#56B4E9", "#E69F00"]

	data_dir = path.join(path.dirname(path.realpath(__file__)),"../../tests/data")
	data_path = path.join(data_dir,'drs_activity.csv')
	df = pd.read_csv(data_path)

	df = df.rename(columns={'t':'Mean t-Statistic'})
	df['Session']=df['Session'].map({
		'ofM':'naïve',
		'ofMaF':'acute',
		'ofMcF1':'chronic/2w',
		'ofMcF2':'chronic/4w',
		'ofMpF':'post',
		})


	# definitions for the axes
	left, width = 0.06, 0.9
	bottom, height = 0.06, 0.9

	session_coordinates = [left, bottom, width, height]
	roi_coordinates = [left+0.02, bottom+0.7, 0.3, 0.2]

	fig = plt.figure(1)

	ax1 = plt.axes(session_coordinates)
	sns.pointplot(
	       x='Session',
	       y='Mean t-Statistic',
	       units='subject',
	       data=df,
	       hue='treatment',
	       dodge=True,
	       palette=palette,
	       order=['naïve','acute','chronic/2w','chronic/4w','post'],
	       ax=ax1,
	       ci=95,
	       )

	ax2 = plt.axes(roi_coordinates)
	maps.atlas_label("/usr/share/mouse-brain-atlases/dsurqec_200micron_roi-dr.nii",
		scale=0.3,
		color="#E69F00",
		ax=ax2,
		annotate=False,
		alpha=0.8,
		)

	plt.savefig('_activity_timecourse_with_inlay.png')

Esempio n. 13

0

Mostra file

File: psisloo.py Progetto: hammerlab/stanity

    def plot(self):
        """ Graphical summary of pointwise pareto-k importance-sampling indices

        Pareto-k tail indices are plotted (on the y axis) for each observation unit (on the x axis)

        """
        seaborn.pointplot(
            y = self.pointwise.pareto_k,
            x = self.pointwise.index,
            join = False)

Esempio n. 14

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/mod

def make_demand_comp_plots(df):
    fig, ax = plt.subplots(1, 1, figsize=aux_fig_size)
    sns.pointplot(x="demand", y="comp_time", hue="capacity", data=df, ax=ax,
                  palette=dem_clrs)
    ax.set_ylabel(prettify("comp_time"))
    ax.set_xlabel("Nominal Number of Requests")
    ax.set_xticklabels(["x0.5", "x1", "x2"])
    handles, _ = ax.get_legend_handles_labels()
    ax.legend(handles, [1, 4], title="Capacity")
    plt.savefig("figs/demand-comp_time.png", bbox_inches="tight")
    plt.close()

Esempio n. 15

0

Mostra file

File: subject_analysis.py Progetto: brahmcapoor/naming-changes-complexity

def individual_graph(transparencies_1, transparencies_2, condition,
                     subject_number, display_graph=True):

    x = [i for i in range(1, 81)]
    sns.pointplot(x, transparencies_1, color='red')
    plot = sns.pointplot(x, transparencies_2)
    plot.set(xlabel="Trial", ylabel="Contrast",
             title="{} Condition".format(condition))
    if display_graph:
        plt.show()
    plot = plot.get_figure()
    plot.savefig("Subject {}/{}.png".format(subject_number, condition))
    plt.cla()

Esempio n. 16

0

Mostra file

File: exp_model_selection.py Progetto: BaxterEaves/ijcai-iml-2016

def plotexp(res):
    data = res['res']['data']
    _, basename = os.path.split(res['args']['filename'])
    basename = basename.replace('.', '')

    plt.figure(facecolor='white', tight_layout=True, figsize=(4.5, 3.5),
               dpi=300)
    sns.pointplot(x='Number of topics', y='Log evidence', data=data)
    plt.savefig(os.path.join(DATA_DIR, basename + '_nt_evidence.png'), dpi=300)

    plt.figure(facecolor='white', tight_layout=True, figsize=(4.5, 3.5),
               dpi=300)
    sns.pointplot(x='Number of topics', y='Runtime', data=data)
    plt.savefig(os.path.join(DATA_DIR, basename + '_nt_runtime.png'), dpi=300)

Esempio n. 17

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/mod

def make_avg_plots_with_preds(big_d):
    d = big_d.query("capacity == 4 and waiting_time == 300")
    cap, wt = 4, 300
    fig = plt.figure()
    fig.set_size_inches(13, 10)
    for field in fields + ["n_shared_per_passenger"]:
        ax = sns.pointplot(x="vehicles", y=field, hue="predictions", data=d)
        ax.set_xticklabels(vehicles)
        plt.ylabel(prettify(field))
        if "%" in prettify(field):
            ax.set_ylim([0, 1])
            vals = ax.get_yticks()
            ax.set_yticklabels(['{:3.0f}%'.format(x * 100) for x in vals])
        plt.xlabel("Num Vehicles")
        handles, _ = ax.get_legend_handles_labels()
        plt.legend(
            handles,
            ["No R.B.", 0, 100, 200, 300, 400],
            loc="center left", fancybox=True,
            shadow=True, bbox_to_anchor=(1, 0.5),
            title="Predictions", markerscale=3)
        plt.title(make_pred_title(wt, cap))
        plt.savefig(
            "figs/avg-with-preds-{}.png".format(field),
            bbox_inches='tight')
        plt.close()

Esempio n. 18

0

Mostra file

File: plotting.py Progetto: 5agado/fitbit-analyzer

def _plotWeekdayByMonthStats(stats, stat_name):
    dataToPlot = _prepareWeekdayByMonthStats(stats)

    # Plot
    g = sns.pointplot(data=dataToPlot, x="day", y=stat_name, hue="month", order=dayOfWeekOrder)
    g.set(xlabel='')
    g.set_ylabel(NAMES[stat_name])
    return g

Esempio n. 19

0

Mostra file

File: views.py Progetto: manozbiswas/Djangoapp

def draw_graph(plot_data, rankingSystem, numberOfUv, hue):
    plot_data['world_rank'] = plot_data['world_rank'].astype(int)
    ax = sns.pointplot(x='year', y='world_rank', hue=hue, data=plot_data);
    pylab.title("Top " + str(numberOfUv) + " university by " + rankingSystem, fontsize=26)
    pylab.xticks(fontsize=20)
    pylab.yticks(fontsize=20)
    pylab.ylabel("World Rank", fontsize=26)
    pylab.xlabel("Year", fontsize=26)
    pylab.savefig('resources/images/topuv.png')
    pylab.cla()
    pylab.clf()
    pylab.close()

Esempio n. 20

0

Mostra file

File: analysis_utils.py Progetto: ewulczyn/wiki-detox

def compare_groups(df, x, mpu = False, order = None, hue = None, plot = True, table = True):
    agg = 'pred_aggression_score'
    rec = 'pred_recipient_score'
    
    if table:
        if hue:
            print(df.groupby([x, hue])[agg, rec].mean())
        else:
            print(df.groupby([x])[agg, rec].mean())
    
    if plot:
        
        fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=False, figsize = (12,6))

        if mpu:
            cols = ['user_text', x]
            plt.figure()
            sns.pointplot(x=x, y= agg, data=mpg(df, agg, cols) , order = order, hue = hue, ax = ax1)
            plt.figure()
            sns.pointplot(x=x, y= rec, data=mpg(df, rec, cols) , order = order, hue = hue, ax = ax2)
        else:
            
            ax = sns.pointplot(x=x, y= agg, data=df, order = order, hue = hue, ax = ax1)
            plt.figure()
            ax = sns.pointplot(x=x, y= rec, data=df, order = order, hue = hue, ax = ax2)

Esempio n. 21

0

Mostra file

File: plotting.py Progetto: TheChymera/behaviopy

def qualitative_times(df,
	ax=None,
	x="relative_date",
	y="weight",
	unit="Animal_id",
	condition="treatment",
	err_style="unit_traces",
	order=None,
	bp_style=True,
	save_as='',
	legend_title=False,
	palette=QUALITATIVE_COLORSET,
	renames={},
	model='',
	print_model=False,
	print_anova=False,
	anova_type=3,
	groups=None,
	ci=95,
	):
	"""Plot a timecourse based on qualitative times (e.g. sessions).
	"""

	if bp_style:
		plt.style.use(u'seaborn-darkgrid')
		plt.style.use('ggplot')

	if renames:
		for key in renames:
			for subkey in renames[key]:
				df.loc[df[key] == subkey, key] = renames[key][subkey]

	ax = sns.pointplot(
		x=x,
		y=y,
		units=unit,
		data=df,
		hue=condition,
		dodge=True,
		palette=sns.color_palette(palette),
		order=order,
		ax=ax,
		ci=ci,
		)

	ax.set_ylabel(y)

	if not legend_title:
		legend_title = ax.legend().set_title('')

	if save_as:
		plt.savefig(path.abspath(path.expanduser(save_as)), bbox_inches='tight')

Esempio n. 22

0

Mostra file

File: cyber_attack_classification.py Progetto: nhanloukiala/AppsOfDataAnalysis

def line_plot(data ,title = "", x_title ="", y_title="", legend_label="",group_labels=None):
    plot_data = DataFrame()

    plot_data['x'] = data[:, 1].astype(int)
    plot_data['y'] = data[:, 0].astype(float)

    plot_data[legend_label] = data[:, 2]
    sns.set(style="whitegrid")
    g = sns.pointplot(x="x", y="y", hue=legend_label, data=plot_data, hue_order=np.unique(plot_data[legend_label]))
    plt.title(title, fontsize=25)
    plt.ylabel(y_title, fontsize=12)
    plt.xlabel(x_title, fontsize=12)
    plt.show()

Esempio n. 23

0

Mostra file

File: simulated_experiments.py Progetto: thran/experiments2.0

def students(runs=15):
    results = []
    for run in range(runs):
        # for n_students in range(100, 1001, 100):
        # for n_students in [10, 25, 50, 100, 200, 300,  400, 600]:
        for difficulty_shift in np.arange(-1, 1.1, 0.2):
            answers, items = data(n_students=n_students, n_items=n_items, n_concepts=n_clusters, skill_correlation=skill_correlation, difficulty_shift=difficulty_shift, missing=missing)
            true_cluster_names = list(items['concept'].unique())
            # for i, clustering in enumerate(clusterings):
            for similarity, euclid, similarity_name in similarities:
                X = similarity(answers)
                items_ids = X.index
                ground_truth = np.array([true_cluster_names.index(items.get_value(item, 'concept')) for item in items_ids])

                labels = clustering(X, n_clusters, euclid=euclid)
                rand = rand_index(ground_truth, labels)
                results.append([n_students, clustering.__name__, rand, skill_correlation, difficulty_shift, similarity_name])
                print(run, n_students, similarity_name, rand)

    results = pd.DataFrame(results, columns=['students', 'clustering', 'rand_index', 'skill_correlation', 'difficulty_shift', 'similarity'])
    print(results)

    plt.figure(figsize=(16, 24))
    sns.pointplot(data=results, x='difficulty_shift', y='rand_index', hue='similarity')

Esempio n. 24

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/mod

def make_interval_plots(df):
    for field in tqdm(fields + ["n_shared_per_passenger"]):
        fig, ax = plt.subplots(1, 1, figsize=aux_fig_size)
        ax = sns.pointplot(x="interval", y=field, data=df,
                           color=sns.xkcd_rgb["bright red"], ax=ax)
        filename = "figs/interval-{}.png".format(field)
        ax.set_xlabel("Step Size [s]")
        ax.set_ylabel(prettify(field))
        if "%" in prettify(field):
            ax.set_ylim(0, 1)
            vals = ax.get_yticks()
            yticklabels = ['{:3.0f}%'.format(x * 100) for x in vals]
            ax.set_yticklabels(yticklabels)

        plt.savefig(filename, bbox_inches="tight")
        plt.close()

Esempio n. 25

0

Mostra file

File: views.py Progetto: manozbiswas/Djangoapp

def process_graph(university):
    timesData = Data.get_time_data()
    shanghaiData = Data.get_shanghai_data()
    cwurData = Data.get_cwur_data()
    # university_name = []
    # university_name = university
    times_plot_data = timesData[timesData.university_name.isin(university)][['world_rank', 'year']]
    shanghai_plot_data = shanghaiData[shanghaiData.university_name.isin(university)][['world_rank', 'year']]
    cwur_plot_data = cwurData[cwurData.institution.isin(university)][['world_rank', 'year']]

    times_plot_data['source'] = 'Times'
    shanghai_plot_data['source'] = 'Shanghai'
    cwur_plot_data['source'] = 'CWUR'

    ## parse the first number in rank for data ranges

    times_plot_data['world_rank'] = times_plot_data['world_rank'].str.split('-').str[0]
    shanghai_plot_data['world_rank'] = shanghai_plot_data['world_rank'].str.split('-').str[0]

    plot_data = times_plot_data.append(shanghai_plot_data).append(cwur_plot_data)
    plot_data['world_rank'] = plot_data['world_rank'].astype(int)

    sns.set(style="ticks", color_codes=True)
    plt.rcParams['figure.figsize'] = 16, 12
    ax = sns.pointplot(x='year', y='world_rank', hue='source', data=plot_data);

    # Styling

    plt.title(university[0] + " Ranking", fontsize=26)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.ylabel("World Rank", fontsize=26)
    plt.xlabel("Year", fontsize=26)
    plt.tight_layout()
    plt.legend(loc='upper left', fontsize=20)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()

    # Save File
    plt.savefig('resources/images/university.png')
    plt.cla()
    plt.clf()
    plt.close()

Esempio n. 26

0

Mostra file

File: blend.py Progetto: ISU-DMC/dmc2016

def compare2(y, y_hat1, y_hat2):
    thetas = np.linspace(0, 1, num=101)
    maes = []
    for theta in thetas:
        blended = blend2([y_hat1, y_hat2], [theta, 1 - theta])
        err = mae(y, blended)
        maes.append(err)
    maes = np.array(maes)
    min_i = np.argmin(maes)
    d = {'mae': maes, 'theta': thetas}
    df = pd.DataFrame(d)
    sns.set_style('darkgrid')
    ax = sns.pointplot(x='theta', y='mae', data=df)
    ax.set_xticks([])
    sns.plt.axvline(x=min_i)
    lab = 'BEST (theta: ' + str(thetas[min_i]) + ', MAE: ' + str(maes[min_i]) + ')'
    ax.set(xlabel=lab, ylabel='MAE')
    sns.plt.show()

Esempio n. 27

0

Mostra file

File: plot_metrics.py Progetto: wallarelvo/mod

def make_demand_plots(df):
    for field in tqdm(fields + ["n_shared_per_passenger"]):
        fig, ax = plt.subplots(1, 1, figsize=aux_fig_size)
        ax = sns.pointplot(x="demand", y=field, hue="capacity", data=df,
                           palette=dem_clrs, ax=ax)
        ax.set_xlabel("Nominal Number of Requests")
        ax.set_ylabel(prettify(field))
        ax.set_xticklabels(["x0.5", "x1", "x2"])
        if "%" in prettify(field):
            ax.set_ylim(0, 1)
            vals = ax.get_yticks()
            yticklabels = ['{:3.0f}%'.format(x * 100) for x in vals]
            ax.set_yticklabels(yticklabels)
        handles, _ = ax.get_legend_handles_labels()
        ax.legend(handles, [1, 4], title="Capacity")
        filename = "figs/demand-{}.png".format(field)
        plt.savefig(filename, bbox_inches="tight")
        plt.close()

Esempio n. 28

0

Mostra file

File: blend.py Progetto: ISU-DMC/dmc2016

def compare_multiway(y, y_hat):
    thetas = np.linspace(0, 1, num=101)
    maes = []
    for theta in thetas:
        theta2 = (1 - theta) / (len(y_hat) - 1)
        blended = blend(y_hat, [theta] + list(np.repeat(theta2, len(y_hat) - 1)))
        err = mae(y, blended)
        maes.append(err)
    maes = np.array(maes)
    min_i = np.argmin(maes)
    d = {'mae': maes, 'theta': thetas}
    df = pd.DataFrame(d)
    sns.set_style('darkgrid')
    ax = sns.pointplot(x='theta', y='mae', data=df)
    ax.set_xticks([])
    sns.plt.axvline(x=min_i)
    lab = 'BEST (theta: ' + str(thetas[min_i]) + ', MAE: ' + str(maes[min_i]) + ')'
    ax.set(xlabel=lab, ylabel='MAE')
    sns.plt.show()

Esempio n. 29

0

Mostra file

File: visualize.py Progetto: namakemono/cifar10-tensorflow

def save_solvers_cmp(is_power_point = False):
    dfs = []
    for filename in glob.glob("../output/cifar10classifier_resnet32_*.csv"):
        target = filename.split("_")[-1].split(".csv")[0] 
        if target in ["adadelta", "adagrad", "adam", "momentum", "rmsprop"]:
            df = pd.read_csv(filename)
            df["train_error"] = 1 - df["train_accuracy"]
            df["test_error"] = 1 - df["test_accuracy"]
            dfs.append(df)
    total_df = pd.concat(dfs)
    total_df["name"] = total_df["name"].str.split("_").str.get(-1).str.replace("Momentum", "Nesterov(Original Paper)")
    ax = sns.pointplot(x="epoch", y="test_error", hue="name", data=total_df, scale=0.2)
    if is_power_point:
        ax.legend(loc="lower left", markerscale=9.0, fontsize=20)  
    else:
        ax.legend(loc="lower left", markerscale=3.0)
    ax.set(ylim=(0, 0.2))
    ax.set_xticklabels([i if i % 10 == 0 else "" for i in range(200)])
    ax.set(xlabel='epoch', ylabel='error(%)')
    ax.get_figure().savefig("../figures/resnet.solvers.png")
    sns.plt.close()

Esempio n. 30

0

Mostra file

File: nationals.py Progetto: yknot/ultimateNationalsTweets

def twentyMins(ultilive, nattys):
    # create array for every 20 minutes from 10-16 00:00 to 10-20 00:00
    ulti_twentyMins = np.zeros(288)

    for u in ultilive.iterrows():
        day = (u[1]['day'] - 16)*72
        hour = (u[1]['hour'])*3
        min = (u[1]['minute'])%3

        ulti_twentyMins[day+hour+min]+=1



    # create array for every 20 minutes from 10-16 00:00 to 10-20 00:00
    natty_twentyMins = np.zeros(288)

    for u in nattys.iterrows():
        day = (u[1]['day'] - 16)*72
        hour = (u[1]['hour'])*3
        min = (u[1]['minute'])%3
        
        natty_twentyMins[day+hour+min]+=1

    sns.set(style='darkgrid', context='poster')
    plt.figure(figsize=(20,15))

    dfTime = pd.DataFrame(index = np.arange(0,576), columns = ['Time', 'Tweets', 'Source'])
    dfTime.Time = np.concatenate([range(0,288),range(0,288)])
    dfTime.Tweets = np.concatenate([ulti_twentyMins, natty_twentyMins])
    dfTime.Source = np.concatenate([["Ultiworld Live"]*288, ["#NationalsTX"]*288])

    s = sns.pointplot('Time', 'Tweets', 'Source', data=dfTime, palette="Paired")
    s.set_title("Tweets per Twenty Minutes")
    s.set_xticks([72, 144, 216, 287])
    s.set_xticklabels(['17th', '18th', '19th', '20th'])
    s.set_xlabel("Day")

    s.axis([0,288,0,50])

    s.figure.savefig("ByTwenty.png")

Esempio n. 31

0

Mostra file

# Here we plot the results. We first make a pointplot with the average
# performance of each pipeline across session and subjects.

fig, axes = plt.subplots(1, 2, figsize=[8, 4], sharey=True)

sns.stripplot(
    data=results,
    y="score",
    x="pipeline",
    ax=axes[0],
    jitter=True,
    alpha=0.5,
    zorder=1,
    palette="Set1",
)
sns.pointplot(data=results, y="score", x="pipeline", ax=axes[0], zorder=1, palette="Set1")

axes[0].set_ylabel("ROC AUC")
axes[0].set_ylim(0.5, 1)

##############################################################################
# The second plot is a paired scatter plot. Each point representing the score
# of a single session. An algorithm will outperform another is most of the
# points are in its quadrant.

paired = results.pivot_table(
    values="score", columns="pipeline", index=["subject", "session"]
)
paired = paired.reset_index()

sns.regplot(data=paired, y="RG+LR", x="CSP+LDA", ax=axes[1], fit_reg=False)

Esempio n. 32

0

Mostra file

File: label_sampling_disc_figure_generator.py Progetto: hubayirp/snorkeling

def plot_performance_graph(metric='AUROC',
                           evaluation_set='dev',
                           title="",
                           file_name="",
                           data=None,
                           color_map=None):
    """
    Plot the graphs onto a multi-subplot grid using seaborn
    Args:
        metric - the metric to plot for the y axis
        evaluation_set - whehter to plot the dev set or test set
        title - the main title of the large graph
        file_name - the name of the file to save the graph
        data - the dataframe tree to plot the large graph
        color_map - the color coded to plot each point on
    """
    fig, axes = plt.subplots(len(file_tree),
                             len(file_tree["DaG"]),
                             figsize=(25, 15),
                             sharey='row')

    for row_ind, col in enumerate(data):
        for col_ind, row in enumerate(data[col]):

            if metric == "AUROC":
                axes[row_ind][col_ind].set_ylim([0.5, 1])

            if metric == "AUPR":
                axes[row_ind][col_ind].set_ylim([0, 0.7])

            # Data Not Available Yet
            if len(data[col][row]) == 0:
                lower, upper = axes[row_ind][col_ind].get_ylim()
                axes[row_ind][col_ind].annotate("Coming Soon!!",
                                                (0.2, (lower + upper) / 2),
                                                color="red",
                                                fontsize=20)

            else:
                sns.pointplot(x="num_lfs",
                              y=metric if metric == "AUROC" else "AUPRC",
                              data=data[col][row][evaluation_set],
                              ax=axes[row_ind][col_ind],
                              hue="label",
                              ci="sd",
                              scale=1.2,
                              markers=["^", "o"])

                # remove x axis labels
                axes[row_ind][col_ind].set_xlabel('')
                axes[row_ind][col_ind].get_legend().remove()

                # unstable code
                # if order of error bars
                # change then this code will not work
                for idx, item in enumerate(
                        axes[row_ind][col_ind].get_children()):
                    # if the points in graph
                    # change color map accordingly
                    if idx == 0 or idx == 1:
                        item.set_edgecolor([
                            color_map[col] if index == 0 else color_map[row]
                            for index in range(
                                len(data[col][row]
                                    [evaluation_set].num_lfs.unique()))
                        ])
                        item.set_facecolor([
                            color_map[col] if index == 0 else color_map[row]
                            for index in range(
                                len(data[col][row]
                                    [evaluation_set].num_lfs.unique()))
                        ])

                    #if error bars change accordingly
                    elif isinstance(item, plt.Line2D):
                        if idx == 2:
                            item.set_linestyle('dashed')
                            item.set_color("black")
                            item.set_alpha(0.25)
                        elif idx == 9:
                            item.set_linestyle('dashed')
                            item.set_color("black")
                            item.set_alpha(0.25)
                        else:
                            item.set_color(color_map[row])

            # only set first column and first row titles
            if row_ind == 0:
                axes[row_ind][col_ind].set_title(row, color=color_map[row])

            if col_ind == 0:
                axes[row_ind][col_ind].set_ylabel(col, color=color_map[col])
            else:
                axes[row_ind][col_ind].set_ylabel('')

    for item in axes.flat:
        item.title.set_fontsize(30)
        item.yaxis.label.set_fontsize(24)
        item.xaxis.label.set_fontsize(24)
        for tick in item.get_yticklabels() + item.get_xticklabels():
            tick.set_fontsize(23)

    if "label" in data["DaG"]["DaG"]["dev"].columns:
        axes.flatten()[3].legend(loc='upper center',
                                 bbox_to_anchor=(2.54, 0.8),
                                 fontsize=20)
        leg = axes.flatten()[3].get_legend()
        leg.legendHandles[0].set_edgecolor('black')
        leg.legendHandles[0].set_facecolor('white')

        leg.legendHandles[1].set_edgecolor('black')
        leg.legendHandles[1].set_facecolor('white')

    fig.text(0.5, 0.89, 'Label Sources', ha='center', fontsize=30)
    fig.text(0.5,
             0.04,
             'Number of Additional Label Functions',
             ha='center',
             fontsize=30)
    fig.text(0.04,
             0.5,
             f'Predicted Relations ({metric})',
             va='center',
             rotation='vertical',
             fontsize=25)
    fig.suptitle(title, fontsize=30)
    fig.text(0.69,
             0.02,
             '0-Only Uses Relation Specific Databases.',
             fontsize=27)
    plt.subplots_adjust(top=0.85)
    plt.savefig(file_name, format='png')

Esempio n. 33

0

Mostra file

def plot_roi_per_session(
    df,
    x='Session',
    y='Mean t-Statistic',
    condition='treatment',
    unit='subject',
    ci=90,
    palette=["#56B4E9", "#E69F00"],
    dodge=True,
    order=[],
    feature_map=True,
    roi_left=0.02,
    roi_bottom=0.74,
    roi_width=0.3,
    roi_height=0.2,
    roi_anat='/usr/share/mouse-brain-atlases/dsurqec_40micron_masked.nii',
    roi_threshold=None,
    cut_coords=None,
    samri_style=True,
    renames=[],
    save_as='',
    ax=None,
    fig=None,
):
    """Plot a ROI t-values over the session timecourse
	"""

    if samri_style:
        plt.style.use(u'seaborn-darkgrid')
        plt.style.use('ggplot')

    try:
        df = path.abspath(path.expanduser(df))
    except AttributeError:
        pass

    # definitions for the axes
    height = rcParams['figure.subplot.top']
    bottom = rcParams['figure.subplot.bottom']
    left = rcParams['figure.subplot.left']
    width = rcParams['figure.subplot.right']

    session_coordinates = [left, bottom, width, height]

    roi_coordinates = [
        left + roi_left, bottom + roi_bottom, roi_width, roi_height
    ]

    if not fig:
        fig = plt.figure(1)

    if renames:
        for key in renames:
            for subkey in renames[key]:
                df.loc[df[key] == subkey, key] = renames[key][subkey]

    if not ax:
        ax1 = plt.axes(session_coordinates)
    else:
        ax1 = ax
    ax = sns.pointplot(
        x=x,
        y=y,
        units=unit,
        data=df,
        hue=condition,
        dodge=dodge,
        palette=sns.color_palette(palette),
        order=order,
        ax=ax1,
        ci=ci,
    )
    ax.set_ylabel(y)

    if isinstance(feature_map, str):
        ax2 = plt.axes(roi_coordinates)
        if roi_threshold and cut_coords:
            maps.stat(
                feature,
                cut_coords=cut_coords,
                template=roi_anat,
                annotate=False,
                scale=0.3,
                show_plot=False,
                interpolation=None,
                threshold=roi_threshold,
                draw_colorbar=False,
                ax=ax2,
            )
        else:
            maps.atlas_label(
                feature_map,
                scale=0.3,
                color="#E69F00",
                ax=ax2,
                annotate=False,
                alpha=0.8,
            )
    elif feature_map:
        try:
            features = df['feature'].unique()
        except KeyError:
            pass
        else:
            if len(features) > 1:
                print(
                    'WARNING: The features list contains more than one feature. We will highlight the first one in the list. This may be incorrect.'
                )
            feature = features[0]
            ax2 = plt.axes(roi_coordinates)
            if path.isfile(feature):
                if roi_threshold and cut_coords:
                    maps.stat(
                        stat_maps=feature,
                        cut_coords=cut_coords,
                        template=roi_anat,
                        annotate=False,
                        scale=0.3,
                        show_plot=False,
                        interpolation=None,
                        threshold=roi_threshold,
                        draw_colorbar=False,
                        ax=ax2,
                    )
                else:
                    maps.atlas_label(
                        feature,
                        scale=0.3,
                        color="#E69F00",
                        ax=ax2,
                        annotate=False,
                        alpha=0.8,
                    )
            else:
                atlas = df['atlas'].unique()[0]
                mapping = df['mapping'].unique()[0]
                if isinstance(feature, str):
                    feature = [feature]
                maps.atlas_label(
                    atlas,
                    scale=0.3,
                    color="#E69F00",
                    ax=ax2,
                    mapping=mapping,
                    label_names=feature,
                    alpha=0.8,
                    annotate=False,
                )

    if save_as:
        plt.savefig(path.abspath(path.expanduser(save_as)),
                    bbox_inches='tight')

    return fig, ax

Esempio n. 34

0

Mostra file

# Show each observation with a scatterplot
sns.stripplot(x="measurement",
              y="value",
              hue="species",
              data=iris,
              dodge=True,
              jitter=True,
              alpha=.25,
              zorder=1)

# Show the conditional means
sns.pointplot(x="measurement",
              y="value",
              hue="species",
              data=iris,
              dodge=.532,
              join=False,
              palette="dark",
              markers="d",
              scale=.75,
              ci=None)

# Improve the legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[3:],
          labels[3:],
          title="species",
          handletextpad=0,
          columnspacing=1,
          loc="lower right",
          ncol=3,
          frameon=True)

Esempio n. 35

0

Mostra file

File: PointPlotOfSex.py Progetto: Wolvarun9295/PythonLibraries

import seaborn as sb
from matplotlib import pyplot as plt

df = sb.load_dataset('titanic')
sb.pointplot(x="sex", y="survived", hue="class", data=df)
plt.show()

Esempio n. 36

0

Mostra file

##############################################################################
# Plot Results
# ------------
#
# Here we plot the results.

fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])

n_subs = len(dataset.subject_list)

if n_subs > 1:
    r = results.groupby(["pipeline", "subject",
                         "data_size"]).mean().reset_index()
else:
    r = results

sns.pointplot(data=r,
              x="data_size",
              y="score",
              hue="pipeline",
              ax=ax,
              palette="Set1")

errbar_meaning = "subjects" if n_subs > 1 else "permutations"
title_str = f"Errorbar shows Mean-CI across {errbar_meaning}"
ax.set_xlabel("Amount of training samples")
ax.set_ylabel("ROC AUC")
ax.set_title(title_str)
fig.tight_layout()
plt.show()

Esempio n. 37

0

Mostra file

File: make_time_plot.py Progetto: daviddliu/thesis

    for time in times:
        data_dicts.append({
            "Number of samples": num_samples,
            "Convergence time (s)": time,
            "Inference method": "DP/VI"
        })
for num_samples, times in SciClone_times.iteritems():
    for time in times:
        data_dicts.append({
            "Number of samples": num_samples,
            "Convergence time (s)": time,
            "Inference method": "SciClone (VI)"
        })
for num_samples, times in PyClone_times.iteritems():
    for time in times:
        data_dicts.append({
            "Number of samples": num_samples,
            "Convergence time (s)": time,
            "Inference method": "PyClone (DP/MCMC)"
        })

data = pd.DataFrame(data_dicts)
ax = sns.pointplot(x="Number of samples",
                   y="Convergence time (s)",
                   hue="Inference method",
                   data=data,
                   capsize=0.1,
                   markers=['x', 'o', '^'],
                   linestyles=['--', '--', '--'])
sns.plt.savefig('time_comparisons.png')

Esempio n. 38

0

Mostra file

df_M.head(2)

# <a id='eda'></a>
# ## Exploratory Data Analysis
#
# >  After trimmed and cleaned The data, Now move on to exploration. Compute statistics and create visualizations to find Patterns between the data , find answers of My research questions .
# ### Research Question 1 (Top 20 movies based on its Profit)

# In[51]:

info = pd.DataFrame(df_M['revenue'].sort_values(ascending=False))
info['original_title'] = df_M['original_title']
data = list(map(str, (info['original_title'])))
x = list(data[:20])
y = list(info['revenue'][:20])
ax = sns.pointplot(x=y, y=x)
sns.set(rc={'figure.figsize': (10, 10)})
ax.set_title("Top 20 Movies has high Profit", fontsize=15)
ax.set_xlabel("revenue", fontsize=15)
sns.set_style("darkgrid")

# The Avatar Movie has the highst provit in the dataset

# ### Research Question 2 (Which movie Has Highest /  Lowest Profit and budget)

# In[52]:

#calculate Profit for each of the movie
#add a new column Profit for each of the movie
df_M['Profit'] = df_M['revenue'] - df_M['budget']

Esempio n. 39

0

Mostra file

File: 2-PlantGrowth.py Progetto: Amal-Almutairy/DS-EDA-Python

# Produces Pandas Series
plant_growth.groupby('group')['weight'].mean()

#%%
# Produces Pandas DataFrame
plant_growth.groupby('group')[['weight']].mean()

#%%
# Easy and flexible
plant_growth.groupby(['group']).agg({'weight': ['mean', 'std']})

#%%
# plot the data:
sns.boxplot(x="group", y="weight", data=plant_growth)
sns.catplot(x="group", y="weight", data=plant_growth)
sns.pointplot(x="group", y="weight", data=plant_growth, join=False)
sns.catplot(x="group", y="weight", data=plant_growth, kind="point")

#%%
# base R plotting functions:
""" boxplot()
hist()
plot()
plot(density()) """

#%%
# specify the model
""" import statsmodels.api as sm
from statsmodels.formula.api import ols """
model = ols("weight ~ group", data=plant_growth)
results = model.fit()

Esempio n. 40

0

Mostra file

# In[6]:

train, test = pd.read_csv("../input/train.csv"), pd.read_csv(
    "../input/test.csv")
test_ids = test[["PassengerId"]]

# In[7]:

train.head()

# We will plot various features with their relation to survival rate to have an idea of correlations

# In[8]:

fig, axs = plt.subplots(ncols=3, figsize=(16, 5))
sns.pointplot(x="Embarked", y="Survived", hue="Sex", data=train, ax=axs[0])
sns.pointplot(x="Pclass", y="Survived", hue="Sex", data=train, ax=axs[1])
sns.violinplot(x="Survived", y="Age", hue="Sex", data=train, ax=axs[2])

# We can already see some (strong) correlation between sex, age, Pclass, embarked and survival rate

# In[9]:

data_age = [
    train[train.Survived == 1].Age.dropna(),
    train[train.Survived == 0].Age.dropna()
]
labels = ["Survived", "Not survived"]
fig = ff.create_distplot(data_age, labels, bin_size=2, show_rug=False)
pyo.iplot(fig)

Esempio n. 41

0

Mostra file

                'Number of articles':
                articles_count,
                'Recall std error':
                Micro_Recall_Values_std_error
            })

        # stop after certain number of articles
        if (articles_count > max_count): break

# visualize Recall values
Articles_Recall_Values.boxplot()

# visualize standard errors values
Recall_std_errors = pd.DataFrame(Macro_Recall_st_errors)
Recall_std_errors['Recall Type'] = 'Macro Recall'

temp_df = pd.DataFrame(Micro_Recall_st_errors)
temp_df['Recall Type'] = 'Micro Recall'

Recall_std_errors = Recall_std_errors.append(temp_df)

plt.figure(figsize=(8, 6))
fig = sns.pointplot(x='Number of articles',
                    y='Recall std error',
                    hue='Recall Type',
                    data=Recall_std_errors,
                    col='Recall Type')

fig.set(xlabel="Number of Articles")
fig.set(ylabel="Recall Standard Error")

Esempio n. 42

0

Mostra file

# just by eyeballing, normal distrib does not capture tails
# nbinom seems like best description, but params are not very intuitive...
import scipy.stats as stats
fig, axes = plt.subplots(len(strains2plot))
fit = []
for ax, (strain, group) in zip(axes.ravel(), parts[(parts.mass_norm>=2)&(parts.corrwideal>=0.5)&(parts.strain.isin(strains2plot))].groupby('strain')):
    values = group.mass_norm.values
    fit.append((strain, stats.probplot(values, dist="geom", sparams=(0.3), plot=ax)[1]))

# Summary plot
df2plot =  parts[(parts.mass_norm>=8)&(parts.corrwideal>=0.5)&(parts.strain.isin(strains2plot))]
order = sorted(df2plot.CTDr.unique())
with sns.axes_style(*style):
    fig, ax = plt.subplots(figsize=(9, 6))
# pooled
sns.pointplot(x='strain', y='mass_norm', order=strains2plot,
        data=df2plot, join=False,
        estimator=np.median, ci=99, ax=ax)
# each image
sns.stripplot(x='strain', y='mass_norm', order=strains2plot,
        data=parts[(parts.mass_norm>=7)&(parts.corrwideal>=0.5)&(parts.strain.isin(strains2plot))].groupby(['mov_name', 'strain']).median().reset_index(),
        ax=ax, size=12, alpha=0.25)
ax.set(ylabel='Median TS intensity (a.u.)', xlabel='CTD repeats', ylim=(ax.get_yticks()[0]-0.2, ax.get_yticks()[-1]))
sns.despine(left=False, bottom=False)
plt.tight_layout()

# check peaks found
from skimage import io
import trackpy as tp
imname = '03052019_yQC21_255u100%int480_150msExp_30-45minPosGal_6_w2GFPlow'
imname = '03272019_TL47pQC99_255u100%int480_150msExp_30-50minPosGal_13_w2GFPlow'

Esempio n. 43

0

Mostra file

File: Wtest.py Progetto: wuhengliangliang/data_analysis

#设置轴的间隔
#ax为两条坐标轴的实例
ax = plt.gca()
#把x轴的刻度间隔设置为5，并存在变量里
x_major_locator = MultipleLocator(1)
#把x轴的主刻度设置为5的倍数
ax.xaxis.set_major_locator(x_major_locator)

sns.set_context(context="poster", font_scale=0.1)
# plt.bar(np.arange(len(df["high"].value_counts()))+0.5,df["high"].value_counts(),width=0.8)
plt.plot(df["date"], df["high"], c='green')
for x, y in zip(np.arange(len(df["date"].value_counts())),
                df["high"].value_counts()):
    plt.text(x, y, y, ha="center", va="bottom")
plt.savefig("weather.jpg")
plt.show()

sns.pointplot(df["date"], df["high"])
sns.set_context(context="poster", font_scale=0.1)
plt.savefig("2.jpg")
plt.show()

line.add("气温",
         df["date"],
         df["high"],
         ymbol_size=2,
         is_step=False,
         is_label_show=df['date'])
line.render("zhexian.html")  #生成对于的HTML文件
# print(df_new.head(10))

Esempio n. 44

0

Mostra file

File: sample146.py Progetto: tetherless-world/CodeGraph

#     <li>linestyles : string or list of strings, optional</li>
#     <li>color : matplotlib color, optional</li>
#     <li>palette : palette name, list, or dict, optional</li>
#     <li>ax : matplotlib Axes, optional</li>
# </ul>
#
#

# In[ ]:

#Gender show point plot
data['Race/Ethnicity'].unique()
len(data[(data['Race/Ethnicity'] == 'group B')].Math_Score)
f, ax1 = plt.subplots(figsize=(25, 10))
sns.pointplot(x=np.arange(1, 191),
              y=data[(data['Race/Ethnicity'] == 'group B')].Math_Score,
              color='lime',
              alpha=0.8)
sns.pointplot(x=np.arange(1, 191),
              y=data[(data['Race/Ethnicity'] == 'group B')].Reading_Score,
              color='red',
              alpha=0.5)
#sns.pointplot(x=np.arange(1,191),y=data[(data['Race/Ethnicity']=='group B')].Math_Score,color='lime',alpha=0.8)
plt.xlabel('Group B index State')
plt.ylabel('Frequency')
plt.title('Group B Math Score & Reading_Score')
plt.xticks(rotation=90)
plt.grid()
plt.show()

# In[ ]:

Esempio n. 45

0

Mostra file

File: titanic-version3.py Progetto: nischalshrestha/automatic_wat_discovery

#graph individual features by survival
fig, saxis = plt.subplots(2, 3, figsize=(16, 12))

sns.barplot(x='Embarked', y='Survived', data=data1, ax=saxis[0, 0])
sns.barplot(x='Pclass',
            y='Survived',
            order=[1, 2, 3],
            data=data1,
            ax=saxis[0, 1])
sns.barplot(x='Isalone',
            y='Survived',
            order=[1, 0],
            data=data1,
            ax=saxis[0, 2])

sns.pointplot(x='FareBin', y='Survived', data=data1, ax=saxis[1, 0])
sns.pointplot(x='AgeBin', y='Survived', data=data1, ax=saxis[1, 1])
sns.pointplot(x='FamilyMembers', y='Survived', data=data1, ax=saxis[1, 2])

# In[ ]:

#graph distribution of qualitative data: Pclass
#we know class mattered in survival, now let's compare class and a 2nd feature
fig, (axis1, axis2, axis3) = plt.subplots(1, 3, figsize=(14, 12))

sns.boxplot(x='Pclass', y='Fare', hue='Survived', data=data1, ax=axis1)
axis1.set_title('Pclass vs Fare Survival Comparison')

sns.violinplot(x='Pclass',
               y='Age',
               hue='Survived',

Esempio n. 46

0

Mostra file

File: let-s-discover-more-about-the-olympic-games.py Progetto: ajmal017/data-journey

part.loc[:,'M'].plot()
plt.title('Variation of Male Athletes over time')
part = WomenOverTime.groupby('Year')['Sex'].value_counts()
plt.figure(figsize=(20, 10))
part.loc[:,'F'].plot()
plt.title('Variation of Female Athletes over time')
plt.figure(figsize=(20, 10))
sns.boxplot('Year', 'Age', data=MenOverTime)
plt.title('Variation of Age for Male Athletes over time')
MenOverTime.loc[MenOverTime['Age'] > 80].head(10)
plt.figure(figsize=(20, 10))
sns.boxplot('Year', 'Age', data=WomenOverTime)
plt.title('Variation of Age for Female Athletes over time')
WomenOverTime.loc[WomenOverTime['Year'] == 1904]
plt.figure(figsize=(20, 10))
sns.pointplot('Year', 'Weight', data=MenOverTime)
plt.title('Variation of Weight for Male Athletes over time')
plt.figure(figsize=(20, 10))
sns.pointplot('Year', 'Weight', data=WomenOverTime)
plt.title('Variation of Weight for Female Athletes over time')
womenInOlympics.loc[womenInOlympics['Year'] < 1924].head(20)
plt.figure(figsize=(20, 10))
sns.pointplot('Year', 'Height', data=MenOverTime, palette='Set2')
plt.title('Variation of Height for Male Athletes over time')
plt.figure(figsize=(20, 10))
sns.pointplot('Year', 'Height', data=WomenOverTime, palette='Set2')
plt.title('Variation of Height for Female Athletes over time')
WomenOverTime.loc[(WomenOverTime['Year'] > 1924) & (WomenOverTime['Year'] < 1952)].head(10)
MenOverTime.head(5)
itMenOverTime = MenOverTime.loc[MenOverTime['region'] == 'Italy']
itMenOverTime.head(5)

Esempio n. 47

0

Mostra file

File: plotsExp2.py Progetto: MichlF/plottingResults

means = dataTarLoc.groupby(['cond_tarLocation'])['responseTime'].mean().values
dataTarLocER.accuracy = (1-dataTarLocER.accuracy)*100  # make accuracy error rate
#mobs = dataTarLoc['cond_tarLocation'].value_counts().values
#pos = range(len(mobs))

# Plotting
fig1 = plt.figure(figsize=(3.25, 6), dpi=100)
ax1 = plt.subplot2grid((6, 1), (0, 0), rowspan=5, colspan=1)
sns.violinplot(x='cond_tarLocation', y='responseTime', data=dataTarLoc, cut=vioCut, saturation=vioSat, linewidth=vioLw, palette=pTarLoc)
sns.swarmplot(x="cond_tarLocation", y="responseTime", data=dataTarLoc, color=swaCol, alpha=swaAlp, linewidth=swaLwE, edgecolor=swaColE)
ax1.plot(range(len(means)), [means[0], means[1], means[2]], color=lpColor, marker=lpMarker, markersize=lpMarkerS,
        markeredgecolor=lpMarkerEC, markeredgewidth=lpMarkerEW, lw=lpLw, ls=lpLs, zorder=3)#, dashes=(0.75, 0.75))
ax1.set_xlabel('')
ax1.set_ylabel("Response Time [in ms]")
ax2 = plt.subplot2grid((6, 1), (5, 0), rowspan=1, colspan=1, sharex=ax1)
sns.pointplot(x='cond_tarLocation', y='accuracy', data=dataTarLocER, color=lpColor, markers=lpMarker, ci=ci)
ax2.axes.get_xaxis().set_visible(False)
ax2.axes.get_xaxis().set_ticks([])
ax2.yaxis.set_ticks(np.arange(3, 11, 2))
plt.ylabel('Error Rate\n [in %]')
sns.despine(offset=10, trim=True)
plt.show()

###  Figure 2 ###

# Data selection: RT
dataTarLocGrad = pd.pivot_table(data[(data.cond_disPresent == 'absent') & (data.RTquicker200 == 0)],
                                values='responseTime', index='subject_nr', columns='TarDistanceFromColor')
dataTarLocGrad = pd.melt(
    dataTarLocGrad.reset_index(),
    id_vars='subject_nr',

Esempio n. 48

0

Mostra file

def model_(x_train, y_train, x_test, y_test, boost_type='lgb'):
    tStart = time.time()
    if boost_type == 'lgb':
        model = lgb.LGBMClassifier(
            boosting_type='gbdt',
            objective='binary',
            learning_rate=0.01,
            n_estimators=9000,
            max_depth=8,
            min_child_weight=5,
            scale_pos_weight=9,  # refer: 70
            subsample=0.7,
            colsample_bytree=0.7,
            subsample_freq=1,
            n_jobs=-1)

    elif boost_type == 'xgb':
        model = XGBClassifier(learning_rate=0.025,
                              tree_method='gpu_hist',
                              n_estimators=6000,
                              max_depth=9,
                              min_child_weight=1,
                              gamma=0,
                              subsample=0.8,
                              colsample_bytree=0.8,
                              objective='binary:logistic',
                              nthread=-1,
                              scale_pos_weight=11,
                              seed=27)
    print('--' * 25)
    print('Start training ...')
    model.fit(x_train, y_train)
    yp_train = model.predict_proba(x_train)[:, 1]
    yp_valid = model.predict_proba(x_test)[:, 1]
    print(
        f'Use time: { np.int_((time.time()-tStart)/60)  } mins\nCaluate prob ...'
    )

    ## probability tune
    mat = np.zeros([5, 100])
    for threshold in range(100):
        y_pred_train = np.int_(yp_train > threshold * 0.01)
        y_pred_valid = np.int_(yp_valid > threshold * 0.01)
        mat[0, threshold] = round(threshold * 0.01, 2)
        mat[1, threshold] = f1_score(y_train, y_pred_train)
        mat[2, threshold] = f1_score(y_test, y_pred_valid)
        mat[3, threshold] = (y_train == y_pred_train).mean()
        mat[4, threshold] = (y_test == y_pred_valid).mean()

    # Fig1 for F1
    sns.pointplot(x=mat[0, :], y=mat[1, :], color='r')
    sns.pointplot(x=mat[0, :], y=mat[2, :], color='b')
    plt.title(f'{boost_type} F1 performance', color='r')
    plt.show()

    # Fig2 for acc
    sns.pointplot(x=mat[0, 10:], y=mat[3, 10:], color='r')
    sns.pointplot(x=mat[0, 10:], y=mat[4, 10:], color='b')
    plt.title(f'{boost_type} Acc performance', color='r')
    plt.show()
    print('--' * 20)

    # reult for best probalility
    best_prob = round(np.argmax(mat[2, :]) * 0.01, 2)
    print('Valid Result:\nprob: {}, F1 : {}, acc : {}'.\
          format(best_prob,max(mat[2,:]).round(3), mat[4,:][np.argmax(mat[2,:])].round(3)))
    print('--' * 20)

    # confusion matrix
    y_pred_train = np.int_(yp_train > best_prob)
    y_pred_valid = np.int_(yp_valid > best_prob)
    print('Train confusion matrix')
    display(
        pd.crosstab(y_train, y_pred_train, margins=True, margins_name="Total"))
    print('--' * 20)
    print('Valid confusion matrix')
    display(
        pd.crosstab(y_test, y_pred_valid, margins=True, margins_name="Total"))
    print('--' * 20)

    print('Feature Importance (Top 10)')
    display(pd.DataFrame({'feature':feature,'gain':model.feature_importances_}).\
        sort_values(by='gain',ascending=False).iloc[0:10,:])
    print('--' * 25)
    return model, best_prob

Esempio n. 49

0

Mostra file

File: first-go-at-titanic-dataset.py Progetto: nischalshrestha/automatic_wat_discovery

pd.cut(data_train['Age'], bins=5)
#[(0.34, 16.336] < (16.336, 32.252] < (32.252, 48.168] < (48.168, 64.084] < (64.084, 80.0]]
pd.cut(data_test['Age'], bins=5)
#[(0.0942, 15.336] < (15.336, 30.502] < (30.502, 45.668] < (45.668, 60.834] < (60.834, 76.0]]

# Now we will create age bins for the full dataset
ages = data_full['Age']
ages = ages.append(pd.Series([0, 80]))
bins = pd.cut(ages, bins=8, labels=[1, 2, 3, 4, 5, 6, 7, 8])

data_full['AgeBin'] = bins[:-2].astype(float)

fig = plt.figure(figsize=(15, 8))
sns.pointplot(x='AgeBin',
              y='Survived',
              ci=95.0,
              hue='Sex',
              data=data_full,
              dodge=True)

# From all this it looks like it would be good to create a dummy for kids - since agebin does not really provide a whole lot of differentiation except in the agebin=1 category

# In[ ]:

data_full['IsKid'] = 0
data_full.loc[data_full['AgeBin'] == 1, 'IsKid'] = 1

# ** Dealing with missing values in the age feature **
#
# Age is the feature that has the most missing values after the cabin feature. In this case I want to fill the agebin with the mode of the agebin based on the title of the person. This especially seems to make sense for Master/Mr and Mrs/Miss where the title allows us to extract information about age based instead of just picking at random.

# In[ ]:

Esempio n. 50

0

Mostra file

File: decode_value_bysubj.py Progetto: davidbestue/encoding

    t_p1 = (start_hrf + t_p) * x_bins / max_val_x
    r_t1 = (start_hrf + r_t) * x_bins / max_val_x
    #
    d_p2 = d_p1 + sec_hdrf * x_bins / max_val_x
    t_p2 = t_p1 + sec_hdrf * x_bins / max_val_x
    r_t2 = r_t1 + sec_hdrf * x_bins / max_val_x

    y_vl_min = df_all_by_subj.Decoding_error.min()
    y_vl_max = df_all_by_subj.Decoding_error.max()

    range_hrf = [float(5) / x_bins, float(6) / x_bins]  #
    paper_rc = {'lines.linewidth': 2, 'lines.markersize': 2}
    sns.set_context("paper", rc=paper_rc)
    sns.pointplot(x='timepoint',
                  y='Decoding_error',
                  hue='ROI',
                  data=df_all_by_subj,
                  size=5,
                  aspect=1.5)
    ##all subj visual
    paper_rc = {'lines.linewidth': 0.25, 'lines.markersize': 0.5}
    sns.set_context("paper", rc=paper_rc)
    for a in ['visual', 'ips']:
        if a == 'visual':
            c = 'b'
        elif a == 'ips':
            c = 'darkorange'
        for s in df_all_by_subj.subj.unique():
            sns.pointplot(
                x='timepoint',
                y='Decoding_error',
                data=df_all_by_subj.loc[(df_all_by_subj['ROI'] == a)

Esempio n. 51

0

Mostra file

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

if __name__ == '__main__':
    exp_name = 'output/exp_p3_w4_s4_deadline'

    general_file = f'{exp_name}-general_data.csv'
    print(f'Loading data file: {general_file}')
    df = pd.read_csv(general_file)
    print(df)

    plt.figure()
    sns.pointplot(data=df, x='epoch', y='accuracy')
    plt.title('Accuracy per epoch')
    plt.show()

    plt.figure()
    # sns.pointplot(data=df[df['epoch'] > 1], x='epoch', y='duration')
    sns.pointplot(data=df, x='epoch', y='duration')
    plt.title('Train time per epoch')
    plt.show()

    dfs = []
    for file in [
            f'{exp_name}_client1_epochs.csv', f'{exp_name}_client2_epochs.csv',
            f'{exp_name}_client3_epochs.csv', f'{exp_name}_client4_epochs.csv'
    ]:
        dfs.append(pd.read_csv(file))
    client_df = pd.concat(dfs, ignore_index=True)

Esempio n. 52

0

Mostra file

sns.countplot(x='Gamma', hue='Segmentation', data=s2_tads_all)
plt.xticks(rotation=90)
plt.show()


# Let's compare the lengths of TADs depending on gamma with these two segmentation methods.

# In[16]:

s2_tads_all['Length'] = s2_tads_all['End']-s2_tads_all['Start']
s2_tads_all['Length'] = s2_tads_all['Length'].astype(int)


# In[17]:

sns.pointplot(x='Gamma', y='Length', hue='Segmentation', data=s2_tads_all, zorder=15)
sns.stripplot(x='Gamma', y='Length', hue='Segmentation', data=s2_tads_all, jitter=True, zorder=1, alpha=0.5)
handles, labels = plt.gca().get_legend_handles_labels() #Have to do this because of a weird legend behavior otherwise...
plt.legend(handles[:2], labels[:2], loc='upper right')
plt.xticks(rotation=90)
plt.yscale('log')
plt.show()


# And again, let's look at the Score in the same way.

# In[18]:

sns.pointplot(x='Gamma', y='Score', hue='Segmentation', data=s2_tads_all, zorder=15)
sns.stripplot(x='Gamma', y='Score', hue='Segmentation', data=s2_tads_all, jitter=True, zorder=1, alpha=0.5)
handles, labels = plt.gca().get_legend_handles_labels()

Esempio n. 53

0

Mostra file

File: kernel_188.py Progetto: josepablocam/wranglesearch

    'world_rank', 'year'
]]

times_plot_data['source'] = 'Times'
shanghai_plot_data['source'] = 'Shanghai'
cwur_plot_data['source'] = 'CWUR'

# parse the first number in rank for data ranges
times_plot_data['world_rank'] = times_plot_data['world_rank'].str.split(
    '-').str[0]
shanghai_plot_data['world_rank'] = shanghai_plot_data['world_rank'].str.split(
    '-').str[0]

plot_data = times_plot_data.append(shanghai_plot_data).append(cwur_plot_data)
plot_data['world_rank'] = plot_data['world_rank'].astype(int)
ax = sns.pointplot(x='year', y='world_rank', hue='source', data=plot_data)

# Styling

plt.title(my_university_name[0] + " Ranking", fontsize=26)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.ylabel("World Rank", fontsize=26)
plt.xlabel("Year", fontsize=26)
plt.tight_layout()
plt.legend(loc='upper left', fontsize=20)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()

Esempio n. 54

0

Mostra file

File: script722.py Progetto: darkblue-b/kaggleScape

plt.figure(figsize=(12, 8))
sns.countplot(x="pickup_hour", data=train_df)
plt.ylabel('Count', fontsize=12)
plt.xlabel('pick up hour', fontsize=12)
plt.xticks(rotation='vertical')
plt.show()

# The distribution shows the car demand with pick up hour time. After mid night less number's of trips are taken. Now let us see how the trip duration changes with respect to trip time.

# In[ ]:

grouped_df = train_df.groupby('pickup_hour')['trip_duration'].aggregate(
    np.median).reset_index()
plt.figure(figsize=(12, 8))
sns.pointplot(grouped_df.pickup_hour.values,
              grouped_df.trip_duration.values,
              alpha=0.8,
              color=color[3])
plt.ylabel('median trip duration', fontsize=12)
plt.xlabel('pick up hour', fontsize=12)
plt.xticks(rotation='vertical')
plt.show()

# In[ ]:

# Group by day
grouped_df = train_df.groupby('day_week')['trip_duration'].aggregate(
    np.median).reset_index()
plt.figure(figsize=(12, 8))
sns.pointplot(grouped_df.day_week.values,
              grouped_df.trip_duration.values,
              alpha=0.8,

Esempio n. 55

0

Mostra file

File: dataPreview.py Progetto: kehw/instacart

temp_series = order_products_prior_df['department'].value_counts()
labels = (np.array(temp_series.index))
sizes = (np.array((temp_series / temp_series.sum()) * 100))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=200)
plt.title("Departments distribution", fontsize=15)
plt.show()

print 'press any key to continue'
raw_input('')

grouped_df = order_products_prior_df.groupby(
    ["department"])["reordered"].aggregate("mean").reset_index()

plt.figure(figsize=(12, 8))
sns.pointplot(grouped_df['department'].values,
              grouped_df['reordered'].values,
              alpha=0.8,
              color=color[2])
plt.ylabel('Reorder ratio', fontsize=12)
plt.xlabel('Department', fontsize=12)
plt.title("Department wise reorder ratio", fontsize=15)
plt.xticks(rotation='vertical')
plt.show()

print 'press any key to continue'
raw_input('')

grouped_df = order_products_prior_df.groupby(
    ["department_id", "aisle"])["reordered"].aggregate("mean").reset_index()
fig, ax = plt.subplots(figsize=(12, 20))
ax.scatter(grouped_df.reordered.values, grouped_df.department_id.values)
for i, txt in enumerate(grouped_df.aisle.values):

Esempio n. 56

0

Mostra file

File: spp_03_analyse_SLCV_results.py Progetto: jirispilka/kaggle-seizure-prediction-2016

# sys.exit(0)

# s = '1_fs__k' if '1_fs__k' in parameters else 'jmi__k_feat'
# s = 'jmi__k_feat'
s = 'clf__n_neighbors'
# s = 'nb__alpha'
# s = 'clf3__gr__group'
# s = 'clf1__fs__k'
# s = 'fs__k'
# s = 'clf3__fs__k'
# s = 'clf3__xgb__n_estimators'

for p in parameters.keys():
    plt.figure()
    # sns.pointplot(x=p, hue='max_depth', y='result', data=df_results[df_results['folds'] == 2], estimator=np.median)
    sns.pointplot(x=p, hue=s, y='result', data=df_results, estimator=np.median)
    plt.ylim(0.6, 0.8)

if '1_clf__C' in parameters:
    plt.figure()
    plt.subplot(311)
    sns.pointplot(x='1_clf__C',
                  y='result_1',
                  data=df_results,
                  estimator=np.median,
                  color='r')
    plt.legend('patient 1')
    plt.grid()
    plt.subplot(312)
    sns.pointplot(x='2_clf__C',
                  y='result_2',

Esempio n. 57

0

Mostra file

File: seabornlearn.py Progetto: fangchi/python_project

# d = rs.normal(size=1000)
# f, axes = plt.subplots(2, 2, figsize=(7, 10), sharex=False)
# sns.distplot(d, kde=False, color="b", ax=axes[0, 0])
# sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1])
# sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0])
# sns.distplot(d, color="m", ax=axes[1, 1])
# plt.show()

#eg4 箱型图
# iris = sns.load_dataset("data",data_home="/Users/fangchi/PycharmProjects/python_project/marchineLearning/ch3/3.4")
# sns.boxplot(x = iris['萼片_长度'],y = iris['品种'])
# plt.show()

#eg5
# iris = sns.load_dataset("data",data_home="/Users/fangchi/PycharmProjects/python_project/marchineLearning/ch3/3.4")
# sns.jointplot("萼片_长度", "花瓣_长度", iris)
# plt.show()

#eg6
iris = sns.load_dataset(
    "data",
    data_home=
    "/Users/fangchi/PycharmProjects/python_project/marchineLearning/ch3/3.4")

plt.figure(figsize=(12, 8))

sns.pointplot(iris.萼片_长度.values, iris.品种.values, alpha=0.8, color='blue')
plt.ylabel('品种', fontsize=12)
plt.xlabel('萼片_长度', fontsize=12)
plt.xticks(rotation='vertical')
plt.show()

Esempio n. 58

0

Mostra file

        
       
    fs = []
    for k in range(1):
        for i in ['fc','fs_5dis','fs_6dis']:
            fs+=[i for j in range(fs_ref.shape[1])]
        
    ref = []
    for k in ['Rep_50']:
        ref += [k for i in range(3*fs_ref.shape[1])]
    
    data = pd.DataFrame({'icc':icc, 'icc_msr':icc_msr, 'icc_mse':icc_mse, 'fs':fs, 'ref': ref})
        
    plt.figure(figsize=(20, 10))
    sns.pointplot(x="ref", y="icc_msr", data=data, hue= 'fs', dodge=0.53, join=False, palette="dark",markers="d", scale=.75, ci='sd',capsize = 0.07)
    sns.stripplot(x="ref", y="icc_msr", data=data, hue= 'fs', size = 3, dodge=0.45, alpha = 0.05).set_title('Edge-wise ICC MSr')
    pt.half_violinplot(x="ref", y="icc_msr", data=data, hue= 'fs',scale = "area",inner = None, offset = 0.03, saturation=0.5)
    plt.legend(ncol=2)       
    plt.savefig(plotd+'icc_msr.png')######
    plt.close()

    plt.figure(figsize=(20, 10))
    sns.pointplot(x="ref", y="icc_mse", data=data, hue= 'fs', dodge=0.53, join=False, palette="dark",markers="d", scale=.75, ci='sd',capsize = 0.07)
    sns.stripplot(x="ref", y="icc_mse", data=data, hue= 'fs', size = 3, dodge=0.45, alpha = 0.05).set_title('Edge-wise ICC MSe')
    pt.half_violinplot(x="ref", y="icc_mse", data=data, hue= 'fs',scale = "area",inner = None, offset = 0.03, saturation=0.5)

    t1,p1 = stats.ttest_ind(icc_mse[0:int(len(icc_mse)/3)],icc_mse[int(len(icc_mse)/3):int(len(icc_mse)/3*2)], nan_policy ='omit', equal_var=False)
    t2,p2 = stats.ttest_ind(icc_mse[0:int(len(icc_mse)/3)],icc_mse[int(len(icc_mse)/3*2):], nan_policy ='omit', equal_var=False)

    plt.text(-0.15,0,'T: '+str(round(t1,5))+'\n'+'P: '+str(round(p1,5)),fontsize=18)

Esempio n. 59

0

Mostra file

File: helpers.py Progetto: DannyGsGit/IntroToMLLab

    def numerical_pca_egv(df, conf_dict, col1, col2, col3):
        # Do the PCA.
        n_components = len(conf_dict['NumericalColumns'])
        df2 = df[conf_dict['NumericalColumns']]

        scaler = StandardScaler()
        scaler.fit(df2)
        df2 = scaler.transform(df2)
        df2 = pd.DataFrame(df2, columns = conf_dict['NumericalColumns'])

        pca = PCA(n_components=n_components)
        reduced = pca.fit_transform(df2)

        # Append the principle components for each entry to the dataframe
        for i in range(0, n_components):
            df2['PC' + str(i + 1)] = reduced[:, i]

        #display(df2.head())
        if col1 not in conf_dict['NumericalColumns']:
            df.reset_index(drop=True, inplace=True)
            df2.reset_index(drop=True, inplace=True)
            df2[col1] = df[col1]

        # Show the points in terms of the first two PCs
        g = sns.lmplot(('PC' + str(col2)),
                       ('PC' + str(col3)),
                       hue=col1,
                       data=df2,
                       fit_reg=False,
                       scatter=True,
                       size=7)
        plt.show()

        # Plot a variable factor map for the first two dimensions.
        (fig, ax) = plt.subplots(figsize=(8, 8))
        for i in range(0, len(pca.components_)):
            ax.arrow(0,
                     0,  # Start the arrow at the origin
                     pca.components_[int(col2) - 1, i],  #0 for PC1
                     pca.components_[int(col3) - 1, i],  #1 for PC2
                     head_width=0.05,
                     head_length=0.08)

            plt.text(pca.components_[int(col2) - 1, i] + 0.05,
                     pca.components_[int(col3) - 1, i] + 0.05,
                     df2.columns.values[i])

        an = np.linspace(0, 2 * np.pi, 100)
        plt.plot(np.cos(an), np.sin(an))  # Add a unit circle for scale
        plt.axis('equal')
        ax.set_title('Variable factor compass')
        plt.show()

        # Do a scree plot
        ind = np.arange(0, n_components)
        (fig, ax) = plt.subplots(figsize=(8, 6))
        sns.pointplot(x=ind, y=pca.explained_variance_ratio_)
        ax.set_title('Scree plot')
        ax.set_xticks(ind)
        ax.set_xticklabels(ind)
        ax.set_xlabel('Component Number')
        ax.set_ylabel('Explained Variance')
        plt.show()

Esempio n. 60

0

Mostra file

File: scikit-learn-ml-from-start-to-finish.py Progetto: ajmal017/data-journey

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
### matplotlib inline

data_train = pd.read_csv('../input/train.csv')
data_test = pd.read_csv('../input/test.csv')

data_train.sample(3)
sns.barplot(x="Embarked", y="Survived", hue="Sex", data=data_train)
sns.pointplot(x="Pclass",
              y="Survived",
              hue="Sex",
              data=data_train,
              palette={
                  "male": "blue",
                  "female": "pink"
              },
              markers=["*", "o"],
              linestyles=["-", "--"])


def simplify_ages(df):
    df.Age = df.Age.fillna(-0.5)
    bins = (-1, 0, 5, 12, 18, 25, 35, 60, 120)
    group_names = [
        'Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult',
        'Adult', 'Senior'
    ]
    categories = pd.cut(df.Age, bins, labels=group_names)
    df.Age = categories