Ejemplo n.º 1
def clean_outliers(points):
    boundaries_array = np.array(points)
    # slope, intercept, r_value, p_value, std_err
    slope, intercept, _, _, _ = stats.linregress(boundaries_array)
    boundaries_array = remove_outliers(boundaries_array, slope, intercept)
    slope, intercept, _, _, _ = stats.linregress(boundaries_array)
    return remove_outliers(boundaries_array, slope, intercept)
Ejemplo n.º 2
def slop(bin,binwidth):
    outputlist = [["Bin", "\t", "Frequency", "\t", "Slope1", "\t", "Slope2", "\t", "peak-Width", "\t", "peak-Apex", "\t","intercept_mass", "\n"]]
    slope1 = [0]
    for index in range(0, len(bin) - 6):
        tempD=dict(itertools.islice(bin.items(), index,index + 7))
        s, intercept, r, p, std_error = linregress(list(tempD.keys()), list(tempD.values()))
    slope2 = []
    for index1 in range(0, len(bin) - 13):
        tempD=dict(itertools.islice(bin.items(), index1 + 3,index1 + 10))
        #print(index1,len(tempD),len(slope1[index1 + 1:index1 + 8]))
        s1, intercept1, r1, p1, std_error1 = linregress(list(tempD.values()), slope1[index1 + 1:index1 + 8])
    apex = []
    peak = []
    interceptList = [0]
    if len(bin) % 2 == 0:
        minus1 = 6
        minus2 = 3
        minus1 = 7
        minus2 = 3
    for index3 in range(len(bin) - minus1):
        if slope1[index3] > 0.0 and slope1[index3 + 1] < 0.0:
    for index4 in range(len(bin) - 13):
        if slope2[index4] < 0:
    slope1 = [0] * 2 + slope1 + [0] * 3
    slope2 = [0] * 6 + slope2 + [0] * 6
    apex = [0] * 3 + apex + [0] * (len(bin) - (len(apex) + 3))
    peak = [0] * 6 + peak + [0] * 6
    for index6 in range(len(bin) - 6):
        if (abs(slope1[index6 + 1]) + abs(slope1[index6 + 2])) == 0.0:
            intercept_mass = float("inf")
            tempD=dict(itertools.islice(bin.items(), index6,index6))
            intercept_mass = list(tempD.values()) + (float(binwidth) * abs(slope1[index6 + 1])) / (
                abs(slope1[index6 + 1]) + abs(slope1[index6 + 2]))
    interceptList = interceptList + [0] * (len(bin) - len(interceptList))
    plot_x = []
    plot_y = []
    for index5 in range(len(bin)-13):
        tempD=dict(itertools.islice(bin.items(), index5,index5))
        outputlist.append([str(list(tempD.values())), "\t", str(slope1[index5]), "\t", str(slope1[index5]), "\t", str(slope2[index5]),"\t", str(peak[index5]), "\t", str(apex[index5]), "\t", str(interceptList[index5]), "\n"])
    return outputlist
Ejemplo n.º 3
    def test_multinomial_elementwise_distribution(self):
        '''Verify that the created variables approach a multinomial distribution for large numbers
        of samples.'''
        (m, n, k) = (6, 5, 1)
        r = 2 ** np.arange(4, 17)
        p = statutil.random_row_stochastic((m, n))
        #p = statutil.scale_row_sums(np.ones((m, n)))
        error = np.zeros((len(r),))
        for (i, r_val) in enumerate(r):
            for _ in xrange(k):
                x = statutil.multinomial_elementwise(p, r_val)
                # Root-mean-square-error of observed frequencies w.r.t. desired frequencies
                error[i] += statutil.norm_frobenius_scaled(statutil.hist(x, n) / (1.0 * r_val) - p)
            error[i] /= (1.0 * k)
        # Validate the model error of the central limit theorem: C*r^(-0.5).
        # This is a consequence of the Central Limit Theorem. We are making k experiments for
        # each value of n. Even if k=1, there's a 95% chance that we are within ~1.6 standard deviations
        # from the mean of the normal distribution sqrt(n)*[observed freq variable - p[i,j]] for each
        # entry j of a row i of the matrix p. So if row i's stddev is s[i], the sum of squared errors
        # should be (with 95% confidence) <= n * (1.96*s[i])^2. So 
        # C <= sqrt(sum(n * (1.5*s[i])^2)_i / (m*n)) = 1.96 * sqrt(s[i]^2/m).
        # See http://en.wikipedia.org/wiki/Central_limit_theorem
        alpha, c, r_value, _, _ = linregress(np.log(r), np.log(error))
        c = np.exp(c)
#        print c , 1.96 * np.linalg.linalg.norm(np.sum(p * np.arange(p.shape[1]) ** 2, axis=1) - 
#                                                          np.sum(p * np.arange(p.shape[1]), axis=1) ** 2,
#                                                          2) / np.sqrt(p.shape[0]),
        assert_almost_equal(alpha, -0.5, decimal=1, err_msg='Unexpected error term growth power')
        self.assertTrue(c <= 1.96 * np.linalg.linalg.norm(np.sum(p * np.arange(p.shape[1]) ** 2, axis=1) - 
                                                          np.sum(p * np.arange(p.shape[1]), axis=1) ** 2,
                                                          2) / np.sqrt(p.shape[0]),
                        'Error term coefficient outside 95% confidence interval')
        self.assertTrue(abs(r_value) > 0.99, 'Error does not fit a power law in sample size')
Ejemplo n.º 4
def calculate_monthly_lapse_rates(csv, station_meta):
    mdf = read_csv(csv,
                   sep=' ',
    mdf = mdf.groupby(mdf.index.month).mean()
    with open(station_meta, 'r') as js:
        stations = json.load(js)

    tmin_lapse, tmax_lapse = [], []
    for temp in ['tmin', 'tmax']:
        for month in range(1, 13):
            temps, elevations = [], []
            cols = [c for c in mdf.columns if temp in c]
            d = mdf[cols]
                temps.append(d['{}_{}'.format(s, temp)].loc[month])
                for s in stations.keys()
            [elevations.append(v['elev']) for k, v in stations.items()]
            regression = linregress(elevations, temps)
            if temp == 'tmin':
                tmin_lapse.append('{:.3f}'.format(regression.slope * 1000.))
                tmax_lapse.append('{:.3f}'.format(regression.slope * 1000.))

    print('tmax_lapse = {}'.format(', '.join(tmax_lapse)))
    print('tmin_lapse = {}'.format(', '.join(tmin_lapse)))

    print('station elevations')
    elevs = sorted([(v['zone'], v['elev']) for k, v in stations.items()],
                   key=lambda x: x[0])
    print(', '.join([str(x[1]) for x in elevs]))
Ejemplo n.º 5
def reuse_model_reg(X_test, y_test, wildcard_name, ws=os.getcwd(), save=True):

    misc_output_path = os.path.join(os.getcwd(), 'output_rs_learn', 'misc')

    if not os.path.exists(misc_output_path):

    prediction_list = []
    feature_list = []
    for tuned_model in glob.glob(
            os.path.join(ws, 'output_rs_learn', 'tuned_models',

        model_trained = joblib.load(tuned_model)
        model_name = os.path.basename(tuned_model)[:-4]
        prediction = model_trained.predict(X_test)


        slope, intercept, r_value, p_value, std_err = stats.linregress(
            y_test, prediction)
        r2 = r2_score(y_test, prediction)
        rmse = sqrt(mean_squared_error(prediction, y_test))
        percent_err = ((prediction - y_test) / y_test) * 100
        mnb = np.mean(percent_err)

        print(f'{model_name} r: %.2f, r2: %.2f, rmse: %.2f, mnb: %.2f' %
              (r_value, r2, rmse, mnb))

    df_prediction = pd.DataFrame(prediction_list).T
    df_prediction.columns = feature_list

    return df_prediction
Ejemplo n.º 6
def rest_task_regression():
    for tpt in [tpt_cole, tpt_sh]:
        fig, axs = plt.subplots(2, 3, figsize=(16, 10), sharex="row", sharey="row")
        txt = None
        for li, (lib, name, lbl) in enumerate(lib_details):
            df = lib.gen_long_data(tpt) \
                .groupby(["task", "region", "network"]).mean().reset_index() \
                .convert_column(metric=lambda x: x * 1000)
            df_rest = df.and_filter(task="Rest")
            txt = []
            for ti, task in enumerate(task_order(False)):
                dft = pd.merge(df_rest, df.and_filter(task=task), on=["region", "network"])
                ax = axs[li, ti]
                sns.scatterplot(data=dft, x="metric_x", y=f"metric_y", hue="network", hue_order=tpt.net_order,
                                ax=ax, palette=tpt.net_colors)
                slope, intercept, r_value, _, _ = stats.linregress(dft.metric_x, dft.metric_y)
                sns.lineplot(dft.metric_x, slope * dft.metric_x + intercept, ax=ax, color='black')
                ax.text(0.3, 0.8, f"$r^2$={r_value ** 2:.2f}***", ha='center', va='center', transform=ax.transAxes)
                ax.set(xlabel=f"Rest {lbl}", ylabel="")
                txt.append(ax.text(-0.15 if ti == 0 else -0.05, 0.5, f"{task} {lbl}",
                                   transform=ax.transAxes, rotation=90, va='center', ha='center'))
        legend_handles = []
        for net, color, label in zip(tpt.net_order, tpt.net_colors, tpt.net_labels(break_space=False)):
            legend_handles.append(Line2D([], [], color=color, marker='o', linestyle='None', markersize=5, label=label))
        n_col = 6 if len(tpt.net_order) == 12 else 7
        lgn = fig.legend(handles=legend_handles, loc=2, ncol=n_col, handletextpad=0.1, mode="expand",
                         bbox_to_anchor=(0.12, -0.04, 0.785, 1))
        print(savefig(fig, f"regression.{tpt}", extra_artists=txt + [lgn, ], low=False))
Ejemplo n.º 7
def calculate_histogram_sizes(tracks_queue, config, out_queue):
    params = config['tracking']['process']
    df = DataFrame()
    while True:
        while not tracks_queue.empty() or tracks_queue.qsize() > 0:
            data = tracks_queue.get()
            df = df.append(data)

        if len(df) % 100 == 0:
            # t1 = tp.filter_stubs(df, params['min_traj_length'])
            # print(t1.head())
            # t2 = t1[((t1['mass'] > params['min_mass']) & (t1['size'] < params['max_size']) &
            #          (t1['ecc'] < params['max_ecc']))]
            # print(t2.head())
            # t2 = t1
            # d = tp.compute_drift(t1)
            # tm = tp.subtract_drift(t2.copy(), d)
            im = tp.imsd(df, config['tracking']['process']['um_pixel'], config['camera']['fps'])
            values = []
            for pcle in im:
                data = im[pcle]
                slope, intercept, r, p, stderr = stats.linregress(np.log(data.index), np.log(data.values))
                values.append([slope, intercept])

def fit_exponential_func(y, x):
	## Fit with y = Ae^(Bx) -> logy = logA + Bx
	# Returns A and B of a function as: y = A*e^(Bx)

	B, logA, r_value, p_value, std_err = linregress(np.transpose(x.values), np.log(y))
	return np.exp(logA), B  
Ejemplo n.º 9
def find_consensus(unassigned, sample, is_vertical):
    """Attempt to find a set of measurements that forms a consensus, in the list of measurements.
        unassigned (list): List of unassigned measurements.
        sample (list): Measurements that fit the extracted line.
        is_vertical (bool): Whether the landmark is close to being vertical.
        list: List of measurements that fit the line.
    cartesian_sample = numpy.array([point.location for point in sample])
    cartesian_unassigned = numpy.array(
        [point.location for point in unassigned])
    consensus = []

    # If almost vertical, calculate line in terms of y.
    if is_vertical:
        cartesian_sample = numpy.fliplr(cartesian_sample)
        cartesian_unassigned = numpy.fliplr(cartesian_unassigned)

    # Calculate regression line.
    slope, intercept, r_, p_, e_ = stats.linregress(cartesian_sample[:, 0],
                                                    cartesian_sample[:, 1])

    # Find the unassigned points that match to this line.
    for i in range(len(unassigned)):
        # If the point lies close enough to the line.
        if util.point_line_dist(cartesian_unassigned[i], slope,
                                intercept) < RANSAC_TOLERANCE:
            consensus.append(unassigned[i])  # Add it to the consensus points.

    return consensus
Ejemplo n.º 10
def plot_mean_boxplot_with_pearson(dataset_id):
    data = []
    pearson = []
    for i, technique_id in enumerate(technique_list):
        print(Globals.acronyms[technique_id], end=' ', flush=True)
        technique_pearson = []
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            delta_vis = DeltaMetrics.compute_delta_vis(history[revision],
                                                       history[revision + 1])
            delta_data = DeltaMetrics.compute_delta_data(
                history[revision], history[revision + 1])
            un_mov = UnavoidableMovement.compute_unavoidable_movement(
                history[revision], history[revision + 1])

            ratios = (1 - delta_vis) / (1 - delta_data)
            diffs = 1 - abs(delta_vis - delta_data)
            unavoidable = 1 - (delta_vis - un_mov)
            mean = (ratios + diffs + unavoidable) / 3

            # Compute linear regression statistics
            _, _, r_value, _, _ = stats.linregress(delta_data, delta_vis)
            technique_pearson.append(r_value if r_value > 0 else 0)


                                  title='Mean with Pearson - ' + dataset_id)
Ejemplo n.º 11
def approximate_random_effects(data, labels, group):

    correlation_per_donor = {}
    for donor_id in set(data[group]):
        correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),list(data[labels[1]][data[group] == donor_id]))
    average_slope = np.array(correlation_per_donor.values()).mean()
    t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
    print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)    
    return average_slope, t, p_val
Ejemplo n.º 12
def draw_fit(x, y):
    range = arg.regression
    x = x[:range]
    y = y[:range]
    slope, intercept, r_value, *_ = linregress(x, y)
    text = r'$f(x)={0:.3f}x+{1:.3f}, R^2={2:.3f}$'.format(
        slope, intercept, r_value**2)
    fit = [slope*i+intercept for i in x]
    plt.plot(x[:arg.regression], fit, 'k--')
    plt.annotate(text, xy=(x[-1], y[-2]))
Ejemplo n.º 13
def regress(my_dict):
    count = 0
    x = []
    y = []
    for k, v, in my_dict.items():
        count += 1
    m, b, r, p, std_err = linregress(x, y)
    print("b = " + str(b) + ", m = " + str(m) + ", r^2 = " + str(r * r))
Ejemplo n.º 14
def scatter_plot(ssu_df, fg_df):
    ssu_iden, fg_iden, fg_siml = ssu_df['identity(%)'], fg_df['identity(%)'], fg_df['similarity(%)']
    fig = plt.figure(figsize=(15,7),dpi=300)
    gs = gridspec.GridSpec(1,2,wspace=0.2,left=0.05, right=0.95)
#   correlation plot of 16S rRNA identity versus funtional gene identity
    ax0 = plt.subplot(gs[0])
    iden_func = stats.linregress(ssu_iden,fg_iden)
    x_rg = range(int(min(ssu_iden)),int(max(ssu_iden))+1)
    y_rg = np.polyval([iden_func[0],iden_func[1]],x_rg)
    plt.text(5,95, r'$y =  %.2f x  %s $' % (iden_func[0],intercept(iden_func[1])), fontsize=15)
    plt.text(5,90, r'$R^2=%.4f$' % (iden_func[2]**2))
    plt.text(5,85, r'$P-value=%.2e$' % (iden_func[3]))
    plt.text(5,80, r'$StdErr=%.4f$' % (iden_func[4]))
    plt.title('16S rRNA identity vs. Funtional gene identity')
    plt.plot(x_rg,y_rg,'r--',label='line 1')   
    plt.xlabel('16S rRNA gene identity (%)')
    plt.ylabel('Funtional gene identity (%)')
#   correlation plot of 16S rRNA identity versus funtional gene similarity
    ax1 = plt.subplot(gs[1])
    siml_func = stats.linregress(ssu_iden,fg_siml)
    x_rg = range(int(min(ssu_iden)),int(max(ssu_iden))+1)
    y_rg = np.polyval([siml_func[0],siml_func[1]],x_rg)
    plt.text(5,95, r'$y =  %.2f x  %s $' % (siml_func[0],intercept(siml_func[1])), fontsize=15)
    plt.text(5,90, r'$R^2=%.4f$' % (siml_func[2]**2))
    plt.text(5,85, r'$P-value=%.2e$' % (siml_func[3]))
    plt.text(5,80, r'$StdErr=%.4f$' % (siml_func[4]))
    plt.title('16S rRNA identity vs. Funtional gene similarity')
    (m,b) = np.polyfit(ssu_iden,fg_siml, 1)
    x_rg = range(int(min(ssu_iden)),int(max(ssu_iden))+1)
    y_rg = np.polyval([m,b],x_rg)
    plt.xlabel('16S rRNA gene identity (%)')
    plt.ylabel('Funtional gene similarity (%)')
    return iden_func, siml_func
Ejemplo n.º 15
 def _linear_regression(self):
     Final trend is expressed as a linear interpolation of the trend-signal obtained after the deseasonal processor
     of the input signal.
     line = np.asarray(self.d).copy()
     # line = filter_outlier(np.asarray(temporal_series).copy(), nsigma=1)
     xx = np.arange(0, len(line), 1)
     slope, intercept, r_value, p_value, std_err = stats.linregress(
         xx[~np.isnan(line)], line[~np.isnan(line)])
     return slope, intercept, p_value, np.square(r_value), std_err
Ejemplo n.º 16
def linregress(x_vals, y_vals):
    least-squares regression of scipy
    a_value, b_value, r_value, p_value, std_err = stats.linregress(x_vals,y_vals)
    est_yvals = a_value * pylab.array(x_vals) + b_value
    k = 1 / a_value
    # plot regression line
    print p_value, std_err
    pylab.plot(x_vals, est_yvals, label='Least-squares fit, k = ' + str(round(k)) +
               ", RSquare = " + str(r_value**2))
Ejemplo n.º 17
def plot_regression(df, x, y, extra_names={}):
    '''Plot a regression with annotated statistics.'''
    # ugly hack to include origin in plot bounds
    ax = _do_plot(df, x, y)
    xlim, ylim = ax.get_xlim(), ax.get_ylim()
    _do_plot(df, x, y, ax=ax)

    # calculate some regression statistics...
    info = [
        ("{} = " + ("{}" if isinstance(v, int) else "{:.2f}")).format(k, v)
        for k, v in it.chain(
                    'Standard Error',
                stats.linregress(df[x], df[y]),
            ), [('$n$', len(df))])

    # ... and annotate regression statistics onto upper left
    at = AnchoredText(
        loc='upper left',

    # save to file
    # and assert df['Load'] is homogeneous
                'x': slugify(x),
                'y': slugify(y),
                'synchronous': str(synchronous),
                'ext': '.png',
Ejemplo n.º 18
def approximate_random_effects(data, labels, group):
    slope_per_donor = np.array([])
    rval_per_donor = np.array([])
    #print "Performing approximate random effect analysis..."
    for donor_id in set(
            data[group]):  #for donor_id in donorids, perform linear regression
        #print "Total usable datapoints of donor %s: %d" % (donor_id, len(list(data[labels[0]][data[group] == donor_id]))) #shows usable datapoints per donor
        slope, _, rval, p_val, stderr = linregress(
            list(data[labels[0]][data[group] == donor_id]),
            list(data[labels[1]][data[group] == donor_id]))
        slope_per_donor = np.append(slope_per_donor, slope)
        rval_per_donor = np.append(rval_per_donor, rval)

    #average_slope = round(slope_per_donor.mean(),6) #get mean r-value across donors
    #average_rval = round(rval_per_donor.mean(),6) #get mean r-value across donors
    average_slope = round(np.nanmean(slope_per_donor),
                          6)  #get mean r-value across donors
    average_rval = round(np.nanmean(rval_per_donor),
                         6)  #get mean r-value across donors
    t_value, p_value = ttest_1samp(
        0)  #t-test (redundant information for downstream analyses)
    with open(output_file, 'a') as f:  #saving full data to .csv
        w = csv.writer(f)
        #print "Saving the analysis results..."
            gene, average_rval, average_slope, rval_per_donor[0],
            rval_per_donor[1], rval_per_donor[2], rval_per_donor[3],
            rval_per_donor[4], rval_per_donor[5], t_value, p_value

    with open(output_file_GSEA, 'a') as f:  #saving GSEA input data to .csv
        w = csv.writer(f, delimiter='\t')
        #print "Saving to GSEA input file..."
        w.writerow([gene, average_rval])

    #Scatterplot of gene expression against reverse inference fMRI map z-score
    print "Plotting the correlation graph..."
    ax = sns.lmplot(labels[0],
                    fit_reg=True)  #comment-out for no plotting
    ax.set(xlabel="%s map z-score value" % (cog_function.capitalize()))
    ax = plot.title(gene)
    print "Saving the correlation graph..."
    plot.savefig(plot_pdf, format='pdf')
Ejemplo n.º 19
def get_taa_group_features(t, seeg, coords, tfr, tto):
    mask = (t > np.min(tfr)) * (t < max(np.max(tto), np.min(tfr) + 5.0))
    seeg -= np.mean(seeg, axis=1)[:, None]

    pca = PCA(n_components=PCA_NCOMP)
    comps = pca.fit_transform(seeg.T)
    var_explained = pca.explained_variance_ratio_
    duration = np.mean(tto - tfr)

    line_coords = np.linalg.norm(coords - coords[0], axis=1)
    slope, _, rval, _, _ = stats.linregress(line_coords, tfr)

    return duration, abs(slope), rval**2, var_explained[0], sum(
Ejemplo n.º 20
def calibrate_data(in_data):
    Takes an input time series containing  ccd (dependent variable,  i.e. predictor), and gauge measurement
    (independent variable,  i.e. predictand), and uses a linear model to derive the calibration parameters.
    :param in_data: input array with four columns of data:  year,  month,  ccd,  rain gauge measurement
    :return: calibration parameters
    #  slice the array to select the gauge and ccd data
    gauge = in_data[:, 3]
    ccd = in_data[:, 2]
    #  derive a linear model using the intrinsic function linregress,  imported from the scipy package
    linear_model = linregress(ccd, gauge)
    a1 = linear_model[0]
    a0 = linear_model[1]
    #  return a tuple containing the calibration parameters
    return a0, a1
Ejemplo n.º 21
def approximate_random_effects(data, labels, group):
    correlation_per_donor = {}
    for donor_id in set(data[group]):
        correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
                                                                 list(data[labels[1]][data[group] == donor_id]))
    average_slope = np.array(correlation_per_donor.values()).mean()
    t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
    print "Averaged slope across donors = %g (t=%g, p=%g)" % (average_slope, t, p_val)
    sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
    plt.ylabel("Linear regression slopes between %s and %s" % (labels[0], labels[1]))
    plt.axhline(0, color="red")

    sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)

    return average_slope, t, p_val
Ejemplo n.º 22
def capm(investment, market, risk_free_return=0):
    """Computes historical CAPM paramaters, using log returns, of the investment over the market.
  investment -- The daily prices of the investment under analysis.
  market -- The daily prices of the market investment.
  risk_free_return -- The risk-free return over the period of consideration, given as a fraction.
  Returns (alpha, beta, r), where r is the r-value."""
    alr = log(1.0 + risk_free_return)
    investment_returns = [log(1.0 * b / a) - alr for (a, b) in zip(investment[0:-1], investment[1:])]
    market_returns = [log(1.0 * b / a) - alr for (a, b) in zip(market[0:-1], market[1:])]
    x = linregress(market_returns, investment_returns)
    beta = x[0]
    alpha = x[1]
    r = x[2]
    return (alpha, beta, r)
Ejemplo n.º 23
def comp_Z(se_data):
    ulist = np.unique(se_data[:,1])
    max_points = 3
    Z = []
    for u in ulist:
        ui =  np.where(se_data[:,1] == u)
        #find lowest available temperatures
	ii = np.argsort(se_data[ui][:,0])
        d = se_data[ui][ii][-max_points:]	# list of lowest temperatur for given U
	w0l = np.pi/d[:,0]			# zero Matsubara Frequency
	dRSigma = d[:,2]/w0l			# approximation for the derivative of SE at w=0
	res = stats.linregress(1./np.array(d[:,0]), dRSigma)
	rr = unc.ufloat(res.intercept,res.stderr)
	rr = (1./(1.-rr))
    return np.array(Z)
Ejemplo n.º 24
def _test1():
    x = np.linspace(0., 10., 41)
    y1 = 2. - 1.5 * x  # (2,-1)
    y2 = 2. * x - 5.  # (3, 1)
    y3 = -x + 4.  # (5, -1)
    y4 = 2. * x - 11.
    y = np.array(x)
    y[np.where(x < 2)] = y1[np.where(x < 2)]
    y[np.where((x >= 2) & (x < 3))] = y2[np.where((x >= 2) & (x < 3))]
    y[np.where((x >= 3) & (x < 5))] = y3[np.where((x >= 3) & (x < 5))]
    y[np.where(5 <= x)] = y4[np.where(5 <= x)]
    # plot(x, y, 'o')
    # show()
    n = len(x)
    var_x0 = np.var(x[:-1]) * (n - 1.)
    var_y0 = np.var(y[:-1]) * (n - 1.)
    mean_x = np.mean(x[:-1]) + (x[-1] - np.mean(x[:-1])) / n
    mean_y = np.mean(y[:-1]) + (y[-1] - np.mean(y[:-1])) / n
    dx = x[-1] - mean_x
    dy = y[-1] - mean_y
    _assert_eq(np.var(x) * n, _update_var(var_x0, n, dx))
    _assert_eq(np.var(y) * n, _update_var(var_y0, n, dy))
    beta0 = np.cov(x[:-1], y[:-1], bias=True)[0][1] / np.var(x[:-1])
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    # print(slope)
    # print(np.cov(x, y, bias=True)  [0][1] / np.var(x))
    print('slope exact = {}, computed = {}'.format(
        slope, _update_beta(beta0, n, dx, dy, var_x0,
                            np.var(x) * n)))
    print('intercept exact = {}, computed = {}'.format(intercept, mean_y -
                                                       slope * mean_x))
    segs = seg_lin_reg(x, y, 0.0001)
    assert len(segs) == 4
    _assert_eq(segs[0][1], 2.), _assert_eq(segs[0][2], -1.5)
    _assert_eq(segs[1][1], -5.), _assert_eq(segs[1][2], 2.)
    _assert_eq(segs[2][1], 4.), _assert_eq(segs[2][2], -1.)
    _assert_eq(segs[3][1], -11.), _assert_eq(segs[3][2], 2.)

    plot_segments(x, y, 0.0001)

    # test spikes
    y[17] = 2
    y[7] = 0
    y[-1] = 7
    y[-2] = 6
    plot_segments(x, y, 0.0001)
Ejemplo n.º 25
    def analyze(self, gaps: Sequence, mlc: MLC, y_field_size: float = 100, profile_width=10):
        """Analyze an EPID image with varying MLC overlaps to determine the DLG.

            The gaps (i.e. overlap) of the leaves in mm.
            These should typically be in descending order and also be negative. E.g. (-1, ..., -2.2).

            The MLC type/arrangement. This lets us know where the leaf centers are to take a profile along.

            The field size along the y-dimension (perpendicular to the leaf travel). This will determined which leaves
            are associated with which gap.

            The width of the profile to take along the axes parallel to leaf motion. This should be a good bit wider
            than the gap values. The default is reasonable and it is unlikely it needs tweaking.
        measured_dlg_per_leaf = []
        planned_dlg_per_leaf = []
        mlc = mlc.value['arrangement']
        g = list(gaps)
        profile_width_px = round(self.image.dpmm * profile_width)
        mid_width = self.image.shape[1] / 2
        mid_height = self.image.shape[0] / 2
        for idx, center in enumerate(mlc.centers):
            if -y_field_size / 2 < center < y_field_size / 2:
                # get the pixel window area
                center_px = center * self.image.dpmm
                width_px = mlc.widths[idx] / 4 * self.image.dpmm
                top = ceil(mid_height + center_px + width_px)
                bottom = floor(mid_height + center_px - width_px)
                # sample the window and take the average perpendicular to MLC motion
                window = self.image[bottom:top, int(mid_width - profile_width_px):int(mid_width + profile_width_px)]
                width = self._determine_measured_gap(window.mean(axis=0))
                planned_dlg_per_leaf.append(self._get_dlg_offset(y_field_size, center, g))
        # fit the data to a line and determine the DLG from the 0 intercept
        lin_fit = stats.linregress(planned_dlg_per_leaf, measured_dlg_per_leaf)
        dlg = lin_fit.intercept / lin_fit.slope
        self._lin_fit = lin_fit
        self.measured_dlg = dlg
        self.planned_dlg_per_leaf = planned_dlg_per_leaf
        self.measured_dlg_per_leaf = measured_dlg_per_leaf
Ejemplo n.º 26
def approximate_random_effects(data, labels, group):

    correlation_per_donor = {}
    for donor_id in set(data[group]):
        correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
                                                       list(data[labels[1]][data[group] == donor_id]))
    average_slope = np.array(correlation_per_donor.values()).mean()
    t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
    print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
    sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
    plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
    plt.axhline(0, color="red")
    sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
    return average_slope, t, p_val
Ejemplo n.º 27
def fit_exp_f(y, x):
    Returns parameters A and B that would fit an exponential
    function of y = A*e^(Bx)
        y: pd.Series
            Variable y in the formula    
        x: pd.Series
            Variable x in the formula
        Parameters A and B

    ## Fit with y = Ae^(Bx) -> logy = logA + Bx
    # Returns A and B of a function as: y = A*e^(Bx)
    B, logA, r_value, p_value, std_err = linregress(transpose(x.values), log(y))
    return exp(logA), B
Ejemplo n.º 28
 def test_multinomial_elementwise_distribution(self):
     '''Verify that the created variables approach a multinomial distribution for large numbers
     of samples.'''
     (m, n, k) = (6, 5, 1)
     r = 2**np.arange(4, 17)
     p = statutil.random_row_stochastic((m, n))
     #p = statutil.scale_row_sums(np.ones((m, n)))
     error = np.zeros((len(r), ))
     for (i, r_val) in enumerate(r):
         for _ in xrange(k):
             x = statutil.multinomial_elementwise(p, r_val)
             # Root-mean-square-error of observed frequencies w.r.t. desired frequencies
             error[i] += statutil.norm_frobenius_scaled(
                 statutil.hist(x, n) / (1.0 * r_val) - p)
         error[i] /= (1.0 * k)
     # Validate the model error of the central limit theorem: C*r^(-0.5).
     # This is a consequence of the Central Limit Theorem. We are making k experiments for
     # each value of n. Even if k=1, there's a 95% chance that we are within ~1.6 standard deviations
     # from the mean of the normal distribution sqrt(n)*[observed freq variable - p[i,j]] for each
     # entry j of a row i of the matrix p. So if row i's stddev is s[i], the sum of squared errors
     # should be (with 95% confidence) <= n * (1.96*s[i])^2. So
     # C <= sqrt(sum(n * (1.5*s[i])^2)_i / (m*n)) = 1.96 * sqrt(s[i]^2/m).
     # See http://en.wikipedia.org/wiki/Central_limit_theorem
     alpha, c, r_value, _, _ = linregress(np.log(r), np.log(error))
     c = np.exp(c)
     #        print c , 1.96 * np.linalg.linalg.norm(np.sum(p * np.arange(p.shape[1]) ** 2, axis=1) -
     #                                                          np.sum(p * np.arange(p.shape[1]), axis=1) ** 2,
     #                                                          2) / np.sqrt(p.shape[0]),
                         err_msg='Unexpected error term growth power')
         c <= 1.96 * np.linalg.linalg.norm(
             np.sum(p * np.arange(p.shape[1])**2, axis=1) -
             np.sum(p * np.arange(p.shape[1]), axis=1)**2, 2) /
         'Error term coefficient outside 95% confidence interval')
         abs(r_value) > 0.99,
         'Error does not fit a power law in sample size')
Ejemplo n.º 29
    def calculate_histogram(self):
        self.calculating_histograms = True
        locations = self.locations.copy()
        t1 = tp.filter_stubs(locations, self.config['process']['min_traj_length'])
        # t2 = t1[((t1['mass'] > self.config['process']['min_mass']) & (t1['size'] < self.config['process']['max_size']) &
        #          (t1['ecc'] < self.config['process']['max_ecc']))]
        im = tp.imsd(t1, self.config['process']['um_pixel'], self.config['process']['fps'])
        self.histogram_values = []
        for pcle in im:
            if general_stop_event.is_set():

            data = im[pcle]
            t = data.index[~np.isnan(data.values)]
            val = data.values[~np.isnan(data.values)]
                slope, intercept, r, p, stderr = stats.linregress(np.log(t), np.log(val))
                self.histogram_values.append([slope, intercept])
        self.calculating_histograms = False
        self.publisher.publish('histogram', self.histogram_values)
Ejemplo n.º 30
def recalculate_line(consensus, is_vertical):
    """Given a discovered consensus, recalculate the line with other points that are close enough.
        consensus (list): List of consensus measurements.
        is_vertical (bool): Whether the line is almost vertical.
        tuple: Start and end points of line segment.
    cartesian_consensus = numpy.array([point.location for point in consensus])
    # If almost vertical, calculate line in terms of y.
    if is_vertical:
        cartesian_consensus = numpy.fliplr(cartesian_consensus)

    # Calculate regression line.
    slope, intercept, r_, p_, e_ = stats.linregress(cartesian_consensus[:, 0],
                                                    cartesian_consensus[:, 1])

    start = util.nearest(cartesian_consensus[0], slope, intercept)
    end = util.nearest(cartesian_consensus[0], slope, intercept)
    distance = 0

    for i in range(len(consensus)):
        for j in range(i + 1, len(consensus)):
            point_a = util.nearest(cartesian_consensus[i], slope, intercept)
            point_b = util.nearest(cartesian_consensus[j], slope, intercept)
            new_dist = util.dist(point_a, point_b)
            if new_dist > distance:
                distance = new_dist
                start = point_a
                end = point_b

    # If line is vertical, flip coordinates back.
    if is_vertical:
        start = numpy.flipud(start)
        end = numpy.flipud(end)

    return start, end
Ejemplo n.º 31
def get_slopes(symbol_list, fund_type):

    # For each fund, I perform a simple least-squares linear regression
    # to get the value as a function of time.

    # Here, I also restrict the analysis to only those funds which have
    # gained value over the past five years (i.e. have a positive slope).
    # The logic behind this is that, if we're only adding one fund to the
    # portfolio, we can limit ourselves to choosing one that has
    # historically done well. The question is then whether the US bonds
    # that have done well have done better than the emerging market funds
    # that have done well.

    slopes = []
    for symbol in symbol_list:
        slope = stats.linregress(parse_csv(symbol, fund_type))[0]
        if slope > 0.0:

#    print(len(slopes))
    return slopes
def decompose(_data, _plots = False):
			Function to decompose a signal into it's trend and normal variation
				_data: signal to decompose
				_plots: print plots or not (default False)
				DataDecomp = _data - slope*_data.index
				slope, intercept = linear regression coefficients
	indexDecomp = np.arange(len(_data))

	slope, intercept, r_value, p_value, std_err = linregress(indexDecomp, np.transpose(_data.values))
	dataDecomp=pd.DataFrame(index = _data.index)
	name = _data.name
	result = []
	for n in range(len(_data)):
	dataDecomp[(name + '_' + '_flat')] = result
	trend = slope*indexDecomp + intercept
	if _plots == True:
		with plt.style.context('seaborn-white'):
			fig, ax = plt.subplots(figsize=(20,10))
			ax.plot(_data.index, _data.values, label = "Actual", marker = None)
			ax.plot(_data.index, dataDecomp[(name + '_' +'_flat')], marker = None, label = 'Flattened')
			ax.plot(_data.index, trend, label = 'Trend')
			ax.set_title("Signal Decomposition - "+ name)

	return dataDecomp, slope, intercept
Ejemplo n.º 33
    def calculate_histogram(self):
        """ Starts a new thread to calculate the histogram of fit-parameters based on the mean-squared displacement of
        individual particles. It publishes the data on topic `histogram`.

        .. warning:: This method is incredibly expensive. Since it runs on a thread it can block other pieces of code,
        especially the GUI, which runs on the same process.

        .. TODO:: The histogram loops over all the particles. It would be better to skeep particles for which there is
            no new data

        .. TODO:: Make this method able to run on a separate process. So far is not possible because it relies on data
            stored on the class itself (`self.locations`).
        self.calculating_histograms = True
        locations = self.locations.copy()
        t1 = tp.filter_stubs(locations,
        t2 = t1[((t1['mass'] > self.config['process']['min_mass']) &
                 (t1['size'] < self.config['process']['max_size']) &
                 (t1['ecc'] < self.config['process']['max_ecc']))]
        im = tp.imsd(t2, self.config['process']['um_pixel'],
        self.histogram_values = []
        for pcle in im:
            if general_stop_event.is_set():

            data = im[pcle]
            t = data.index[~np.isnan(data.values)]
            val = data.values[~np.isnan(data.values)]
                slope, intercept, r, p, stderr = stats.linregress(
                    np.log(t), np.log(val))
                self.histogram_values.append([slope, intercept])
        self.calculating_histograms = False
        self.publisher.publish('histogram', self.histogram_values)
Ejemplo n.º 34
    def linregress_hb_drop_with_time_to_previous_rbc(self,
        Compute a linear regression of each RBCs hemoglobin saturation drop with the time difference
        to the previous RBC.

            si (int): segment index
            threshold (float): maximum value of time difference used for the linear regression

            float tuple, return value of scipy.stats.linregress

        time_difference = self.rbcDataPostProcessor.timeToPreviousRBC(
            si, self.n_rbc_average(si))
        hb_drop = self.hb_difference(si)[1:]
        filtered_times = time_difference[time_difference < threshold]
        filtered_drops = hb_drop[time_difference < threshold]
        if filtered_times.size:
            return linregress(filtered_times, filtered_drops)
            return np.nan, np.nan, np.nan, np.nan
Ejemplo n.º 35
    def getLinearRegressionData(self, log):
        values = self.createValuesForRegression(log)

        slope, intercept, r_value, p_value, std_err = linregress(values[0], values[1])
        return [slope,intercept,r_value*r_value]
Ejemplo n.º 36
def fit( x, y, funchandle='gauss1', estimates=None ):
    """ Returns: fitstruct,  fitY, Rbest """
    from scipy.optimize import curve_fit 
    from scipy.stats.stats import linregress

    if funchandle == 'gauss1':
        def fitfunc( x, a1, b1, c1 ):
            return a1 * np.exp( -( (x-b1)/ c1)**2 )
        # Really arbitrary c1 estimate at basically 25 pixels..
        if estimates is None:
            estimates = np.array( [np.max(y), x[np.argmax(y)], 25.0*(x[1]-x[0]) ] )
    elif funchandle == 'poly1':
        def fitfunc( x, a1, b1 ):
            return a1 * x + b1
        if estimates is None:
            slope = (np.max(y)-np.min(y))/(np.max(x)-np.min(x))
            intercept = np.min(y) - slope*x[np.argmin(y)]
            estimates = [slope, intercept]
    elif funchandle == 'poly2':
        def fitfunc( x, a1, b1, c1 ):
            return a1 * x **2.0 + b1 *x + c1
        if estimates is None:
            slope = (np.max(y)-np.min(y))/(np.max(x)-np.min(x))
            intercept = np.min(y) - slope*x[np.argmin(y)]
            estimates = [0.0, slope, intercept]
    elif funchandle == 'poly3':
        def fitfunc( x, a1, b1, c1, d1 ):
            return a1 * x **3.0 + b1 *x**2.0 + c1*x + d1
        if estimates is None:
            slope = (np.max(y)-np.min(y))/(np.max(x)-np.min(x))
            intercept = np.min(y) - slope*x[np.argmin(y)]
            estimates = [0.0, 0.0, slope, intercept]
    elif funchandle == 'poly5':
        def fitfunc( x, a1, b1, c1, d1, e1, f1 ):
            return a1 * x **5.0 + b1 *x**4.0 + c1*x**3.0 + d1*x**2.0 + e1*x + f1
        if estimates is None:
            slope = (np.max(y)-np.min(y))/(np.max(x)-np.min(x))
            intercept = np.min(y) - slope*x[np.argmin(y)]
            estimates = [0.0, 0.0, 0.0, 0.0, slope, intercept]
    elif funchandle == 'abs1':
        def fitfunc( x, a1 ):
            return a1 * np.abs( x )
        if estimates is None:
            estimates = np.array( [ (np.max(y)-np.min(y))/(np.max(x)-np.min(x))])
    elif funchandle == 'exp':
        def fitfunc( x, a1, c1 ):
            return a1 * np.exp( c1*x )
        if estimates is None:
            estimates = np.array( [1.0, -1.0] )
    elif funchandle == 'expc':
        def fitfunc( x, a1, c1, d1 ):
            return a1 * np.exp( c1*x ) + d1
        if estimates is None:
            estimates = np.array( [1.0, -1.0, 1.0] )
    elif funchandle == 'power1':
        def fitfunc( x, a1, b1 ):
            return a1*(x**b1)
        if estimates is None:
            estimates = np.array( [1.0, -2.0] )   
    elif funchandle == 'power2':
        def fitfunc( x, a1, b1, c1 ):
            return a1*(x**b1) + c1
        if estimates is None:
            estimates = np.array( [1.0, -2.0, 1.0] )    
    elif funchandle == 'powerpoly1':
        def fitfunc( x, a1, b1, a2, c1 ):
            return a1*(x**b1) + a2*x + c1
        if estimates == None:
            estimates = np.array( [1.0, -2.0, 0.0,  1.0] )
        fitfunc = funchandle
        fitstruct, pcov = curve_fit( fitfunc, x, y, p0=estimates )
        perr = np.sqrt(np.diag(pcov))
        print( "Fitting completed with perr = " + str(perr) )
        fitY = fitfunc( x, *fitstruct )
        goodstruct = linregress( x, fitfunc( x, *fitstruct ) )
        Rbest = goodstruct[2]
    except RuntimeError:
        print( "RAM: Curve fitting failed")
    return fitstruct,  fitY, Rbest
Ejemplo n.º 37
# Remove Visual count 0 class
#dfCS2 = dfCS[dfCS['JNM']!=0]
#dfCS2 = dfCS1

# -----------------------------------------------
#### see zero counts Camille with large count software
dfCS0 = dfCS[dfCS['JNM']==0]
#### Query 0 count for me and count for C
dfCSM0 = dfCS2[dfCS2['SoftC']==0]
# --------------------------------------------------

# Create lists for corellation
camC1 = list(dfCS3['J'])
objc = list(dfCS3['SoftC'])
slopeO, interceptO, r_valueO, p_valueO, std_errO = linregress(camC1,objc)
print "r squared count = ",r_valueO**2
r_valueO = r_valueO**2
print "slope",slopeO
print "p-value",p_valueO

# plot raw corellation
#plt.title('Obj count: erode = %s, dilate = %s, thres = %s'%(er,dil,thres))
plt.xlabel('Visual count')
plt.ylabel('Software count')
#pylab.savefig(resultsdir + 'ObjCount-' + str(count) + '.pdf',bbox_inches='tight')

# plot mean with sd
Ejemplo n.º 38
    def test_regression_of_returns_factor(self, returns_length, regression_length):
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        my_asset_column = 0
        start_date_index = 6
        end_date_index = 10

        assets = self.asset_finder.retrieve_all(self.sids)
        my_asset = assets[my_asset_column]
        my_asset_filter = AssetID() != (my_asset_column + 1)
        num_days = end_date_index - start_date_index + 1

        # The order of these is meant to align with the output of `linregress`.
        outputs = ["beta", "alpha", "r_value", "p_value", "stderr"]

        # Our regression factor requires that its target asset is not filtered
        # out, so make sure that masking out our target asset does not take
        # effect. That is, a filter which filters out only our target asset
        # should produce the same result as if no mask was passed at all.
        for mask in (NotSpecified, my_asset_filter):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask
            results = self.engine.run_pipeline(
                Pipeline(columns={output: getattr(regression_factor, output) for output in outputs}),
            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(output_results[output], nan)

            # Run a separate pipeline that calculates returns starting 2 days
            # prior to our start date. This is because we need
            # (regression_length - 1) extra days of returns to compute our
            # expected regressions.
            returns = Returns(window_length=returns_length)
            results = self.engine.run_pipeline(
                Pipeline(columns={"returns": returns}),
                self.dates[start_date_index - (regression_length - 1)],
            returns_results = results["returns"].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[day : day + regression_length]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(y=other_asset_returns, x=my_asset_returns)
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = expected_regression_results[i]

            for output in outputs:
                        index=self.dates[start_date_index : end_date_index + 1],
Ejemplo n.º 39
meanGroundedTime = []
for robot in dataset[0]:
	robotGroundedTimesteps = []
	for row in robot:
		rowGrounded = 1 if 1 in row else 0;

print(np.mean(meanGroundedTime), np.std(meanGroundedTime), min(meanGroundedTime), max(meanGroundedTime));

rp = pearsonr(meanGroundedTime, dataset[REWARD_SIGNALS]);
rs = spearmanr(meanGroundedTime, dataset[REWARD_SIGNALS]);
lr = linregress(meanGroundedTime, dataset[REWARD_SIGNALS]);

fit = np.polyfit(meanGroundedTime, dataset[REWARD_SIGNALS], 1);

fitfn = np.poly1d(lr[0:2]);

plt.plot(meanGroundedTime, dataset[REWARD_SIGNALS], 'go', np.arange(.4, 1.1, .01), fitfn(np.arange(.4, 1.1, .01)), '--k');

plt.title("Proportion Time Grounded and Normalized Reinforcement Signal" 
		+ "\n For " + robotType[0].upper() + robotType[1:] + " Robot with \"jump\" Command");
plt.ylabel("Normalized Reinforcement Signal");
plt.xlabel("Proportion of Time Grounded");
plt.axis([.45, 1.05, -1.1, 1.1]);
Ejemplo n.º 40


err = np.abs(np.array([validation_dump]) - np.array([model_dump]))
err_av = np.mean(err)
sum1 = np.sum(model_dump)
print('Difference between valid and model =', err)
print('Average daily error =', err_av)
print('Sum of model LWD =', sum1)
print('Sum of valid LWD =', np.sum(validation_dump))

slope, intercept, r_value, p_value, std_err = \
print('slope =', slope)
print('intercept =', intercept)

counter1 = np.zeros_like(model_dump)
counter2 = np.zeros_like(model_dump)
counter5 = np.zeros_like(model_dump)

# Counters for number of model LWD that are between 1, 2 and 5 hours of
# validation LWD
for i in range(len(model_dump)):
    if (model_dump[i]<=validation_dump[i]+1 and \
        counter1[i] = 1

    if (model_dump[i]<=validation_dump[i]+2 and \
Ejemplo n.º 41

histDic2=[np.count_nonzero((binwidth-tol < testBin) & (testBin < binwidth+tol)) for binwidth in range(100)]

plt.hist(histDicT.values(),bins=sample)#np.arange(min(dataD), max(dataD) + binwidth, binwidth))

from scipy.stats.stats import linregress
for value in binSs[:6]:
for index in range(0, len(binSs[:sample]) - 6):
    s, intercept, r, p, std_error = linregress(binSs[index:index + 7], binSs[index:index + 7])
    print(s, intercept, r, p, std_error,"\n")
for value in histDicT:
    temp = histDicT[value]
import itertools
for index in range(0, len(histDicT) - 6):
        tempD=dict(itertools.islice(histDicT.items(), index,index + 7))
        s, intercept, r, p, std_error = linregress(list(tempD.keys()), list(tempD.values()))
        #print(index,tempD,s, intercept, r, p, std_error)
        roi_data_mean = np.ones(len(names))*-99
        roi_data_std = np.ones(len(names))*-99
        roi_data_r = np.ones(len(names))*-99
        roi_data_p = np.ones(len(names))*-99
        roi_data_m = np.ones(len(names))*-99
        for i, name in enumerate(names):
            #wm_name = 'wm-' + hemi + '-' + name
            wm_name = '{}_{}'.format(hemi, name)
            if wm_name in df1.columns:
                df_merge = df1.merge(df2, on='nspn_id')
                roi_data_mean[i] = df1[wm_name].mean()
                roi_data_std[i] = df1[wm_name].std()
                m, c, r, p, sterr = linregress(df_merge[wm_name + '_x'], df_merge[wm_name + '_y'])
                roi_data_m[i] = m
                roi_data_r[i] = r
                roi_data_p[i] = 1 - p

        Make a vector containing the data point at each vertex.
        vtx_data_mean = roi_data_mean[labels]
        vtx_data_std = roi_data_std[labels]
        vtx_data_r = roi_data_r[labels]
        vtx_data_p = roi_data_p[labels]
        vtx_data_m = roi_data_m[labels]

Ejemplo n.º 43
  d2 = hpcs.double[t2,t1]

  naive   = s1['instructions'] + s2['instructions']
  actual  = d1['instructions'] + d2['instructions']
  degr = actual / naive
  degradations += [degr]

  for k, v1 in s1.items():
    v2 = s2[k]
    total = v1 + v2
    counters[k] += [total]

  # total = gettotal(shpc1, shpc2, ['LLC-stores', 'LLC-loads'])
  total = gettotal(s1, s2, ['instructions'])
  plotdata[total] = degr

for counter,v in counters.items():
  cor, pv = pearsonr(v, degradations)
  if pv < 0.1:
    print ("{:25} {: .3f} {:2.1%}".format(counter, cor, pv*100))

if plotdata:
  X = sorted(list(plotdata.keys()))
  Y = [plotdata[x] for x in X]
  p.plot(X, Y, '-o')
def main():
    usage = 'usage: %prog [opt] lfq_filename gene_exprs_filename output_filename'\
        '\nThree arguments must be specified in command line:\n'\
        '1) LFQ filename, containing LFQ intensities and two replicates.\n'\
        '2) Gene exprs filename, read count data.\n'\
        '3) AS status of genes (miso output)\n'
    parser = OptionParser(usage=usage)
    # colnames for lfq data
    parser.add_option('--lfq_gene_colname', dest='lfq_gene_colname',
                      default='Gene names',
                      help='Column name of gene name')
    parser.add_option('--samp1_lfq_colname1', dest='samp1_lfq_colname1',
                      default='LFQ intensity T331_1',
                      help='Column name of LFQ intensity, sample 1 replicate 1.')
    parser.add_option('--samp1_lfq_colname2', dest='sampl1_lfq_colname2',
                      default='LFQ intensity T331_2',
                      help='Column name of LFQ intensity, sample 1 replicate 2.')
    parser.add_option('--samp2_lfq_colname1', dest='sampl2_lfq_colname1',
                      default='LFQ intensity R_1',
                      help='Column name of LFQ intensity, sample 2 replicate 1.')
    parser.add_option('--samp2_lfq_colname2', dest='sampl2_lfq_colname2',
                      default='LFQ intensity R_2',
                      help='Column name of LFQ intensity, sample 2 replicate 2.')  
    # colnames for gene exprs data
    parser.add_option('--mrna_gene_colname', dest='mrna_gene_colname',
                      help='Column name for mRNA exprs data.')
    parser.add_option('--samp1_exprs_colname', dest='samp1_exprs_colname',
                      help='Column name of gene exprs for sample 1')  
    parser.add_option('--samp2_exprs_colname', dest='samp2_exprs_colname',
                      help='Column name of gene exprs for sample 2')
    parser.add_option('--spliced_only', dest='spliced_only',
                      help='True or False. True shows only spliced genes. '\
                        'False shows all. Default is False.')
    parser.add_option('--convert_to_log2', dest='convert_to_log2',
                      help='True or False, converts mRNA exprs data to to log2'\
                        ' scale. Default True.')
    parser.add_option('--title', dest='title',
                      default='Plot title',
                      help='Title of plot.')
    parser.add_option('--xlabel', dest='xlabel',
                      default='x axis',
                      help='X axis label of plot')
    parser.add_option('--ylabel', dest='ylabel',
                      default='y axis',
                      help='Y axis label of plot')
    parser.add_option('--annotate_genes', dest='annotate_genes',
                      help='CSV list of genes to be annotated.\n'\
                        'Default is None, allowing mouse click annotation.')
    (options, args) = parser.parse_args()
    if len(args) < 3:
        print 'Not enough args specified.\n%s' %usage
    lfq_filename = args[0]
    gene_exprs_filename = args[1]
    miso_filename = args[2]
    # parse options
    # splicing only option
    spliced_only = options.spliced_only
    if spliced_only in ['True', 'true', 'T', 'TRUE']:
        spliced_only = True
    elif spliced_only in ['False', 'false', 'F', 'FALSE']:
        spliced_only = False
        print 'Spliced only option must be True or False. %s found.' \
    print 'splicing_only: %s' %spliced_only
    # log2 conversion option
    convert_to_log2 = options.convert_to_log2
    if convert_to_log2 in ['True', 'T']:
        convert_to_log2 = True
    elif convert_to_log2 in ['False', 'F']:
        convert_to_log2 = False
        print '--convert_to_log2 must be True or False. %s found.'\
    print 'Convert to log2: %s' %convert_to_log2
    # xlabel, ylabel, title options
    xlabel = options.xlabel
    ylabel = options.ylabel
    title = options.title
    # annotate genes options
    if options.annotate_genes is not None:
        annotated_gene_list = options.annotate_genes.split(',')
        annotated_gene_list = options.annotate_genes
    lfq_mrna_dic = {}
    # Add LFQ information to dic
    lfq_mrna_dic = index_lfq_data(lfq_filename, lfq_mrna_dic, options, 
    print 'lfq data indexed from file: %s' %lfq_filename
    # Add gene exprs to dic
    lfq_mrna_dic = index_mrna_data(gene_exprs_filename, lfq_mrna_dic, options,
    print 'mrna data indexed from file: %s' %gene_exprs_filename
    # Write dic to file
    # write_lfq_mrna_data_to_file(lfq_mrna_dic, out_filename, options)
    # Get differentially spliced genes (non-redundant only)
    spliced_genes = list(set(get_spliced_genes(miso_filename)))
    print '%s spliced genes extracted from %s' %(len(spliced_genes), 
    # Calculate Pearson and Spearman correlation for non-AS genes and AS genes
    # Create x and y vectors for spliced, nonspliced and both
    spliced_mrna_log2_fc, spliced_lfq_diff = \
        split_by_splice_status(lfq_mrna_dic, spliced_genes, spliced=True)
    non_spliced_mrna_log2_fc, non_spliced_lfq_diff = \
        split_by_splice_status(lfq_mrna_dic, spliced_genes, spliced=False)
    mrna_log2_fc, lfq_diff = \
        split_by_splice_status(lfq_mrna_dic, spliced_genes, spliced=None)
    # Calculate r and pvals for Pearson

    for mrna_diff_vector, \
        lfq_diff_vector, \
        splice_status in \
            zip([spliced_mrna_log2_fc, non_spliced_mrna_log2_fc, mrna_log2_fc], 
                [spliced_lfq_diff, non_spliced_lfq_diff, lfq_diff], 
                ['DS Genes', 'Non-DS Genes', 'All Genes']):
        pearsonr, pearsonpval = \
            stats.pearsonr(mrna_diff_vector, lfq_diff_vector)
        print 'Gene set:%s\nPearson coefficient: %s\nPval:%s' \
            %(splice_status, pearsonr, pearsonpval)
        spearmanr, spearmanpval = \
            stats.spearmanr(mrna_diff_vector, lfq_diff_vector)
        print 'Gene set:%s\nSpearman coefficient: %s\nPval:%s' \
            %(splice_status, spearmanr, spearmanpval)
        slope, intercept, r_value, p_value, std_err = stats.linregress(mrna_diff_vector,lfq_diff_vector)
        print 'slope: %s\nintercept: %s\nr_value: %s\nstd_error: %s' %(slope, intercept, r_value, std_err)
        # calculate concordants
        concord_count = 0
        all_count = 0
        for mrna, lfq in zip(mrna_diff_vector, lfq_diff_vector):
            if mrna * lfq >= 0:    # means concordant
                concord_count += 1
            all_count += 1
        frac_concord = float(concord_count) / all_count
        print 'Gene set:%s\nConcordance:%s/%s, %s' %(splice_status, concord_count, all_count, frac_concord)
    # Scatterplot data
    scatter_plot_lfq_mrna(lfq_mrna_dic, spliced_genes, spliced_only=spliced_only,
                          title=title, xlabel=xlabel, ylabel=ylabel,