コード例 #1
0
def group_by_columns(data, group_by_columns, column_to_agg):
    '''Takes a list of column names and a column to count and counts rows by those column name, including nulls'''
    all_columns = group_by_columns + [column_to_agg]
    data.loc[:, column_to_agg] = data.loc[:, column_to_agg].fillna(value=0)
    data = data.loc[:, all_columns]
    grouped = data.groupby(group_by_columns)
    return grouped
コード例 #2
0
def fit_polynomial(data, title, file, alpha=0):
    """
    Fits data to linear models of polynomial degrees 1-5 to compare
    :param data: water maze cipl data
    :param title: title of plot (string)
    :param file: filename
    :param alpha: alpha value
    :return: predictions, y intercept, coefficients, and errors
    """
    X_train, X_test, y_train, y_test = train_test_split(
        data['Trial'], data['Water Maze CIPL'])
    colors = ['teal', 'yellowgreen', 'gold', 'purple', 'pink', 'brown']
    lw = 2
    mses = []
    for degree in [1, 2, 3, 4, 5]:
        model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=alpha))
        model.fit(X_train.values.reshape(-1, 1), y_train)
        if degree == 1:
            line = model.predict(np.linspace(0, 30, 100).reshape(-1, 1))
            intercept = model._final_estimator.intercept_
            coef = model._final_estimator.coef_
        y_plot = model.predict(np.linspace(0, 30, 100).reshape(-1, 1))
        mse = mean_squared_error(y_test,
                                 model.predict(X_test.values.reshape(-1, 1)))
        mses.append(mse)
        plt.plot(np.linspace(0, 30, 100),
                 y_plot,
                 color=colors[degree - 1],
                 linewidth=lw,
                 label="degree {0}   test error:{1}".format(
                     degree, mse.round(2)))
    ys = data.groupby('Trial').mean()['Water Maze CIPL']
    plt.scatter(range(1, 25),
                ys,
                color='black',
                edgecolors='black',
                s=30,
                marker='o',
                label="Average trial performance")
    plt.legend(loc=2, prop={'size': 8})
    plt.title(title)
    plt.xlim(0, 30)
    plt.ylim(0, 60)
    plt.xlabel('Trial')
    plt.ylabel('CIPL')
    plt.savefig('Results/Regression/Learning/' + file)
    plt.show()
    return line, intercept, coef, mses
コード例 #3
0
def create_counts(data, group_by_columns, column_to_count):
    '''Takes a list of column names and a column to count and counts rows by those column name, including nulls'''
    all_columns = group_by_columns + [column_to_count]
    data = data.loc[:, all_columns].fillna(value=1)
    grouped = data.groupby(group_by_columns)
    return grouped.count()