Python process_data 예제들, helpers.process_data Python 예제들

예제 #1

0

파일 보기

def scraper():
    if request.method == 'POST':
        site = request.form['myUrl']
        session = HTMLSession()
        abs_urls = session.get(site).html.absolute_links
        helpers.process_data(abs_urls)
        print(abs_urls)
        return render_template('index.html', links=abs_urls)
    else:
        return render_template('index.html')

예제 #2

0

파일 보기

def cross_validation(y, x, k_indices, k, regression_method, **args):
    """
    Completes k-fold cross-validation using the regression method
    passed as argument.
    """
    # get k'th subgroup in test, others in train
    msk_test = k_indices[k]
    msk_train = np.delete(k_indices, (k), axis=0).ravel()

    x_train = x[msk_train, :]
    x_test = x[msk_test, :]
    y_train = y[msk_train]
    y_test = y[msk_test]

    # data pre-processing
    x_train, x_test = process_data(x_train, x_test, True)

    # compute weights using given method
    weights, loss = regression_method(y=y_train, tx=x_train, **args)

    # predict output for train and test data
    y_train_pred = predict_labels(weights, x_train)
    y_test_pred = predict_labels(weights, x_test)

    # compute accuracy for train and test data
    acc_train = compute_accuracy(y_train_pred, y_train)
    acc_test = compute_accuracy(y_test_pred, y_test)

    return acc_train, acc_test

예제 #3

0

파일 보기

def cross_validation_ridge_regression(y, x, k_indices, k, lambdas, degrees):
    """
    Completes k-fold cross-validation using the ridge regression method.
    Here, we build polynomial features and create four subsets using
    the jet feature.
    """
    # get k'th subgroup in test, others in train
    msk_test = k_indices[k]
    msk_train = np.delete(k_indices, (k), axis=0).ravel()

    x_train_all_jets = x[msk_train, :]
    x_test_all_jets = x[msk_test, :]
    y_train_all_jets = y[msk_train]
    y_test_all_jets = y[msk_test]

    # split in 4 subsets the training set
    msk_jets_train = get_jet_masks(x_train_all_jets)
    msk_jets_test = get_jet_masks(x_test_all_jets)

    # initialize output vectors
    y_train_pred = np.zeros(len(y_train_all_jets))
    y_test_pred = np.zeros(len(y_test_all_jets))

    for idx in range(len(msk_jets_train)):
        x_train = x_train_all_jets[msk_jets_train[idx]]
        x_test = x_test_all_jets[msk_jets_test[idx]]
        y_train = y_train_all_jets[msk_jets_train[idx]]

        # data pre-processing
        x_train, x_test = process_data(x_train, x_test, False)

        phi_train = build_poly(x_train, degrees[idx])
        phi_test = build_poly(x_test, degrees[idx])

        phi_train = add_constant_column(phi_train)
        phi_test = add_constant_column(phi_test)

        # compute weights using given method
        weights, loss = ridge_regression(y=y_train, tx=phi_train, lambda_=lambdas[idx])

        y_train_pred[msk_jets_train[idx]] = predict_labels(weights, phi_train)
        y_test_pred[msk_jets_test[idx]] = predict_labels(weights, phi_test)

    # compute accuracy for train and test data
    acc_train = compute_accuracy(y_train_pred, y_train_all_jets)
    acc_test = compute_accuracy(y_test_pred, y_test_all_jets)

    return acc_train, acc_test

예제 #4

0

파일 보기

# | 2.25 | 31.85 | -2.82901631903 | 0.0 |
# | 2.5 | 36.75 | -2.82901631903 | 0.0 |
# | 2.75 | 41.65 | -2.82901631903 | 0.0 |
# | 3.0 | 46.55 | -2.82901631903 | 0.0 |
# | 3.25 | 51.45 | -2.82901631903 | 0.0 |
# | 3.5 | 56.35 | -2.82901631903 | 0.0 |
#
# This data is currently saved in a file called `trajectory_example.pickle`. It can be loaded using a helper function we've provided (demonstrated below):

# In[1]:

from helpers import process_data
from math import cos, sin, pi
from matplotlib import pyplot as plt

data_list = process_data("trajectory_example.pickle")

for entry in data_list:
    print(entry)

# as you can see, each entry in `data_list` contains four fields. Those fields correspond to `timestamp` (seconds), `displacement` (meters), `yaw_rate` (rads / sec), and `acceleration` (m/s/s).
#
# ### The Point of this Project!
# **Data tells a story but you have to know how to find it!**
#
# Contained in the data above is all the information you need to reconstruct a fairly complex vehicle trajectory. After processing **this** exact data, it's possible to generate this plot of the vehicle's X and Y position:
#
# ![](https://d17h27t6h515a5.cloudfront.net/topher/2017/December/5a3044ac_example-trajectory/example-trajectory.png)
#
# as you can see, this vehicle first accelerates forwards and then turns right until it almost completes a full circle turn.
#

예제 #5

0

파일 보기

#!/usr/bin/env python
# coding: utf-8

# # Integrating Rate Gyro Data
# The **yaw rate** of a vehicle can be measured by a **rate gyro**.
#
# The yaw rate gives the rate of change of the vehicle's heading in radians per second and since a vehicle's heading is usually given by the greek letter $\theta$ (theta), yaw **rate** is given by $\dot{\theta}$ (theta dot).
#
# Integrating the yaw rate gives total change in heading.

# In[1]:

from helpers import process_data, get_derivative_from_data
from matplotlib import pyplot as plt

PARALLEL_PARK_DATA = process_data("parallel_park.pickle")

TIMESTAMPS = [row[0] for row in PARALLEL_PARK_DATA]
DISPLACEMENTS = [row[1] for row in PARALLEL_PARK_DATA]
YAW_RATES = [row[2] for row in PARALLEL_PARK_DATA]
ACCELERATIONS = [row[3] for row in PARALLEL_PARK_DATA]

# In[2]:

plt.title("Yaw Rate vs Time")
plt.xlabel("Time (seconds)")
plt.ylabel("Yaw Rate (radians / second)")
plt.plot(TIMESTAMPS, YAW_RATES)
plt.show()

# Here's what I make of this data

예제 #6

0

파일 보기

def api():
    data = request.args.get('data', "")
    process_data(data)
    return jsonify({'success': True})

예제 #7

0

파일 보기

from helpers import process_data
import numpy as np
from math import cos, sin
from matplotlib import pyplot as plt

# %matplotlib inline

data_list = process_data("trajectory_example.pickle")

for entry in data_list:
    print(entry)


def get_speeds(data_list):
    displacement_previous = 0.0
    time_previous = 0.0

    speeds = [0.0]
    for i in range(1, len(data_list)):
        displacement = data_list[i][1]
        time = data_list[i][0]
        delta_displacement = displacement - displacement_previous
        delta_time = time - time_previous
        speed = delta_displacement / delta_time
        speeds.append(speed)

        displacement_previous = displacement
        time_previous = time
    return speeds

예제 #8

0

파일 보기

from scipy import stats as stat
import pickle as pk
import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '../util'))
import helpers
import mcmcse
sys.path.insert(1, os.path.join(sys.path[0], '../mcmc_sampler'))
from gibbs import gibbs
from metropolis_within_gibbs import metropolis_within_gibbs
sys.path.insert(1, os.path.join(sys.path[0], '../data_generation'))
from gaussian_mean_shift import generate_gaussian_mean_shift
import matplotlib.pyplot as plt

# params that won't change throughout
sd = 1
alpha = 1
beta = 1
rep = 1
i = np.array([0, 1, 2, 3, 4])

M = np.array([2, 2, 4, 4, 3])
n = np.array([50, 20000, 60, 100, 100])
n_MCMC = np.array([1, 3, 3, 3, 3])

for m in np.arange(M.shape[0]):
    print(m)
    for d in i:
        print(d)
        helpers.process_data(M[m], n[m], n_MCMC[m], 2, d)

예제 #9

0

파일 보기

from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import auc, roc_curve, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier as XGBClassifier
import statsmodels.api as statmod
from sklearn.pipeline import Pipeline
import warnings

warnings.filterwarnings("ignore")

if __name__ == "__main__":

    # Import and Process data
    # ==================================================
    df_2019 = helpers.process_data(2019)
    df_2018 = helpers.process_data(2018)
    df_2017 = helpers.process_data(2017)
    df_2017['gender_Prefer not to say'] = 0
    df_2017 = df_2017.sort_index(axis=1)  # sort column by column names

    # Aggregating the repeated cross-sectional data
    data = pd.concat([df_2019, df_2018, df_2017], axis=0)

    # check for missing values
    print(data.isnull().sum())

    # For this notebook, I will only work with a much smaller subset of the dataset.
    # I will randomly sample 20 k respondents and perform analysis and modelling.
    data = data.sample(n=20000, random_state=10)

예제 #10

0

파일 보기

파일: run.py 프로젝트: wuyusheng1023/nox-auto-sampler

    while True:

        try:

            set_mfc(r)

            if r.get('mock') == b'on' and r.llen('mock_data') > 0:
                while r.llen('mock_data') > 0:
                    data = r.rpop('mock_data')
                data = json.loads(data)
                data = process_mock_data(r, data)
                print('Worker produce mock data', data)

            if r.get('status') == b'run':
                data = process_data(r, s)
                print('System is running...', data)
                if r.get('analyzing') == b'true':
                    if r.get('purging') == b'true':
                        set_valve(0)
                    else:
                        position = get_valve(r)
                        set_valve(position)
                elif r.get('analyzing') == b'manual_valve':
                    position = get_valve(r)
                    set_valve(position)
                else:
                    set_valve(-1)
                    r.set('valve', -1)
            else:
                turn_off_valves_mfc()

예제 #11

0

파일 보기

파일: testing.py 프로젝트: wolfgang-stefani/udacity-reconstructing-trajectories-from-sensor-data

import solution
from helpers import process_data
from math import pi

T1 = process_data("trajectory_1.pickle")
T2 = process_data("trajectory_2.pickle")
T3 = process_data("trajectory_3.pickle")


def test_get_speeds(get_speeds_function):
    #print(T1) # SW 20200407 DEBUG
    student_speeds = get_speeds_function(T1)
    correct_speeds = solution.get_speeds(T1)
    if len(student_speeds) != len(correct_speeds):
        print("Your get_speeds function returned a list of length")
        print(len(student_speeds), "was expecting length", len(correct_speeds))
        return

    speed_diff_at_end = correct_speeds[-1] - student_speeds[-1]
    pct_diff = abs(speed_diff_at_end / correct_speeds[-1]) * 100

    if pct_diff > 5:
        print("Your final speed for trajectory_1 was too far off. You said:")
        print(student_speeds[-1], "but this was expected: ",
              correct_speeds[-1])
        return
    print("PASSED test of get_speeds function!")
    return


def test_get_x_y(get_x_y_function):