Ejemplo n.º 1
0
def extract_norm_x_y_t(filename):

    try:
        file = open(filename, "r")
        data = file.read()
        data = json.loads(data)['all']
    except:
        prep_data.clean_data(filename)
        return extract_norm_x_y_t

    tracking_data = [
        l for l in data
        if l['category'] == "tracker" and l['values']['frame']['state'] == 7
    ]

    time_stamps = np.array(
        [l['values']['frame']['time'] for l in tracking_data])
    time_stamps = time_stamps - time_stamps[0]

    x_y_data = np.array([(l['values']['frame']['avg']['x'],
                          l['values']['frame']['avg']['y'])
                         for l in tracking_data])
    x_y_t = np.concatenate((x_y_data, time_stamps.T.reshape(
        (len(x_y_data), 1))),
                           axis=1)

    return dist_analysis.whiten(x_y_t)
Ejemplo n.º 2
0
def extract_features(filename, coord_type):
    try:
        file = open(filename, "r")
        data = file.read()
        data = json.loads(data)['all']
    except:
        prep_data.clean_data(filename)
        return extract_features(filename, coord_type)

    tracking_data = [
        l for l in data
        if l['category'] == "tracker" and l['values']['frame']['state'] == 7
    ]

    # time stamps
    time_stamps = np.array(
        [l['values']['frame']['time'] for l in tracking_data])
    # start counting time relative to the first point
    time_stamps = time_stamps - time_stamps[0]

    # (x,y)
    x_y_data = np.array([(l['values']['frame'][coord_type]['x'],
                          l['values']['frame'][coord_type]['y'])
                         for l in tracking_data])

    # left (x,y)
    l_x_y_data = np.array([(l['values']['frame']['lefteye'][coord_type]['x'],
                            l['values']['frame']['lefteye'][coord_type]['y'])
                           for l in tracking_data])
    # right (x,y)
    r_x_y_data = np.array([(l['values']['frame']['righteye'][coord_type]['x'],
                            l['values']['frame']['righteye'][coord_type]['y'])
                           for l in tracking_data])
    f_data = np.array([l['values']['frame']['fix'] for l in tracking_data])
    return x_y_data, time_stamps, l_x_y_data, r_x_y_data, f_data
Ejemplo n.º 3
0
def test_pin_route(file='../k1.txt'):
    data = prep.clean_data(file)
    np_data = np.array(data)
    np_time = np.array(data['time'])

    pin = simulate_real_data(data, np_data)
    return pin
Ejemplo n.º 4
0
def create_small_dataset(dataset, size=300):
    new_datatset = f'{dataset}_small'

    # Load files
    sentences = file.get_sentences(dataset)
    labels = file.get_labels(dataset)

    # Sentences & Labels
    sentences_normal = sentences[0:size]
    doc_labels = list(map(lambda label: label.split(sep="\t")[2], labels[0:size]))

    clean_data(new_datatset)

    file.save_sentences(sentences_normal, new_datatset)
    file.save_labels(doc_labels, new_datatset)

    print(f"Small dataset created with {size} documents (based on: {dataset})")
Ejemplo n.º 5
0
def pipeline(csv_name=csv_file):
    '''
    Goes from the beginning to the end of the machine learning pipeline

    Inputs:
        csv_name: the pathway to a CSV file that has the data we want
            (this is initialized to the CSV file we were given for this
            assignment)

    Outputs:
        models_eval: a pandas dataframe of the different models we have tested,
            the different parameters we have tried on them and the evaluation
            metrics we have used
    '''

    print('Importing')
    df_all_data = prep_data.import_data(csv_name)
    if df_all_data is None:
        return None
    all_cols = df_all_data.columns

    print('Exploring')
    descriptions = prep_data.explore_data(df_all_data, all_cols)
    print('Cleaning')
    df_all_data = prep_data.clean_data(df_all_data, all_cols)

    print('Generating Var and Feat')
    df_all_data, variable, features, split = prep_data.generate_var_feat(
        df_all_data, all_cols)
    df_all_data.to_csv("Data_For_Eval.csv")

    print('Modeling')
    models_dict = modeling.split_by_date(df_all_data, split, variable, features)
    
    print('Creating final table')
    return table_models_eval(models_dict)