def extract_norm_x_y_t(filename): try: file = open(filename, "r") data = file.read() data = json.loads(data)['all'] except: prep_data.clean_data(filename) return extract_norm_x_y_t tracking_data = [ l for l in data if l['category'] == "tracker" and l['values']['frame']['state'] == 7 ] time_stamps = np.array( [l['values']['frame']['time'] for l in tracking_data]) time_stamps = time_stamps - time_stamps[0] x_y_data = np.array([(l['values']['frame']['avg']['x'], l['values']['frame']['avg']['y']) for l in tracking_data]) x_y_t = np.concatenate((x_y_data, time_stamps.T.reshape( (len(x_y_data), 1))), axis=1) return dist_analysis.whiten(x_y_t)
def extract_features(filename, coord_type): try: file = open(filename, "r") data = file.read() data = json.loads(data)['all'] except: prep_data.clean_data(filename) return extract_features(filename, coord_type) tracking_data = [ l for l in data if l['category'] == "tracker" and l['values']['frame']['state'] == 7 ] # time stamps time_stamps = np.array( [l['values']['frame']['time'] for l in tracking_data]) # start counting time relative to the first point time_stamps = time_stamps - time_stamps[0] # (x,y) x_y_data = np.array([(l['values']['frame'][coord_type]['x'], l['values']['frame'][coord_type]['y']) for l in tracking_data]) # left (x,y) l_x_y_data = np.array([(l['values']['frame']['lefteye'][coord_type]['x'], l['values']['frame']['lefteye'][coord_type]['y']) for l in tracking_data]) # right (x,y) r_x_y_data = np.array([(l['values']['frame']['righteye'][coord_type]['x'], l['values']['frame']['righteye'][coord_type]['y']) for l in tracking_data]) f_data = np.array([l['values']['frame']['fix'] for l in tracking_data]) return x_y_data, time_stamps, l_x_y_data, r_x_y_data, f_data
def test_pin_route(file='../k1.txt'): data = prep.clean_data(file) np_data = np.array(data) np_time = np.array(data['time']) pin = simulate_real_data(data, np_data) return pin
def create_small_dataset(dataset, size=300): new_datatset = f'{dataset}_small' # Load files sentences = file.get_sentences(dataset) labels = file.get_labels(dataset) # Sentences & Labels sentences_normal = sentences[0:size] doc_labels = list(map(lambda label: label.split(sep="\t")[2], labels[0:size])) clean_data(new_datatset) file.save_sentences(sentences_normal, new_datatset) file.save_labels(doc_labels, new_datatset) print(f"Small dataset created with {size} documents (based on: {dataset})")
def pipeline(csv_name=csv_file): ''' Goes from the beginning to the end of the machine learning pipeline Inputs: csv_name: the pathway to a CSV file that has the data we want (this is initialized to the CSV file we were given for this assignment) Outputs: models_eval: a pandas dataframe of the different models we have tested, the different parameters we have tried on them and the evaluation metrics we have used ''' print('Importing') df_all_data = prep_data.import_data(csv_name) if df_all_data is None: return None all_cols = df_all_data.columns print('Exploring') descriptions = prep_data.explore_data(df_all_data, all_cols) print('Cleaning') df_all_data = prep_data.clean_data(df_all_data, all_cols) print('Generating Var and Feat') df_all_data, variable, features, split = prep_data.generate_var_feat( df_all_data, all_cols) df_all_data.to_csv("Data_For_Eval.csv") print('Modeling') models_dict = modeling.split_by_date(df_all_data, split, variable, features) print('Creating final table') return table_models_eval(models_dict)