def __init__(self): print("HELLO FROM PIPELINE") # Create a data handling object for importing and manipulating dataset ## PREPROCESSING print('CREATING datahandler') self.dh = DataHandler() print('CREATED datahandler') self.dataframe = None
def get_features_and_labels(self, df, dh=None, columns_back=[0,1,2,6], columns_thigh=[3,4,5,7], column_label=[8]): if dh is None: dh = DataHandler() back_feat, thigh_feat, labels = None, None, None print(columns_back, columns_thigh, column_label) if columns_back: back_feat = dh.get_rows_and_columns(dataframe=df, columns=columns_back).values if columns_thigh: thigh_feat = dh.get_rows_and_columns(dataframe=df, columns=columns_thigh).values if column_label: labels = dh.get_rows_and_columns(dataframe=df, columns=column_label).values return back_feat, thigh_feat, labels
def __init__(self): # Create a data handling object for importing and manipulating dataset ## PREPROCESSING print('CREATING datahandler') self.dh = DataHandler() print('CREATED datahandler')
df = '../data/temp/4000181.7z/4000181/4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv' print('SET INDEX TO TIMESTAMP') # test that this works with a dataframe and not only path to csv # thus pre-loaded and makes it run a little faster dh.convert_column_from_str_to_datetime(dataframe=df, ) dh.set_column_as_index("time") print('DONE') df = dh.get_dataframe_iterator() ########################## print('MAKE NUMERIC') dh.convert_column_from_str_to_numeric(column_name="btemp") dh.convert_column_from_str_to_numeric(column_name="ttemp") print('DONE') temp = dh.get_rows_and_columns(dataframe=df, columns=[6, 7]) pl.plot_temperature(temp) if __name__ == '__main__': print('CREATING datahandler') dh = DataHandler() print('CREATED datahandler') # sync_csv_shit(dh) # make_dataframe(dh)
def create_large_dafatframe_from_multiple_input_directories(self, list_with_subjects, back_keywords=['Back'], thigh_keywords = ['Thigh'], label_keywords = ['GoPro', "Labels"], out_path=None, merge_column = None, master_columns = ['bx', 'by', 'bz'], slave_columns = ['tx', 'ty', 'tz'], rearrange_columns_to = None, save=False, added_columns_name=["new_col"] ): subjects = {} for subject in list_with_subjects: if not os.path.exists(subject): print("Could not find Subject at path: ", subject) files = {} for sub_files_and_dirs in os.listdir(subject): # print(sub_files_and_dirs) words = re.split("[_ .]", sub_files_and_dirs) words = list(map(lambda x: x.lower(), words)) check_for_matching_word = lambda words, keywords: [True if keyword.lower() == word.lower() else False for word in words for keyword in keywords] if any(check_for_matching_word(words, back_keywords)): files["backCSV"] = sub_files_and_dirs elif any(check_for_matching_word(words, thigh_keywords)): files["thighCSV"] = sub_files_and_dirs elif any(check_for_matching_word(words, label_keywords)): files["labelCSV"] = sub_files_and_dirs subjects[subject] = files # print(subjects) merged_df = None dh = DataHandler() dh_stacker = DataHandler() for idx, root_dir in enumerate(subjects): subject = subjects[root_dir] # print("SUBJECT: \n", subject) master = os.path.join(root_dir, subject['backCSV']) slave = os.path.join(root_dir, subject['thighCSV']) label = os.path.join(root_dir, subject['labelCSV']) # dh = DataHandler() dh.merge_csvs_on_first_time_overlap( master, slave, out_path=out_path, merge_column=merge_column, master_columns=master_columns, slave_columns=slave_columns, rearrange_columns_to=rearrange_columns_to, save=save, left_index=True, right_index=True ) dh.add_columns_based_on_csv(label, columns_name=added_columns_name, join_type="inner") if idx == 0: merged_df = dh.get_dataframe_iterator() continue merged_old_shape = merged_df.shape # vertically stack the dataframes aka add the rows from dataframe2 as rows to the dataframe1 merged_df = dh_stacker.vertical_stack_dataframes(merged_df, dh.get_dataframe_iterator(), set_as_current_df=False) # print( # "shape merged df: ", merged_df.shape, "should be ", dh.get_dataframe_iterator().shape, " more than old ", # merged_old_shape) # # print("Final merge form: ", merged_df.shape) return merged_df