# dh = DataHandler()
    dh.merge_csvs_on_first_time_overlap(master,
                                        slave,
                                        out_path=None,
                                        merge_column=None,
                                        master_columns=['bx', 'by', 'bz'],
                                        slave_columns=['tx', 'ty', 'tz'],
                                        rearrange_columns_to=None,
                                        save=False,
                                        left_index=True,
                                        right_index=True)

    dh.add_columns_based_on_csv(label,
                                columns_name=["label"],
                                join_type="inner")

    if idx == 0:
        merged_df = dh.get_dataframe_iterator()
        continue

    merged_old_shape = merged_df.shape
    # vertically stack the dataframes aka add the rows from dataframe2 as rows to the dataframe1
    merged_df = dh_stacker.vertical_stack_dataframes(
        merged_df, dh.get_dataframe_iterator(), set_as_current_df=False)

    print("shape merged df: ", merged_df.shape, "should be ",
          dh.get_dataframe_iterator().shape, "  more than old  ",
          merged_old_shape)

print("Final merge form: ", merged_df.shape)
Ejemplo n.º 2
0
#################
# CLASSIFY W/ MODEL
#################

datahandler = DataHandler()

# csv has column names as first row
datahandler.load_dataframe_from_csv(
    '../data/temp/4000181.7z/4000181/',
    '4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv',
    whole_days=True,
    chunk_size=20000,
    max_days=6)

#cols =  time,bx,by,bz,tx,ty,tz,btemp,ttemp

predictions = model.inference(
    dataframe_iterator=datahandler.get_dataframe_iterator(),
    batch_size=512,
    sequence_length=250,
    weights_path=config.WEIGHTS_PATH,
    timestamp_col="time",
    back_cols=['bx', 'by', 'bz'],
    thigh_cols=['tx', 'ty', 'tz'])

answr = input(
    "\nENTIRE DATASET CLASSIFICATION DONE\n Continue or quite [y | n]")
if not answr == "y":
    os._exit(1)
dh3.add_new_column()
dh3.add_labels_file_based_on_intervals(
    intervals={
        '1': [['2017-09-19', '18:31:09', '23:59:59'],
              ['2017-09-20', '00:00:00', '08:23:08'],
              ['2017-09-20', '08:35:13', '16:03:58'],
              ['2017-09-20', '16:20:21', '23:59:59'],
              ['2017-09-21', '00:00:00', '09:23:07'],
              ['2017-09-21', '09:35:40', '23:59:59'],
              ['2017-09-22', '00:00:00', '09:54:29']],
        '3': [['2017-09-20', '08:23:09', '08:35:12'],
              ['2017-09-20', '16:03:59', '16:20:20'],
              ['2017-09-21', '09:23:08', '09:35:39']]
    })

dataframe_test = dh3.get_dataframe_iterator()
dataframe_test.dropna(subset=['label'], inplace=True)

###############
# RUN PIPELINE PARALLELL CODE building queues for model classification and activity classification
###############

# Do some magic numbering
sampling_frequency = 50
window_length = 120
tempearture_reading_rate = 120
samples_pr_second = 1 / (tempearture_reading_rate / sampling_frequency)
samples_pr_window = int(window_length * samples_pr_second)

RFC = models.get("RFC", {})
Ejemplo n.º 4
0
              ['2018-04-27', '10:03:39', '11:09:00']],
        '2': [['2018-04-27', '11:09:01', '12:19:00']],
        '3': [['2018-04-27', '12:19:01', '14:28:00']]
    })

dh2.add_new_column()
dh2.add_labels_file_based_on_intervals(
    intervals={
        "1": [['2018-04-24', '12:09:00', '13:08:00']],
        '2': [['2018-04-24', '13:08:01', '14:08:00']],
        '3': [['2018-04-24', '14:08:01', '15:08:00']]
    })

###################################### remove rows that does not have label ###########################

df1 = dh1.get_dataframe_iterator()
df2 = dh2.get_dataframe_iterator()

print(df1.shape, df2.shape)
df1.dropna(subset=['label'], inplace=True)
df2.dropna(subset=['label'], inplace=True)
print(df1.shape, df2.shape)

############################## THEN COMBINE INTO ONE BIG TRAINING SET  AKA VERTICAL STACKING #############

dataframe = dh1.vertical_stack_dataframes(df1, df2, set_as_current_df=False)
# dataframe = dh1.vertical_stack_dataframes(dataframe, df3, set_as_current_df=False)
print("DATAFRAME\n", dataframe.head(5), dataframe.shape)

############################## THEN WE MUST EXTRACT FEATURES N LABELS ######################################
Ejemplo n.º 5
0
dh.load_dataframe_from_csv(
    input_directory_path=input_dir,
    filename=filename,
    header=0,
    columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz', 'btemp', 'ttemp'])

dh.convert_column_from_str_to_datetime(column_name='time')
dh.set_column_as_index("time")

# # add labels
dh.add_new_column("label")
intervals = dh.read_labels_from_json(
    filepath="../data/temp/xxx_x.7z/xxx_x/xxx_x intervals.json")
dh.add_labels_file_based_on_intervals(intervals=intervals)
df = dh.get_dataframe_iterator()
print(df.head(10))
print()
print(df.dtypes)
#
# ########
# #
# # DATA INPUT FORMAT SPECIFIC DONE
# #
# #######
#
# # Do some magic numbering
sampling_frequency = 50
window_length = 120
tempearture_reading_rate = 120
samples_pr_second = 1 / (tempearture_reading_rate / sampling_frequency)