def create_local_test_dataset(dataset_num, split_point=0.7): dataset_d = get_dataset_from_file(dataset_num) not_repeated = dataset_d.get_not_repeated_activity_data() d = convert_to_one_hot(not_repeated) test_start_index = int(split_point * len(d)) test_d = d[test_start_index:, :] test_data_d, test_labels_d = build_dataset(test_d) return test_data_d, test_labels_d
def fl_test_dataset_for_client(client, split_point=0.7): dataset_d = get_dataset_from_file(client) # activities_d = dataset_d.get_activities()[client_round_dict[client] * history_size: # (client_round_dict[client] + 2) * history_size, :] not_repeated = dataset_d.get_not_repeated_activity_data() d = convert_to_one_hot(not_repeated) test_start_index = int(split_point * len(d)) test_d = d[test_start_index:, :] data_d, labels_d = build_dataset(test_d) return data_d, labels_d
def fl_train_dataset_for_client(client, num_days): dataset_d = get_dataset_from_file(client) # activities_d = dataset_d.get_activities()[client_round_dict[client] * history_size: # (client_round_dict[client] + 2) * history_size, :] not_repeated = dataset_d.get_not_repeated_activity_data() d = convert_to_one_hot(not_repeated) train_day_index = get_data_of_days(not_repeated, num_days) limit_index = int(0.7 * len(d)) if limit_index >= train_day_index: train_d = d[:train_day_index, :] else: train_d = d[:limit_index, :] data_d, labels_d = build_dataset(train_d) return data_d, labels_d
def load_pie_data(home_id, num_days): dataset_d = get_dataset_from_file(home_id) not_repeated = dataset_d.get_not_repeated_activity_data() one_hot = convert_to_one_hot(not_repeated) train_day_index = get_data_of_days(not_repeated, num_days) activities_d = one_hot[:train_day_index, -NUMBER_OF_ACTIVITIES:] # print("\n\nClient" + str(i) + "\n") stats = list() labels = list() for i, act in enumerate(LIST_OF_MAPPED_2_ACTIVITIES): activity_count = int(sum(activities_d[:, i])) stats.append(activity_count) labels.append(act) return stats, labels
def create_local_train_test_dataset(dataset_num, num_days): dataset_d = get_dataset_from_file(dataset_num) not_repeated = dataset_d.get_not_repeated_activity_data() d = convert_to_one_hot(not_repeated) train_day_index = get_data_of_days(not_repeated, num_days) test_start_index = int(0.7 * len(d)) if test_start_index >= train_day_index: train_d = d[:train_day_index, :] else: train_d = d[:test_start_index, :] # raise Exception("Number of days for training passed 70% limit of the train dataset.") test_d = d[test_start_index:, :] train_data_d, train_labels_d = build_dataset(train_d) test_data_d, test_labels_d = build_dataset(test_d) train_data_shuffled, train_labels_shuffled = sklearn.utils.shuffle( train_data_d, train_labels_d, random_state=0) return train_data_shuffled, train_labels_shuffled, test_data_d, test_labels_d
def create_centralized_train_dataset(dataset_nums, num_days): all_data = np.empty((0, HISTORY_SIZE, TOTAL_NUM_OF_FEATURES)) all_labels = np.empty((0, NUMBER_OF_ACTIVITIES)) print('Loading dataset') for i in tqdm(dataset_nums): dataset_d = get_dataset_from_file(i) not_repeated = dataset_d.get_not_repeated_activity_data() d = convert_to_one_hot(not_repeated) train_day_index = get_data_of_days(not_repeated, num_days[i]) limit_index = int(0.7 * len(d)) if limit_index >= train_day_index: train_d = d[:train_day_index, :] else: train_d = d[:limit_index, :] data_d, labels_d = build_dataset(train_d) if len(data_d) == 0: continue # data_d, labels_d = build_dataset(d) all_data = np.concatenate(([all_data, data_d]), axis=0) all_labels = np.concatenate(([all_labels, labels_d]), axis=0) all_data_shuffled, all_labels_shuffled = sklearn.utils.shuffle( all_data, all_labels, random_state=0) return all_data_shuffled, all_labels_shuffled
from settings import LIST_OF_MAPPED_2_ACTIVITIES, NUMBER_OF_ACTIVITIES from datasets import get_dataset_from_file, convert_to_one_hot, get_data_of_days def client_activities_stats(client_activities): for i, act in enumerate(LIST_OF_MAPPED_2_ACTIVITIES): print(act + "," + str(int(sum(client_activities[:, i])))) if __name__ == "__main__": for i in range(121, 122): dataset_d = get_dataset_from_file(i) not_repeated = dataset_d.get_not_repeated_activity_data() one_hot = convert_to_one_hot(not_repeated) train_day_index = get_data_of_days(not_repeated, 2) activities_d = one_hot[:train_day_index, -NUMBER_OF_ACTIVITIES:] print("\n\nClient" + str(i) + "\n") client_activities_stats(activities_d)