def main(): # 数据的文件始终在其父目录 dataset_prefix = os.path.dirname(os.path.realpath(__file__)) # 解析参数 options, model_obj, trainer_class, dataset_name, sub_data = read_options() # is_leaf = options['data_format'] == 'json' if is_leaf: train_path = os.path.join(dataset_prefix, 'leaf', 'data', dataset_name, 'data', 'train') test_path = os.path.join(dataset_prefix, 'leaf', 'data', dataset_name, 'data', 'test') else: train_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data', 'train') test_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data', 'test') all_data_info = read_data(train_path, test_path, sub_data=sub_data, data_format=options['data_format']) # 调用solver trainer = trainer_class(options, model_obj, all_data_info) trainer.train()
def main(): tf.logging.set_verbosity(tf.logging.WARN) # 数据的文件始终在其父目录 dataset_prefix = os.path.dirname( os.path.dirname(os.path.realpath(__file__))) # 解析参数 options, learner, trainer_class, dataset_name, sub_data = read_options() train_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data', 'train') test_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data', 'test') all_data_info = read_data(train_path, test_path, sub_data=sub_data, data_format=options['data_format']) # 调用solver trainer = trainer_class(params=options, learner=learner, dataset=all_data_info) trainer.train()
def split_to_support_query(train_dir, test_dir, p): """ 原来的数据格式为 : { <user_name>: { x: [], y: [] } } 转换后的结构也是这样, 但是保存的 train 则代表了 support, test 则为 query :param train_dir: :param test_dir: :param p: :return: """ # Create data structure f_train_data = {'users': [], 'user_data': {}, 'num_samples': []} f_test_data = {'users': [], 'user_data': {}, 'num_samples': []} before_merged = {'users': [], 'user_data': {}, 'num_samples': []} train_clients, train_groups, train_data, test_data = read_data( train_dir, test_dir, data_format='json') # 合并 for user_name in train_clients: print('User', user_name, end=' ') train_x = train_data[user_name]['x'] train_y = train_data[user_name]['y'] test_x = test_data[user_name]['x'] test_y = test_data[user_name]['y'] # 合并之后 print('Found, train size:', len(train_y), ', test size:', len(test_y), end=' ') all_x = np.asarray(train_x + test_x) all_y = np.asarray(train_y + test_y) # spt_x, qry_x, spt_y, qry_y = support_query_split(all_x, all_y, p=p) f_train_data['users'].append(user_name) f_train_data['user_data'][user_name] = {'x': spt_x, 'y': spt_y} f_train_data['num_samples'].append(len(spt_y)) f_test_data['users'].append(user_name) f_test_data['user_data'][user_name] = {'x': qry_x, 'y': qry_y} f_test_data['num_samples'].append(len(qry_y)) print('Query size:', len(qry_y), 'Support size:', len(spt_y)) # for stats before_merged['users'].append(user_name) before_merged['user_data'][user_name] = {'x': all_x, 'y': all_y} before_merged['num_samples'].append(len(all_y)) with open(os.path.join(train_prefix, f'p_{p}.pkl'), 'wb') as outfile: pickle.dump(f_train_data, outfile) with open(os.path.join(test_prefix, f'p_{p}.pkl'), 'wb') as outfile: pickle.dump(f_test_data, outfile) # 统计相关的信息 print('拆分之前:') print_stats(data=before_merged) print('训练:') print_stats(data=f_train_data) print('测试:') print_stats(data=f_test_data)