예제 #1
0
def main():
    # 数据的文件始终在其父目录
    dataset_prefix = os.path.dirname(os.path.realpath(__file__))

    # 解析参数
    options, model_obj, trainer_class, dataset_name, sub_data = read_options()

    #
    is_leaf = options['data_format'] == 'json'
    if is_leaf:
        train_path = os.path.join(dataset_prefix, 'leaf', 'data', dataset_name,
                                  'data', 'train')
        test_path = os.path.join(dataset_prefix, 'leaf', 'data', dataset_name,
                                 'data', 'test')
    else:
        train_path = os.path.join(dataset_prefix, 'dataset', dataset_name,
                                  'data', 'train')
        test_path = os.path.join(dataset_prefix, 'dataset', dataset_name,
                                 'data', 'test')

    all_data_info = read_data(train_path,
                              test_path,
                              sub_data=sub_data,
                              data_format=options['data_format'])
    # 调用solver
    trainer = trainer_class(options, model_obj, all_data_info)
    trainer.train()
예제 #2
0
def main():
    tf.logging.set_verbosity(tf.logging.WARN)
    # 数据的文件始终在其父目录
    dataset_prefix = os.path.dirname(
        os.path.dirname(os.path.realpath(__file__)))

    # 解析参数
    options, learner, trainer_class, dataset_name, sub_data = read_options()

    train_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data',
                              'train')
    test_path = os.path.join(dataset_prefix, 'dataset', dataset_name, 'data',
                             'test')

    all_data_info = read_data(train_path,
                              test_path,
                              sub_data=sub_data,
                              data_format=options['data_format'])
    # 调用solver
    trainer = trainer_class(params=options,
                            learner=learner,
                            dataset=all_data_info)
    trainer.train()
예제 #3
0
def split_to_support_query(train_dir, test_dir, p):
    """
    原来的数据格式为 :
    {
        <user_name>: {
            x: [],
            y: []
        }
    }
    转换后的结构也是这样, 但是保存的 train 则代表了 support, test 则为 query
    :param train_dir:
    :param test_dir:
    :param p:
    :return:
    """
    # Create data structure
    f_train_data = {'users': [], 'user_data': {}, 'num_samples': []}
    f_test_data = {'users': [], 'user_data': {}, 'num_samples': []}
    before_merged = {'users': [], 'user_data': {}, 'num_samples': []}
    train_clients, train_groups, train_data, test_data = read_data(
        train_dir, test_dir, data_format='json')
    # 合并
    for user_name in train_clients:
        print('User', user_name, end=' ')
        train_x = train_data[user_name]['x']
        train_y = train_data[user_name]['y']
        test_x = test_data[user_name]['x']
        test_y = test_data[user_name]['y']
        # 合并之后
        print('Found, train size:',
              len(train_y),
              ', test size:',
              len(test_y),
              end=' ')
        all_x = np.asarray(train_x + test_x)
        all_y = np.asarray(train_y + test_y)
        #
        spt_x, qry_x, spt_y, qry_y = support_query_split(all_x, all_y, p=p)
        f_train_data['users'].append(user_name)
        f_train_data['user_data'][user_name] = {'x': spt_x, 'y': spt_y}
        f_train_data['num_samples'].append(len(spt_y))

        f_test_data['users'].append(user_name)
        f_test_data['user_data'][user_name] = {'x': qry_x, 'y': qry_y}
        f_test_data['num_samples'].append(len(qry_y))
        print('Query size:', len(qry_y), 'Support size:', len(spt_y))

        # for stats
        before_merged['users'].append(user_name)
        before_merged['user_data'][user_name] = {'x': all_x, 'y': all_y}
        before_merged['num_samples'].append(len(all_y))

    with open(os.path.join(train_prefix, f'p_{p}.pkl'), 'wb') as outfile:
        pickle.dump(f_train_data, outfile)
    with open(os.path.join(test_prefix, f'p_{p}.pkl'), 'wb') as outfile:
        pickle.dump(f_test_data, outfile)

    # 统计相关的信息
    print('拆分之前:')
    print_stats(data=before_merged)
    print('训练:')
    print_stats(data=f_train_data)
    print('测试:')
    print_stats(data=f_test_data)