コード例 #1
0
ファイル: predict.py プロジェクト: xhjcxxl/ccf2020-
            for key, value in self.idx2label.items()
        }


if __name__ == '__main__':
    dataset = '../dataset'  # 数据集
    config = Config(dataset)
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    config.test_path = dataset + '/unlabeled_data.csv'
    start_time = time.time()
    data_df = load_data(config.test_path, config, with_label=False)

    print('Reading testing data...')
    test_data = Mydataset(config=config, data=data_df, with_labels=False)
    test_iter = DataLoader(dataset=test_data,
                           batch_size=config.batch_size,
                           shuffle=False)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    model = bert_RNN(config).to(config.device)
    predict_all = test(config, model, test_iter)

    # ---------------------生成文件--------------------------
    df_test = pd.read_csv(config.submit_example_path, encoding='utf-8')
    id2label, label2id = json.load(open(config.id2label_path))
コード例 #2
0
        self.dropout = 0.1
        self.num_layers = 2
        self.label2idx = {key: int(value) for key, value in self.label2idx.items()}
        self.idx2label = {int(key): value for key, value in self.idx2label.items()}


if __name__ == '__main__':
    dataset = '../dataset'  # 数据集
    config = Config(dataset)
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样

    config.train_path = dataset + '/all_label_data_3class.csv'
    data_df = load_data(config.train_path, config, with_label=True)
    train_df, valid_df = train_test_split(data_df, test_size=0.2, shuffle_flag=True, random_state=seed)

    print('Reading training data...')
    train_dataset = Mydataset(config=config, data=train_df, with_labels=True)
    train_iter = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True)

    print('Reading validation data...')
    valid_dataset = Mydataset(config=config, data=valid_df, with_labels=True)
    dev_iter = DataLoader(dataset=valid_dataset, batch_size=config.batch_size, shuffle=True)

    model = bert_RNN(config).to(config.device)
    train(config, model, train_iter, dev_iter)