コード例 #1
0
ファイル: run_2.py プロジェクト: RuiCaiNLP/SRL_UPB
    # set random seed
    seed_everything(args.seed, USE_CUDA)

    use_bert = args.use_bert

    # do preprocessing

    print('\t start loading data...')
    start_t = time.time()

    train_input_file = os.path.join(os.path.dirname(__file__),
                                    'temp/train.pickle.input')
    dev_input_file = os.path.join(os.path.dirname(__file__),
                                  'temp/dev.pickle.input')
    train_data = data_utils.load_dump_data(train_input_file)
    dev_data = data_utils.load_dump_data(dev_input_file)
    train_dataset = train_data['input_data']
    dev_dataset = dev_data['input_data'][:9000]

    train_input_file_fr = os.path.join(os.path.dirname(__file__),
                                       'temp/train_fr.pickle.input')
    dev_input_file_fr = os.path.join(os.path.dirname(__file__),
                                     'temp/dev_fr.pickle.input')
    train_data_fr = data_utils.load_dump_data(train_input_file_fr)
    dev_data_fr = data_utils.load_dump_data(dev_input_file_fr)
    train_dataset_fr = train_data_fr['input_data']
    dev_dataset_fr = dev_data_fr['input_data']
    #log(len(train_dataset_fr))
    #log(len(dev_dataset_fr))
    labeled_dataset_fr = train_dataset_fr + dev_dataset_fr
コード例 #2
0
                               tmp_path, 'dev.pickle.input'))

        log('\t data preprocessing finished! consuming {} s'.format(
            int(time.time() - start_t)))

    log('\t start loading data...')
    start_t = time.time()

    train_input_file = os.path.join(os.path.dirname(__file__),
                                    'temp/train.pickle.input')
    dev_input_file = os.path.join(os.path.dirname(__file__),
                                  'temp/dev.pickle.input')
    unlabeled_input_file = os.path.join(os.path.dirname(__file__),
                                        'temp/unlabeled.pickle.input')

    train_data = data_utils.load_dump_data(train_input_file)
    dev_data = data_utils.load_dump_data(dev_input_file)
    unlabeled_data = data_utils.load_dump_data(unlabeled_input_file)

    train_dataset = train_data['input_data']
    dev_dataset = dev_data['input_data']
    unlabeled_dataset = unlabeled_data['input_data']

    word2idx = data_utils.load_dump_data(
        os.path.join(os.path.dirname(__file__), 'temp/word2idx.bin'))
    idx2word = data_utils.load_dump_data(
        os.path.join(os.path.dirname(__file__), 'temp/idx2word.bin'))

    fr_word2idx = data_utils.load_dump_data(
        os.path.join(os.path.dirname(__file__), 'temp/fr_word2idx.bin'))
    fr_idx2word = data_utils.load_dump_data(
コード例 #3
0
    # set random seed
    seed_everything(args.seed, USE_CUDA)

    use_bert = args.use_bert

    # do preprocessing

    print('\t start loading data...')
    start_t = time.time()

    En_train_file_CoNLL = os.path.join(os.path.dirname(__file__),
                                       'temp/En_train_conll.pickle.input')
    Fr_dev_file = os.path.join(os.path.dirname(__file__),
                               'temp/Fr_dev.pickle.input')
    En_train_data_CoNLL = data_utils.load_dump_data(En_train_file_CoNLL)
    En_train_file_UPB = os.path.join(os.path.dirname(__file__),
                                     'temp/En_train_UPB.pickle.input')
    En_train_data_UPB = data_utils.load_dump_data(En_train_file_UPB)
    Fr_dev_data = data_utils.load_dump_data(Fr_dev_file)
    train_dataset = En_train_data_CoNLL['input_data']
    dev_dataset = En_train_data_CoNLL['input_data']

    #train_input_file_fr = os.path.join(os.path.dirname(__file__), 'temp/train_fr.pickle.input')
    dev_input_file_fr = os.path.join(os.path.dirname(__file__),
                                     'temp/Fr_dev.pickle.input')
    dev_data_fr = data_utils.load_dump_data(dev_input_file_fr)
    dev_dataset_fr = dev_data_fr['input_data']
    labeled_dataset_fr = dev_dataset_fr
    print(len(labeled_dataset_fr))