コード例 #1
0
ファイル: Vocab.py プロジェクト: LindgeW/AutoEncoder
def create_vocab(path):
    wd_counter = Counter()
    # tag_counter = Counter()
    dataset = load_dataset(path)
    for inst in dataset:
        wd_counter.update(inst.words)
        # tag_counter[inst.tag] += 1
    return WordVocab(wd_counter)
コード例 #2
0
    torch.manual_seed(1234)
    torch.cuda.manual_seed(1344)
    torch.cuda.manual_seed_all(1344)

    print('cuda available:', torch.cuda.is_available())
    print('cuDnn available:', torch.backends.cudnn.enabled)
    print('GPU numbers:', torch.cuda.device_count())

    data_path = get_data_path("./conf/datapath.json")
    char_vocab, bichar_vocab = create_vocab(data_path['data']['train_data'])
    char_embed_weights = char_vocab.get_embedding_weights(
        data_path['pretrained']['char_embedding'])
    bichar_embed_weights = bichar_vocab.get_embedding_weights(
        data_path['pretrained']['bichar_embedding'])

    train_data = load_dataset(data_path['data']['train_data'], char_vocab)
    print('train data size:', len(train_data))
    dev_data = load_dataset(data_path['data']['dev_data'], char_vocab)
    print('dev data size:', len(dev_data))
    test_data = load_dataset(data_path['data']['test_data'], char_vocab)
    print('test data size:', len(test_data))

    args = args_config()
    args.char_vocab_size = char_vocab.vocab_size
    args.bichar_vocab_size = bichar_vocab.vocab_size
    args.tag_size = char_vocab.tag_size
    args.rel_size = char_vocab.rel_size

    parser_model = ParserModel(args, char_embed_weights, bichar_embed_weights)
    if torch.cuda.is_available() and args.cuda >= 0:
        args.device = torch.device('cuda', args.cuda)
コード例 #3
0
ファイル: train.py プロジェクト: LindgeW/AutoEncoder
from classifier import Classifier

if __name__ == '__main__':
    # 设置随机种子(固定随机值)
    np.random.seed(666)
    torch.manual_seed(6666)
    torch.cuda.manual_seed(1234)  # 为当前GPU设置种子
    # torch.cuda.manual_seed_all(4321)  # 为所有GPU设置种子(如果有多个GPU)

    print('GPU available: ', torch.cuda.is_available())
    print('CuDNN available: ', torch.backends.cudnn.enabled)
    print('GPU number: ', torch.cuda.device_count())

    # 加载数据(训练集-学习、开发集-调参、测试集-评估)
    data_opts = config.data_path_parse('./config/data_path.json')
    train_data = load_dataset(data_opts['data']['train_data'])
    dev_data = load_dataset(data_opts['data']['dev_data'])
    test_data = load_dataset(data_opts['data']['test_data'])
    print('train_size=%d  dev_size=%d  test_size=%d' %
          (len(train_data), len(dev_data), len(test_data)))

    # 设置参数(数据参数+模型参数)
    args = config.arg_parse()
    if args.enable_cuda and torch.cuda.is_available():
        args.device = torch.device('cuda', args.cuda)
    else:
        args.device = torch.device('cpu')
    print(args.device)

    # 创建词表
    vocab = create_vocab(data_opts['data']['train_data'])