Beispiel #1
0
    hidden_dim = 50
    sentence_len = 32
    train_file = os.path.join(DATA_DIR, TRAIN_FILE)
    test_file = os.path.join(DATA_DIR, TEST_FILE)
    fp_train = open(train_file, 'r')
    train_filenames = [
        os.path.join(TRAIN_DIR, line.strip()) for line in fp_train
    ]
    filenames = copy.deepcopy(train_filenames)
    fp_train.close()
    fp_test = open(test_file, 'r')
    test_filenames = [os.path.join(TEST_DIR, line.strip()) for line in fp_test]
    fp_test.close()
    filenames.extend(test_filenames)

    corpus = DP.Corpus(DATA_DIR, filenames)
    nlabel = 8

    ### create model
    model = LSTMC.LSTMClassifier(embedding_dim=embedding_dim,
                                 hidden_dim=hidden_dim,
                                 vocab_size=len(corpus.dictionary),
                                 label_size=nlabel,
                                 batch_size=batch_size,
                                 use_gpu=use_gpu)
    if use_gpu:
        model = model.cuda()
    ### data processing
    dtrain_set = DP.TxtDatasetProcessing(DATA_DIR, TRAIN_DIR, TRAIN_FILE,
                                         TRAIN_LABEL, sentence_len, corpus)
    print('[!] available gpus:', torch.cuda.device_count())

    # DONOTCHANGE: Reserved for nsml use
    bind_model(model, config)

    # DONOTCHANGE: They are reserved for nsml
    if config.pause:
        nsml.paused(scope=locals())

    ### Training mode
    # 학습 모드일 때 사용합니다. (기본값)
    if config.mode == 'train':
        # 데이터를 로드합니다.
        if not HAS_DATASET and not IS_ON_NSML:  # It is not running on nsml
            DATASET_PATH = '../sample_data/movie_review/'
        corpus = DP.Corpus(DATASET_PATH, total_train)
        print('[*]', 'Load corpus')

        # Load training data
        train_dataset = MovieReviewDataset(DATASET_PATH, config.strmaxlen,
                                           True, corpus)
        print('[*]', 'Load train dataset')
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=config.batch,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  num_workers=1)
        total_train = len(train_loader)

        # Load validation data
        test_dataset = MovieReviewDataset(DATASET_PATH,