Ejemplo n.º 1
0
def test_T(HalfTensorMock):
    tensor = torch.ones([1, 2])
    assert core.T(tensor) is tensor

    array = np.arange(0, 5)
    assert core.T(array.astype(np.int)).type() == "torch.LongTensor"
    assert core.T(array.astype(np.float)).type() == "torch.FloatTensor"

    core.T(array.astype(np.float), half=True)
    HalfTensorMock.assert_called_once()

    with pytest.raises(NotImplementedError):
        assert core.T(array.astype(np.object))
Ejemplo n.º 2
0
def test_T():
    tensor = torch.ones([1, 2])
    np.testing.assert_equal(core.to_np(core.T(tensor)), [[1, 1]])

    array = np.arange(0, 5)
    assert core.T(array.astype(np.int)).type() == "torch.LongTensor"
    assert core.T(array.astype(np.float)).type() == "torch.FloatTensor"

    with mock.patch("fastai.core.to_half") as to_half_mock:
        core.T(array.astype(np.float), half=True)
        to_half_mock.assert_called_once()

    with pytest.raises(NotImplementedError):
        assert core.T(array.astype(np.object))
Ejemplo n.º 3
0
def test_create_variable_passing_Variable_object():
    v = torch.autograd.Variable(core.T(np.arange(0, 3)))
    cv = core.create_variable(v, volatile=True)
    if core.IS_TORCH_04:
        assert (cv == v).all()
    else:
        assert cv is v
Ejemplo n.º 4
0
def test_to_np():
    array = np.arange(0, 3).astype(np.float)
    assert core.to_np(array) is array

    tensor = core.T(array)
    result = core.to_np([tensor, tensor])
    np.testing.assert_equal(result[0], array)
    np.testing.assert_equal(result[1], array)

    variable = core.V(array)
    np.testing.assert_equal(core.to_np(variable), array)

    with mock.patch("torch.cuda") as cuda_mock:
        tensor_long = core.T(array.astype(np.int))
        cuda_mock.is_available(return_value=True)
        cuda_mock.HalfTensor = torch.LongTensor

        array = core.to_np(tensor_long)
        np.testing.assert_equal(array, [0., 1., 2.])
        assert array.dtype in (np.float32, np.float64)
Ejemplo n.º 5
0
def test_to_np():
  array = np.arange(0, 3).astype(np.float)
  assert core.to_np(array) is array

  tensor = core.T(array)
  result = core.to_np([tensor, tensor])
  np.testing.assert_equal(result[0], array)
  np.testing.assert_equal(result[1], array)

  variable = core.V(array)
  np.testing.assert_equal(core.to_np(variable), array)
Ejemplo n.º 6
0
def test_to_np():
    array = np.arange(0, 3).astype(np.float)
    assert core.to_np(array) is array

    tensor = core.T(array)
    result = core.to_np([tensor, tensor])
    np.testing.assert_equal(result[0], array)
    np.testing.assert_equal(result[1], array)

    variable = core.V(array)
    np.testing.assert_equal(core.to_np(variable), array)

    with mock.patch("torch.cuda.is_available") as is_available_mock:
        with mock.patch("fastai.core.is_half_tensor") as is_half_tensor_mock:
            is_available_mock.return_value = True
            is_half_tensor_mock.return_value = True

            tensor = core.T(array.astype(np.int))

            array = core.to_np(tensor)
            np.testing.assert_equal(array, [0., 1., 2.])
            assert array.dtype in (np.float32, np.float64)
Ejemplo n.º 7
0
def test_create_variable_passing_Variable_object():
    v = torch.autograd.Variable(core.T(np.arange(0, 3)))
    assert core.create_variable(v, volatile=True) is v
Ejemplo n.º 8
0
def run(constant_overwrites):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    config_path = os.path.join(dir_path, 'hyperparams.yml')
    constants = merge_dict(load_hyperparams(config_path), constant_overwrites)

    # data subdir expected to exist
    LM_PATH.mkdir(exist_ok=True)
    (LM_PATH / 'tmp').mkdir(exist_ok=True)
    CLAS_PATH.mkdir(exist_ok=True)
    (CLAS_PATH / 'tmp').mkdir(exist_ok=True)

    data_path = dir_path + '/train.csv'
    if not os.path.exists(data_path):
        train_df, val_df, test_df, x_train, y_train, x_val, y_val, x_test, y_test, classes = preprocess_csv(
        )
    else:
        train_df = pd.read_csv(dir_path + '/train.csv',
                               header=None,
                               chunksize=CHUNKSIZE)
        # x_train, y_train = train_df[0].values, train_df[1].values
        val_df = pd.read_csv(dir_path + '/val.csv',
                             header=None,
                             chunksize=CHUNKSIZE)
        # x_val, y_val = val_df[0].values, val_df[1].values
        # test_df = pd.read_csv(dir_path + '/test.csv', header=None, chunksize=CHUNKSIZE)
        # x_test, y_test = test_df[0].values, test_df[1].values
        # classes = np.genfromtxt(dir_path + '/classes.txt', dtype=str)

    # print('Counts x_train: {}, y_train: {}, x_val: {}, y_val: {}, x_test: {}, y_test: {}, classes: {}'
    #       .format(len(x_train), len(y_train), len(x_val), len(y_val), len(x_test), len(y_test), len(classes)))

    if constants['train_lm']:
        logging.info('Training LM...')
        if (LM_PATH / 'tmp' / 'tok_train.npy').exists():
            logging.info('Loading tokens...')
            tok_train = np.load(LM_PATH / 'tmp' / 'tok_train.npy')
            tok_val = np.load(LM_PATH / 'tmp' / 'tok_val.npy')
        else:
            logging.info('Get tokens...')
            tok_train, labels_train = get_all(train_df, 1)
            tok_val, labels_val = get_all(val_df, 1)
            np.save(LM_PATH / 'tmp' / 'tok_train.npy', tok_train)
            np.save(LM_PATH / 'tmp' / 'tok_val.npy', tok_val)

        if (LM_PATH / 'tmp' / 'itos.pkl').exists():
            train_ids = np.load(LM_PATH / 'tmp' / 'train_ids.npy')
            val_ids = np.load(LM_PATH / 'tmp' / 'val_ids.npy')
            itos = pickle.load(open(LM_PATH / 'tmp' / 'itos.pkl', 'rb'))
        else:
            freq = collections.Counter(t for ts in tok_train for t in ts)
            itos = [t for t, k in freq.most_common(MAX_VOCAB)
                    if k > MIN_FREQ]  # int idx to str token
            itos.insert(0, '_pad_')
            itos.insert(1, '_unk_')
            stoi = collections.defaultdict(
                lambda: 0,
                {t: i
                 for i, t in enumerate(itos)})  # str token to int idx
            train_ids = np.array([[stoi[t] for t in ts] for ts in tok_train])
            val_ids = np.array([[stoi[t] for t in ts] for ts in tok_val])
            np.save(LM_PATH / 'tmp' / 'train_ids.npy', train_ids)
            np.save(LM_PATH / 'tmp' / 'val_ids.npy', val_ids)
            pickle.dump(itos, open(LM_PATH / 'tmp' / 'itos.pkl', 'wb'))

        vocab_size = len(itos)
        emb_dim, n_hidden, n_layers = 400, 1150, 3
        pre_path = PATH / 'models' / 'wt103'
        pre_lm_path = pre_path / 'fwd_wt103.h5'
        w = torch.load(pre_lm_path, map_location=lambda storage, loc: storage)
        enc_w = fastai.to_np(w['0.encoder.weight'])
        row_mean = enc_w.mean(0)
        itos_model = pickle.load((pre_path / 'itos_wt103.pkl').open('rb'))
        stoi_model = collections.defaultdict(
            lambda: -1, {t: i
                         for i, t in enumerate(itos_model)})
        new_w = np.zeros((vocab_size, emb_dim), dtype=np.float32)
        for i, t in enumerate(itos):
            j = stoi_model[t]
            new_w[i] = enc_w[j] if j >= 0 else row_mean

        w['0.encoder.weight'] = fastai.T(new_w)
        w['0.encoder_with_dropout.embed.weight'] = fastai.T(np.copy(new_w))
        w['1.decoder.weight'] = fastai.T(np.copy(new_w))

        wd = 1e-7  # weight decay
        bptt = 70  # backpropagation through time, a.k.a. ngrams
        batch_size = 52
        optimizer_fn = functools.partial(torch.optim.Adam, betas=(0.8, 0.99))

        dl_train = ftext.LanguageModelLoader(np.concatenate(train_ids),
                                             batch_size, bptt)  # data loader
        dl_val = ftext.LanguageModelLoader(np.concatenate(val_ids), batch_size,
                                           bptt)
        md = ftext.LanguageModelData(PATH,
                                     1,
                                     vocab_size,
                                     dl_train,
                                     dl_val,
                                     batch_size=batch_size,
                                     bptt=bptt)
        drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * 0.7
        learner = md.get_model(optimizer_fn,
                               emb_dim,
                               n_hidden,
                               n_layers,
                               dropouti=drops[0],
                               dropout=drops[1],
                               wdrop=drops[2],
                               dropoute=drops[3],
                               dropouth=drops[4])
        learner.metrics = [fmetrics.accuracy]
        learner.freeze_to(-1)
        learner.model.load_state_dict(w)

        lr = 1e-3
        lrs = lr
        learner.fit(lrs / 2, 1, wds=wd, use_clr=(32, 2), cycle_len=1)
        learner.save('lm_last_ft')
        learner.lr_find(start_lr=lrs / 10, end_lr=lrs * 10, linear=True)
        learner.sched.plot()
        learner.fit(lrs, 1, wds=wd, use_clr=(20, 10), cycle_len=15)
        learner.save('lm1')
        learner.save_encoder('lm1_enc')
        learner.sched.plot_loss()

    if (CLAS_PATH / 'tmp' / 'tok_train.npy').exists():
        tok_train = np.load(CLAS_PATH / 'tmp' / 'tok_train.npy')
        tok_val = np.load(CLAS_PATH / 'tmp' / 'tok_val.npy')
        labels_train = np.load(CLAS_PATH / 'tmp' / 'labels_train.npy')
        labels_val = np.load(CLAS_PATH / 'tmp' / 'labels_val.npy')
    else:
        tok_train, labels_train = get_all(train_df, 1)
        tok_val, labels_val = get_all(val_df, 1)
        np.save(CLAS_PATH / 'tmp' / 'tok_train.npy', tok_train)
        np.save(CLAS_PATH / 'tmp' / 'tok_val.npy', tok_val)
        np.save(CLAS_PATH / 'tmp' / 'labels_train.npy', labels_train)
        np.save(CLAS_PATH / 'tmp' / 'labels_val.npy', labels_val)

    if (CLAS_PATH / 'tmp' / 'itos.pkl').exists():
        train_ids = np.load(CLAS_PATH / 'tmp' / 'train_ids.npy')
        val_ids = np.load(CLAS_PATH / 'tmp' / 'val_ids.npy')
        itos = pickle.load(open(CLAS_PATH / 'tmp' / 'itos.pkl', 'rb'))
    else:
        freq = collections.Counter(t for ts in tok_train for t in ts)
        itos = [t for t, k in freq.most_common(MAX_VOCAB)
                if k > MIN_FREQ]  # int idx to str token
        itos.insert(0, '_pad_')
        itos.insert(1, '_unk_')
        stoi = collections.defaultdict(
            lambda: 0, {t: i
                        for i, t in enumerate(itos)})  # str token to int idx
        train_ids = np.array([[stoi[t] for t in ts] for ts in tok_train])
        val_ids = np.array([[stoi[t] for t in ts] for ts in tok_val])
        np.save(CLAS_PATH / 'tmp' / 'train_ids.npy', train_ids)
        np.save(CLAS_PATH / 'tmp' / 'val_ids.npy', val_ids)
        pickle.dump(itos, open(CLAS_PATH / 'tmp' / 'itos.pkl', 'wb'))

    vocab_size = len(itos)
    bptt = 70  # backpropagation through time, a.k.a. ngrams
    emb_dim, n_hidden, n_layers = 400, 1150, 3
    # optimizer_fn = functools.partial(optim.Adam, betas=(0.8, 0.99))
    batch_size = 48

    min_label = min(labels_train)
    labels_train -= min_label
    labels_val -= min_label
    k = int(max(labels_train)) + 1

    ds_train = ftext.TextDataset(train_ids, labels_train)
    ds_val = ftext.TextDataset(val_ids, labels_val)
    sampler_train = ftext.SortishSampler(train_ids,
                                         key=lambda x: len(train_ids[x]),
                                         bs=batch_size // 2)
    sampler_val = ftext.SortSampler(val_ids, key=lambda x: len(val_ids[x]))
    dl_train = dataloader.DataLoader(ds_train,
                                     batch_size // 2,
                                     transpose=True,
                                     num_workers=1,
                                     pad_idx=1,
                                     sampler=sampler_train)
    dl_val = dataloader.DataLoader(ds_val,
                                   batch_size // 2,
                                   transpose=True,
                                   num_workers=1,
                                   pad_idx=1,
                                   sampler=sampler_val)
    md = fdata.ModelData(PATH, dl_train, dl_val)

    # drops = np.array([0.4, 0.5, 0.05, 0.3, 0.1])
    drops = np.array([0.4, 0.5, 0.05, 0.3, 0.4]) * 0.5
    model = lm_rnn.get_rnn_classifier(bptt,
                                      20 * 70,
                                      k,
                                      vocab_size,
                                      emb_sz=emb_dim,
                                      n_hid=n_hidden,
                                      n_layers=n_layers,
                                      pad_token=1,
                                      layers=[emb_dim * 3, 50, k],
                                      drops=[drops[4], 0.1],
                                      dropouti=drops[0],
                                      wdrop=drops[1],
                                      dropoute=drops[2],
                                      dropouth=drops[3])
    optimizer_fn = functools.partial(torch.optim.Adam, betas=(0.7, 0.99))
    # learner = RNN_Learner(md, TextModel(to_gpu(model)), opt_fn=optimizer_fn)
    learner = ftext.RNN_Learner(md,
                                ftext.TextModel(model),
                                opt_fn=optimizer_fn)
    learner.reg_fn = functools.partial(lm_rnn.seq2seq_reg, alpha=2, beta=1)
    learner.clip = 25.0
    learner.metrics = [fmetrics.accuracy]

    # lr = 3e-3
    # lrm = 2.6
    # lrs = np.array([lr / lrm**4, lr / lrm**3, lr / lrm**2, lr / lrm, lr])
    lrs = np.array([1e-4, 1e-4, 1e-4, 1e-3, 1e-2])

    # wd = 1e-7  # weight decay
    wd = 0
    learner.load_encoder('lm1_enc')
    learner.freeze_to(-1)
    learner.lr_find(lrs / 1000)
    learner.sched.plot()
    learner.fit(lrs, 1, wds=wd, cycle_len=1, use_clr=(8, 3))
    learner.save('clas_0')