def test_predict_functions_honor_device(random_matrix, func):
    X = random_matrix
    mod = TorchAutoencoder(hidden_dim=5, max_iter=2)
    mod.fit(X)
    prediction_func = getattr(mod, func)
    with pytest.raises(RuntimeError):
        prediction_func(X, device="FAKE_DEVICE")
def test_simple_example_params(random_low_rank_matrix, param, expected):
    X = random_low_rank_matrix
    ae = TorchAutoencoder()
    H = ae.fit(X)
    X_pred = ae.predict(X)
    r2 = ae.score(X)
    assert r2 > 0.92
def test_hidden_activation_in_graph(random_matrix):
    X = random_matrix
    mod = TorchAutoencoder(max_iter=1, hidden_activation=nn.ReLU())
    mod.fit(X)
    mod_hidden_activation = mod.hidden_activation.__class__
    graph_activation_class = mod.model[1].__class__
    assert mod_hidden_activation == graph_activation_class
def test_model_graph_dimensions(random_matrix, attr, layer_index, weight_dim):
    X = random_matrix
    mod = TorchAutoencoder(max_iter=1)
    mod.fit(X)
    mod_attr_val = getattr(mod, attr)
    graph_dim = mod.model[layer_index].weight.shape[weight_dim]
    assert mod_attr_val == graph_dim
def test_predict_functions_restore_device(random_matrix, func):
    X = random_matrix
    mod = TorchAutoencoder(hidden_dim=5, max_iter=2)
    mod.fit(X)
    current_device = mod.device
    assert current_device != torch.device("cpu:0")
    prediction_func = getattr(mod, func)
    prediction_func(X, device="cpu:0")
    assert mod.device == current_device
def test_build_dataset(random_matrix, with_y, expected):
    X = random_matrix
    mod = TorchAutoencoder()
    if with_y:
        dataset = mod.build_dataset(X, X)
    else:
        dataset = mod.build_dataset(X)
    result = next(iter(dataset))
    assert len(result) == expected
コード例 #7
0
def autoencoder_evaluation(nrow=1000, ncol=100, rank=20, max_iter=20000):
    """This an evaluation in which `TfAutoencoder` should be able
    to perfectly reconstruct the input data, because the
    hidden representations have the same dimensionality as
    the rank of the input matrix.
    """
    X = randmatrix(nrow, rank).dot(randmatrix(rank, ncol))
    ae = TorchAutoencoder(hidden_dim=rank, max_iter=max_iter)
    ae.fit(X)
    X_pred = ae.predict(X)
    mse = (0.5 * (X_pred - X)**2).mean()
    return (X, X_pred, mse)
def test_model(random_matrix, model_class, pandas):
    """Just makes sure that this code will run; it doesn't check that
    it is creating good models.
    """
    X = random_matrix
    if pandas:
        X = pd.DataFrame(X)
    ae = TorchAutoencoder(hidden_dim=5, max_iter=100)
    H = ae.fit(X)
    ae.predict(X)
    H_is_pandas = isinstance(H, pd.DataFrame)
    assert H_is_pandas == pandas
def test_save_load(random_matrix):
    X = random_matrix
    mod = TorchAutoencoder(hidden_dim=5, max_iter=2)
    mod.fit(X)
    mod.predict(X)
    with tempfile.NamedTemporaryFile(mode='wb') as f:
        name = f.name
        mod.to_pickle(name)
        mod2 = TorchAutoencoder.from_pickle(name)
        mod2.predict(X)
        mod2.fit(X)
コード例 #10
0
 def model():
     df = pd.read_csv(os.path.join(VSM_HOME, 'imdb_window5-scaled.csv.gz'),
                      index_col=0)
     df = vsm.pmi(df)
     df = ttest(df)
     # df = subword_enrichment(df)
     df = df.apply(vsm.length_norm, axis=1)
     df = TorchAutoencoder(hidden_dim=500, max_iter=200, eta=1e-3).fit(df)
     return full_word_similarity_evaluation(df)
コード例 #11
0
if 'IS_GRADESCOPE_ENV' not in os.environ:
    import torch
    from torch_autoencoder import TorchAutoencoder

    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    print("Using device: {}".format(device))
    for hidden_dim in (50, 150, 300, 500):
        for l2_strength in (0.0, 0.5, 1.0):
            print("Dim: {}, l2: {}".format(hidden_dim, l2_strength))
            ae = TorchAutoencoder(max_iter=100,
                                  hidden_dim=hidden_dim,
                                  eta=0.1,
                                  l2_strength=l2_strength,
                                  device=device)
            giga20_ppmi_ae = ae.fit(giga20_ppmi)
            display(full_word_similarity_evaluation(giga20_ppmi_ae))

# %%

if 'IS_GRADESCOPE_ENV' not in os.environ:
    print("Using device: {}".format(device))
    for hidden_dim in (50, 150, 300, 500):
        for l2_strength in (0.0, 0.5, 1.0):
            print("Dim: {}, l2: {}".format(hidden_dim, l2_strength))
            ae = TorchAutoencoder(max_iter=100,
                                  hidden_dim=hidden_dim,
                                  eta=0.1,
コード例 #12
0
print("Autoencoder evaluation MSE after {0} evaluations: {1:0.04f}".format(
    ae_max_iter, ae))

# ### Applying autoencoders to real VSMs
#
# You can apply the autoencoder directly to the count matrix, but this could interact very badly with the internal activation function: if the counts are all very high or very low, then everything might get pushed irrevocably towards the extreme values of the activation.
#
# Thus, it's a good idea to first normalize the values somehow. Here, I use `vsm.length_norm`:

# In[30]:

imdb5_l2 = imdb5.apply(vsm.length_norm, axis=1)

# In[31]:

imdb5_l2_ae = TorchAutoencoder(max_iter=100, hidden_dim=50,
                               eta=0.001).fit(imdb5_l2)

# In[32]:

vsm.neighbors('superb', imdb5_l2_ae).head()

# This is very slow and seems not to work all that well. To speed things up, one can first apply LSA or similar:

# In[33]:

imdb5_l2_svd100 = vsm.lsa(imdb5_l2, k=100)

# In[34]:

imdb_l2_svd100_ae = TorchAutoencoder(max_iter=1000, hidden_dim=50,
                                     eta=0.01).fit(imdb5_l2_svd100)
def test_parameter_setting(param, expected):
    mod = TorchAutoencoder()
    mod.set_params(**{param: expected})
    result = getattr(mod, param)
    assert result == expected
def test_params(param, expected):
    mod = TorchAutoencoder(**{param: expected})
    result = getattr(mod, param)
    assert result == expected
def test_build_dataset_input_dim(random_matrix, early_stopping):
    X = random_matrix
    mod = TorchAutoencoder(early_stopping=early_stopping)
    dataset = mod.build_dataset(X)
    assert mod.input_dim == X.shape[1]