Ejemplo n.º 1
0
def test_sklearn_tensor(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [500, 100, 50], [500, 100, 50]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory,
                      priors=priors,
                      n_components=K,
                      dtype=dtype)

    # train the model
    U0 = model.fit_transform(lrData.training)

    # check whether variance explained is between 0.95 and 1.
    U1, U2 = model.components_
    assert (0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.)

    # transform test data
    transformModelDirectory = str(tmpdir.mkdir("transformModel"))
    U0test = model.transform(transformModelDirectory=transformModelDirectory,
                             X=lrData.test)
    assert (0.95 <= lrData.var_expl_test((U0test, U1, U2)) <= 1.)
Ejemplo n.º 2
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     dtype = self.__dtype
     properties = self.__properties
     noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype),
                                   properties=properties)
     self.__noiseDistribution = noiseDistribution
Ejemplo n.º 3
0
def test_sklearn_cv(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [30, 100, 150], [200, 100, 150]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory, priors=priors, n_components=K,
                      isFullyObserved=False,
                      cv=Block(nFolds=(2, 3, 3), foldNumber=3), dtype=dtype)

    # mark 20% of the elments as unobserved
    data = lrData.training.copy()
    r = np.random.random(data.shape) > 0.8
    data[r] = np.nan

    # train the model
    U0 = model.fit_transform(data)

    # get mask marking the test set
    testMask = model.testMask

    # # check whether variance explained is between 0.95 and 1.
    U1, U2 = model.components_
    testIndexes = testMask.flatten()
    recons = np.einsum("ka,kb,kc->abc", U0, U1, U2)
    testResiduals = (recons - lrData.training).flatten()[testIndexes]
    testData = lrData.training.flatten()[testIndexes]
    testVarExpl = 1. - np.var(testResiduals)/np.var(testData)
    assert(0.95 <= testVarExpl <= 1.)
    assert(0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.)
Ejemplo n.º 4
0
 def __init__(self,
              modelDirectory: str,
              priors: Tuple[Distribution, ...] = (CenNormal(), CenNormal()),
              n_components: int = 3,
              isFullyObserved: bool = True,
              dtype: type = np.float32,
              maxIterations: int = 100000,
              cv: CV = None,
              noiseUniformity: NoiseUniformity = HOMOGENEOUS,
              stopCriterionInit: StopCriterion = LlhStall(100),
              stopCriterionEM: StopCriterion = LlhStall(100),
              stopCriterionBCD: StopCriterion = LlhImprovementThreshold(.1),
              device: str = "/cpu:0") -> None:
     self.__isFullyObserved = isFullyObserved
     self.__maxIterations = maxIterations
     self.__n_components = n_components
     self.__priors = priors
     self.__dtype = dtype
     self.__cv = cv
     self.__modelDirectory = modelDirectory
     self.__device = device
     self.__noiseUniformity = noiseUniformity
     self.__stopCriterionInit = stopCriterionInit
     self.__stopCriterionEM = stopCriterionEM
     self.__stopCriterionBCD = stopCriterionBCD
     tefa = TensorFactorisation.getEstimator(
         priors=priors,
         K=self.n_components,
         isFullyObserved=isFullyObserved,
         dtype=tf.as_dtype(dtype),
         path=modelDirectory,
         noiseUniformity=noiseUniformity,
         cv=cv,
         stopCriterionInit=stopCriterionInit,
         stopCriterionEM=stopCriterionEM,
         stopCriterionBCD=stopCriterionBCD,
         device=self.__device)
     self.__tefa = tefa
Ejemplo n.º 5
0
def test_sklearn_cv(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [500, 100], [200, 100]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory,
                      priors=priors,
                      n_components=K,
                      cv=Block(nFolds=(3, 3), foldNumber=3),
                      dtype=dtype)

    # train the model
    U0 = model.fit_transform(lrData.training)

    # get mask marking the training set
    testMask = model.testMask

    # check whether variance explained is between 0.95 and 1.
    U1 = model.components_
    testIndexes = testMask.flatten()
    testResiduals = (np.dot(U0.T, U1) - lrData.training).flatten()[testIndexes]
    testData = lrData.training.flatten()[testIndexes]
    testVarExpl = 1. - np.var(testResiduals) / np.var(testData)
    print("testVarExpl", testVarExpl)
    assert (0.95 <= lrData.var_expl_training((U0, U1)) <= 1.)
Ejemplo n.º 6
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     dtype = self.__dtype
     properties = self.__properties
     noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype),
                                   properties=properties)
     self.__noiseDistribution = noiseDistribution
     observedMask = tf.logical_not(tf.is_nan(data))
     trainMask = tf.logical_not(self.cv.mask(X=data))
     trainMask = tf.get_variable("trainMask",
                                 dtype=trainMask.dtype,
                                 initializer=trainMask)
     trainMask = tf.logical_and(trainMask, observedMask)
     testMask = tf.logical_and(observedMask, tf.logical_not(trainMask))
     self.__observedMask = observedMask
     self.__trainMask = trainMask
     self.__testMask = testMask
Ejemplo n.º 7
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     properties = self.__properties
     tau = tf.ones_like(data[0]) * tau  # TODO is using ones really useful
     noiseDistribution = CenNormal(tau=tau, properties=properties)
     self.__noiseDistribution = noiseDistribution