Example #1
0
    def uploadable_object(self, obj):
        """
    uploadable_obect(obj)

    Given an object, deterine if an object should be uploaded to object store.
    Uploadable object is defined as a binary that doesnt "ignore_patterns"
    listed in config.
    """

        # Exclude generated files.
        filename = os.path.basename(obj)
        if re.match('^.*.pitem$', filename):
            return False

        # Exclude files that match patten defined in config. ie, "*.pyc"
        for pattern in self.options['ignore_patterns']:
            if fnmatch.fnmatch(filename, pattern):
                return False

        # Binary overrides match patten defined in config. ie, "*.pyc"
        for pattern in self.options['binary_overrides']:
            if fnmatch.fnmatch(filename, pattern):
                return True

        # Binary check
        object_path = os.path.abspath(obj)
        if utils.is_binary(object_path):
            return True
        return False
Example #2
0
    def transform(self, modes, data, mask=None, version='geometric'):
        """Transform new data given a pre-trained model."""
        assert all(m in range(self.n_modes) for m in modes)
        assert (version == 'geometric') or (version == 'arithmetic')

        assert data.ndim == self.n_modes
        data = preprocess(data)
        if mask is not None:
            mask = preprocess(mask)
            assert data.shape == mask.shape
            assert is_binary(mask)
            assert np.issubdtype(mask.dtype, int)
        self.mode_dims = data.shape
        for m, D in enumerate(self.mode_dims):
            if m not in modes:
                if self.theta_E_DK_M[m].shape[0] != D:
                    raise ValueError(
                        'Pre-trained components dont match new data.')
            else:
                self._init_component(m, D)
        self._update(data, mask=mask, modes=modes)

        if version == 'geometric':
            return [self.theta_G_DK_M[m] for m in modes]
        elif version == 'arithmetic':
            return [self.theta_E_DK_M[m] for m in modes]
Example #3
0
  def uploadable_object(self, obj):
    """
    uploadable_obect(obj)

    Given an object, deterine if an object should be uploaded to object store.
    Uploadable object is defined as a binary that doesnt "ignore_patterns"
    listed in config.
    """

    # Exclude generated files.
    filename = os.path.basename(obj)
    if re.match('^.*.pitem$', filename):
      return False

    # Exclude files that match patten defined in config. ie, "*.pyc"
    for pattern in self.options['ignore_patterns']:
      if fnmatch.fnmatch(filename, pattern):
        return False

    # Binary overrides match patten defined in config. ie, "*.pyc"
    for pattern in self.options['binary_overrides']:
      if fnmatch.fnmatch(filename, pattern):
        return True

    # Binary check
    object_path = os.path.abspath(obj)
    if utils.is_binary(object_path):
      return True
    return False
    def check_is_binary(self, line, dir):
        # Split "CSV" file into parts using naive parsing
        filename, answer = line.split(',')

        # Do identification
        result = utils.is_binary(os.path.join(dir, filename))

        assert_equal(result,
                     bool(int(answer)),
                     msg="'%s' is wrongly classified" % filename)
Example #5
0
    def check_is_binary(self, line, dir):
            # Split "CSV" file into parts using naive parsing
            filename, answer = line.split(',')

            # Do identification
            result = utils.is_binary(os.path.join(dir, filename))

            assert_equal(result,
                         bool(int(answer)),
                         msg="'%s' is wrongly classified" % filename)
Example #6
0
    def fit(self, data, priv, mask=None):
        assert data.ndim == self.n_modes
        self.data_DIMS = preprocess(data)
        data_shape = self.data_DIMS.shape
        #if isinstance(data, skt.sptensor):
        #    self.data_DIMS = skt.sptensor(
        #        tuple((np.copy(ds) for ds in data.subs)),
        #        data.vals.copy())
        #else:
        #    self.data_DIMS = data.copy()

        if mask is not None:
            mask = preprocess(mask)
            assert data_shape == mask.shape
            assert is_binary(mask)
            assert np.issubdtype(mask.dtype, int)
        self._init_all_components(data_shape)
        if priv > 0:
            self._init_privacy_variables(data_shape, priv)
        self._update(self.data_DIMS, priv=priv, mask=mask)
        return self
Example #7
0
a, y = data['A'], data['Y']
config['data']['samples'] = len(y)
config['max_cfu']['n_original'] = len(y)

if debug > 0:
    logger.info("Create and save scatter plot of features...")
    plotters.plot_scatter_matrix(data, g, fig_dir, save=True)
    logger.info("Create conditional histograms...")
    for target in g.vertices():
        if target != 'A':
            plotters.plot_conditional_histograms(data, target, 'A', fig_dir)

# -------------------------------------------------------------------------
# Classification or regression problem?
# -------------------------------------------------------------------------
if utils.is_binary(y):
    config['cf_fair']['type'] = 'classification'
else:
    config['cf_fair']['type'] = 'regression'

# -------------------------------------------------------------------------
# Fit assumed model A via cross validation
# -------------------------------------------------------------------------
logger.info("Fit model A via CV, compute phi and residuals...")
model_a = models.ModelA(g_noy)
_, phi, vareps = model_a.fit(data, config['cf_fair'])
logger.info(f"Best parameters: {model_a.best_parameters}")

# Refit as torch with weighted ridge
logger.info("Refit model analytically...")
targets = utils.data_to_tensor(data, list(model_a.targets.keys()), numpy=True)