def uploadable_object(self, obj): """ uploadable_obect(obj) Given an object, deterine if an object should be uploaded to object store. Uploadable object is defined as a binary that doesnt "ignore_patterns" listed in config. """ # Exclude generated files. filename = os.path.basename(obj) if re.match('^.*.pitem$', filename): return False # Exclude files that match patten defined in config. ie, "*.pyc" for pattern in self.options['ignore_patterns']: if fnmatch.fnmatch(filename, pattern): return False # Binary overrides match patten defined in config. ie, "*.pyc" for pattern in self.options['binary_overrides']: if fnmatch.fnmatch(filename, pattern): return True # Binary check object_path = os.path.abspath(obj) if utils.is_binary(object_path): return True return False
def transform(self, modes, data, mask=None, version='geometric'): """Transform new data given a pre-trained model.""" assert all(m in range(self.n_modes) for m in modes) assert (version == 'geometric') or (version == 'arithmetic') assert data.ndim == self.n_modes data = preprocess(data) if mask is not None: mask = preprocess(mask) assert data.shape == mask.shape assert is_binary(mask) assert np.issubdtype(mask.dtype, int) self.mode_dims = data.shape for m, D in enumerate(self.mode_dims): if m not in modes: if self.theta_E_DK_M[m].shape[0] != D: raise ValueError( 'Pre-trained components dont match new data.') else: self._init_component(m, D) self._update(data, mask=mask, modes=modes) if version == 'geometric': return [self.theta_G_DK_M[m] for m in modes] elif version == 'arithmetic': return [self.theta_E_DK_M[m] for m in modes]
def check_is_binary(self, line, dir): # Split "CSV" file into parts using naive parsing filename, answer = line.split(',') # Do identification result = utils.is_binary(os.path.join(dir, filename)) assert_equal(result, bool(int(answer)), msg="'%s' is wrongly classified" % filename)
def fit(self, data, priv, mask=None): assert data.ndim == self.n_modes self.data_DIMS = preprocess(data) data_shape = self.data_DIMS.shape #if isinstance(data, skt.sptensor): # self.data_DIMS = skt.sptensor( # tuple((np.copy(ds) for ds in data.subs)), # data.vals.copy()) #else: # self.data_DIMS = data.copy() if mask is not None: mask = preprocess(mask) assert data_shape == mask.shape assert is_binary(mask) assert np.issubdtype(mask.dtype, int) self._init_all_components(data_shape) if priv > 0: self._init_privacy_variables(data_shape, priv) self._update(self.data_DIMS, priv=priv, mask=mask) return self
a, y = data['A'], data['Y'] config['data']['samples'] = len(y) config['max_cfu']['n_original'] = len(y) if debug > 0: logger.info("Create and save scatter plot of features...") plotters.plot_scatter_matrix(data, g, fig_dir, save=True) logger.info("Create conditional histograms...") for target in g.vertices(): if target != 'A': plotters.plot_conditional_histograms(data, target, 'A', fig_dir) # ------------------------------------------------------------------------- # Classification or regression problem? # ------------------------------------------------------------------------- if utils.is_binary(y): config['cf_fair']['type'] = 'classification' else: config['cf_fair']['type'] = 'regression' # ------------------------------------------------------------------------- # Fit assumed model A via cross validation # ------------------------------------------------------------------------- logger.info("Fit model A via CV, compute phi and residuals...") model_a = models.ModelA(g_noy) _, phi, vareps = model_a.fit(data, config['cf_fair']) logger.info(f"Best parameters: {model_a.best_parameters}") # Refit as torch with weighted ridge logger.info("Refit model analytically...") targets = utils.data_to_tensor(data, list(model_a.targets.keys()), numpy=True)