def timeit(func, *args, **kwargs): """Compute the mean execution time of func based on 7 measures.""" times = [] tries = kwargs['tries'] kwargs.pop('tries') if tries > 1: tries += 2 for _ in range(tries): kill_disk_cache() t0 = time.time() out = func(*args, **kwargs) if 1: # Just time the function t1 = time.time() times.append(t1 - t0) else: # Compute a hash of the output, to estimate the time # necessary to access the elements: this is a better # estimate of the time to load with me mmapping. joblib.hash(out) t1 = time.time() joblib.hash(out) t2 = time.time() times.append(t2 - t0 - 2 * (t2 - t1)) times.sort() return np.mean(times[1:-1]) if tries > 1 else t1 - t0, out
def inner(*args, **kargs): a = d.get(hash((args, kargs))) if a is None: a = d[hash((args, kargs))] = [] yield from a for x in drop(len(a), f(*args, **kargs)): a.append(x) yield x
def is_damaged(self): mem = self.stored() if mem and 'obj' in mem: if self._obj is None: self._memory['obj'] = mem['obj'] self._obj = dill.loads(base64.b64decode(mem['obj'])) return self._obj is None else: return joblib.hash(self._obj) != \ joblib.hash(dill.loads(base64.b64decode(mem['obj']))) else: return self._obj is None
def test_joblib_cache(): from joblib import hash # Dummy mask data = np.zeros((40, 40, 40, 2)) data[20, 20, 20] = 1 data_img = Nifti1Image(data, np.eye(4)) with testing.write_tmp_imgs(data_img, create_files=True)\ as filename: masker = NiftiMasker(mask=filename) masker.fit() mask_hash = hash(masker.mask_img_) masker.mask_img_.get_data() assert_true(mask_hash == hash(masker.mask_img_))
def hash_codeobj(code): """Return hashed version of a code object""" bytecode = code.co_code consts = code.co_consts consts = [hash_codeobj(c) if isinstance(c, types.CodeType) else c for c in consts] return joblib.hash((bytecode, consts))
def memcached(*args, **kwargs): """Cache the function in memory.""" h = hash((args, kwargs)) if h in cache: # logger.debug("Get %s(%s) from memcache.", name, str(args)) return cache[h] else: # logger.debug("Compute %s(%s).", name, str(args)) out = f(*args, **kwargs) cache[h] = out return out
def checksum(self): if not self._checksum: m = hashlib.sha1() for ia in self.input_args: if isinstance(ia, target.Target): m.update(ia.checksum()) else: m.update(joblib.hash(ia).encode()) m.update(self.get_code(self.user_outputs)) m.update(self.get_code(self.user_outputs)) self._checksum = m.digest() return self._checksum
def checksum(self): if not self._checksum: m = hashlib.sha1() for ia in full_traverse(self.input_args): if isinstance(ia, target.Target): m.update(ia.checksum()) else: m.update(joblib.hash(ia).encode()) m.update('\n'.join(inspect.getsourcelines(self.user_outputs)[0]).encode('utf-8')) m.update('\n'.join(inspect.getsourcelines(self.user_run)[0]).encode('utf-8')) self._checksum = m.hexdigest() return self._checksum
def make_key(args, kwds, typed, tuple=tuple, sorted=sorted, type=type): # helper function to build a cache key from positional and keyword args key = args if kwds: sorted_items = tuple(sorted(kwds.items())) key += kwd_mark + sorted_items if typed: key += tuple(type(v) for v in args) if kwds: key += tuple(type(v) for k, v in sorted_items) if use_joblib_hash: key = joblib.hash(key) return key
def hash(self): if isinstance(self.target, types.BuiltinFunctionType): function_hash = None else: function_hash = hash_codeobj(self.target.__code__) uniquity = (self.trail, self.args, self.kwargs, function_hash) if self.has_deps(): previous_hash = "".join(p.hash() for p in self.previous()) else: previous_hash = "" return previous_hash + joblib.hash(uniquity)
def setup_cache(self, cache_path, **init_kargs): if self.rawmode in ('one-file', 'multi-file'): ressource_name = self.filename elif self.rawmode=='one-dir': ressource_name = self.dirname else: raise(NotImlementedError) if cache_path=='home': if sys.platform.startswith('win'): dirname = os.path.join(os.environ['APPDATA'], 'neo_rawio_cache') elif sys.platform.startswith('darwin'): dirname = '~/Library/Application Support/neo_rawio_cache' else: dirname = os.path.expanduser('~/.config/neo_rawio_cache') dirname = os.path.join(dirname, self.__class__.__name__) if not os.path.exists(dirname): os.makedirs(dirname) elif cache_path=='same_as_resource': dirname = os.path.dirname(ressource_name) else: assert os.path.exists(cache_path),\ 'cache_path do not exists use "home" or "same_as_file" to make this auto' #the hash of the ressource (dir of file) is done with filename+datetime #TODO make something more sofisticated when rawmode='one-dir' that use all filename and datetime d = dict(ressource_name=ressource_name, mtime=os.path.getmtime(ressource_name)) hash = joblib.hash(d, hash_name='md5') #name is compund by the real_n,ame and the hash name = '{}_{}'.format(os.path.basename(ressource_name), hash) self.cache_filename = os.path.join(dirname, name) if os.path.exists(self.cache_filename): self.logger.warning('Use existing cache file {}'.format(self.cache_filename)) self._cache = joblib.load(self.cache_filename) else: self.logger.warning('Create cache file {}'.format(self.cache_filename)) self._cache = {} self.dump_cache()
def hash_dataframe(df): return joblib.hash(df)
def embed_data(self, X, y, index, inverse, **kwargs): # get data from graph graph, epochs_per_sample, head, tail, weight, n_vertices = get_graph_elements( self.graph_, self.n_epochs ) # number of elements per batch for tensorflow embedding if self.batch_size is None: # batch size can be larger if its just over embeddings if self.direct_embedding & (self.decoding_method is None): self.batch_size = np.min([n_vertices, 60000]) else: self.batch_size = np.min([n_vertices, 1000]) # get embedding initialization if embedding directly if self.direct_embedding: embedding = self.init_embedding_from_graph(graph, **kwargs) embedding = embedding[index] self.embedding = tf.Variable(embedding.astype(np.float32, order="C")) # alpha is a hack for circumventing tensorflow's bug with sparse vectors # this is only needed for the adadelta on direct embeddings self.alpha = tf.Variable(1.0) # get dimensions of data if self.dims is None: self.dims = [np.shape(X)[-1]] # reshape data for network if self.dims is not None: if len(self.dims) > 1: X = np.reshape(X, [len(X)] + list(self.dims)) if self.valid_X is not None: self.valid_X = np.reshape( self.valid_X, [len(self.valid_X)] + list(self.dims) ) # if network is jointly training a classifier, get labeled data if (y is not None) & self.train_classifier: # get the number of training classes label_mask = y != -1 # subset labeled X and Y X_labeled = X[label_mask] y_labeled = y[label_mask] self.n_classes = len(np.unique(y_labeled)) # create networks, if one does not exist self.prepare_networks() # set a batch size, if one does not exist if self.batch_size is None: self.batch_size = 100 # create iterator for data/edges edge_iter, n_edges_per_epoch = self.create_edge_iterator( head, tail, epochs_per_sample ) # if network is jointly training a classifier, prepare data iterator if (y is not None) & self.train_classifier: # generate tensorflow iterator for classifier labels labeled_iter = self.create_classification_iterator( self, X_labeled, y_labeled ) # get batches per epoch n_batches_per_epoch = int(np.ceil(n_edges_per_epoch / self.batch_size)) # create an iterator for validation data if ( self.decoding_method in ["autoencoder", "network"] or (self.train_classifier) ) and self.valid_X is not None: data_valid, n_valid_samp = self.create_validation_iterator() # number of batches corresponding to one epoch n_valid_batches_per_epoch = int(n_valid_samp / self.batch_size) if self.verbose: print(ts(), "Embedding with TensorFlow") # create keras summary objects for loss self.create_summary_metrics() # create a tqdm iterator to show epoch progress if self.verbose: epoch_iter = tqdm(desc="epoch", total=self.training_epochs) batch = 0 X_lab, y_lab = None, None # default classifier values for edge_epoch, epoch in zip(edge_iter, np.arange(self.training_epochs)): if self.verbose & (n_batches_per_epoch > 200): edge_tqdm = tqdm(desc="batch", total=n_batches_per_epoch, leave=False) # loop through batches for batch_to, batch_from in edge_epoch: batch += 1 # if training a classifier, get X and y data if self.train_classifier: X_lab, y_lab = labeled_iter.next() # if this is a direct encoding, the embeddings should be used directly if self.direct_embedding: ( ce_loss, reconstruction_loss, classifier_loss, classifier_acc, ) = self.train_batch(batch_to, batch_from, X_lab, y_lab) else: ( ce_loss, reconstruction_loss, classifier_loss, classifier_acc, ) = self.train_batch(X[batch_to], X[batch_from], X_lab, y_lab) # save losses to tensorflow summary self.summary_metrics["train_loss_umap"](ce_loss) if self.decoding_method in ["autoencoder", "network"]: self.summary_metrics["train_loss_recon"](reconstruction_loss) if self.train_classifier: self.summary_metrics["train_loss_classif"](classifier_loss) self.summary_metrics["train_acc_classif"](classifier_acc) if self.verbose & (n_batches_per_epoch > 200): edge_tqdm.update(1) # save summary information with self.summary_writer_train.as_default(): tf.summary.scalar( "umap_loss", self.summary_metrics["train_loss_umap"].result(), step=batch, ) if self.decoding_method in ["autoencoder", "network"]: tf.summary.scalar( "recon_loss", self.summary_metrics["train_loss_recon"].result(), step=batch, ) if self.train_classifier: tf.summary.scalar( "classif_loss", self.summary_metrics["train_loss_classif"].result(), step=batch, ) tf.summary.scalar( "classif_acc", self.summary_metrics["train_acc_classif"].result(), step=batch, ) self.summary_writer_train.flush() # update tqdm iterators if self.verbose: if n_batches_per_epoch > 200: # close tqdm iterator edge_tqdm.update(edge_tqdm.total - edge_tqdm.n) edge_tqdm.close() epoch_iter.update(1) # compute test loss for reconstruction and classification if self.valid_X is not None and self.direct_embedding is False: for valid_batch_X, valid_batch_Y in iter(data_valid): # get loss for reconstruction if self.decoding_method in ["autoencoder", "network"]: valid_recon_loss = tf.reduce_mean( self.compute_reconstruction_loss(valid_batch_X) ) self.summary_metrics["valid_loss_recon"](valid_recon_loss) # get loss for accuracy if self.train_classifier: classifier_loss, classifier_acc = self.compute_classifier_loss( valid_batch_X, valid_batch_Y ) self.summary_metrics["valid_loss_classif"](classifier_loss) self.summary_metrics["valid_acc_classif"](classifier_acc) # save summary information with self.summary_writer_valid.as_default(): if self.decoding_method in ["autoencoder", "network"]: tf.summary.scalar( "recon_loss", self.summary_metrics["valid_loss_recon"].result(), step=batch, ) if self.train_classifier: tf.summary.scalar( "classif_loss", self.summary_metrics["valid_loss_classif"].result(), step=batch, ) tf.summary.scalar( "classif_acc", self.summary_metrics["valid_acc_classif"].result(), step=batch, ) self.summary_writer_valid.flush() # self.summary_writer.close() if self.verbose: print(ts() + " Finished embedding") # make embedding as projected batch if self.direct_embedding: self.embedding_ = self.embedding.numpy()[inverse] else: self.embedding = self.transform(X[index]) self.embedding_ = self.embedding[inverse] self._input_hash = joblib.hash(self._raw_data)
def plot_tfr(tfr, time_cutoff, vmin, vmax, tl, cluster_correct=False, threshold=0.05, plot_colorbar=False, ax=None, cmap=None, stat_cutoff=None, aspect=None, cluster=None, contrast_name=None, time_lock=None): from pymeg.contrast_tfr import get_tfr_stats # colorbar: from matplotlib.colors import LinearSegmentedColormap if cmap is None: cmap = LinearSegmentedColormap.from_list( "custom", ["blue", "lightblue", "lightgrey", "yellow", "red"], N=100) if stat_cutoff is None: stat_cutoff = time_cutoff # data: times, freqs, X = contrast_tfr.get_tfr(tfr, stat_cutoff) #import ipdb; ipdb.set_trace() ### Save data to data source file from conf_analysis.meg.figures import array_to_data_source_file panel = 'A' if 'all' in contrast_name else 'B' if not 'choice' in contrast_name: fnr = 2 else: fnr = 'S6' panel = 'A' array_to_data_source_file( fnr, panel, cluster + str(time_lock), X, { 'dim_0_subjects': np.arange(1, 16), 'dim_1_frequencies': freqs, 'dim_2_time': times }) mask = None if cluster_correct: hash = joblib.hash([times, freqs, X, threshold]) try: _, _, cluster_p_values, _ = cluster_correct[hash] sig = cluster_p_values.reshape((X.shape[1], X.shape[2])) mask = sig < threshold except KeyError: s = get_tfr_stats(times, freqs, X, threshold) _, _, cluster_p_values, _ = s[hash] sig = cluster_p_values.reshape((X.shape[1], X.shape[2])) mask = sig < threshold earliest_sig = None if mask is not None: idt = np.where(np.any(mask, 0).ravel())[0] idt = [ t for t in idt if (time_cutoff[0] <= times[t]) and (times[t] <= time_cutoff[1]) ] if len(idt) > 0: earliest_sig = times[idt[0]] freqs_idx = freqs >= 4 Xb = np.nanmean(X, 0)[freqs_idx, :] freqsb = freqs[freqs_idx] cax = pmi( plt.gca(), Xb, times, yvals=freqsb, yscale="linear", vmin=vmin, vmax=vmax, mask=mask[freqs_idx, :], mask_alpha=1, mask_cmap=cmap, cmap=cmap, ) plt.gca().set_aspect(aspect) plt.xlim(time_cutoff) plt.ylim([freqs.min() - 0.5, freqs.max() + 0.5]) ax.axvline(0, ls="--", lw=0.75, color="black") ax.axvline(1, ls="--", lw=0.75, color="black") if plot_colorbar: plt.colorbar(cax, ticks=[vmin, 0, vmax]) return ax, earliest_sig
def plot_epoch_pair( tfr_data, vmin=-25, vmax=25, cmap="RdBu_r", gs=None, stats=False, threshold=0.05, ylabel=None, ): from matplotlib import gridspec import pylab as plt import joblib if gs is None: g = gridspec.GridSpec(1, 2) else: g = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs, wspace=0.01, width_ratios=[1, 0.4]) times, freq, tfr = None, None, None for epoch in ["stimulus", "response"]: row = 0 if epoch == "stimulus": col = 0 time_cutoff = (-0.35, 1.1) xticks = [0, 0.25, 0.5, 0.75, 1] yticks = [25, 50, 75, 100, 125] xmarker = [0, 1] else: col = 1 time_cutoff = (-0.35, 0.1) xticks = [0] yticks = [1, 25, 50, 75, 100, 125] xmarker = [0, 1] plt.subplot(g[row, col]) tdata = tfr_data.query('epoch=="%s"' % (epoch)) if len(tdata) == 0: plt.yticks([], [""]) plt.xticks([], [""]) continue times, freqs, tfr = get_tfr(tdata, time_cutoff) mask = None if stats: hash = joblib.hash([times, freqs, tfr, threshold]) try: _, _, cluster_p_values, _ = stats[hash] except KeyError: s = get_tfr_stats(times, freqs, tfr, threshold) _, _, cluster_p_values, _ = s[hash] sig = cluster_p_values.reshape((tfr.shape[1], tfr.shape[2])) mask = sig < threshold _ = pmi( plt.gca(), np.nanmean(tfr, 0), times, yvals=freqs, yscale="linear", vmin=vmin, vmax=vmax, mask=mask, mask_alpha=1, mask_cmap=cmap, cmap=cmap, ) if (ylabel is not None) and (epoch == "stimulus"): plt.ylabel(ylabel, labelpad=-2, fontdict={"fontsize": 4}) # for xmark in xmarker: # plt.axvline(xmark, color='k', lw=1, zorder=-1, alpha=0.5) plt.yticks(yticks, [""] * len(yticks)) plt.xticks(xticks, [""] * len(xticks)) plt.tick_params(direction="inout", length=2, zorder=100) plt.xlim(time_cutoff) plt.ylim([1, 147.5]) # plt.axhline(10, color='k', lw=1, alpha=0.5, linestyle='--') # plt.axhline(25, color='k', lw=1, alpha=0.5, linestyle=':') # plt.axhline(50, color='k', lw=1, alpha=0.5, linestyle=':') plt.axvline(0, color="k", lw=1, zorder=5, alpha=0.5) if epoch == "stimulus": plt.axvline(1, color="k", lw=1, zorder=5, alpha=0.5) return times, freqs, tfr
def test_copy_img_side_effect(): img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4)) hash1 = joblib.hash(img1) niimg.copy_img(img1) hash2 = joblib.hash(img1) assert hash1 == hash2
def __hash__(self): return int(joblib.hash(self.atom), base=16)
def fit(self, X, y): self.training_size_ = X.shape[0] self.training_hash_ = joblib.hash(X)
def __hash__(self): return hash( joblib.hash((self._final_estimator.coef_, self._final_estimator.intercept_)))
def run_model(experiment_info=None, output_dataset=None, force=False, hash_type='sha1', output_path=None, run_number=1, *, dataset_name, is_supervised, model_name): '''Run a model on a dataset (predict/transform) Runs an algorithm_object on the dataset and returns a new dataset object, tagged with experiment metadata, and saves it to disk under `data_path / output_dataset`. Parameters ---------- dataset_name: str, valid dataset name Name of a dataset object that will be run through the model model_name: str, valid model name name of the model that will transform the data experiment_info: (str) any other information to note about the experiment This is used as the output dataset's DESCR text output_path: path directory to store output files output_dataset: (str, optional) filename base for the output dataset. Will also be used as the output `dataset.name`. run_number: (int) attempt number via the same parameters force: (boolean) force re-running the algorithm and overwriting any existing data. Returns ------- Dataset object emerging from the model, with experiment dictionary embedded in metadata ''' if output_path is None: output_path = paths['model_output_path'] else: output_path = pathlib.Path(output_path) if output_dataset is None: output_dataset = f'{model_name}_exp_{dataset_name}_{run_number}' os.makedirs(output_path, exist_ok=True) dataset = Dataset.load(dataset_name) model, model_meta = load_model(model_name) # add experiment metadata experiment = { 'model_name': model_name, 'dataset_name': dataset_name, 'run_number': run_number, 'hash_type': hash_type, 'input_data_hash': joblib.hash(dataset.data, hash_name=hash_type), 'input_target_hash': joblib.hash(dataset.target, hash_name=hash_type), 'model_hash': joblib.hash(model, hash_name=hash_type), } logger.debug(f"Predict: Applying {model_name} to {dataset_name}") metadata_fq = output_path / f'{output_dataset}.metadata' if metadata_fq.exists() and force is False: cached_metadata = Dataset.load(output_dataset, data_path=output_path, metadata_only=True) if experiment.items() <= cached_metadata['experiment'].items(): logger.info( "Experiment has already been run. Returning Cached Result") return Dataset.load(output_dataset, data_path=output_path) else: raise Exception( f'An Experiment with name {output_dataset} exists already, ' 'but metadata has changed. ' 'Use `force=True` to overwrite, or change one of ' '`run_number` or `output_dataset`') # Either force is True, or we need to rerun the algorithm. start_time = time.time() if is_supervised: exp_data = model.predict(dataset.data) else: if hasattr(model, 'transform'): logger.debug('Transform found. Skipping fit') exp_data = model.transform(dataset.data) else: logger.debug('No Transform found. Running fit_transform') exp_data = model.fit_transform(dataset.data) end_time = record_time_interval(output_dataset, start_time) experiment['start_time'] = start_time experiment['duration'] = end_time - start_time new_metadata = dataset.metadata.copy() new_metadata['experiment'] = experiment if experiment_info: new_metadata['descr'] = experiment_info new_dataset = Dataset(dataset_name=output_dataset, data=exp_data, target=dataset.target.copy(), metadata=new_metadata) new_dataset.dump(file_base=output_dataset, dump_path=output_path, force=True) return new_dataset
def inner(*args, **kargs): a = d.get(hash((args, kargs))) if a is None: a = d[hash((args, kargs))] = f(*args, **kargs) return a
def fit_embed_data(self, X, y, index, inverse): """ Performs an embedding on data after a UMAP graph has been constructed. Parameters ---------- X : array, shape (n_samples, n_features) or (n_samples, n_samples) If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. y : array, shape (n_samples) A target array for supervised dimension reduction. How this is handled is determined by parameters UMAP was instantiated with. The relevant attributes are ``target_metric`` and ``target_metric_kwds``. index : array, shape (n_samples) [description] inverse : array, shape (n_samples) [description] """ if self.n_epochs is None: n_epochs = 0 else: n_epochs = self.n_epochs if self.densmap or self.output_dens: self._densmap_kwds["graph_dists"] = self.graph_dists_ if self.verbose: print(ts(), "Construct embedding") self.embedding_, aux_data = simplicial_set_embedding( self._raw_data[self.index__], # JH why raw data? self.graph_, self.n_components, self._initial_alpha, self._a, self._b, self.repulsion_strength, self.negative_sample_rate, n_epochs, init, random_state, self._input_distance_func, self._metric_kwds, self.densmap, self._densmap_kwds, self.output_dens, self._output_distance_func, self._output_metric_kwds, self.output_metric in ("euclidean", "l2"), self.random_state is None, self.verbose, ) self.embedding_ = self.embedding_[self.inverse__] if self.output_dens: self.rad_orig_ = aux_data["rad_orig"][self.inverse__] self.rad_emb_ = aux_data["rad_emb"][self.inverse__] if self.verbose: print(ts() + " Finished embedding") numba.set_num_threads(self._original_n_threads) self._input_hash = joblib.hash(self._raw_data)
def compute_hash(*args, **kwargs): """Compute a hash of anything joblib can handle.""" to_hash = {"args": args, "kwargs": kwargs} return joblib.hash(to_hash)
def test_random_state_second_output_reproducibility(regtest): random = np.random.RandomState(0) n_samples = 500 _ = random.uniform(size=(n_samples, 5)) X = random.uniform(size=(n_samples, 5)) print(joblib.hash(X), file=regtest)
import pytz import uuid import traceback logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def now_localtz(): return datetime.datetime.now(pytz.timezone('Europe/Lisbon')) VERSION = "20181005-8" DATE_STARTED = now_localtz() HOSTNAME = joblib.hash("salted2662" + socket.gethostname()) WORKER_ID = str(uuid.uuid4()) USE_CACHE = False REDIS_HOST = "XXXXXXXXX.redis.cache.windows.net" REDIS_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" CACHE_VERSION = "v3" app = Flask(__name__) # LogicApps configured to send all Msft Forms full form body with question Ids and forms answers # We map here each question id to sklearn algorithm and pipeline parameters FORM_IDS = { "rf71efaaee75f4869b3a24de441b09919": "algorithm", "r52e336e1f3564f47b9359debc320a7ce": "nickname",
def fit(self, X): """Fit X into an embedded space. Optionally use y for supervised dimension reduction. Parameters ---------- X : array, shape (n_samples, n_features) or (n_samples, n_samples) If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. """ X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C") self._raw_data = X # Handle all the optional arguments, setting default if self.a is None or self.b is None: self.a, self.b = find_ab_params(self.spread, self.min_dist) self._validate_parameters() if self.verbose: print(str(self)) index = list(range(X.shape[0])) # Error check n_neighbors based on data size if X[index].shape[0] <= self.n_neighbors: if X[index].shape[0] == 1: self.embedding_ = np.zeros( (1, self.n_components)) # needed to sklearn comparability return self warn("n_neighbors is larger than the dataset size; truncating to " "X.shape[0] - 1") self._n_neighbors = X[index].shape[0] - 1 else: self._n_neighbors = self.n_neighbors # Note: unless it causes issues for setting 'index', could move this to # initial sparsity check above if self._sparse_data and not X.has_sorted_indices: X.sort_indices() random_state = check_random_state(self.random_state) if self.verbose: print(ts(), "Construct fuzzy simplicial set") # pass string identifier if pynndescent also defines distance metric if _HAVE_PYNNDESCENT: if self._sparse_data and self.metric in pynn_sparse_named_distances: nn_metric = self.metric elif not self._sparse_data and self.metric in pynn_named_distances: nn_metric = self.metric else: nn_metric = self._input_distance_func else: nn_metric = self._input_distance_func (self._knn_indices, self._knn_dists, _) = nearest_neighbors( X[index], self._n_neighbors, # int(self._n_neighbors * 1.2), # we can use more neighbors nn_metric, self.angular_rp_forest, random_state, self.low_memory, use_pynndescent=True, verbose=self.verbose, ) if self.local_n_epochs is None: self.local_n_epochs = 50 if self.global_n_epochs is None: self.global_n_epochs = 100 if self.verbose: print(ts(), "Build K-nearest neighbor graph structure") flat_indices = self._knn_indices.flatten( ) # flattening all knn indices index, freq = np.unique(flat_indices, return_counts=True) # sorted_index = index[freq.argsort(kind="stable")] # sorted index in increasing order sorted_index = index[freq.argsort( kind="stable")[::-1]] # sorted index in decreasing order # get disjoint NN matrix disjoints = build_knn_graph( data=X, sorted_index=sorted_index, hub_num=self.hub_num, ) # get hub indices from disjoint set hubs = pick_hubs( disjoints=disjoints, random_state=random_state, popular=True, ) if self.verbose: print(ts(), "Run global optimization") init_global = build_global_structure( data=X, hubs=hubs, n_components=self.n_components, a=self.a, b=self.b, random_state=random_state, alpha=self.global_learning_rate, n_epochs=self.global_n_epochs, verbose=self.verbose, label=self.ll, init_global=self.init, ) if self.verbose: print( ts(), "Get NN indices & Initialize them using original hub information" ) init, hub_info, hubs = embed_others_nn( data=X, init_global=init_global, hubs=hubs, knn_indices=self._knn_indices, nn_consider=self._n_neighbors, random_state=random_state, label=self.ll, verbose=self.verbose, ) self._knn_indices, self._knn_dists, counts = select_from_knn( knn_indices=self._knn_indices, knn_dists=self._knn_dists, hub_info=hub_info, n_neighbors=self.n_neighbors, n=X.shape[0], ) counts_hub = counts[hubs] counts_sum = len(counts_hub[counts_hub < self.n_neighbors]) if counts_sum > 0: if self.verbose: print(ts(), "Adding more KNNs to build the graph") self._knn_indices, self._knn_dists, counts_sum = apppend_knn( data=X, knn_indices=self._knn_indices, knn_dists=self._knn_dists, hub_info=hub_info, n_neighbors=self.n_neighbors, counts=counts, counts_sum=counts_sum, ) if counts_sum != 0: raise ValueError( f"KNN indices not fully determined! counts_sum: {counts_sum} != 0" ) self.graph_, _, _ = fuzzy_simplicial_set( X[hubs], self.n_neighbors, random_state, nn_metric, hubs, self._knn_indices[hubs], self._knn_dists[hubs], self.angular_rp_forest, self.set_op_mix_ratio, self.local_connectivity, True, True, ) if self.verbose: print(ts(), "Run local optimization") init = local_optimize_nn( data=X, graph=self.graph_, hub_info=hub_info, n_components=self.n_components, learning_rate=self.local_learning_rate, a=self.a, b=self.b, gamma=self.gamma, negative_sample_rate=self.negative_sample_rate, n_epochs=self.local_n_epochs, init=init, random_state=random_state, parallel=False, verbose=self.verbose, label=self.ll, ) if self.verbose: print(ts(), "Embedding outliers") self.embedding_ = embed_outliers( data=X, init=init, hubs=hubs, disjoints=disjoints, random_state=random_state, label=self.ll, verbose=self.verbose, ) if self.verbose: print(ts(), "Finished embedding") self._input_hash = joblib.hash(self._raw_data) return self
def train(**kargs): random_state = 43 rskf = RepeatedStratifiedKFold(n_splits=5, n_repeats=1, random_state=random_state) if kargs["algorithm"] == "Logistic Regression": clf = LogisticRegression(random_state=random_state) clf_name = "logreg" if kargs["algorithm"] == "Random Forest": clf = RandomForestClassifier(random_state=random_state) clf_name = "rf" if kargs["algorithm"] == "Decision Tree": clf = DecisionTreeClassifier(random_state=random_state) clf_name = "dt" if kargs["algorithm"] == "SVM": clf = SVC(random_state=random_state) clf_name = "svm" if kargs["algorithm"] == "Extra Trees": clf = ExtraTreesClassifier(random_state=random_state) clf_name = "xt" print("train params", kargs) pipeline = [] # Basic post prep pipeline (onehot/remove any remaining NA), make the dataset scikit compliant nums = [([c], pp.Imputer()) for c in X.select_dtypes(np.number)] cats = [([c], [DataFrameImputer(default_value=""), pp.LabelBinarizer()]) for c in X.select_dtypes("object")] texts = [] text_preproc = kargs.get("text_preproc") if text_preproc and text_preproc != "None": if text_preproc == "Tfidf": texts = [("Name", TfidfVectorizer())] elif text_preproc == "Count": texts = [("Name", CountVectorizer())] else: raise (Exception(f"not valid:{text_preproc}")) print(texts) mapper = DataFrameMapper(nums + cats + texts, df_out=True) pipeline.append(('featurize', mapper)) pca = kargs.get("pca") if pca and pca != "Disabled": print("add pca") pipeline.append(('pca', PCA(n_components=guess_type(kargs["pca"])))) pipeline.append((clf_name, clf)) # Our full pipeline train_pipeline = Pipeline(pipeline) # Set classifier parameters for k in kargs.keys(): if (clf_name + "__") in k: train_pipeline.set_params(**{k: guess_type(kargs[k])}) # Dump for step in train_pipeline.steps: pprint.pprint(step) # Check cache if USE_CACHE: cache_key = CACHE_VERSION + "__" + str(joblib.hash(train_pipeline)) print("Cache key:", cache_key) scores = cache.get(cache_key) print("From Cache") scores = pickle.loads(scores) return scores + np.random.normal(0, .0005, len(scores)) * 100 print("Not in cache, training...") # Train/Cross eval scores = cross_val_score(X=X, y=y, cv=rskf, estimator=train_pipeline, verbose=5, n_jobs=1, scoring="accuracy") scores = (scores * 100).round(3) if USE_CACHE: print("Saving in cache...") cache.set(cache_key, pickle.dumps(scores)) return scores + np.random.normal(0, .0005, len(scores)) * 100
def _evaluate_one(**kwargs): params = DEFAULT_PARAMS.copy() params.update(kwargs) params_digest = joblib.hash(params) results = params.copy() results['digest'] = params_digest results_folder = Path('results') results_folder.mkdir(exist_ok=True) folder = results_folder.joinpath(params_digest) folder.mkdir(exist_ok=True) if len(list(folder.glob("*/results.json"))) == 4: print('Skipping') split_idx = params.get('split_idx', 0) print("Evaluating model on split #%d:" % split_idx) pprint(params) ratings_train, ratings_test = train_test_split(all_ratings, test_size=0.2, random_state=split_idx) max_user_id = all_ratings['user_id'].max() max_item_id = all_ratings['item_id'].max() user_id_train = ratings_train['user_id'] item_id_train = ratings_train['item_id'] rating_train = ratings_train['rating'] user_id_test = ratings_test['user_id'] item_id_test = ratings_test['item_id'] rating_test = ratings_test['rating'] loss = params.get('loss', DEFAULT_LOSS) if loss == 'cross_entropy': target_train = rating_train - 1 else: target_train = rating_train model = make_model(max_user_id + 1, max_item_id + 1, **params) results['model_size'] = sum(w.size for w in model.get_weights()) nb_epoch = 5 epochs = 0 for i in range(4): epochs += nb_epoch t0 = time() model.fit([user_id_train, item_id_train], target_train, batch_size=params['batch_size'], nb_epoch=nb_epoch, shuffle=True, verbose=False) epoch_duration = (time() - t0) / nb_epoch train_scores, train_preds = _compute_scores(model, 'train', user_id_train, item_id_train, rating_train, loss) results.update(train_scores) test_scores, test_preds = _compute_scores(model, 'test', user_id_test, item_id_test, rating_test, loss) results.update(test_scores) results['epoch_duration'] = epoch_duration results['epochs'] = epochs subfolder = folder.joinpath("%03d" % epochs) subfolder.mkdir(exist_ok=True) # Transactional results saving to avoid file corruption on ctrl-c results_filepath = subfolder.joinpath(RESULTS_FILENAME) with transactional_open(results_filepath, mode='w') as f: json.dump(results, f) model_filepath = subfolder.joinpath(MODEL_FILENAME) with transactional_fname(model_filepath) as fname: model.save(fname) # Save predictions and true labels to be able to recompute new scores # later with transactional_open(subfolder / 'test_preds.npy', mode='wb') as f: np.save(f, test_preds) with transactional_open(subfolder / 'train_preds.npy', mode='wb') as f: np.save(f, test_preds) with transactional_open(subfolder / 'ratings.npy', mode='wb') as f: np.save(f, rating_test) return params_digest
def _evaluate_one(**kwargs): params = DEFAULT_PARAMS.copy() params.update(kwargs) params_digest = joblib.hash(params) results = params.copy() results['digest'] = params_digest results_folder = Path('results') results_folder.mkdir(exist_ok=True) folder = results_folder.joinpath(params_digest) folder.mkdir(exist_ok=True) if len(list(folder.glob("*/results.json"))) == 4: print('Skipping') split_idx = params.get('split_idx', 0) print("Evaluating model on split #%d:" % split_idx) pprint(params) ratings_train, ratings_test = train_test_split( all_ratings, test_size=0.2, random_state=split_idx) max_user_id = all_ratings['user_id'].max() max_item_id = all_ratings['item_id'].max() user_id_train = ratings_train['user_id'] item_id_train = ratings_train['item_id'] rating_train = ratings_train['rating'] user_id_test = ratings_test['user_id'] item_id_test = ratings_test['item_id'] rating_test = ratings_test['rating'] loss = params.get('loss', DEFAULT_LOSS) if loss == 'cross_entropy': target_train = rating_train - 1 else: target_train = rating_train model = make_model(max_user_id + 1, max_item_id + 1, **params) results['model_size'] = sum(w.size for w in model.get_weights()) nb_epoch = 5 epochs = 0 for i in range(4): epochs += nb_epoch t0 = time() model.fit([user_id_train, item_id_train], target_train, batch_size=params['batch_size'], nb_epoch=nb_epoch, shuffle=True, verbose=False) epoch_duration = (time() - t0) / nb_epoch train_scores, train_preds = _compute_scores( model, 'train', user_id_train, item_id_train, rating_train, loss) results.update(train_scores) test_scores, test_preds = _compute_scores( model, 'test', user_id_test, item_id_test, rating_test, loss) results.update(test_scores) results['epoch_duration'] = epoch_duration results['epochs'] = epochs subfolder = folder.joinpath("%03d" % epochs) subfolder.mkdir(exist_ok=True) # Transactional results saving to avoid file corruption on ctrl-c results_filepath = subfolder.joinpath(RESULTS_FILENAME) with transactional_open(results_filepath, mode='w') as f: json.dump(results, f) model_filepath = subfolder.joinpath(MODEL_FILENAME) with transactional_fname(model_filepath) as fname: model.save(fname) # Save predictions and true labels to be able to recompute new scores # later with transactional_open(subfolder / 'test_preds.npy', mode='wb') as f: np.save(f, test_preds) with transactional_open(subfolder / 'train_preds.npy', mode='wb') as f: np.save(f, test_preds) with transactional_open(subfolder / 'ratings.npy', mode='wb') as f: np.save(f, rating_test) return params_digest
def compute_degeneracy(tRNAs, aaRSs, mask, cache): """ This function computes all possible site-block-match-matrices and their encodable genetic degeneracies """ uni_t = set(range(tRNAs)) uni_a = set(range(aaRSs)) ## if mask: ## zeros = 2**(2*(tRNAs+aaRSs)) ## else: ## zeros = 2**(tRNAs+aaRSs) if mask: genotypes = masked_genotypes_gen(tRNAs, aaRSs) for setm, setn, sett, seta in genotypes: offm = uni_t - setm offn = uni_a - setn eoff = len(offm) + len( offn ) # eoff is ultimately the expected fraction of sites masked per genotype eips = ( 2 * len(setm) * len(setn) ) # eips is expected number of unmasked interactions per site 0 <= eips <= P or N+M/2 if (eips > 0): eips /= (len(setm) + len(setn)) settc = uni_t - sett sett &= setm settc &= setm setac = uni_a - seta seta &= setn setac &= setn m = np.zeros((tRNAs, aaRSs), dtype=np.int16) for match in chain(product(sett, seta), product(settc, setac)): m[match] += 1 # if (m==0).all(): ##print ('# huh! in compute_degeneracy') # why do we get here? # continue key = joblib.hash(m) #if key == '3d364cbacfad5c8c2be9dc4314aec17c': # pdb.set_trace() if key in degeneracy: degeneracy[key] += 1 off[key] += eoff / (2 * pairs * width) ips[key] += eips / width else: degeneracy[key] = 1 off[key] = eoff / (2 * pairs * width) ips[key] = eips / width if cache: sbm_matrix(key, m) # THIS IS NOT TESTED else: sbmmd[key] = m #zeros -= 1 for key in off: off[key] /= degeneracy[key] ips[key] /= degeneracy[key] #pdb.set_trace() else: genotypes = genotypes_gen(tRNAs, aaRSs) for sett, seta in genotypes: settc = uni_t - sett setac = uni_a - seta m = np.zeros((tRNAs, aaRSs), dtype=np.int16) for match in chain(product(sett, seta), product(settc, setac)): m[match] += 1 key = joblib.hash(m) if key in degeneracy: degeneracy[key] += 1 #zeros -= 1 else: degeneracy[key] = 1 if cache: sbm_matrix(m) # THIS IS NOT TESTED else: sbmmd[key] = m
def test_proxy(): inst = LazyProxy(nocall) pickle.Pickler(io.BytesIO(), pickle.HIGHEST_PROTOCOL).dump(inst) pickle.Pickler(io.BytesIO()).dump(inst) jb.hash(inst)
def regression_state(state, regtest): for v in sorted(state): print(v, joblib.hash(state[v].values), file=regtest)
'learning_rate': ['constant', 'adaptive'], 'max_iter': [5000], }, { 'solver': ['lbfgs'], 'hidden_layer_sizes': hidden_layer_sizes_range, 'activation': ['relu'], 'random_state': [0], }, ] if __name__ == '__main__': model_params = list(ParameterGrid(param_grid)) with open(model_filename, 'w') as f: for params in model_params: model_id = joblib.hash(params) model_record = params.copy() model_record['model_id'] = model_id model_record['depth'] = len(params['hidden_layer_sizes']) model_record['width'] = max(params['hidden_layer_sizes']) f.write(json.dumps(model_record) + '\n') f.flush() model_params = shuffle(model_params, random_state=0) with open(evaluations_filename, 'w') as f: for n_samples_train in [30]: for label_noise_rate in np.linspace(0, 1, 11): print(f'\nn_samples: {n_samples_train}, label noise: {label_noise_rate:0.1f}') for data_seed in [0, 1]: (X_train, y_train), (X_test, y_test) = make_noisy_problem( n_samples_train, label_noise_rate, seed=data_seed)
sfab = 0 sfab2 = 0 sfbf = 0 sfbf2 = 0 sfabfa = 0 sfabfa2 = 0 #for arg in args: # m,d,o,f,f2 = compute_fitness(arg) for m, d, o, ei, f, f2, fa, fa2 in pool.imap(compute_fitness, args, chunksize=chunksize): key = joblib.hash(m) if key in dd: dd[key] += d oo[key] += (d * o) fb = fitb[key] fb2 = fitb2[key] fab = fitab[key] fab2 = fitab2[key] else: dd[key] = d oo[key] = (d * o) eeii[key] = ei fit[key] = f fit2[key] = f2 fita[key] = fa
# --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) # --- Instantiate agent --- agent = ALEAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "ALE_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController(evaluateOn='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach( bc.TrainerController(evaluateOn='action', periodicity=parameters.update_frequency,
def plot_mosaic( tfr_data, vmin=-25, vmax=25, cmap="RdBu_r", ncols=4, epoch="stimulus", stats=False, threshold=0.05, ): if epoch == "stimulus": time_cutoff = (-0.5, 1.35) xticks = [0, 0.25, 0.5, 0.75, 1] xticklabels = ["0\nStim on", "", ".5", "", "1\nStim off"] yticks = [25, 50, 75, 100, 125] yticklabels = ["25", "", "75", "", "125"] xmarker = [0, 1] baseline = (-0.25, 0) else: time_cutoff = (-1, 0.5) xticks = [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5] xticklabels = ["-1", "", "-0.5", "", "0\nResponse", "", "0.5"] yticks = [1, 25, 50, 75, 100, 125] yticklabels = ["1", "25", "", "75", "", "125"] xmarker = [0, 1] baseline = None from matplotlib import gridspec import pylab as plt import seaborn as sns contrast_tfr.set_jw_style() sns.set_style("ticks") nrows = (len(atlas_glasser.areas) // ncols) + 1 gs = gridspec.GridSpec(nrows, ncols) gs.update(wspace=0.01, hspace=0.01) for i, (name, area) in enumerate(atlas_glasser.areas.items()): try: column = np.mod(i, ncols) row = i // ncols plt.subplot(gs[row, column]) times, freqs, tfr = get_tfr(tfr_data.query('cluster=="%s"' % area), time_cutoff) # cax = plt.gca().pcolormesh(times, freqs, np.nanmean( # tfr, 0), vmin=vmin, vmax=vmax, cmap=cmap, zorder=-2) mask = None if stats: import joblib hash = joblib.hash([times, freqs, tfr, threshold]) try: _, _, cluster_p_values, _ = stats[hash] except KeyError: s = get_tfr_stats(times, freqs, tfr, threshold) _, _, cluster_p_values, _ = s[hash] sig = cluster_p_values.reshape((tfr.shape[1], tfr.shape[2])) mask = sig < threshold cax = pmi( plt.gca(), np.nanmean(tfr, 0), times, yvals=freqs, yscale="linear", vmin=vmin, vmax=vmax, mask=mask, mask_alpha=1, mask_cmap=cmap, cmap=cmap, ) # plt.grid(True, alpha=0.5) for xmark in xmarker: plt.axvline(xmark, color="k", lw=1, zorder=-1, alpha=0.5) plt.yticks(yticks, [""] * len(yticks)) plt.xticks(xticks, [""] * len(xticks)) set_title(name, times, freqs, plt.gca()) plt.tick_params(direction="inout", length=2, zorder=100) plt.xlim(time_cutoff) plt.ylim([1, 147.5]) plt.axhline(10, color="k", lw=1, alpha=0.5, linestyle="--") except ValueError as e: print(name, area, e) plt.subplot(gs[nrows - 2, 0]) sns.despine(left=True, bottom=True) plt.subplot(gs[nrows - 1, 0]) pmi( plt.gca(), np.nanmean(tfr, 0) * 0, times, yvals=freqs, yscale="linear", vmin=vmin, vmax=vmax, mask=None, mask_alpha=1, mask_cmap=cmap, cmap=cmap, ) plt.xticks(xticks, xticklabels) plt.yticks(yticks, yticklabels) for xmark in xmarker: plt.axvline(xmark, color="k", lw=1, zorder=-1, alpha=0.5) if baseline is not None: plt.fill_between(baseline, y1=[1, 1], y2=[150, 150], color="k", alpha=0.5) plt.tick_params(direction="in", length=3) plt.xlim(time_cutoff) plt.ylim([1, 147.5]) plt.xlabel("time [s]") plt.ylabel("Freq [Hz]") sns.despine(ax=plt.gca())
def fit_mice_hash(X: DataFrame, iterations: int = 1): return joblib.hash(X)
def make_path(name_tuple): return joblib.hash(name_tuple)
parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng) # --- Create unique filename for FindBestController --- h = hash(vars(parameters), hash_name="sha1") fname = "MG2S_" + h print("The parameters hash is: {}".format(h)) print("The parameters are: {}".format(parameters)) # --- Bind controllers to the agent --- # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and # learning rate as well as the training epoch number. agent.attach(bc.VerboseController( evaluateOn='epoch', periodicity=1)) # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes. # Plus, we also want to display after each training episode (!= than after every training) the average bellman # residual and the average of the V values obtained during the last episode, hence the two last arguments. agent.attach(bc.TrainerController(
def run(self): self._validate_params() self._set_lib() estimator_class = self._load_estimator_class() metrics_functions = self._load_metrics_functions() parameters_grid = self._init_parameters_grid() self.results_ = [] for dataset in self.datasets: n_features = dataset["n_features"] n_samples_train = dataset["n_samples_train"] n_samples_test = list(reversed(sorted(dataset["n_samples_test"]))) for ns_train in n_samples_train: X, y = gen_data( dataset["sample_generator"], n_samples=ns_train + max(n_samples_test), n_features=n_features, **dataset["params"], ) X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=ns_train) for params in parameters_grid: estimator = estimator_class(**params) set_random_state(estimator, random_state=42) hyperparams_digest = joblib.hash(params) dims_digest = joblib.hash([ns_train, n_features]) profiling_results_path = str(RESULTS_PATH / "profiling") profiling_path = f"{profiling_results_path}/{self.lib_}_fit_{hyperparams_digest}_{dims_digest}.html" _, mean, stdev = FuncExecutor.run(estimator.fit, profiling_path, X_train, y_train) row = dict( estimator=self.name, lib=self.lib_, function="fit", mean=mean, stdev=stdev, n_samples=ns_train, n_features=n_features, hyperparams_digest=hyperparams_digest, dims_digest=dims_digest, **params, ) if hasattr(estimator, "n_iter_"): row["n_iter"] = estimator.n_iter_ self.results_.append(row) print("%s - %s - %s - mean: %6.3f - stdev: %6.3f" % (self.lib_, self.name, "fit", mean, stdev)) for i in range(len(n_samples_test)): ns_test = n_samples_test[i] X_test_, y_test_ = X_test[:ns_test], y_test[:ns_test] bench_func = predict_or_transform(estimator) dims_digest = joblib.hash([ns_test, n_features]) profiling_path = f"{profiling_results_path}/{self.lib_}_{bench_func.__name__}_{hyperparams_digest}_{dims_digest}.html" ( y_pred, mean, stdev, ) = FuncExecutor.run(bench_func, profiling_path, X_test_) if i == 0: scores = { func.__name__: func(y_test_, y_pred) for func in metrics_functions } row = dict( estimator=self.name, lib=self.lib_, function="predict", mean=mean, stdev=stdev, n_samples=ns_test, n_features=n_features, hyperparams_digest=hyperparams_digest, dims_digest=dims_digest, **scores, **params, ) print("%s - %s - %s - mean: %6.3f - stdev: %6.3f" % (self.lib_, self.name, bench_func.__name__, mean, stdev)) self.results_.append(row) return self
def calc_hash(self): self.hash = joblib.hash(self.filename)
'learning_rate': ['constant', 'adaptive'], 'max_iter': [5000], }, { 'solver': ['lbfgs'], 'hidden_layer_sizes': hidden_layer_sizes_range, 'activation': ['relu'], 'random_state': [0], }, ] if __name__ == '__main__': model_params = list(ParameterGrid(param_grid)) with open(model_filename, 'w') as f: for params in model_params: model_id = joblib.hash(params) model_record = params.copy() model_record['model_id'] = model_id model_record['depth'] = len(params['hidden_layer_sizes']) model_record['width'] = max(params['hidden_layer_sizes']) f.write(json.dumps(model_record) + '\n') f.flush() model_params = shuffle(model_params, random_state=0) with open(evaluations_filename, 'w') as f: for n_samples_train in [30]: for label_noise_rate in np.linspace(0, 1, 11): print( f'\nn_samples: {n_samples_train}, label noise: {label_noise_rate:0.1f}' ) for data_seed in [0, 1]: