def _handler(key, func, *args, **kwargs): data = cache.get(key) if data is None: logger.warning('update cache %s' % key) data = func(*args, **kwargs) cache.set(key, data) return data return data
def calc_hyper_param_importance(df, hyper_param, target): new_df = df[[hyper_param, target]] no_missing_value_df = new_df.dropna() # Can not calc pearson correlation coefficient when number of samples is less or equal than 2 if len(no_missing_value_df) <= 2: logger.error("Number of samples is less or equal than 2.") return 0 correlation = no_missing_value_df[target].corr( no_missing_value_df[hyper_param]) if np.isnan(correlation): logger.warning("Correlation is nan!") return 0 return abs(correlation)
def add_embeddings(self, tag, mat=None, metadata=None, metadata_header=None, walltime=None, labels=None, hot_vectors=None, labels_meta=None): """Add embeddings to vdl record file. Args: tag (string): Data identifier mat (numpy.array or list): A matrix which each row is feature of labels. metadata (numpy.array or list): A 1D or 2D matrix of labels metadata_header (numpy.array or list): Meta data of labels. walltime (int): Wall time of embeddings. labels (numpy.array or list): Obsolete parameter, use `metadata` to replace it. hot_vectors (numpy.array or list): Obsolete parameter, use `mat` to replace it. labels_meta (numpy.array or list): Obsolete parameter, use `metadata_header` to replace it. Example 1: mat = [ [1.3561076367500755, 1.3116267195134017, 1.6785401875616097], [1.1039614644440658, 1.8891609992484688, 1.32030488587171], [1.9924524852447711, 1.9358920727142739, 1.2124401279391606], [1.4129542689796446, 1.7372166387197474, 1.7317806077076527], [1.3913371800587777, 1.4684674577930312, 1.5214136352476377]] metadata = ["label_1", "label_2", "label_3", "label_4", "label_5"] # or like this # metadata = [["label_1", "label_2", "label_3", "label_4", "label_5"]] writer.add_embeddings(tag='default', metadata=metadata, mat=mat, walltime=round(time.time() * 1000)) Example 2: mat = [ [1.3561076367500755, 1.3116267195134017, 1.6785401875616097], [1.1039614644440658, 1.8891609992484688, 1.32030488587171], [1.9924524852447711, 1.9358920727142739, 1.2124401279391606], [1.4129542689796446, 1.7372166387197474, 1.7317806077076527], [1.3913371800587777, 1.4684674577930312, 1.5214136352476377]] metadata = [["label_a_1", "label_a_2", "label_a_3", "label_a_4", "label_a_5"], ["label_b_1", "label_b_2", "label_b_3", "label_b_4", "label_b_5"]] metadata_header = ["label_a", "label_2"] writer.add_embeddings(tag='default', metadata=metadata, metadata_header=metadata_header, mat=mat, walltime=round(time.time() * 1000)) """ if '%' in tag: raise RuntimeError("% can't appear in tag!") if (mat is None) and hot_vectors: mat = hot_vectors logger.warning('Parameter `hot_vectors` in function ' '`add_embeddings` will be deprecated in ' 'future, use `mat` instead.') if (metadata is None) and labels: metadata = labels logger.warning( 'Parameter `labels` in function `add_embeddings` will be ' 'deprecated in future, use `metadata` instead.') if (metadata_header is None) and labels_meta: metadata_header = labels_meta logger.warning( 'Parameter `labels_meta` in function `add_embeddings` will be' ' deprecated in future, use `metadata_header` instead.') if isinstance(mat, np.ndarray): mat = mat.tolist() if isinstance(metadata, np.ndarray): metadata = metadata.tolist() if isinstance(metadata[0], list) and not metadata_header: metadata_header = ["label_%d" % i for i in range(len(metadata))] step = 0 walltime = round(time.time() * 1000) if walltime is None else walltime self._get_file_writer().add_record( embedding(tag=tag, labels=metadata, labels_meta=metadata_header, hot_vectors=mat, step=step, walltime=walltime))