Beispiel #1
0
 def _handler(key, func, *args, **kwargs):
     data = cache.get(key)
     if data is None:
         logger.warning('update cache %s' % key)
         data = func(*args, **kwargs)
         cache.set(key, data)
         return data
     return data
Beispiel #2
0
def calc_hyper_param_importance(df, hyper_param, target):
    new_df = df[[hyper_param, target]]
    no_missing_value_df = new_df.dropna()

    # Can not calc pearson correlation coefficient when number of samples is less or equal than 2
    if len(no_missing_value_df) <= 2:
        logger.error("Number of samples is less or equal than 2.")
        return 0

    correlation = no_missing_value_df[target].corr(
        no_missing_value_df[hyper_param])
    if np.isnan(correlation):
        logger.warning("Correlation is nan!")
        return 0

    return abs(correlation)
Beispiel #3
0
    def add_embeddings(self,
                       tag,
                       mat=None,
                       metadata=None,
                       metadata_header=None,
                       walltime=None,
                       labels=None,
                       hot_vectors=None,
                       labels_meta=None):
        """Add embeddings to vdl record file.

        Args:
            tag (string): Data identifier
            mat (numpy.array or list): A matrix which each row is
                feature of labels.
            metadata (numpy.array or list): A 1D or 2D matrix of labels
            metadata_header (numpy.array or list): Meta data of labels.
            walltime (int): Wall time of embeddings.
            labels (numpy.array or list): Obsolete parameter, use `metadata` to
                replace it.
            hot_vectors (numpy.array or list): Obsolete parameter, use `mat` to
                replace it.
            labels_meta (numpy.array or list): Obsolete parameter, use
                `metadata_header` to replace it.
        Example 1:
            mat = [
            [1.3561076367500755, 1.3116267195134017, 1.6785401875616097],
            [1.1039614644440658, 1.8891609992484688, 1.32030488587171],
            [1.9924524852447711, 1.9358920727142739, 1.2124401279391606],
            [1.4129542689796446, 1.7372166387197474, 1.7317806077076527],
            [1.3913371800587777, 1.4684674577930312, 1.5214136352476377]]

            metadata = ["label_1", "label_2", "label_3", "label_4", "label_5"]
            # or like this
            # metadata = [["label_1", "label_2", "label_3", "label_4", "label_5"]]

            writer.add_embeddings(tag='default',
                                  metadata=metadata,
                                  mat=mat,
                                  walltime=round(time.time() * 1000))

        Example 2:
            mat = [
            [1.3561076367500755, 1.3116267195134017, 1.6785401875616097],
            [1.1039614644440658, 1.8891609992484688, 1.32030488587171],
            [1.9924524852447711, 1.9358920727142739, 1.2124401279391606],
            [1.4129542689796446, 1.7372166387197474, 1.7317806077076527],
            [1.3913371800587777, 1.4684674577930312, 1.5214136352476377]]

            metadata = [["label_a_1", "label_a_2", "label_a_3", "label_a_4", "label_a_5"],
                      ["label_b_1", "label_b_2", "label_b_3", "label_b_4", "label_b_5"]]

            metadata_header = ["label_a", "label_2"]

            writer.add_embeddings(tag='default',
                                  metadata=metadata,
                                  metadata_header=metadata_header,
                                  mat=mat,
                                  walltime=round(time.time() * 1000))
        """
        if '%' in tag:
            raise RuntimeError("% can't appear in tag!")
        if (mat is None) and hot_vectors:
            mat = hot_vectors
            logger.warning('Parameter `hot_vectors` in function '
                           '`add_embeddings` will be deprecated in '
                           'future, use `mat` instead.')
        if (metadata is None) and labels:
            metadata = labels
            logger.warning(
                'Parameter `labels` in function `add_embeddings` will be '
                'deprecated in future, use `metadata` instead.')
        if (metadata_header is None) and labels_meta:
            metadata_header = labels_meta
            logger.warning(
                'Parameter `labels_meta` in function `add_embeddings` will be'
                ' deprecated in future, use `metadata_header` instead.')
        if isinstance(mat, np.ndarray):
            mat = mat.tolist()
        if isinstance(metadata, np.ndarray):
            metadata = metadata.tolist()

        if isinstance(metadata[0], list) and not metadata_header:
            metadata_header = ["label_%d" % i for i in range(len(metadata))]

        step = 0
        walltime = round(time.time() * 1000) if walltime is None else walltime
        self._get_file_writer().add_record(
            embedding(tag=tag,
                      labels=metadata,
                      labels_meta=metadata_header,
                      hot_vectors=mat,
                      step=step,
                      walltime=walltime))