Exemple #1
0
    def __init__(self):
        super(Full, self).__init__()

        logging.warn(
            "indexing - performance warning "
            "- A full index can result in large number of record pairs."
        )
Exemple #2
0
    def _link_index(self, df_a, df_b):

        n_max = full_index_size((df_a, df_b))

        if n_max > 1e7:
            logging.warn(
                "The number of record pairs is large. Consider a different "
                "indexation algorithm for better performance. ")

        return pandas.MultiIndex.from_product(
            [df_a.index.values, df_b.index.values],
            names=[df_a.index.name, df_b.index.name])
Exemple #3
0
    def _dedup_index(self, df_a):

        n_max = full_index_size((df_a))

        if n_max > 1e7:
            logging.warn(
                "The number of record pairs is large. Consider a different "
                "indexation algorithm for better performance. ")

        levels = [df_a.index.values, df_a.index.values]
        labels = numpy.triu_indices(len(df_a.index), k=1)

        return pandas.MultiIndex(levels=levels,
                                 labels=labels,
                                 verify_integrity=False)