Beispiel #1
0
            pairs = self._link_index(*x)
            names = self._make_index_names(x[0].index.name, x[1].index.name)

        # deduplication
        else:

            pairs = self._dedup_index(*x)
            names = self._make_index_names(x[0].index.name, x[0].index.name)

        pairs.rename(names, inplace=True)

        return pairs


BaseIndexator = DeprecationHelper(BaseIndexAlgorithm)


class BaseCompareFeature(object):
    """Base abstract class for compare feature engineering.

    Parameters
    ----------
    labels_left : list, str, int
        The labels to use for comparing record pairs in the left
        dataframe.
    labels_right : list, str, int
        The labels to use for comparing record pairs in the right
        dataframe (linking) or left dataframe (deduplication).
    args : tuple
        Additional arguments to pass to the `_compare_vectorized`
Beispiel #2
0
            # large dataframes
            if n_max < 1e6:
                pairs = random_pairs_without_replacement_small_frames(
                    self.n, shape, self.random_state)
            # small dataframes
            else:
                pairs = random_pairs_without_replacement_large_frames(
                    self.n, shape, self.random_state)

        levels = [df_a.index.values, df_a.index.values]
        labels = pairs

        return pandas.MultiIndex(levels=levels,
                                 labels=labels,
                                 verify_integrity=False)


FullIndex = DeprecationHelper(
    Full, "class recordlinkage.FullIndex is renamed and moved, "
    "use recordlinkage.index.Full")
BlockIndex = DeprecationHelper(
    Block, "class recordlinkage.BlockIndex is renamed and moved, "
    "use recordlinkage.index.Block")
SortedNeighbourhoodIndex = DeprecationHelper(
    SortedNeighbourhood, "class recordlinkage.SortedNeighbourhoodIndex "
    "is renamed and moved, use recordlinkage.index.SortedNeighbourhood")
RandomIndex = DeprecationHelper(
    Random, "class recordlinkage.RandomIndex is renamed and moved, "
    "use recordlinkage.index.Random")
Beispiel #3
0
                    "n must be a integer satisfying 0<n<=%s" % n_max)

            # large dataframes
            if n_max < 1e6:
                pairs = random_pairs_without_replacement_small_frames(
                    self.n, shape, self.random_state)
            # small dataframes
            else:
                pairs = random_pairs_without_replacement_large_frames(
                    self.n, shape, self.random_state)

        levels = [df_a.index.values, df_a.index.values]
        labels = pairs

        return pandas.MultiIndex(
            levels=levels,
            labels=labels,
            verify_integrity=False
        )


FullIndex = DeprecationHelper(
    Full, "This class is moved to recordlinkage.index.Full.")
BlockIndex = DeprecationHelper(
    Block, "This class is moved to recordlinkage.index.Block.")
SortedNeighbourhoodIndex = DeprecationHelper(
    SortedNeighbourhood,
    "This class is moved to recordlinkage.index.SortedNeighbourhood.")
RandomIndex = DeprecationHelper(
    Random, "This class is moved to recordlinkage.index.Random.")
Beispiel #4
0
            if self.swap_months == 'default':
                self.swap_months = [(6, 7, 0.5), (7, 6, 0.5), (9, 10, 0.5),
                                    (10, 9, 0.5)]
            else:
                try:
                    if not all([len(x) == 3 for x in self.swap_months]):
                        raise Exception
                except Exception:
                    raise ValueError(
                        'swap_months must be a list of (first month, \
                        second month, value) tuples or lists. ')

            for month1, month2, value in self.swap_months:

                c[(s1.dt.year == s2.dt.year) & (s1.dt.month == month1) &
                  (s2.dt.month == month2) & (s1.dt.day == s2.dt.day) &
                  (c != 1)] = value

        c = pandas.Series(c)
        c[s1.isnull() | s2.isnull()] = self.missing_value
        return c


CompareExact = DeprecationHelper(Exact, "This class is renamed into Exact.")
CompareString = DeprecationHelper(String, "This class is renamed into String.")
CompareNumeric = DeprecationHelper(Numeric,
                                   "This class is renamed into Numeric.")
CompareGeographic = DeprecationHelper(
    Geographic, "This class is renamed into Geographic.")
CompareDate = DeprecationHelper(Date, "This class is renamed into Date.")