Beispiel #1
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort': sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Beispiel #2
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort':sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Beispiel #3
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Beispiel #4
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = 1L
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)