Beispiel #1
0
def _factorize_keys(lk, rk, sort=True):
    if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
        klass = _hash.Int64Factorizer
        lk = com._ensure_int64(lk)
        rk = com._ensure_int64(rk)
    else:
        klass = _hash.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab = rizer.factorize(lk)
    rlab = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        uniques = rizer.uniques.to_array()
        llab, rlab = _sort_labels(uniques, llab, rlab)

    # NA group
    lmask = llab == -1
    lany = lmask.any()
    rmask = rlab == -1
    rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
Beispiel #2
0
def _factorize_keys(lk, rk, sort=True):
    if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
        klass = _hash.Int64Factorizer
        lk = com._ensure_int64(lk)
        rk = com._ensure_int64(rk)
    else:
        klass = _hash.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab = rizer.factorize(lk)
    rlab = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        uniques = rizer.uniques.to_array()
        llab, rlab = _sort_labels(uniques, llab, rlab)

    # NA group
    lmask = llab == -1
    lany = lmask.any()
    rmask = rlab == -1
    rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
Beispiel #3
0
def _na_ok_dtype(dtype):
    return not is_int_or_datetime_dtype(dtype)
Beispiel #4
0
def _na_ok_dtype(dtype):
    return not is_int_or_datetime_dtype(dtype)