Beispiel #1
0
def _filter_samples_by_index(variations,
                             sample_cols,
                             filtered_vars=None,
                             reverse=False):
    if filtered_vars is None:
        filtered_vars = VariationsArrays()

    samples = variations.samples
    try:
        dtype = sample_cols.dtype
        is_bool = numpy.issubdtype(dtype, numpy.dtype(bool))
    except AttributeError:
        item = first(iter(sample_cols))
        is_bool = isinstance(item, bool)
    if not is_bool:
        sample_cols = [idx in sample_cols for idx in range(len(samples))]

    if 'shape' not in dir(sample_cols):
        sample_cols = numpy.array(sample_cols, dtype=numpy.bool)

    if reverse:
        sample_cols = numpy.logical_not(sample_cols)

    for path in variations.keys():
        matrix = variations[path]
        if is_dataset(matrix):
            matrix = matrix[:]
        if 'calls' in path:
            flt_data = matrix[:, sample_cols]
            # flt_data = numpy.compress(sample_cols, , axis=1)
            filtered_vars[path] = flt_data
        else:
            filtered_vars[path] = matrix
    filtered_vars.metadata = variations.metadata
    kept_samples = [
        samples[idx] for idx, keep in enumerate(sample_cols) if keep
    ]
    filtered_vars.samples = kept_samples
    return filtered_vars
Beispiel #2
0
def _filter_samples_by_index(variations, sample_cols, filtered_vars=None,
                             reverse=False):
    if filtered_vars is None:
        filtered_vars = VariationsArrays()

    samples = variations.samples
    try:
        dtype = sample_cols.dtype
        is_bool = numpy.issubdtype(dtype, numpy.bool)
    except AttributeError:
        item = first(iter(sample_cols))
        is_bool = isinstance(item, bool)
    if not is_bool:
        sample_cols = [idx in sample_cols for idx in range(len(samples))]

    if 'shape' not in dir(sample_cols):
        sample_cols = numpy.array(sample_cols, dtype=numpy.bool)

    if reverse:
        sample_cols = numpy.logical_not(sample_cols)

    for path in variations.keys():
        matrix = variations[path]
        if is_dataset(matrix):
            matrix = matrix[:]
        if 'calls' in path:
            flt_data = matrix[:, sample_cols]
            # flt_data = numpy.compress(sample_cols, , axis=1)
            filtered_vars[path] = flt_data
        else:
            filtered_vars[path] = matrix
    filtered_vars.metadata = variations.metadata
    kept_samples = [samples[idx] for idx, keep in enumerate(sample_cols)
                    if keep]
    filtered_vars.samples = kept_samples
    return filtered_vars