Ejemplo n.º 1
0
              'temporary pruning of identical rows and columns')
    unique_col, representing = ht.prune_identical_alleles(binary,
                                                          report_groups=True)
    representing_df = pd.DataFrame([[a1, a2]
                                    for a1, a_l in representing.items()
                                    for a2 in a_l],
                                   columns=['representative', 'represented'])

    temp_pruned = ht.prune_identical_reads(unique_col)

    if VERBOSE:
        print("\n", ht.now(), 'Size of mtx with unique rows and columns:',
              temp_pruned.shape)
        print(ht.now(), 'determining minimal set of non-overshadowed alleles')

    minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned)

    if VERBOSE:
        print("\n", ht.now(),
              'Keeping only the minimal number of required alleles',
              minimal_alleles.shape)

    binary = binary[minimal_alleles]

    if VERBOSE:
        print("\n", ht.now(), 'Creating compact model...')

    if is_paired and unpaired_weight > 0:
        if use_discordant:
            compact_mtx, compact_occ = ht.get_compact_model(
                binary_p[minimal_alleles],
Ejemplo n.º 2
0
    alleles_to_keep = filter(is_frequent, binary.columns)
    binary = binary[alleles_to_keep]

    if args.verbose:
        print "\n", ht.now(), 'temporary pruning of identical rows and columns'
    unique_col, representing = ht.prune_identical_alleles(binary, report_groups=True)
    representing_df = pd.DataFrame([[a1, a2] for a1, a_l in representing.iteritems() for a2 in a_l],
                                   columns=['representative', 'represented'])

    temp_pruned = ht.prune_identical_reads(unique_col)

    if args.verbose:
        print "\n", ht.now(), 'Size of mtx with unique rows and columns:', temp_pruned.shape
        print ht.now(), 'determining minimal set of non-overshadowed alleles'

    minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned)

    if args.verbose:
        print "\n", ht.now(), 'Keeping only the minimal number of required alleles', minimal_alleles.shape

    binary = binary[minimal_alleles]

    if args.verbose:
        print "\n", ht.now(), 'Creating compact model...'
    compact_mtx, compact_occ = ht.get_compact_model(binary)

    allele_ids = binary.columns

    groups_4digit = defaultdict(list)
    for allele in allele_ids:
        type_4digit = get_4digit(allele)