'temporary pruning of identical rows and columns') unique_col, representing = ht.prune_identical_alleles(binary, report_groups=True) representing_df = pd.DataFrame([[a1, a2] for a1, a_l in representing.items() for a2 in a_l], columns=['representative', 'represented']) temp_pruned = ht.prune_identical_reads(unique_col) if VERBOSE: print("\n", ht.now(), 'Size of mtx with unique rows and columns:', temp_pruned.shape) print(ht.now(), 'determining minimal set of non-overshadowed alleles') minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned) if VERBOSE: print("\n", ht.now(), 'Keeping only the minimal number of required alleles', minimal_alleles.shape) binary = binary[minimal_alleles] if VERBOSE: print("\n", ht.now(), 'Creating compact model...') if is_paired and unpaired_weight > 0: if use_discordant: compact_mtx, compact_occ = ht.get_compact_model( binary_p[minimal_alleles],
alleles_to_keep = filter(is_frequent, binary.columns) binary = binary[alleles_to_keep] if args.verbose: print "\n", ht.now(), 'temporary pruning of identical rows and columns' unique_col, representing = ht.prune_identical_alleles(binary, report_groups=True) representing_df = pd.DataFrame([[a1, a2] for a1, a_l in representing.iteritems() for a2 in a_l], columns=['representative', 'represented']) temp_pruned = ht.prune_identical_reads(unique_col) if args.verbose: print "\n", ht.now(), 'Size of mtx with unique rows and columns:', temp_pruned.shape print ht.now(), 'determining minimal set of non-overshadowed alleles' minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned) if args.verbose: print "\n", ht.now(), 'Keeping only the minimal number of required alleles', minimal_alleles.shape binary = binary[minimal_alleles] if args.verbose: print "\n", ht.now(), 'Creating compact model...' compact_mtx, compact_occ = ht.get_compact_model(binary) allele_ids = binary.columns groups_4digit = defaultdict(list) for allele in allele_ids: type_4digit = get_4digit(allele)