Exemple #1
0
        pos, read_details = ht.pysam_to_hdf(bam_paths[0])

        if not bam_input and config.getboolean('behavior', 'deletebam'):
            os.remove(bam_paths[0])

        binary = np.sign(pos)  # dtype=np.uint16

    # dimensionality reduction and typing

    alleles_to_keep = list(filter(is_frequent, binary.columns))
    binary = binary[alleles_to_keep]

    if VERBOSE:
        print("\n", ht.now(),
              'temporary pruning of identical rows and columns')
    unique_col, representing = ht.prune_identical_alleles(binary,
                                                          report_groups=True)
    representing_df = pd.DataFrame([[a1, a2]
                                    for a1, a_l in representing.items()
                                    for a2 in a_l],
                                   columns=['representative', 'represented'])

    temp_pruned = ht.prune_identical_reads(unique_col)

    if VERBOSE:
        print("\n", ht.now(), 'Size of mtx with unique rows and columns:',
              temp_pruned.shape)
        print(ht.now(), 'determining minimal set of non-overshadowed alleles')

    minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned)

    if VERBOSE:
            else:
                print "\nCould not match paired-end pairs. Switching to single-end pipeline."
                binary = binary1
                is_paired = False
    else:
        pos, etc, desc = ht.sam_to_hdf(out_dir+"/"+date+"_0.sam", verbosity=args.verbose)
        binary = pos.applymap(bool).applymap(int)

    #dimensionality reduction and typing

    alleles_to_keep = filter(is_frequent, binary.columns)
    binary = binary[alleles_to_keep]

    if args.verbose:
        print "\n", ht.now(), 'temporary pruning of identical rows and columns'
    unique_col, representing = ht.prune_identical_alleles(binary, report_groups=True)
    representing_df = pd.DataFrame([[a1, a2] for a1, a_l in representing.iteritems() for a2 in a_l],
                                   columns=['representative', 'represented'])

    temp_pruned = ht.prune_identical_reads(unique_col)

    if args.verbose:
        print "\n", ht.now(), 'Size of mtx with unique rows and columns:', temp_pruned.shape
        print ht.now(), 'determining minimal set of non-overshadowed alleles'

    minimal_alleles = ht.prune_overshadowed_alleles(temp_pruned)

    if args.verbose:
        print "\n", ht.now(), 'Keeping only the minimal number of required alleles', minimal_alleles.shape

    binary = binary[minimal_alleles]