Example #1
0
def main():
    args = parse_args()

    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)
    sys.stderr.write("pytables version: %s\n" % tables.__version__)

    util.check_pysam_version()
    util.check_pytables_version()

    # disable warnings that come from pytables when chromosome
    # names are like 1, 2, 3 (instead of chr1, chr2, chr3)
    warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)

    snp_tab_h5 = tables.open_file(args.snp_tab, "r")
    snp_index_h5 = tables.open_file(args.snp_index, "r")

    if args.haplotype:
        hap_h5 = tables.open_file(args.haplotype, "r")
    else:
        hap_h5 = None

    ref_count_h5 = tables.open_file(args.ref_as_counts, "w")
    alt_count_h5 = tables.open_file(args.alt_as_counts, "w")
    other_count_h5 = tables.open_file(args.other_as_counts, "w")
    read_count_h5 = tables.open_file(args.read_counts, "w")

    output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5]

    chrom_dict = {}

    # initialize every chromosome in output files
    chrom_list = chromosome.get_all_chromosomes(args.chrom)

    for chrom in chrom_list:
        for out_file in output_h5:
            create_carray(out_file, chrom, args.data_type)
        chrom_dict[chrom.name] = chrom

    count = 0
    dtype = None
    if args.data_type == "uint8":
        max_count = MAX_UINT8_COUNT
        dtype = np.uint8
    elif args.data_type == "uint16":
        max_count = MAX_UINT16_COUNT
        dtype = np.uint16
    else:
        raise NotImplementedError("unsupported datatype %s" % args.data_type)

    # create a txt file to also holds the counts
    if args.txt_counts is not None:
        if os.path.splitext(args.txt_counts)[1] == ".gz":
            txt_counts = gzip.open(args.txt_counts, 'wt+')
        else:
            txt_counts = open(args.txt_counts, 'w+')

    for chrom in chrom_list:
        sys.stderr.write("%s\n" % chrom.name)

        if args.test_chrom:
            if chrom.name != args.test_chrom:
                sys.stderr.write("skipping because not test chrom\n")
                continue

        warned_pos = {}

        # fetch SNP info for this chromosome
        if chrom.name not in snp_tab_h5.root:
            # no SNPs for this chromosome
            sys.stderr.write("skipping %s because chromosome with this name "
                             "not found in SNP table\n" % chrom.name)
            continue

        sys.stderr.write("fetching SNPs\n")

        snp_tab = snp_tab_h5.get_node("/%s" % chrom.name)
        snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:]
        if hap_h5:
            hap_tab = hap_h5.get_node("/%s" % chrom.name)
            ind_dict, ind_idx = snptable.SNPTable().get_h5_sample_indices(
                hap_h5, chrom, [args.individual])

            if len(ind_idx) == 1:
                ind_idx = ind_idx[0]
                sys.stderr.write("index for individual %s is %d\n" %
                                 (args.individual, ind_idx))
            else:
                raise ValueError("got sample indices for %d individuals, "
                                 "but expected to get index for one "
                                 "individual (%s)" %
                                 (len(ind_idx), args.individual))
                hap_tab = None
                ind_idx = None
        else:
            hap_tab = None
            ind_idx = None

        # initialize count arrays for this chromosome to 0
        ref_carray = get_carray(ref_count_h5, chrom)
        alt_carray = get_carray(alt_count_h5, chrom)
        other_carray = get_carray(other_count_h5, chrom)
        read_count_carray = get_carray(read_count_h5, chrom)

        ref_array = np.zeros(chrom.length, dtype)
        alt_array = np.zeros(chrom.length, dtype)
        other_array = np.zeros(chrom.length, dtype)
        read_count_array = np.zeros(chrom.length, dtype)

        # loop over all BAM files, pulling out reads
        # for this chromosome
        for bam_filename in args.bam_filenames:
            sys.stderr.write("reading from file %s\n" % bam_filename)

            samfile = pysam.Samfile(bam_filename, "rb")

            for read in get_sam_iter(samfile, chrom):
                count += 1
                if count == 10000:
                    sys.stderr.write(".")
                    count = 0

                add_read_count(read, chrom, ref_array, alt_array, other_array,
                               read_count_array, snp_index_array, snp_tab,
                               hap_tab, warned_pos, max_count, ind_idx)

            # store results for this chromosome
            ref_carray[:] = ref_array
            alt_carray[:] = alt_array
            other_carray[:] = other_array
            read_count_carray[:] = read_count_array
            sys.stderr.write("\n")

            # write data to numpy arrays, so that they can be written to a txt
            # file later
            # columns are:
            # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count
            if args.txt_counts is not None:
                write_txt_file(txt_counts, chrom, snp_tab, hap_tab, ind_idx,
                               ref_array, alt_array, other_array)
            samfile.close()

    if args.txt_counts:
        # close the open txt file handler
        txt_counts.close()

    # check if any of the reads contained an unimplemented CIGAR
    if unimplemented_CIGAR[0] > 0:
        sys.stderr.write("WARNING: Encountered " +
                         str(unimplemented_CIGAR[0]) +
                         " instances of CIGAR codes: " +
                         str(unimplemented_CIGAR[1]) + ". Reads with these "
                         "CIGAR codes were skipped because they "
                         "are currently unimplemented.\n")

    # set track statistics and close HDF5 files

    sys.stderr.write("setting statistics for each chromosome\n")
    for h5f in output_h5:
        chromstat.set_stats(h5f, chrom_list)
        h5f.close()

    snp_tab_h5.close()
    snp_index_h5.close()
    if hap_h5:
        hap_h5.close()

    sys.stderr.write("done\n")
Example #2
0
    if (len(keep_cache) + len(discard_cache)) != 0:
        sys.stderr.write("WARNING: failed to find pairs for %d "
                         "keep reads and %d discard reads on this "
                         "chromosome\n" %
                         (len(keep_cache), len(discard_cache)))

        read_stats.discard_missing_pair += len(keep_cache) + len(discard_cache)

    read_stats.write(sys.stderr)


if __name__ == "__main__":
    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)

    util.check_pysam_version()

    parser = argparse.ArgumentParser()
    parser.add_argument('input_bam',
                        help="input BAM or SAM file (must "
                        "be sorted!)")
    parser.add_argument("output_bam",
                        help="output BAM or SAM file (not "
                        "sorted!)")

    options = parser.parse_args()

    main(options.input_bam, options.output_bam)
Example #3
0
def main():
    args = parse_args()

    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)
    sys.stderr.write("pytables version: %s\n" % tables.__version__)

    util.check_pysam_version()
    util.check_pytables_version()

    # disable warnings that come from pytables when chromosome
    # names are like 1, 2, 3 (instead of chr1, chr2, chr3)
    warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)

    
    snp_tab_h5 = tables.open_file(args.snp_tab, "r")
    snp_index_h5 = tables.open_file(args.snp_index, "r")

    if args.haplotype:
        hap_h5 = tables.open_file(args.haplotype, "r")
    else:
        hap_h5 = None

    ref_count_h5 = tables.open_file(args.ref_as_counts, "w")
    alt_count_h5 = tables.open_file(args.alt_as_counts, "w")
    other_count_h5 = tables.open_file(args.other_as_counts, "w")
    read_count_h5 = tables.open_file(args.read_counts, "w")

    output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5]

    chrom_dict = {}

    # initialize every chromosome in output files
    chrom_list = chromosome.get_all_chromosomes(args.chrom)

    for chrom in chrom_list:
        for out_file in output_h5:
            create_carray(out_file, chrom, args.data_type)
        chrom_dict[chrom.name] = chrom

    count = 0
    dtype = None
    if args.data_type == "uint8":
        max_count = MAX_UINT8_COUNT
        dtype = np.uint8
    elif args.data_type == "uint16":
        max_count = MAX_UINT16_COUNT
        dtype = np.uint16
    else:
        raise NotImplementedError("unsupported datatype %s" % args.data_type)

    # create a txt file to also holds the counts
    if args.txt_counts is not None:
        if os.path.splitext(args.txt_counts)[1] == ".gz":
            txt_counts = gzip.open(args.txt_counts, 'wt+')
        else:
            txt_counts = open(args.txt_counts, 'w+')

    for chrom in chrom_list:
        sys.stderr.write("%s\n" % chrom.name)

        if args.test_chrom:
            if chrom.name != args.test_chrom:
                sys.stderr.write("skipping because not test chrom\n")
                continue

        warned_pos = {}

        # fetch SNP info for this chromosome
        if chrom.name not in snp_tab_h5.root:
            # no SNPs for this chromosome
            sys.stderr.write("skipping %s because chromosome with this name "
                             "not found in SNP table\n" % chrom.name)
            continue

        sys.stderr.write("fetching SNPs\n")

        snp_tab = snp_tab_h5.get_node("/%s" % chrom.name)
        snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:]
        if hap_h5:
            hap_tab = hap_h5.get_node("/%s" % chrom.name)
            ind_dict, ind_idx = snptable.SNPTable().get_h5_sample_indices(
                hap_h5, chrom, [args.individual])

            if len(ind_idx) == 1:
                ind_idx = ind_idx[0]
                sys.stderr.write("index for individual %s is %d\n" %
                                 (args.individual, ind_idx))
            else:
                raise ValueError("got sample indices for %d individuals, "
                                 "but expected to get index for one "
                                 "individual (%s)" % (len(ind_idx),
                                                      args.individual))
                hap_tab = None
                ind_idx = None
        else:
            hap_tab = None
            ind_idx = None


        # initialize count arrays for this chromosome to 0
        ref_carray = get_carray(ref_count_h5, chrom)
        alt_carray = get_carray(alt_count_h5, chrom)
        other_carray = get_carray(other_count_h5, chrom)
        read_count_carray = get_carray(read_count_h5, chrom)

        ref_array = np.zeros(chrom.length, dtype)
        alt_array = np.zeros(chrom.length, dtype)
        other_array = np.zeros(chrom.length, dtype)
        read_count_array = np.zeros(chrom.length, dtype)

        # loop over all BAM files, pulling out reads
        # for this chromosome
        for bam_filename in args.bam_filenames:
            sys.stderr.write("reading from file %s\n" % bam_filename)

            samfile = pysam.Samfile(bam_filename, "rb")

            for read in get_sam_iter(samfile, chrom):
                count += 1
                if count == 10000:
                    sys.stderr.write(".")
                    count = 0

                add_read_count(read, chrom, ref_array, alt_array,
                               other_array, read_count_array,
                               snp_index_array, snp_tab, hap_tab,
                               warned_pos, max_count, ind_idx)

            # store results for this chromosome
            ref_carray[:] = ref_array
            alt_carray[:] = alt_array
            other_carray[:] = other_array
            read_count_carray[:] = read_count_array
            sys.stderr.write("\n")

            # write data to numpy arrays, so that they can be written to a txt
            # file later
            # columns are:
            # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count
            if args.txt_counts is not None:
                write_txt_file(txt_counts, chrom, snp_tab, hap_tab, ind_idx,
                               ref_array, alt_array, other_array)
            samfile.close()

    if args.txt_counts:
        # close the open txt file handler
        txt_counts.close()

    # check if any of the reads contained an unimplemented CIGAR
    if unimplemented_CIGAR[0] > 0:
        sys.stderr.write("WARNING: Encountered " + str(unimplemented_CIGAR[0])
                         + " instances of CIGAR codes: "
                         + str(unimplemented_CIGAR[1]) + ". Reads with these "
                         "CIGAR codes were skipped because they "
                         "are currently unimplemented.\n")

    # set track statistics and close HDF5 files

    sys.stderr.write("setting statistics for each chromosome\n")
    for h5f in output_h5:
        chromstat.set_stats(h5f, chrom_list)
        h5f.close()

    snp_tab_h5.close()
    snp_index_h5.close()
    if hap_h5:
        hap_h5.close()


    sys.stderr.write("done\n")
Example #4
0
def main():
    args = parse_args()

    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)
    sys.stderr.write("pytables version: %s\n" % tables.__version__)

    util.check_pysam_version()
    util.check_pytables_version()
    
    snp_tab_h5 = tables.open_file(args.snp_tab, "r")
    snp_index_h5 = tables.open_file(args.snp_index, "r")

    if args.haplotype:
        hap_h5 = tables.open_file(args.haplotype, "r")
        ind_idx = lookup_individual_index(args.samples, args.individual)
    else:
        hap_h5 = None
        ind_idx = None

    ref_count_h5 = tables.open_file(args.ref_as_counts, "w")
    alt_count_h5 = tables.open_file(args.alt_as_counts, "w")
    other_count_h5 = tables.open_file(args.other_as_counts, "w")
    read_count_h5 = tables.open_file(args.read_counts, "w")

    output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5]

    chrom_dict = {}

    # initialize every chromosome in output files
    chrom_list = chromosome.get_all_chromosomes(args.chrom)

    for chrom in chrom_list:
        for out_file in output_h5:
            create_carray(out_file, chrom, args.data_type)
        chrom_dict[chrom.name] = chrom

    count = 0
    dtype = None
    if args.data_type == "uint8":
        max_count = MAX_UINT8_COUNT
        dtype = np.uint8
    elif args.data_type == "uint16":
        max_count = MAX_UINT16_COUNT
        dtype = np.uint16
    else:
        raise NotImplementedError("unsupported datatype %s" % args.data_type)

    # create a list to hold the counts that will be later written
    # to a txt file
    if args.text_counts is not None:
        txt_counts = list()

    for chrom in chrom_list:
        sys.stderr.write("%s\n" % chrom.name)

        warned_pos = {}

        # fetch SNP info for this chromosome
        if chrom.name not in snp_tab_h5.root:
            # no SNPs for this chromosome
            continue

        sys.stderr.write("fetching SNPs\n")

        snp_tab = snp_tab_h5.get_node("/%s" % chrom.name)
        snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:]
        if hap_h5:
            hap_tab = hap_h5.get_node("/%s" % chrom.name)
        else:
            hap_tab = None

        # initialize count arrays for this chromosome to 0
        ref_carray = get_carray(ref_count_h5, chrom)
        alt_carray = get_carray(alt_count_h5, chrom)
        other_carray = get_carray(other_count_h5, chrom)
        read_count_carray = get_carray(read_count_h5, chrom)

        ref_array = np.zeros(chrom.length, dtype)
        alt_array = np.zeros(chrom.length, dtype)
        other_array = np.zeros(chrom.length, dtype)
        read_count_array = np.zeros(chrom.length, dtype)

        # loop over all BAM files, pulling out reads
        # for this chromosome
        for bam_filename in args.bam_filenames:
            sys.stderr.write("reading from file %s\n" % bam_filename)

            samfile = pysam.Samfile(bam_filename, "rb")

            for read in get_sam_iter(samfile, chrom):
                count += 1
                if count == 10000:
                    sys.stderr.write(".")
                    count = 0

                add_read_count(read, chrom, ref_array, alt_array,
                               other_array, read_count_array,
                               snp_index_array, snp_tab, hap_tab,
                               warned_pos, max_count, ind_idx)

            # store results for this chromosome
            ref_carray[:] = ref_array
            alt_carray[:] = alt_array
            other_carray[:] = other_array
            read_count_carray[:] = read_count_array
            sys.stderr.write("\n")

            # write data to numpy arrays, so that they can be written to a txt
            # file later
            # columns are:
            # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count
            if args.text_counts is not None:
                chrom = np.tile(chrom.name, len(snp_tab))
                pos = np.array([snp['pos'] for snp in snp_tab])
                ref = np.array([snp['allele1'] for snp in snp_tab])
                alt = np.array([snp['allele2'] for snp in snp_tab])
                if hap_tab is not None:
                    genotype = np.array([str(hap[0])+"|"+str(hap[1])
                                         for hap in hap_tab])
                else:
                    genotype = np.empty((len(snp_tab), 0))
                txt_counts.append(
                    np.column_stack((chrom, pos, ref, alt, genotype,
                                     ref_array[pos-1],
                                     alt_array[pos-1],
                                     other_array[pos-1]))
                )


            samfile.close()

    # write the txt_counts np arrays to a txt file
    if args.text_counts is not None:
        # we use vstack to combine np arrays row-wise into a multi-dimensional
        # array
        np.savetxt(args.text_counts, np.vstack(tuple(txt_counts)),
                   fmt="%1s", delimiter=" ")

    # set track statistics and close HDF5 files

    sys.stderr.write("setting statistics for each chromosome\n")
    for h5f in output_h5:
        chromstat.set_stats(h5f, chrom_list)
        h5f.close()

    snp_tab_h5.close()
    snp_index_h5.close()
    if hap_h5:
        hap_h5.close()


    sys.stderr.write("done\n")
Example #5
0
def main():
    args = parse_args()

    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)
    sys.stderr.write("pytables version: %s\n" % tables.__version__)

    util.check_pysam_version()
    util.check_pytables_version()

    snp_tab_h5 = tables.open_file(args.snp_tab, "r")
    snp_index_h5 = tables.open_file(args.snp_index, "r")

    if args.haplotype:
        hap_h5 = tables.open_file(args.haplotype, "r")
    else:
        hap_h5 = None

    ref_count_h5 = tables.open_file(args.ref_as_counts, "w")
    alt_count_h5 = tables.open_file(args.alt_as_counts, "w")
    other_count_h5 = tables.open_file(args.other_as_counts, "w")
    read_count_h5 = tables.open_file(args.read_counts, "w")

    output_h5 = [ref_count_h5, alt_count_h5, other_count_h5, read_count_h5]

    chrom_dict = {}

    # initialize every chromosome in output files
    chrom_list = chromosome.get_all_chromosomes(args.chrom)

    for chrom in chrom_list:
        for out_file in output_h5:
            create_carray(out_file, chrom, args.data_type)
        chrom_dict[chrom.name] = chrom

    count = 0
    dtype = None
    if args.data_type == "uint8":
        max_count = MAX_UINT8_COUNT
        dtype = np.uint8
    elif args.data_type == "uint16":
        max_count = MAX_UINT16_COUNT
        dtype = np.uint16
    else:
        raise NotImplementedError("unsupported datatype %s" % args.data_type)

    # create a txt file to also holds the counts
    if args.txt_counts is not None:
        if os.path.splitext(args.txt_counts)[1] == ".gz":
            txt_counts = gzip.open(args.txt_counts, 'a+')
        else:
            txt_counts = open(args.txt_counts, 'a+')

    for chrom in chrom_list:
        sys.stderr.write("%s\n" % chrom.name)

        warned_pos = {}

        # fetch SNP info for this chromosome
        if chrom.name not in snp_tab_h5.root:
            # no SNPs for this chromosome
            continue

        sys.stderr.write("fetching SNPs\n")

        snp_tab = snp_tab_h5.get_node("/%s" % chrom.name)
        snp_index_array = snp_index_h5.get_node("/%s" % chrom.name)[:]
        if hap_h5:
            hap_tab = hap_h5.get_node("/%s" % chrom.name)
            ind_idx = snptable.SNPTable().get_h5_sample_indices(
                hap_h5, chrom, [args.individual])[1]
            if len(ind_idx) != 0:
                ind_idx = ind_idx[0]
            else:
                hap_tab = None
                ind_idx = None
        else:
            hap_tab = None
            ind_idx = None

        # initialize count arrays for this chromosome to 0
        ref_carray = get_carray(ref_count_h5, chrom)
        alt_carray = get_carray(alt_count_h5, chrom)
        other_carray = get_carray(other_count_h5, chrom)
        read_count_carray = get_carray(read_count_h5, chrom)

        ref_array = np.zeros(chrom.length, dtype)
        alt_array = np.zeros(chrom.length, dtype)
        other_array = np.zeros(chrom.length, dtype)
        read_count_array = np.zeros(chrom.length, dtype)

        # loop over all BAM files, pulling out reads
        # for this chromosome
        for bam_filename in args.bam_filenames:
            sys.stderr.write("reading from file %s\n" % bam_filename)

            samfile = pysam.Samfile(bam_filename, "rb")

            for read in get_sam_iter(samfile, chrom):
                count += 1
                if count == 10000:
                    sys.stderr.write(".")
                    count = 0

                add_read_count(read, chrom, ref_array, alt_array, other_array,
                               read_count_array, snp_index_array, snp_tab,
                               hap_tab, warned_pos, max_count, ind_idx)

            # store results for this chromosome
            ref_carray[:] = ref_array
            alt_carray[:] = alt_array
            other_carray[:] = other_array
            read_count_carray[:] = read_count_array
            sys.stderr.write("\n")

            # write data to numpy arrays, so that they can be written to a txt
            # file later
            # columns are:
            # chrom, pos, ref, alt, genotype, ref_count, alt_count, other_count
            if args.txt_counts is not None:
                chrom = np.tile(chrom.name, len(snp_tab))
                pos = np.array([snp['pos'] for snp in snp_tab])
                ref = np.array([snp['allele1'] for snp in snp_tab])
                alt = np.array([snp['allele2'] for snp in snp_tab])
                if hap_tab is not None:
                    genotype = np.array(
                        [str(hap[0]) + "|" + str(hap[1]) for hap in hap_tab])
                else:
                    genotype = np.empty((len(snp_tab), 0))
                # write an np array to a txt file
                np.savetxt(txt_counts,
                           np.column_stack(
                               (chrom, pos, ref, alt, genotype,
                                ref_array[pos - 1], alt_array[pos - 1],
                                other_array[pos - 1])),
                           fmt="%1s",
                           delimiter=" ")

            samfile.close()

    if args.txt_counts:
        # close the open txt file handler
        txt_counts.close()

    # check if any of the reads contained an unimplemented CIGAR
    sys.stderr.write(
        "WARNING: Encountered " + str(unimplemented_CIGAR[0]) +
        " instances of any of the following CIGAR codes: " +
        str(unimplemented_CIGAR[1]) +
        ". The regions of reads with these CIGAR codes were skipped because these CIGAR codes are currently unimplemented.\n"
    )

    # set track statistics and close HDF5 files

    sys.stderr.write("setting statistics for each chromosome\n")
    for h5f in output_h5:
        chromstat.set_stats(h5f, chrom_list)
        h5f.close()

    snp_tab_h5.close()
    snp_index_h5.close()
    if hap_h5:
        hap_h5.close()

    sys.stderr.write("done\n")
Example #6
0
                      haplotype_filename=haplotype_filename)
    
    filter_reads(files, max_seqs=max_seqs, max_snps=max_snps,
                 samples=samples)

    files.close()
    
    

if __name__ == '__main__':
    sys.stderr.write("command line: %s\n" % " ".join(sys.argv))
    sys.stderr.write("python version: %s\n" % sys.version)
    sys.stderr.write("pysam version: %s\n" % pysam.__version__)
    sys.stderr.write("pytables version: %s\n" % tables.__version__)

    util.check_pysam_version()
    util.check_pytables_version()
    util.check_python_version()
        
    options = parse_options()
    samples = parse_samples(options.samples)
    
    main(options.bam_filename,
         is_paired_end=options.is_paired_end, is_sorted=options.is_sorted,
         max_seqs=options.max_seqs, max_snps=options.max_snps,
         output_dir=options.output_dir,
         snp_dir=options.snp_dir,
         snp_tab_filename=options.snp_tab,
         snp_index_filename=options.snp_index,
         haplotype_filename=options.haplotype,
         samples=samples)