def test_store():
     'It test the store for iterables'
     item_list = [1, 3, 4]
     storage = CachedArray(typecode='I')
     storage.extend(item_list)
     assert list(storage) == item_list
     assert list(storage) == item_list
Beispiel #2
0
def create_maf_distribution(
    seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, groups=None, group_kind=None
):
    "It creates the distribution of the maf (not takes in account ref allele)"
    title = "maf"
    if groups and group_kind:
        title = "maf (%s: %s)" % (group_kind, ",".join(groups))

    mafs = CachedArray("f")
    for seq in seqs:
        for snv in seq.get_features("snv"):
            maf = calculate_maf_frequency(snv, groups=groups, group_kind=group_kind)
            if maf:
                mafs.append(maf)
    if list(mafs):
        create_distribution(
            mafs,
            labels={"title": title},
            distrib_fhand=distrib_fhand,
            bins=None,
            plot_fhand=plot_fhand,
            range_=None,
            summary_fhand=summary_fhand,
            calculate_freqs=False,
            remove_outliers=False,
        )
Beispiel #3
0
def create_het_distribution(
    seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, group_kind=None, groups=None, ploidy=2
):
    """It creates the distribution of the heterozygosity
    (not takes in account ref allele)"""
    title = "heterozygosity"
    if groups and group_kind:
        title = "heterozygosity (%s: %s)" % (group_kind, ",".join(groups))

    hets = CachedArray("f")
    for seq in seqs:
        for snv in seq.get_features("snv"):
            if not group_kind and "heterozygosity" in snv.qualifiers:
                het = snv.qualifiers["heterozygosity"]
            else:
                het = calculate_heterozygosity(snv, ploidy, group_kind=group_kind, groups=groups)
            if het is not None:
                hets.append(het)
    if list(hets):
        create_distribution(
            hets,
            labels={"title": title},
            distrib_fhand=distrib_fhand,
            bins=None,
            plot_fhand=plot_fhand,
            range_=None,
            summary_fhand=summary_fhand,
            calculate_freqs=False,
            remove_outliers=False,
        )
Beispiel #4
0
def create_pic_distribution(
    seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, read_groups=None, group_kind=None, groups=None
):
    "It creates the distribution of the pic (not takes in account ref allele)"
    title = "pic"
    if groups and group_kind:
        title = "pic (%s: %s)" % (group_kind, ",".join(groups))

    pics = CachedArray("f")
    for seq in seqs:
        for snv in seq.get_features("snv"):
            if not group_kind and "pic" in snv.qualifiers:
                pic = snv.qualifiers["pic"]
            else:
                pic = calculate_pic(snv, group_kind=group_kind, groups=groups)
            if pic is not None:
                pics.append(pic)
    if list(pics):
        create_distribution(
            pics,
            labels={"title": title},
            distrib_fhand=distrib_fhand,
            bins=None,
            plot_fhand=plot_fhand,
            range_=None,
            summary_fhand=summary_fhand,
            calculate_freqs=False,
            remove_outliers=False,
        )
 def test_basic_statistics():
     'It test the max, min avg, etc.'
     item_list = [1, 2, 3]
     storage = CachedArray(typecode='I')
     storage.extend(item_list)
     assert storage.max == max(item_list)
     assert storage.min == min(item_list)
     assert storage.average == 2
     assert len(storage) == len(item_list)
    def test_basic_distribution(self):
        'It tests the distribution'
        summary_fhand = StringIO()
        distrib_fhand = StringIO()

        numbers = CachedArray(typecode='I')
        numbers.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 101, 2, 3, 4, 5, 6, 7, 8, 9])

        create_distribution(numbers, distrib_fhand=distrib_fhand,
                            summary_fhand=summary_fhand)
        result = '''Statistics for histogram
-------------------------
minimum: 1
maximum: 101
average: 10.5556
variance: 486.9136
sum: 190
items: 18'''
        assert result in summary_fhand.getvalue()
 def test_store_to_disk():
     'It test the store for iterables saving to disk'
     item_list = [1, 2, 3]
     item_list2 = [4, 5, 6]
     storage = CachedArray(typecode='I')
     storage.extend(item_list)
     storage.to_disk()
     storage.extend(item_list2)
     assert list(storage) == [1, 2, 3, 4, 5, 6]
    def test_sample():
        'It tests the random sample'
        storage = CachedArray(typecode='I')
        storage.extend([0] * 10000)
        storage.extend([1] * 10000)
        storage.extend([2] * 10000)
        count = {}
        for item in storage.sample:
            try:
                count[item] += 1
            except KeyError:
                count[item] = 1
        len_sample = len(storage.sample)
        assert len_sample == storage.sample_length

        assert count[1] / (len_sample * 1.0) - 1/3.0 < 0.05

        assert storage.get_sample_percentiles([5, 95]) == [0, 2]