def test_store(): 'It test the store for iterables' item_list = [1, 3, 4] storage = CachedArray(typecode='I') storage.extend(item_list) assert list(storage) == item_list assert list(storage) == item_list
def create_maf_distribution( seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, groups=None, group_kind=None ): "It creates the distribution of the maf (not takes in account ref allele)" title = "maf" if groups and group_kind: title = "maf (%s: %s)" % (group_kind, ",".join(groups)) mafs = CachedArray("f") for seq in seqs: for snv in seq.get_features("snv"): maf = calculate_maf_frequency(snv, groups=groups, group_kind=group_kind) if maf: mafs.append(maf) if list(mafs): create_distribution( mafs, labels={"title": title}, distrib_fhand=distrib_fhand, bins=None, plot_fhand=plot_fhand, range_=None, summary_fhand=summary_fhand, calculate_freqs=False, remove_outliers=False, )
def create_het_distribution( seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, group_kind=None, groups=None, ploidy=2 ): """It creates the distribution of the heterozygosity (not takes in account ref allele)""" title = "heterozygosity" if groups and group_kind: title = "heterozygosity (%s: %s)" % (group_kind, ",".join(groups)) hets = CachedArray("f") for seq in seqs: for snv in seq.get_features("snv"): if not group_kind and "heterozygosity" in snv.qualifiers: het = snv.qualifiers["heterozygosity"] else: het = calculate_heterozygosity(snv, ploidy, group_kind=group_kind, groups=groups) if het is not None: hets.append(het) if list(hets): create_distribution( hets, labels={"title": title}, distrib_fhand=distrib_fhand, bins=None, plot_fhand=plot_fhand, range_=None, summary_fhand=summary_fhand, calculate_freqs=False, remove_outliers=False, )
def create_pic_distribution( seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, read_groups=None, group_kind=None, groups=None ): "It creates the distribution of the pic (not takes in account ref allele)" title = "pic" if groups and group_kind: title = "pic (%s: %s)" % (group_kind, ",".join(groups)) pics = CachedArray("f") for seq in seqs: for snv in seq.get_features("snv"): if not group_kind and "pic" in snv.qualifiers: pic = snv.qualifiers["pic"] else: pic = calculate_pic(snv, group_kind=group_kind, groups=groups) if pic is not None: pics.append(pic) if list(pics): create_distribution( pics, labels={"title": title}, distrib_fhand=distrib_fhand, bins=None, plot_fhand=plot_fhand, range_=None, summary_fhand=summary_fhand, calculate_freqs=False, remove_outliers=False, )
def test_basic_statistics(): 'It test the max, min avg, etc.' item_list = [1, 2, 3] storage = CachedArray(typecode='I') storage.extend(item_list) assert storage.max == max(item_list) assert storage.min == min(item_list) assert storage.average == 2 assert len(storage) == len(item_list)
def test_basic_distribution(self): 'It tests the distribution' summary_fhand = StringIO() distrib_fhand = StringIO() numbers = CachedArray(typecode='I') numbers.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 101, 2, 3, 4, 5, 6, 7, 8, 9]) create_distribution(numbers, distrib_fhand=distrib_fhand, summary_fhand=summary_fhand) result = '''Statistics for histogram ------------------------- minimum: 1 maximum: 101 average: 10.5556 variance: 486.9136 sum: 190 items: 18''' assert result in summary_fhand.getvalue()
def test_store_to_disk(): 'It test the store for iterables saving to disk' item_list = [1, 2, 3] item_list2 = [4, 5, 6] storage = CachedArray(typecode='I') storage.extend(item_list) storage.to_disk() storage.extend(item_list2) assert list(storage) == [1, 2, 3, 4, 5, 6]
def test_sample(): 'It tests the random sample' storage = CachedArray(typecode='I') storage.extend([0] * 10000) storage.extend([1] * 10000) storage.extend([2] * 10000) count = {} for item in storage.sample: try: count[item] += 1 except KeyError: count[item] = 1 len_sample = len(storage.sample) assert len_sample == storage.sample_length assert count[1] / (len_sample * 1.0) - 1/3.0 < 0.05 assert storage.get_sample_percentiles([5, 95]) == [0, 2]