def test_tolist(): s = SparseArray(5, 0) assert s.tolist() == [0,0,0,0,0] s = SparseArray(8, 1) s[1] = 2 s[5] = 3 assert s.tolist() == [1, 2, 1, 1, 1, 3, 1, 1]
def test_staticbuilders(): a = SparseArray.from_dense([0, 1, 1, 0, 0, 1], 0) assert type(a) is SparseArray assert a.tolist() == [0, 1, 1, 0, 0, 1] assert a.size == 6 npa = np.array([0, 1, 1, 0, 0, 1], dtype=np.int8) assert type(a) is SparseArray assert a.tolist() == [0, 1, 1, 0, 0, 1] assert a.size == 6 a = SparseArray.from_items([(0,1), (2,1), (4,1)], 5, 0) assert type(a) is SparseArray assert a.tolist() == [1,0,1,0,1] assert a.size == 5
def test_setitem(): s = SparseArray(100, 0) # Add a first element s[10] = 1 # Add something before the first non-sparse element s[5] = 1 # Add something after the last non-sparse element s[20] = 1 # Add sometihng in between s[15] = 2 assert s.keys() == [5, 10, 15, 20] assert s.values() == [1, 1, 2, 1] s[15] = 0 assert s.keys() == [5, 10, 20] assert s.values() == [1, 1, 1] s[20] = 2 assert s.keys() == [5, 10, 20] assert s.values() == [1, 1, 2]
def test_logic(): s = SparseArray(5,0) assert not s.all() assert not s.any() s[0] = 1 assert not s.all() assert s.any() s[0:5] = 1 assert s.all() assert s.any() s = SparseArray(5, False) s[(1,3)] = True assert s.logical_not().tolist() == [True, False, True, False, True]
def test_fancy_index_get(): # Bool masks s = SparseArray(5,0) s[3] = 1 s[(False, False, True, False, True)].tolist() == [1,0] try: s[(False, True)] assert False except IndexError: # We wanted this error pass # Number masks s = SparseArray(10,0) mask = [False, False, False, True, False, True, False, False, False, False] s[3] = 2 s[5] = 3 assert s.tolist() == [0, 0, 0, 2, 0, 3, 0, 0, 0, 0] assert s[(3,5)].tolist() == [2,3] assert s[(5,3)].tolist() == [3,2]
def __init__(self, data=None, refcode=0, missingcode=-1, size=None, template=None): self.template = template self.refcode = refcode if data is None: if template is None and size is None: raise ValueError('No template or size') elif template is not None and size is None: size = self.template.nmark() self.container = SparseArray(size, refcode) return if type(data) is SparseArray: raise NotImplementedError else: if not isinstance(data, np.ndarray): data = np.array(data) self.container = SparseArray.from_dense(data, refcode) self.size = len(self.container)
def test_misc(): # Sparsity function s = SparseArray(10, 0) assert s.sparsity() == 1.0 s[0] = 1 assert s.sparsity() == 0.9 s[1] = 1 assert s.sparsity() == 0.8 # items() generator s = SparseArray(10, 0) s[5] = 1 s[2] = 4 assert list(s.items()) == [(2,4), (5,1)]
def test_fancy_index_set(): # Bool masks s = SparseArray(10,0) mask = [False, False, False, True, False, True, False, False, False, False] s[mask] = [2,3] assert s.tolist() == [0, 0, 0, 2, 0, 3, 0, 0, 0, 0] s[(3,5)] = [10, 10] assert s.tolist() == [0, 0, 0, 10, 0, 10, 0, 0, 0, 0] s[(5,3)] = [20, 20] assert s.tolist() == [0, 0, 0, 20, 0, 20, 0, 0, 0, 0] s = SparseArray(10,0) mask = [False, False, False, True, False, True, False, False, False, False] s[mask] = 2 assert s.tolist() == [0, 0, 0, 2, 0, 2, 0, 0, 0, 0]
def test_copy(): a = SparseArray.from_items([(0,1), (2,1), (4,1)], 5, 0) b = a.copy() assert a.tolist() == b.tolist() a[1] = 1 assert a.tolist() != b.tolist()
def test_setslice(): s = SparseArray(100, 0) s[1] = 2 s[99] = 2 s[5:8] = 3 assert len(s.container) == 5 assert s.keys() == [1, 5, 6, 7, 99] assert s.values() == [2, 3, 3, 3, 2] s = SparseArray(100, 0) s[1] = 2 s[99] = 2 s[5:8] = [3,3,3] assert len(s.container) == 5 assert s.keys() == [1, 5, 6, 7, 99] assert s.values() == [2, 3, 3, 3, 2] s = SparseArray(100, 0) t = SparseArray(100,0) t[5:10] = 1 s[5:10] = t[5:10] assert len(t.container) == 5 assert t.values() == [1]*5 assert t.keys() == [5, 6, 7, 8, 9] s = SparseArray(10, 0) s[2:5] = SparseArray.from_dense([1]*10, 0)[2:5] assert s.tolist() == [0,0,1,1,1,0,0,0,0,0] s = SparseArray(100,0) t = SparseArray(100,0) t[20:50] = 1 s[20:50] = t[20:50] assert all(sv == 1 for sv in s[20:50].tolist())
class SparseAlleles(AlleleContainer): ''' An object representing a set of haploid genotypes efficiently by storing allele differences from a reference. Useful for manipulating genotypes from sequence data (e.g. VCF files) In the interest of conserving memory for sequencing data, all alleles must be represented by a signed 8-bit integer (i.e. between -128 and 127). Negative values are interpreted as missing. ''' def __init__(self, data=None, refcode=0, missingcode=-1, size=None, template=None): self.template = template self.refcode = refcode if data is None: if template is None and size is None: raise ValueError('No template or size') elif template is not None and size is None: size = self.template.nmark() self.container = SparseArray(size, refcode) return if type(data) is SparseArray: raise NotImplementedError else: if not isinstance(data, np.ndarray): data = np.array(data) self.container = SparseArray.from_dense(data, refcode) self.size = len(self.container) def __getitem__(self, key): return self.container[key] def __setitem__(self, key, value): self.container[key] = value def keys(self): return self.container.keys() def values(self): return self.container.values() @property def missingcode(self): return -1 @property def missing(self): " Returns a numpy array indicating which markers have missing data " missingindices = [i for i,v in self.container.items() if v == self.missingcode] base = np.zeros(self.size, dtype=np.bool_) base[missingindices] = 1 return base def __eq__(self, other): if type(other) is SparseAlleles: return self.container == other.container else: return self.container == other def __ne__(self, other): if type(other) is SparseAlleles: return self.container != other.container else: return self.container != other def nmark(self): ''' Return the number of markers (both reference and non-reference) represented by the SparseAlleles object ''' return self.container.size def todense(self): dense = Alleles(self.container.tolist(), template=self.template) return dense def empty_like(self): output = SparseAlleles(template=self.template, missingcode=self.missingcode, refcode=self.refcode, size=self.nmark()) return output def copy_span(self, template, copy_start, copy_stop): if isinstance(template, SparseAlleles): self.container[copy_start:copy_stop] = template.container[copy_start:copy_stop] else: self.container = template[copy_start:copy_stop] def copy(self): outp = self.empty_like() outp.container = self.container.copy() return outp @staticmethod def empty(reference=None, template=None, missingcode=''): out = SparseAlleles(size, template=template, missingcode=missingcode) return out