Beispiel #1
0
    def __init__(self, data=None, refcode=0, size=None, template=None):
        self.template = template

        if refcode is None:
            refcode = 0

        if data is None:
            if template is None and size is None:
                raise ValueError('No template or size')
            elif template is not None and size is None:
                size = self.template.nmark()
            self.container = SparseArray(size, refcode) 
            return 

        elif type(data) is SparseArray:
            self.container = data.copy()
        
        else:    
            if not isinstance(data, np.ndarray):
                data = np.array(data)
            ref = refcode if refcode is not None else mode(data)
            self.container = SparseArray.from_dense(data, ref)


        self.size = len(self.container)
Beispiel #2
0
def test_getitem():
    s = SparseArray(100,0)
    assert s[50] == 0

    s2 = SparseArray(100, 1)
    assert s2[50] == 1

    s[5] = 20
    assert s[5] == 20
Beispiel #3
0
def test_cmp():
    s = SparseArray(5, 0)
    s[(1,3)] = [1, 1]
    assert (s == 1).tolist() == [False, True, False, True, False]
    assert (s != 1).tolist() == [True,  False, True, False, True]
    assert (s > 0).tolist() == [False, True, False, True, False]
    assert (s < 1).tolist() == [True, False, True, False, True]

    s2 = SparseArray.from_dense([0,1,0,1,0], 0)
    assert (s == s2) 
    assert (s == [0,1,0,1,0]).tolist() == [True, True, True, True, True]
Beispiel #4
0
def test_setitem():
    s = SparseArray(100, 0)

    # Add a first element
    s[10] = 1

    # Add something before the first non-sparse element
    s[5] = 1

    # Add something after the last non-sparse element
    s[20] = 1

    # Add sometihng in between
    s[15] = 2
    assert s.keys() == [5, 10, 15, 20]
    assert s.values() == [1, 1, 2, 1]

    s[15] = 0

    assert s.keys() == [5, 10, 20]
    assert s.values() == [1, 1, 1]

    s[20] = 2

    assert s.keys() == [5, 10, 20]
    assert s.values() == [1, 1, 2]
Beispiel #5
0
def test_getslice():
    s = SparseArray(100,0)
    s[5] = 20
    s[10] = 40

    assert s[4:20].ndense() == 2
    assert list(s[4:20].keys()) == [1, 6]
    s2 = SparseArray(100, 0)
    s2[1] = 2
    s2[99] = 2
    s2[5] = 20
    s2[10] = 40

    assert s2[4:20].ndense() == 2
Beispiel #6
0
def test_staticbuilders():
    a = SparseArray.from_dense([0, 1, 1, 0, 0, 1], 0)
    assert type(a) is SparseArray
    assert a.tolist() == [0, 1, 1, 0, 0, 1]
    assert a.size == 6
    
    npa = np.array([0, 1, 1, 0, 0, 1], dtype=np.int8)
    assert type(a) is SparseArray
    assert a.tolist() == [0, 1, 1, 0, 0, 1]
    assert a.size == 6

    a = SparseArray.from_items([(0,1), (2,1), (4,1)], 5, 0)
    assert type(a) is SparseArray
    assert a.tolist() == [1,0,1,0,1]
    assert a.size == 5
Beispiel #7
0
def test_fancy_index_get():
    # Bool masks
    s = SparseArray(5,0)
    s[3] = 1
    s[(False, False, True, False, True)].tolist() == [1,0]

    try:
        s[(False, True)] 
        assert False
    except ValueError:
        # We wanted this error
        pass

    # Number masks
    s = SparseArray(10,0)
    mask = [False, False, False, True, False, True, False, False, False, False]
    s[3] = 2
    s[5] = 3
    assert s.tolist() == [0, 0, 0, 2, 0, 3, 0, 0, 0, 0]
    assert s[(3,5)].tolist() == [2,3]
    assert s[(5,3)].tolist() == [3,2]
Beispiel #8
0
def test_misc():
    # Sparsity function
    s = SparseArray(10, 0)
    assert_almost_equal(s.sparsity(), 1.0)
    s[0] = 1
    assert_almost_equal(s.sparsity(), 0.9)
    s[1] = 1
    assert_almost_equal(s.sparsity(), 0.8)

    # items() generator
    s = SparseArray(10, 0)
    s[5] = 1
    s[2] = 4
    assert list(s.items()) == [(2,4), (5,1)]
Beispiel #9
0
def test_tolist():
    s = SparseArray(5, 0)
    assert s.tolist() == [0,0,0,0,0]

    s = SparseArray(8, 1)
    s[1] = 2
    s[5] = 3
    assert s.tolist() == [1, 2, 1, 1, 1, 3, 1, 1]
Beispiel #10
0
    def __init__(self, data=None, refcode=0, size=None, template=None):
        self.template = template

        if refcode is None:
            refcode = 0

        if data is None:
            if template is None and size is None:
                raise ValueError('No template or size')
            elif template is not None and size is None:
                size = self.template.nmark()
            self.container = SparseArray(size, refcode)
            return

        elif type(data) is SparseArray:
            self.container = data.copy()

        else:
            if not isinstance(data, np.ndarray):
                data = np.array(data)
            ref = refcode if refcode is not None else mode(data)
            self.container = SparseArray.from_dense(data, ref)

        self.size = len(self.container)
Beispiel #11
0
def test_fancy_index_set():    
    # Bool masks
    s = SparseArray(10,0)
    mask = [False, False, False, True, False, True, False, False, False, False]
    s[mask] = [2,3]
    assert s.tolist() == [0, 0, 0, 2, 0, 3, 0, 0, 0, 0]
    
    s[(3,5)] = [10, 10]
    assert s.tolist() == [0, 0, 0, 10, 0, 10, 0, 0, 0, 0]

    s[(5,3)] = [20, 20]
    assert s.tolist() == [0, 0, 0, 20, 0, 20, 0, 0, 0, 0]

    s = SparseArray(10, 0)
    s[(2,3)] = 1
    assert s.tolist() == [0,0,1,1,0,0,0,0,0,0]

    s = SparseArray(10,0)
    mask = [False, False, False, True, False, True, False, False, False, False]
    s[mask] = 2
    assert s.tolist() == [0, 0, 0, 2, 0, 2, 0, 0, 0, 0]
Beispiel #12
0
def test_copy():
    a = SparseArray.from_items([(0,1), (2,1), (4,1)], 5, 0)
    b = a.copy()
    assert a.tolist() == b.tolist()
    a[1] = 1
    assert a.tolist() != b.tolist()
Beispiel #13
0
def test_logic():
    s = SparseArray(5,0)
    assert not s.all()
    assert not s.any()

    s[0] = 1
    assert not s.all()
    assert s.any()

    s[0:5] = 1
    assert s.all()
    assert s.any()

    s = SparseArray(5, False)
    s[(1,3)] = True
    assert s.logical_not().tolist() == [True, False, True, False, True]
Beispiel #14
0
def test_setslice():
    s = SparseArray(100, 0)
    s[1] = 2
    s[99] = 2

    s[5:8] = 3
    assert s.ndense() == 5
    assert s.keys() == [1, 5, 6, 7, 99]
    assert s.values() == [2, 3, 3, 3, 2]

    s = SparseArray(100, 0)
    s[1] = 2
    s[99] = 2
    s[5:8] = [3,3,3]
    assert s.ndense() == 5
    assert s.keys() == [1, 5, 6, 7, 99]
    assert s.values() == [2, 3, 3, 3, 2]

    s = SparseArray(100, 0)
    t = SparseArray(100,0)
    t[5:10] = 1
    s[5:10] = t[5:10]
    assert t.ndense() == 5
    assert t.values() == [1]*5
    assert t.keys() == [5, 6, 7, 8, 9]

    s = SparseArray(10, 0)
    s[2:5] = SparseArray.from_dense([1]*10, 0)[2:5]
    assert s.tolist() == [0,0,1,1,1,0,0,0,0,0]

    s = SparseArray(100,0)
    t = SparseArray(100,0)
    t[20:50] = 1
    s[20:50] = t[20:50]
    assert all(sv == 1 for sv in s[20:50].tolist())
Beispiel #15
0
class SparseAlleles(AlleleContainer):
    '''
    An object representing a set of haploid genotypes efficiently by 
    storing allele differences from a reference. Useful for manipulating
    genotypes from sequence data (e.g. VCF files)

    In the interest of conserving memory for sequencing data, all alleles must
    be represented by a signed 8-bit integer (i.e. between -128 and 127). 
    Negative values are interpreted as missing.
    '''

    def __init__(self, data=None, refcode=0, size=None, template=None):
        self.template = template

        if refcode is None:
            refcode = 0

        if data is None:
            if template is None and size is None:
                raise ValueError('No template or size')
            elif template is not None and size is None:
                size = self.template.nmark()
            self.container = SparseArray(size, refcode) 
            return 

        elif type(data) is SparseArray:
            self.container = data.copy()
        
        else:    
            if not isinstance(data, np.ndarray):
                data = np.array(data)
            ref = refcode if refcode is not None else mode(data)
            self.container = SparseArray.from_dense(data, ref)


        self.size = len(self.container)

    def __getitem__(self, key):
        return self.container[key]

    def __setitem__(self, key, value):
        self.container[key] = value

    def keys(self):
        return self.container.keys()
    
    def values(self):
        return self.container.values()

    @property 
    def refcode(self):
        """ 
        Returns the sparse value in the container 
        
        :rtype: int8_t
        """
        return self.container.ref

    @property
    def missingcode(self):
        "Returns the code used for missing values"
        return -1

    @property
    def dtype(self):
        return int

    @property
    def missing(self):
        " Returns a numpy array indicating which markers have missing data "
        missingindices = [i for i,v in self.container.items() if v == self.missingcode]
        base = np.zeros(self.size, dtype=np.bool_)
        base[missingindices] = 1
        return base

    def __eq__(self, other):
        if type(other) is SparseAlleles:
            return self.container == other.container
        else:
            return self.container == other

    def __ne__(self, other):
        if type(other) is SparseAlleles:
            return self.container != other.container
        else:
            return self.container != other

    def nmark(self):
        '''
        Return the number of markers (both reference and non-reference)
        represented by the SparseAlleles object

        :returns: markercount
        :rtype: int
        '''
        return self.container.size

    def todense(self):
        """
        Converts to a dense representation of the same genotypes (Alleles).

        :returns: dense version
        :rtype: Alleles
        """
        dense = Alleles(self.container.tolist(), template=self.template)
        return dense

    def empty_like(self):
        """
        Creates a blank SparseAlleles with same parameters

        :returns: empty SparseAlleles
        """
        output = SparseAlleles(template=self.template,
                               refcode=self.refcode, size=self.nmark())
        return output

    def copy_span(self, template, copy_start, copy_stop):
        """
        Copies one segment of a chromosome over to the other

        :param template: the data to be copied from
        :param copy_start: where to start copying (inclusive)
        :param copy_stop: where to stop copying (exclusive)
        :type template: AlleleContainer
        :type copy_start: int
        :type copy_stop: int
        :rtype void:
        """
        if isinstance(template, SparseAlleles):
            self.container[copy_start:copy_stop] = template.container[copy_start:copy_stop]
        else:
            self.container[copy_start:copy_stop] = template[copy_start:copy_stop]

    def copy(self):
        """
        Creates a copy of the current data

        :returns: cloned allele set
        :rtype: SparseAlleles
        """
        outp = self.empty_like()
        outp.container = self.container.copy()
        return outp

    @staticmethod
    def empty(template):
        """
        Creates an empty SparseAlleles (everybody is wild-type)

        :param template: The chromosome info associated with this set of alleles
        :type template: ChromosomeTemplate

        :returns: Empty container
        :rtype: SparseAlleles
        """
        out = SparseAlleles(template.nmark(), template=template)

        return out 
Beispiel #16
0
class SparseAlleles(AlleleContainer):
    '''
    An object representing a set of haploid genotypes efficiently by 
    storing allele differences from a reference. Useful for manipulating
    genotypes from sequence data (e.g. VCF files)

    In the interest of conserving memory for sequencing data, all alleles must
    be represented by a signed 8-bit integer (i.e. between -128 and 127). 
    Negative values are interpreted as missing.
    '''
    def __init__(self, data=None, refcode=0, size=None, template=None):
        self.template = template

        if refcode is None:
            refcode = 0

        if data is None:
            if template is None and size is None:
                raise ValueError('No template or size')
            elif template is not None and size is None:
                size = self.template.nmark()
            self.container = SparseArray(size, refcode)
            return

        elif type(data) is SparseArray:
            self.container = data.copy()

        else:
            if not isinstance(data, np.ndarray):
                data = np.array(data)
            ref = refcode if refcode is not None else mode(data)
            self.container = SparseArray.from_dense(data, ref)

        self.size = len(self.container)

    def __getitem__(self, key):
        return self.container[key]

    def __setitem__(self, key, value):
        self.container[key] = value

    def keys(self):
        return self.container.keys()

    def values(self):
        return self.container.values()

    @property
    def refcode(self):
        """ 
        Returns the sparse value in the container 
        
        :rtype: int8_t
        """
        return self.container.ref

    @property
    def missingcode(self):
        "Returns the code used for missing values"
        return -1

    @property
    def dtype(self):
        return int

    @property
    def missing(self):
        " Returns a numpy array indicating which markers have missing data "
        missingindices = [
            i for i, v in self.container.items() if v == self.missingcode
        ]
        base = np.zeros(self.size, dtype=np.bool_)
        base[missingindices] = 1
        return base

    def __eq__(self, other):
        if type(other) is SparseAlleles:
            return self.container == other.container
        else:
            return self.container == other

    def __ne__(self, other):
        if type(other) is SparseAlleles:
            return self.container != other.container
        else:
            return self.container != other

    def nmark(self):
        '''
        Return the number of markers (both reference and non-reference)
        represented by the SparseAlleles object

        :returns: markercount
        :rtype: int
        '''
        return self.container.size

    def todense(self):
        """
        Converts to a dense representation of the same genotypes (Alleles).

        :returns: dense version
        :rtype: Alleles
        """
        dense = Alleles(self.container.tolist(), template=self.template)
        return dense

    def empty_like(self):
        """
        Creates a blank SparseAlleles with same parameters

        :returns: empty SparseAlleles
        """
        output = SparseAlleles(template=self.template,
                               refcode=self.refcode,
                               size=self.nmark())
        return output

    def copy_span(self, template, copy_start, copy_stop):
        """
        Copies one segment of a chromosome over to the other

        :param template: the data to be copied from
        :param copy_start: where to start copying (inclusive)
        :param copy_stop: where to stop copying (exclusive)
        :type template: AlleleContainer
        :type copy_start: int
        :type copy_stop: int
        :rtype void:
        """
        if isinstance(template, SparseAlleles):
            self.container[copy_start:copy_stop] = template.container[
                copy_start:copy_stop]
        else:
            self.container[copy_start:copy_stop] = template[
                copy_start:copy_stop]

    def copy(self):
        """
        Creates a copy of the current data

        :returns: cloned allele set
        :rtype: SparseAlleles
        """
        outp = self.empty_like()
        outp.container = self.container.copy()
        return outp

    @staticmethod
    def empty(template):
        """
        Creates an empty SparseAlleles (everybody is wild-type)

        :param template: The chromosome info associated with this set of alleles
        :type template: ChromosomeTemplate

        :returns: Empty container
        :rtype: SparseAlleles
        """
        out = SparseAlleles(template.nmark(), template=template)

        return out