Esempio n. 1
0
    def getGenotypes(self,sample_idx=None,idx_start=None,idx_end=None,chrom=None,pos_start=None,pos_end=None,center=True,unit=True,impute_missing=False,snp_idx=None,windowsize=0):
        """load genotypes.
        Optionally the indices for loading subgroups the genotypes for all people
        based on position of cumulative position.
        Positions can be given as (pos_start-pos_end on chrom)
        If both of these are None (default), then all genotypes are returned

        Args:
            idx_start:         genotype index based selection (start index)
            idx_end:         genotype index based selection (end index)
            chrom:          chromosome based selection (return whole chromosome)
            pos_start:       position based selection (start position) tuple of chrom, position
            pos_end:       position based selection (end position) tuple of chrom, position
            impute_missing: Boolean indicator variable if missing values should be imputed

        Returns:
            X:          scipy.array of genotype values
        """
        #position based matching?
        if (idx_start is None) and (idx_end is None) and ((pos_start is not None) & (pos_end is not None)):
            idx_start,idx_end=self.getGenoIndex(chrom=chrom,pos_start=pos_start,pos_end=pos_end,windowsize=windowsize)
        #index based matching?
        if (idx_start is not None) & (idx_end is not None):
            X = self.geno_matrix[:,idx_start:idx_end]
        elif snp_idx is not None:
            X = self.geno_matrix[:,snp_idx]
        else:
            X = self.geno_matrix[:,:]
        if sample_idx is not None:
            X=X[sample_idx]
        if impute_missing:
            X = du.imputeMissing(X,center=center,unit=unit)
        return X
Esempio n. 2
0
File: data.py Progetto: PMBio/limix
    def getGenotypes(self,idx_start=None,idx_end=None,pos_start=None,pos_end=None,windowsize=0,chrom=None,center=True,unit=True,impute_missing=False,cast_float=True):
        """return genotypes.
        Optionally the indices for loading subgroups the genotypes for all people
        can be given in one out of three ways:
        - 0-based indexing (idx_start-idx_end)
        - position (pos_start-pos_end on chrom)
        - cumulative position (pos_cum_start-pos_cum_end)
        If all these are None (default), then all genotypes are returned

        Args:
            idx_start:         genotype index based selection (start index)
            idx_end:         genotype index based selection (end index)
            pos_start:       position based selection (start position)
            pos_end:       position based selection (end position)
            chrom:      position based selection (chromosome)
            pos_cum_start:   cumulative position based selection (start position)
            pos_cum_end:   cumulative position based selection (end position)
            impute_missing: Boolean indicator variable if missing values should be imputed
            cast_float: Boolean indicator variable if output genotypes should be casted as float
        Returns:
            X:          scipy.array of genotype values
        """
        query_idx = self.range_query_geno(idx_start=idx_start, idx_end=idx_end, chrom=chrom, pos_start=pos_start,windowsize=windowsize)
        X = self.geno_reader.getGenotypes(sample_idx= sp.array(self.sample_idx["geno"]),snp_idx=query_idx)
        if impute_missing:
            X = du.imputeMissing(X,center=center,unit=unit)
        if cast_float:
            if X.dtype!='float64':
                X = sp.array(X,dtype='float64')
        return X
Esempio n. 3
0
    def getGenotypes(self,
                     sample_idx=None,
                     idx_start=None,
                     idx_end=None,
                     pos_start=None,
                     pos_end=None,
                     chrom=None,
                     center=True,
                     unit=True,
                     pos_cum_start=None,
                     pos_cum_end=None,
                     impute_missing=False,
                     snp_idx=None):
        """load genotypes.
        Optionally the indices for loading subgroups the genotypes for all people
        can be given in one out of three ways:
        - 0-based indexing (idx_start-idx_end)
        - position (pos_start-pos_end on chrom)
        - cumulative position (pos_cum_start-pos_cum_end)
        If all these are None (default), then all genotypes are returned

        Args:
            idx_start:         genotype index based selection (start index)
            idx_end:         genotype index based selection (end index)
            pos_start:       position based selection (start position)
            pos_end:       position based selection (end position)
            chrom:      position based selection (chromosome)
            pos_cum_start:   cumulative position based selection (start position)
            pos_cum_end:   cumulative position based selection (end position)
            impute_missing: Boolean indicator variable if missing values should be imputed

        Returns:
            X:          scipy.array of genotype values
        """
        #position based matching?
        if (idx_start is None) and (idx_end is None) and (
            (pos_start is not None) & (pos_end is not None) &
            (chrom is not None)) or ((pos_cum_start is not None) &
                                     (pos_cum_end is not None)):
            idx_start, idx_end = self.getGenoIndex(pos_start=pos_start,
                                                   pos_end=pos_end,
                                                   chrom=chrom,
                                                   pos_cum_start=pos_cum_start,
                                                   pos_cum_end=pose_cum1)
        #index based matching?
        if (idx_start is not None) & (idx_end is not None):
            X = self.geno_matrix[:, idx_start:idx_end]
        elif snp_idx is not None:
            X = self.geno_matrix[:, snp_idx]
        else:
            X = self.geno_matrix[:, :]
        if sample_idx is not None:
            X = X[sample_idx]
        if impute_missing:
            X = du.imputeMissing(X, center=center, unit=unit)
        return X
Esempio n. 4
0
    def getGenotypes(self,
                     idx_start=None,
                     idx_end=None,
                     pos_start=None,
                     pos_end=None,
                     windowsize=0,
                     chrom=None,
                     center=True,
                     unit=True,
                     impute_missing=False,
                     cast_float=True):
        """return genotypes.
        Optionally the indices for loading subgroups the genotypes for all people
        can be given in one out of three ways:
        - 0-based indexing (idx_start-idx_end)
        - position (pos_start-pos_end on chrom)
        - cumulative position (pos_cum_start-pos_cum_end)
        If all these are None (default), then all genotypes are returned

        Args:
            idx_start:         genotype index based selection (start index)
            idx_end:         genotype index based selection (end index)
            pos_start:       position based selection (start position)
            pos_end:       position based selection (end position)
            chrom:      position based selection (chromosome)
            pos_cum_start:   cumulative position based selection (start position)
            pos_cum_end:   cumulative position based selection (end position)
            impute_missing: Boolean indicator variable if missing values should be imputed
            cast_float: Boolean indicator variable if output genotypes should be casted as float
        Returns:
            X:          scipy.array of genotype values
        """
        query_idx = self.range_query_geno(idx_start=idx_start,
                                          idx_end=idx_end,
                                          chrom=chrom,
                                          pos_start=pos_start,
                                          windowsize=windowsize)
        X = self.geno_reader.getGenotypes(sample_idx=sp.array(
            self.sample_idx["geno"]),
                                          snp_idx=query_idx)
        if impute_missing:
            X = du.imputeMissing(X, center=center, unit=unit)
        if cast_float:
            if X.dtype != 'float64':
                X = sp.array(X, dtype='float64')
        return X
Esempio n. 5
0
    def getGenotypes(self,sample_idx=None,idx_start=None,idx_end=None,pos_start=None,pos_end=None,chrom=None,center=True,unit=True,pos_cum_start=None,pos_cum_end=None,impute_missing=False,snp_idx=None):
        """load genotypes.
        Optionally the indices for loading subgroups the genotypes for all people
        can be given in one out of three ways:
        - 0-based indexing (idx_start-idx_end)
        - position (pos_start-pos_end on chrom)
        - cumulative position (pos_cum_start-pos_cum_end)
        If all these are None (default), then all genotypes are returned

        Args:
            idx_start:         genotype index based selection (start index)
            idx_end:         genotype index based selection (end index)
            pos_start:       position based selection (start position)
            pos_end:       position based selection (end position)
            chrom:      position based selection (chromosome)
            pos_cum_start:   cumulative position based selection (start position)
            pos_cum_end:   cumulative position based selection (end position)
            impute_missing: Boolean indicator variable if missing values should be imputed

        Returns:
            X:          scipy.array of genotype values
        """
        #position based matching?
        if (idx_start is None) and (idx_end is None) and ((pos_start is not None) & (pos_end is not None) & (chrom is not None)) or ((pos_cum_start is not None) & (pos_cum_end is not None)):
            idx_start,idx_end=self.getGenoIndex(pos_start=pos_start,pos_end=pos_end,chrom=chrom,pos_cum_start=pos_cum_start,pos_cum_end=pose_cum1)
        #index based matching?
        if (idx_start is not None) & (idx_end is not None):
            X = self.geno_matrix[:,idx_start:idx_end]
        elif snp_idx is not None:
            X = self.geno_matrix[:,snp_idx]
        else:
            X = self.geno_matrix[:,:]
        if sample_idx is not None:
            X=X[sample_idx]
        if impute_missing:
            X = du.imputeMissing(X,center=center,unit=unit)
        return X