def col_property(self): """*same as* :attr:`pos` """ if not hasattr(self, "_col_property"): self._col, self._col_property = SnpReader._read_map_or_bim( self.filename, remove_suffix="bed", add_suffix="bim") return self._col_property
def col(self): """*same as* :attr:`sid` """ if not hasattr(self, "_col"): self._col, self._col_property = SnpReader._read_map_or_bim( self.filename, remove_suffix="pgen", add_suffix="bim", max_filesize=1E9) return self._col
def run_once(self): if (self._ran_once): return self._ran_once = True self._iid = SnpReader._read_fam(self.dat_filename,remove_suffix="dat") self._sid, self._pos = SnpReader._read_map_or_bim(self.dat_filename,remove_suffix="dat", add_suffix="map") self._assert_iid_sid_pos() return self
def _read_pstdata(self): col, col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="ped", add_suffix="map") ped = np.loadtxt(self.filename, dtype='str', comments=None) row = ped[:,0:2] snpsstr = ped[:,6::] inan=snpsstr==self.missing snps = np.zeros((snpsstr.shape[0],snpsstr.shape[1]/2)) for i in xrange(snpsstr.shape[1]//2): snps[inan[:,2*i],i]=np.nan vals=snpsstr[~inan[:,2*i],2*i:2*(i+1)] snps[~inan[:,2*i],i]+=(vals==vals[0,0]).sum(1) snpdata = SnpData(iid=row,sid=col,pos=col_property,val=snps) return snpdata
def _read_pstdata(self): row = SnpReader._read_fam(self.filename,remove_suffix="dat") col, col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="dat", add_suffix="map") if len(row)==0 or len(col)==0: return SnpData(iid=row,sid=col,pos=col_property,val=np.empty([len(row),len(col)])) datfields = pd.read_csv(self.filename,delimiter = '\t',header=None,index_col=False) if not np.array_equal(np.array(datfields[0],dtype="string"), col) : raise Exception("Expect snp list in map file to exactly match snp list in dat file") del datfields[0] del datfields[1] del datfields[2] assert len(row) == datfields.shape[1], "Expect # iids in fam file to match dat file" val = datfields.as_matrix().T snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val) return snpdata
def _read_pstdata(self): col, col_property = SnpReader._read_map_or_bim(self.filename, remove_suffix="ped", add_suffix="map") ped = np.loadtxt(self.filename, dtype='str', comments=None) row = ped[:, 0:2] snpsstr = ped[:, 6::] inan = snpsstr == self.missing snps = np.zeros((snpsstr.shape[0], snpsstr.shape[1] / 2)) for i in xrange(snpsstr.shape[1] // 2): snps[inan[:, 2 * i], i] = np.nan vals = snpsstr[~inan[:, 2 * i], 2 * i:2 * (i + 1)] snps[~inan[:, 2 * i], i] += (vals == vals[0, 0]).sum(1) snpdata = SnpData(iid=row, sid=col, pos=col_property, val=snps) return snpdata
def _run_once(self): if self._ran_once: return self._ran_once = True if not hasattr(self, "_row"): self._row = SnpReader._read_fam(self.filename, remove_suffix="bed") if not hasattr(self, "_col") or not hasattr(self, "_col_property"): self._col, self._col_property = SnpReader._read_map_or_bim( self.filename, remove_suffix="bed", add_suffix="bim") self._assert_iid_sid_pos() if not self.skip_format_check: self._open_bed() self._close_bed()
def _run_once(self): if self._ran_once: return self._ran_once = True self._row = SnpReader._read_fam(self.filename,remove_suffix="bed") self._col, self._col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="bed", add_suffix="bim") self._assert_iid_sid_pos() bedfile = SnpReader._name_of_other_file(self.filename,"bed","bed") self._filepointer = open(bedfile, "rb") mode = self._filepointer.read(2) if mode != 'l\x1b': raise Exception('No valid binary BED file') mode = self._filepointer.read(1) #\x01 = SNP major \x00 = individual major if mode != '\x01': raise Exception('only SNP-major is implemented') logging.info("bed file is open {0}".format(bedfile))
def run_once(self): if (self._ran_once): return self._ran_once = True self._sid, self._pos = SnpReader._read_map_or_bim(self.basefilename,remove_suffix="bed", add_suffix="map") pedfile = SnpReader._name_of_other_file(self.basefilename,remove_suffix="ped", add_suffix="ped") ped = np.loadtxt(pedfile,dtype = 'str',comments=None) self._iid = ped[:,0:2] self._assert_iid_sid_pos() snpsstr = ped[:,6::] inan=snpsstr==self.missing self._snps = np.zeros((snpsstr.shape[0],snpsstr.shape[1]/2)) for i in xrange(snpsstr.shape[1]/2): self._snps[inan[:,2*i],i]=np.nan vals=snpsstr[~inan[:,2*i],2*i:2*(i+1)] self._snps[~inan[:,2*i],i]+=(vals==vals[0,0]).sum(1)