def _parse_samples(self, samples, samp_fmt, site): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' # check whether we already know how to parse this format if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) samp_fmt = self._format_cache[samp_fmt] if cparse: return cparse.parse_samples( self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site) samp_data = [] _map = self._map nfields = len(samp_fmt._fields) for name, sample in itertools.izip(self.samples, samples): # parse the data for this sample sampdat = [None] * nfields for i, vals in enumerate(sample.split(':')): # short circuit the most common if samp_fmt._fields[i] == 'GT': sampdat[i] = vals continue elif vals == ".": sampdat[i] = None continue entry_num = samp_fmt._nums[i] entry_type = samp_fmt._types[i] # we don't need to split single entries if entry_num == 1 or ',' not in vals: if entry_type == 'Integer': try: sampdat[i] = int(vals) except ValueError: sampdat[i] = float(vals) elif entry_type == 'Float': sampdat[i] = float(vals) else: sampdat[i] = vals if entry_num != 1: sampdat[i] = (sampdat[i]) continue vals = vals.split(',') if entry_type == 'Integer': try: sampdat[i] = _map(int, vals) except ValueError: sampdat[i] = _map(float, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = _map(float, vals) else: sampdat[i] = vals # create a call object call = _Call(site, name, samp_fmt(*sampdat)) samp_data.append(call) return samp_data
def _parse_samples(self, samples, samp_fmt, site): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' # check whether we already know how to parse this format if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) samp_fmt = self._format_cache[samp_fmt] if cparse: return cparse.parse_samples(self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site) samp_data = [] _map = self._map nfields = len(samp_fmt._fields) for name, sample in zip(self.samples, samples): # parse the data for this sample sampdat = [None] * nfields for i, vals in enumerate(sample.split(':')): # short circuit the most common if samp_fmt._fields[i] == 'GT': sampdat[i] = vals continue # genotype filters are a special case elif samp_fmt._fields[i] == 'FT': sampdat[i] = self._parse_filter(vals) continue elif not vals or vals == ".": sampdat[i] = None continue entry_num = samp_fmt._nums[i] entry_type = samp_fmt._types[i] # we don't need to split single entries if entry_num == 1: if entry_type == 'Integer': try: sampdat[i] = int(vals) except ValueError: sampdat[i] = float(vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = float(vals) else: sampdat[i] = vals continue vals = vals.split(',') if entry_type == 'Integer': try: sampdat[i] = _map(int, vals) except ValueError: sampdat[i] = _map(float, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = _map(float, vals) else: sampdat[i] = vals # create a call object call = _Call(site, name, samp_fmt(*sampdat)) samp_data.append(call) return samp_data
def _parse_samples(self, samples, samp_fmt, site, EntryDbID): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' # check whether we already know how to parse this format # TODO at some point add DB # TODO 1 remove print when ready print samp_fmt individGeno = samp_fmt.split(":") IndividualFunctions = [] CustomGeno = [] #Supported #TODO individual #JULIA: AD DP, GLE, GL, EC GP, GT, FT, PL, GQ, HQ, PS, PQ for genotype in individGeno: if ( genotype == "AD" ): IndividualFunctions.append(self.db.createAD) elif (genotype == "DP" ): IndividualFunctions.append(self.db.createDP) elif (genotype == "EC" ): IndividualFunctions.append(self.db.createEC) elif (genotype == "FT" ): IndividualFunctions.append(self.db.createFT) elif (genotype == "GL" ): IndividualFunctions.append(self.db.createGL) elif (genotype == "GLE" ): IndividualFunctions.append(self.db.createGLE) elif (genotype == "GP" ): IndividualFunctions.append(self.db.createGP) elif (genotype == "GQ" ): IndividualFunctions.append(self.db.createGQ) elif (genotype == "GT" ): IndividualFunctions.append(self.db.createGT) elif (genotype == "HQ" ): IndividualFunctions.append(self.db.createHQ) elif (genotype == "PL" ): IndividualFunctions.append(self.db.createPL) elif (genotype == "PQ" ): IndividualFunctions.append(self.db.createPQ) elif (genotype == "PS" ): IndividualFunctions.append(self.db.createPS) else: CustomGeno.append( genotype ) IndividualFunctions.append(self.db.createIndividualDefault) if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) samp_fmt = self._format_cache[samp_fmt] if cparse: return cparse.parse_samples( self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site) samp_data = [] _map = self._map nfields = len(samp_fmt._fields) indNumber = 0; indId = 0 for name, sample in itertools.izip(self.samples, samples): customCount = 0 indId = self.db.createIndividualEntry( EntryDbID, indNumber ); if indId == -1: print "Failed to create individual entry" indNumber += 1 # parse the data for this sample sampdat = [None] * nfields for i, vals in enumerate(sample.split(':')): #TODO individ here # short circuit the most common #MINE if ( IndividualFunctions[i] == self.db.createIndividualDefault ): IndividualFunctions[i]( CustomGeno[customCount], indId, vals ) customCount += 1 else: IndividualFunctions[i]( indId, vals ) if vals == '.' or vals == './.': sampdat[i] = None continue entry_num = samp_fmt._nums[i] entry_type = samp_fmt._types[i] # we don't need to split single entries if entry_num == 1 or ',' not in vals: #TODO: add DB upload and subroutines if entry_type == 'Integer': sampdat[i] = int(vals) elif entry_type == 'Float': sampdat[i] = float(vals) else: sampdat[i] = vals if entry_num != 1: sampdat[i] = (sampdat[i]) continue vals = vals.split(',') if entry_type == 'Integer': sampdat[i] = _map(int, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = _map(float, vals) else: sampdat[i] = vals # create a call object call = _Call(site, name, samp_fmt(*sampdat)) samp_data.append(call) return samp_data
def _parse_samples(self, samples, samp_fmt, site): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' # check whether we already know how to parse this format if samp_fmt in self._format_cache: samp_fmt, samp_fmt_types, samp_fmt_nums = \ self._format_cache[samp_fmt] else: sf, samp_fmt_types, samp_fmt_nums = self._parse_sample_format(samp_fmt) self._format_cache[samp_fmt] = (sf, samp_fmt_types, samp_fmt_nums) samp_fmt = sf if cparse: return cparse.parse_samples( self.samples, samples, samp_fmt, samp_fmt_types, samp_fmt_nums, site) samp_data = [] _map = self._map for name, sample in itertools.izip(self.samples, samples): # parse the data for this sample sampdict = dict([(x, None) for x in samp_fmt]) for fmt, entry_type, entry_num, vals in itertools.izip( samp_fmt, samp_fmt_types, samp_fmt_nums, sample.split(':')): # short circuit the most common if vals == '.' or vals == './.': sampdict[fmt] = None continue # we don't need to split single entries if entry_num == 1 or ',' not in vals: if entry_type == 'Integer': sampdict[fmt] = int(vals) elif entry_type == 'Float': sampdict[fmt] = float(vals) else: sampdict[fmt] = vals if entry_num != 1: sampdict[fmt] = (sampdict[fmt]) continue vals = vals.split(',') if entry_type == 'Integer': sampdict[fmt] = _map(int, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdict[fmt] = _map(float, vals) else: sampdict[fmt] = vals # create a call object call = _Call(site, name, sampdict) samp_data.append(call) return samp_data
def _parse_samples(self, samples, samp_fmt, EntryDbID): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' individGeno = samp_fmt.split(":") IndividualFunctions = [] CustomGeno = [] for genotype in individGeno: if ( genotype == "AD" ): IndividualFunctions.append(self.db.createAD) elif (genotype == "DP" ): IndividualFunctions.append(self.db.createDP) elif (genotype == "EC" ): IndividualFunctions.append(self.db.createEC) elif (genotype == "FT" ): IndividualFunctions.append(self.db.createFT) elif (genotype == "GL" ): IndividualFunctions.append(self.db.createGL) elif (genotype == "GLE" ): IndividualFunctions.append(self.db.createGLE) elif (genotype == "GP" ): IndividualFunctions.append(self.db.createGP) elif (genotype == "GQ" ): IndividualFunctions.append(self.db.createGQ) elif (genotype == "GT" ): IndividualFunctions.append(self.db.createGT) elif (genotype == "HQ" ): IndividualFunctions.append(self.db.createHQ) elif (genotype == "PL" ): IndividualFunctions.append(self.db.createPL) elif (genotype == "PQ" ): IndividualFunctions.append(self.db.createPQ) elif (genotype == "PS" ): IndividualFunctions.append(self.db.createPS) else: CustomGeno.append( genotype ) IndividualFunctions.append(self.db.createIndividualDefault) if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) samp_fmt = self._format_cache[samp_fmt] if cparse: return cparse.parse_samples( self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site) samp_data = [] _map = self._map nfields = len(samp_fmt._fields) indNumber = 0; indId = 0 for name, sample in itertools.izip(self.samples, samples): customCount = 0 indId = self.db.createIndividualEntry( EntryDbID, indNumber ); if indId == -1: print "Failed to create individual entry" indNumber += 1 # parse the data for this sample sampdat = [None] * nfields for i, vals in enumerate(sample.split(':')): if ( IndividualFunctions[i] == self.db.createIndividualDefault ): IndividualFunctions[i]( CustomGeno[customCount], indId, vals ) customCount += 1 else: IndividualFunctions[i]( indId, vals ) if vals == '.' or vals == './.': sampdat[i] = None continue entry_num = samp_fmt._nums[i] entry_type = samp_fmt._types[i] if entry_num == 1 or ',' not in vals: if entry_type == 'Integer': sampdat[i] = int(vals) elif entry_type == 'Float': sampdat[i] = float(vals) else: sampdat[i] = vals if entry_num != 1: sampdat[i] = (sampdat[i]) continue vals = vals.split(',') if entry_type == 'Integer': sampdat[i] = _map(int, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = _map(float, vals) else: sampdat[i] = vals
def _parse_samples(self, samples, samp_fmt, site): '''Parse a sample entry according to the format specified in the FORMAT column. NOTE: this method has a cython equivalent and care must be taken to keep the two methods equivalent ''' # check whether we already know how to parse this format if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) samp_fmt = self._format_cache[samp_fmt] if cparse: return cparse.parse_samples(self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site) samp_data = [] _map = self._map nfields = len(samp_fmt._fields) for name, sample in itertools.izip(self.samples, samples): # parse the data for this sample sampdat = [None] * nfields for i, vals in enumerate(sample.split(':')): # short circuit the most common if vals == '.' or vals == './.': sampdat[i] = None continue if i >= len(samp_fmt._nums): # print 'Error: i = ' + str(i) + ', samp_fmt._nums = ' + str(samp_fmt._nums) + \ # ', nfields = ' + str(nfields) + ', samp_fmt._fields = ' + str(samp_fmt._fields) + \ # ', sample = ' + str(sample) sys.stderr.write('i >= len(samp_fmt._nums)' + ' - i=' + str(i) + ', len(samp_fmt._nums)=' + str(len(samp_fmt._nums)) + ', name=' + name + ', vals=' + str(vals) + '\n') break entry_num = samp_fmt._nums[i] entry_type = samp_fmt._types[i] # we don't need to split single entries if entry_num == 1 or ',' not in vals: if entry_type == 'Integer': try: sampdat[i] = int(vals) except ValueError: try: sampdat[i] = float(vals) except ValueError: sampdat[i] = None elif entry_type == 'Float': try: sampdat[i] = float(vals) except ValueError: sampdat[i] = None else: sampdat[i] = vals if entry_num != 1: sampdat[i] = (sampdat[i]) continue vals = vals.split(',') if entry_type == 'Integer': try: sampdat[i] = _map(int, vals) except ValueError: try: sampdat[i] = _map(float, vals) except ValueError: sampdat[i] = [None for v in vals] elif entry_type == 'Float' or entry_type == 'Numeric': try: sampdat[i] = _map(float, vals) except ValueError: sampdat[i] = [None for v in vals] else: sampdat[i] = vals # create a call object call = _Call(site, name, samp_fmt(*sampdat)) samp_data.append(call) return samp_data