def read_tabix(file_name, genotyped_id_file=os.environ['OBER_DATA'] + '/hutt/hutt.3chipoverlap.clean.fam'): '''Read a Haplotype object from an ITABIX CGI-imputed file. Line format: tab-delimited 7849538 chr11 1909005 1909006 snp T C dbsnp.107:rs3817198 <genotypes> ''' # Load entire file into memory. It must fit, if we are to load it into a Genotype object d = np.loadtxt(file_name, str) # Read SNP metadata into a record array snp_dtype = [ ('chrom', np.uint8), # Chromosome # containing the SNP ('name', np.chararray), # SNP name (e.g., 'rs...') ('dist_cm', np.float), # Genetic position [CENTI-Morgans!!] ('base_pair', np.uint) # Base pair position on chromosome ] snp = np.array([(int(line[1][3:]), line[7], 0, int(line[3])) for line in d], dtype=snp_dtype) data = np.array([[(CGI_LETTER_TO_ALLELE[x[1]], CGI_LETTER_TO_ALLELE[x[2]]) for x in line[8:]] for line in d]) hap_type = np.array([[int(x[0]) for x in line[8:]] for line in d]) sample_id = read_sample_id(genotyped_id_file) # Construct object return GenotypeFactory.new_instance('haplotype', data, snp, sample_id, hap_type=hap_type)
def __init__(self, file_name, affy_bim, genotyped_id_file, debug=False): p = im.hutt_pedigree() self.qf = p.quasi_founders self.non_qf = np.setdiff1d(xrange(p.num_genotyped), p.quasi_founders) self.debug = debug # Load affy SNP names self.affy = set(np.loadtxt(affy_bim, usecols=[1], dtype=str)) self.sample_id = read_sample_id(genotyped_id_file) # Load data, cache statistics in the data field self.data = self.__stats_struct(file_name)
def read_tabix(file_name, genotyped_id_file=os.environ["OBER_DATA"] + "/hutt/hutt.3chipoverlap.clean.fam"): """Read a Haplotype object from an ITABIX CGI-imputed file. Line format: tab-delimited 7849538 chr11 1909005 1909006 snp T C dbsnp.107:rs3817198 <genotypes> """ # Load entire file into memory. It must fit, if we are to load it into a Genotype object d = np.loadtxt(file_name, str) # Read SNP metadata into a record array snp_dtype = [ ("chrom", np.uint8), # Chromosome # containing the SNP ("name", np.chararray), # SNP name (e.g., 'rs...') ("dist_cm", np.float), # Genetic position [CENTI-Morgans!!] ("base_pair", np.uint), # Base pair position on chromosome ] snp = np.array([(int(line[1][3:]), line[7], 0, int(line[3])) for line in d], dtype=snp_dtype) data = np.array([[(CGI_LETTER_TO_ALLELE[x[1]], CGI_LETTER_TO_ALLELE[x[2]]) for x in line[8:]] for line in d]) hap_type = np.array([[int(x[0]) for x in line[8:]] for line in d]) sample_id = read_sample_id(genotyped_id_file) # Construct object return GenotypeFactory.new_instance("haplotype", data, snp, sample_id, hap_type=hap_type)