Beispiel #1
0
def read_tabix(file_name,
               genotyped_id_file=os.environ['OBER_DATA'] +
               '/hutt/hutt.3chipoverlap.clean.fam'):
    '''Read a Haplotype object from an ITABIX CGI-imputed file.
    Line format: tab-delimited
    7849538    chr11    1909005    1909006    snp    T    C    dbsnp.107:rs3817198    <genotypes>
    '''
    # Load entire file into memory. It must fit, if we are to load it into a Genotype object
    d = np.loadtxt(file_name, str)

    # Read SNP metadata into a record array
    snp_dtype = [
        ('chrom', np.uint8),  # Chromosome # containing the SNP
        ('name', np.chararray),  # SNP name (e.g., 'rs...')
        ('dist_cm', np.float),  # Genetic position [CENTI-Morgans!!]
        ('base_pair', np.uint)  # Base pair position on chromosome
    ]
    snp = np.array([(int(line[1][3:]), line[7], 0, int(line[3]))
                    for line in d],
                   dtype=snp_dtype)
    data = np.array([[(CGI_LETTER_TO_ALLELE[x[1]], CGI_LETTER_TO_ALLELE[x[2]])
                      for x in line[8:]] for line in d])
    hap_type = np.array([[int(x[0]) for x in line[8:]] for line in d])
    sample_id = read_sample_id(genotyped_id_file)
    # Construct object
    return GenotypeFactory.new_instance('haplotype',
                                        data,
                                        snp,
                                        sample_id,
                                        hap_type=hap_type)
Beispiel #2
0
 def __init__(self, file_name, affy_bim, genotyped_id_file, debug=False): 
     p = im.hutt_pedigree()
     self.qf = p.quasi_founders
     self.non_qf = np.setdiff1d(xrange(p.num_genotyped), p.quasi_founders)
     self.debug = debug
     # Load affy SNP names 
     self.affy = set(np.loadtxt(affy_bim, usecols=[1], dtype=str))
     self.sample_id = read_sample_id(genotyped_id_file)
     # Load data, cache statistics in the data field
     self.data = self.__stats_struct(file_name)
Beispiel #3
0
 def __init__(self, file_name, affy_bim, genotyped_id_file, debug=False):
     p = im.hutt_pedigree()
     self.qf = p.quasi_founders
     self.non_qf = np.setdiff1d(xrange(p.num_genotyped), p.quasi_founders)
     self.debug = debug
     # Load affy SNP names
     self.affy = set(np.loadtxt(affy_bim, usecols=[1], dtype=str))
     self.sample_id = read_sample_id(genotyped_id_file)
     # Load data, cache statistics in the data field
     self.data = self.__stats_struct(file_name)
Beispiel #4
0
def read_tabix(file_name, genotyped_id_file=os.environ["OBER_DATA"] + "/hutt/hutt.3chipoverlap.clean.fam"):
    """Read a Haplotype object from an ITABIX CGI-imputed file.
    Line format: tab-delimited
    7849538    chr11    1909005    1909006    snp    T    C    dbsnp.107:rs3817198    <genotypes>
    """
    # Load entire file into memory. It must fit, if we are to load it into a Genotype object
    d = np.loadtxt(file_name, str)

    # Read SNP metadata into a record array
    snp_dtype = [
        ("chrom", np.uint8),  # Chromosome # containing the SNP
        ("name", np.chararray),  # SNP name (e.g., 'rs...')
        ("dist_cm", np.float),  # Genetic position [CENTI-Morgans!!]
        ("base_pair", np.uint),  # Base pair position on chromosome
    ]
    snp = np.array([(int(line[1][3:]), line[7], 0, int(line[3])) for line in d], dtype=snp_dtype)
    data = np.array([[(CGI_LETTER_TO_ALLELE[x[1]], CGI_LETTER_TO_ALLELE[x[2]]) for x in line[8:]] for line in d])
    hap_type = np.array([[int(x[0]) for x in line[8:]] for line in d])
    sample_id = read_sample_id(genotyped_id_file)
    # Construct object
    return GenotypeFactory.new_instance("haplotype", data, snp, sample_id, hap_type=hap_type)