예제 #1
0
 def too_slow_test_layout_hut(self):
     '''Test generating layout coordinates for the Hutterites pedigree. This test is more about
     speed, to see that we do it in reasonable time.'''
     p = io_pedigree.read(itu.HUTT_PED)
     positions = pedigree_plot_laplacian._layout_positions(p.graph) #@UnusedVariable
     # Save to file - slow
     '''
예제 #2
0
    def test_marriage_graph_layout_positions(self):
        '''Test generating the extended graph layout position.'''
        p = io_pedigree.read(itu.SMALL_FILE)
        g = p.graph
        g_extended = pedigree_plot_laplacian._marriage_graph(g)

        assert_equal(g.number_of_nodes(), 8, 'Wrong number of nodes')
        assert_equal(g.number_of_edges(), 10, 'Wrong number of edges')

        assert_equal(g_extended.number_of_nodes(), 12, 'Wrong number of nodes')
        assert_equal(g_extended.number_of_edges(), 13, 'Wrong number of edges')

        #positions =
        pedigree_plot_laplacian._layout_positions(g, g_extended)
        #expected =
        {
            1: (-0.23562670914672229, 2),
            2: (-0.063382627268225591, 3),
            3: (-0.23562670914672237, 3),
            4: (0.1502736499569044, 1),
            5: (0.15027364995690434, 2),
            6: (0.43352532974526942, 0),
            7: (0.43352532974526858, 0),
            8: (-0.48586913569278134, 2),
            -1: (-0.054615070741569148, 2.5),
            -4: (0.31358843226722749, 0.5),
            -3: (-0.054615070741569086, 2.5),
            -2: (-0.35145106893398231, 1.5)
        }
예제 #3
0
def read_pedigree_from_test_file(file_name, genotyped_id_file=None):
    '''Load a pedigree from a PLINK TFAM file.'''
    data = np.genfromtxt(file_name, np.dtype(int))
    p = io_pedigree.read(file_name, genotyped_id_file=genotyped_id_file)
    assert_equal(p._graph.number_of_nodes(), data.shape[0], 'Incorrect number of nodes')
    assert nx.is_directed_acyclic_graph(p._graph), 'Pedigree is not a DAG'
    return p
예제 #4
0
 def too_slow_test_layout_hut(self):
     '''Test generating layout coordinates for the Hutterites pedigree. This test is more about
     speed, to see that we do it in reasonable time.'''
     p = io_pedigree.read(itu.HUTT_PED)
     positions = pedigree_plot_laplacian._layout_positions(
         p.graph)  #@UnusedVariable
     # Save to file - slow
     '''
예제 #5
0
def read_pedigree_from_test_file(file_name, genotyped_id_file=None):
    '''Load a pedigree from a PLINK TFAM file.'''
    data = np.genfromtxt(file_name, np.dtype(int))
    p = io_pedigree.read(file_name, genotyped_id_file=genotyped_id_file)
    assert_equal(p._graph.number_of_nodes(), data.shape[0],
                 'Incorrect number of nodes')
    assert nx.is_directed_acyclic_graph(p._graph), 'Pedigree is not a DAG'
    return p
예제 #6
0
 def test_save_load_pedigree_plink(self):
     '''Check that saving and loading a pedigree object from file preserves the original object.'''
     p = itu.Templates.pedigree_hut()
     out_file = tempfile.TemporaryFile()
     io_pedigree.write(p, out_file)
     out_file.seek(0)
     p2 = io_pedigree.read( out_file, genotyped_id_file=itu.GENOTYPE_SAMPLE+'.tfam')
     out_file.close()
     assert_equal(p, p2, 'Saving and loading did not restore the original pedigree')
예제 #7
0
 def test_marriage_graph(self):
     '''Test generating the extended graph.'''
     p = io_pedigree.read(itu.HUTT_PED)
     g = p.graph
     g_extended = pedigree_plot_laplacian._marriage_graph(g)
     
     assert_equal(g.number_of_nodes(), 3671, 'Wrong number of nodes')
     assert_equal(g.number_of_edges(), 7200, 'Wrong number of edges')
     
     assert_equal(g_extended.number_of_nodes(), 4661, 'Wrong number of nodes')
     assert_equal(g_extended.number_of_edges(), 5580, 'Wrong number of edges')
예제 #8
0
    def read(self, file_name, genotyped_id_file=None):
        '''Load pedigree from file in old format.'''

        p = io_pedigree.read(file_name, genotyped_id_file)
        # Load data from text file a second time to read the old-study-specific-column. Not efficient.
        data = np.genfromtxt(file_name, np.dtype(int))
        old_generation = dict(data[:,(1,6)])
        
        # Wrap by old pedigree object 
        return PedigreeOldStudy(p, old_generation)
        
예제 #9
0
    def test_marriage_graph(self):
        '''Test generating the extended graph.'''
        p = io_pedigree.read(itu.HUTT_PED)
        g = p.graph
        g_extended = pedigree_plot_laplacian._marriage_graph(g)

        assert_equal(g.number_of_nodes(), 3671, 'Wrong number of nodes')
        assert_equal(g.number_of_edges(), 7200, 'Wrong number of edges')

        assert_equal(g_extended.number_of_nodes(), 4661,
                     'Wrong number of nodes')
        assert_equal(g_extended.number_of_edges(), 5580,
                     'Wrong number of edges')
예제 #10
0
    def test_lca_small(self):
        '''Test lowest common ancestor computation in a small pedigree.'''
        p = io_pedigree.read(itu.SMALL_FILE)

        # Direct siblings
        self.__compute_and_check_lca(p, 6, 7, [4, 5], 2)

        # Far siblings    
        u = 6
        v = 8
        w = self.__compute_and_check_lca(p, u, v, [3], 3) 
        assert_equal(shortest_path(p.graph, w, u), [3, 5, 6], 'Wrong shorted path from ancestor to node u')
        assert_equal(shortest_path(p.graph, w, v), [3, 8], 'Wrong shorted path from ancestor to node v')
        
        # No common ancestor exists
        self.__compute_and_check_lca(p, 1, 2, [None], Infinity)
예제 #11
0
    def test_marriage_graph_layout_positions(self):
        '''Test generating the extended graph layout position.'''
        p = io_pedigree.read(itu.SMALL_FILE)
        g = p.graph
        g_extended = pedigree_plot_laplacian._marriage_graph(g)
        
        assert_equal(g.number_of_nodes(), 8, 'Wrong number of nodes')
        assert_equal(g.number_of_edges(), 10, 'Wrong number of edges')
        
        assert_equal(g_extended.number_of_nodes(), 12, 'Wrong number of nodes')
        assert_equal(g_extended.number_of_edges(), 13, 'Wrong number of edges')

        #positions = 
        pedigree_plot_laplacian._layout_positions(g, g_extended)
        #expected = 
        {1: (-0.23562670914672229, 2), 2: (-0.063382627268225591, 3), 3: (-0.23562670914672237, 3), 4: (0.1502736499569044, 1), 5: (0.15027364995690434, 2), 6: (0.43352532974526942, 0), 7: (0.43352532974526858, 0), 8: (-0.48586913569278134, 2), -1: (-0.054615070741569148, 2.5), -4: (0.31358843226722749, 0.5), -3: (-0.054615070741569086, 2.5), -2: (-0.35145106893398231, 1.5)}
예제 #12
0
    def test_lca_small(self):
        '''Test lowest common ancestor computation in a small pedigree.'''
        p = io_pedigree.read(itu.SMALL_FILE)

        # Direct siblings
        self.__compute_and_check_lca(p, 6, 7, [4, 5], 2)

        # Far siblings
        u = 6
        v = 8
        w = self.__compute_and_check_lca(p, u, v, [3], 3)
        assert_equal(shortest_path(p.graph, w, u), [3, 5, 6],
                     'Wrong shorted path from ancestor to node u')
        assert_equal(shortest_path(p.graph, w, v), [3, 8],
                     'Wrong shorted path from ancestor to node v')

        # No common ancestor exists
        self.__compute_and_check_lca(p, 1, 2, [None], Infinity)
예제 #13
0
 def test_families(self):
     '''Test family computation.'''
     p = io_pedigree.read(itu.HUTT_PED)
     assert_equal(len(list(pt.families(p.graph))), 990, 'Wrong number of pedigree families')
예제 #14
0
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    options, args = __parse_command_line_args(sys.argv)
    var_file_prefix, input_file, output_file = args
    genotype_filter = __GENOTYPE_FILTER[options.genotype_filter]
    genotype_cleaner = __GENOTYPE_CLEANER[options.genotype_filter]
    allele_start_index = 1 if options.type == 'imputed' else 0
    try:
        # Read location list
        snp_dao = SnpDao(options.db_url)
        input_file = sys.stdin if args[1] == '-' else open(input_file, 'rb')

        # Sample IDs are read from the pedigree, and must match the genotype files' ordering
        pedigree = io_pedigree.read(options.pedigree_file,
                                    options.genotype_id_file)
        node_of = pedigree.node_of
        sample_id = np.loadtxt(
            options.id_file, dtype=np.int,
            ndmin=1) if options.id_file else pedigree._sample_id
        N = pedigree.num_genotyped
        sample_index = np.array(
            filter(lambda x: x is not None and x < N,
                   map(node_of.get, sample_id))
        )  # Filter FINDIVs that are not in the imputed FINDIV set

        out = open(output_file, 'wb')
        # Print header line
        if options.output_format == 'matrix':
            out.write('\t'.join([
                'variant', 'chromosome', 'bp_start', 'bp_stop', 'variant_type',
예제 #15
0
 def test_families(self):
     '''Test family computation.'''
     p = io_pedigree.read(itu.HUTT_PED)
     assert_equal(len(list(pt.families(p.graph))), 990,
                  'Wrong number of pedigree families')
예제 #16
0
 def test_lca_hut(self):
     '''Test lowest common ancestor computation in a large pedigree.'''
     p = io_pedigree.read(itu.HUTT_PED)
     self.__compute_and_check_lca(p, 169512, 170362, [8551], 9)
예제 #17
0
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    options, args = __parse_command_line_args(sys.argv)
    var_file_prefix, input_file, output_file = args
    genotype_filter = __GENOTYPE_FILTER[options.genotype_filter]
    genotype_cleaner = __GENOTYPE_CLEANER[options.genotype_filter]
    allele_start_index = 1 if options.type == 'imputed' else 0
    try:
        # Read location list
        snp_dao = SnpDao(options.db_url)
        input_file = sys.stdin if args[1] == '-' else open(input_file, 'rb')
        
        # Sample IDs are read from the pedigree, and must match the genotype files' ordering
        pedigree = io_pedigree.read(options.pedigree_file, options.genotype_id_file)
        node_of = pedigree.node_of
        sample_id = np.loadtxt(options.id_file, dtype=np.int, ndmin=1) if options.id_file else pedigree._sample_id
        N = pedigree.num_genotyped
        sample_index = np.array(filter(lambda x: x is not None and x < N, map(node_of.get, sample_id)))  # Filter FINDIVs that are not in the imputed FINDIV set
                          
        out = open(output_file, 'wb')
        # Print header line
        if options.output_format == 'matrix':
            out.write('\t'.join(['variant', 'chromosome', 'bp_start', 'bp_stop', 'variant_type', 'ref_allele', 'minor_allele'] + map(str, sample_id)) + '\n')
                
        # Extract data using tabix for each location. A location may be a range of bps and may
        # correspond to multiple output lines.
        snp_count = [0, 0, 0, 0, 0]  # #found variants; #not-found variants; #multiply-matching variants; #nameless variants
#        print list(enumerate(it.chain.from_iterable(__parse_line(line, snp_dao, debug=options.debug) 
#                                                                                                for line in (line.rstrip('\n').rstrip('\r') 
예제 #18
0
 def test_lca_hut(self):
     '''Test lowest common ancestor computation in a large pedigree.'''
     p = io_pedigree.read(itu.HUTT_PED)
     self.__compute_and_check_lca(p, 169512, 170362, [8551], 9)
예제 #19
0
파일: io.py 프로젝트: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''
    
    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]):
        raise ValueError('Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files')
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')
    
    # Load data
    print_location = lambda x : x if x else '-'
    
    if verbose: print 'Reading pedigree from %s, %s ...' % (print_location(pedigree), print_location(pedigree_genotyped),)
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)
    
    if verbose:  print 'Reading genotype data from %s ...' % (print_location(genotype),)
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)
    
    if verbose: print 'Reading haplotype data from %s ...' % (print_location(haplotype),)
    h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None
    
    if verbose: print 'Reading error data from %s ...' % (print_location(error_file),)
    error = np.loadtxt(error_file) if error_file and os.path.isfile(error_file) else None
    
    if verbose: print 'Reading frame data from %s ...' % (print_location(frames_file),)
    frames = db_gene.snp.ld_graph.read_frames(frames_file) if frames_file else None
    
    lam = np.loadtxt(lam_file) if lam_file and os.path.isfile(lam_file) else None
    
    # info = ProblemInfo(p, g) if info is None else info    
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose: 
            print 'Reading problem info from %s ...' % (info,)
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem
예제 #20
0
파일: io.py 프로젝트: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''

    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(
        name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped',
                                             prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file]
                   is not None]):
        raise ValueError(
            'Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files'
        )
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')

    # Load data
    print_location = lambda x: x if x else '-'

    if verbose:
        print 'Reading pedigree from %s, %s ...' % (
            print_location(pedigree),
            print_location(pedigree_genotyped),
        )
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)

    if verbose:
        print 'Reading genotype data from %s ...' % (
            print_location(genotype), )
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)

    if verbose:
        print 'Reading haplotype data from %s ...' % (
            print_location(haplotype), )
    h = io_genotype.read('plink', 'haplotype', tped=haplotype,
                         load_ids=False) if haplotype else None

    if verbose:
        print 'Reading error data from %s ...' % (print_location(error_file), )
    error = np.loadtxt(
        error_file) if error_file and os.path.isfile(error_file) else None

    if verbose:
        print 'Reading frame data from %s ...' % (
            print_location(frames_file), )
    frames = db_gene.snp.ld_graph.read_frames(
        frames_file) if frames_file else None

    lam = np.loadtxt(
        lam_file) if lam_file and os.path.isfile(lam_file) else None

    # info = ProblemInfo(p, g) if info is None else info
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose:
            print 'Reading problem info from %s ...' % (info, )
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem