예제 #1
0
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES 
                                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                                 "%s");""" %
                (row["partid"], familyGeneNames.get(row["partid"],
                                                    ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3, row["dup"], row["loss"],
                 row["genes"], familyGeneNames.get(row["partid"],
                                                   ("", ""))[1]))
        util.toc()
    def addFamilies(self, eventsfile, discard=[]):

        if not tableExists(self.cur, "Families"):
            self.makeFamiliesTable()

        util.tic("add families")
        events_tab = tablelib.read_table(eventsfile)
        events_lookup = events_tab.lookup("partid")
        familyGeneNames = self.makeFamilyGeneNames()
        discard = set(discard)

        for row in events_tab:
            famid = row["partid"]
            if famid in discard:
                util.logger("discarding '%s'" % famid)
                continue

            tree = treelib.read_tree(self.getTreeFile(famid))
            treelen = sum(x.dist for x in tree)
            seqs = fasta.read_fasta(self.getFastaFile(famid))
            seqlen = stats.median(map(len, seqs.values()))

            self.cur.execute(
                """INSERT INTO Families VALUES
                ("%s", "%s", %f, %f, %f, %d, %d, %d,
                "%s");""" %
                (row["partid"],
                 familyGeneNames.get(row["partid"], ("", ""))[0],
                 row["famrate"], treelen, seqlen * 3,
                 row["dup"], row["loss"], row["genes"],
                 familyGeneNames.get(row["partid"], ("", ""))[1]))
        util.toc()
    def layout_frags(self, genome_name, chrom_name, start, end, direction=1):

        ref_chrom  = self.chroms_lookup[(genome_name, chrom_name)]

        # setup genome display order
        order = {}
        for i, genome in enumerate(self.genomes):
            order[genome] = i
        
        # swap the genome with order 0 and the reference genome
        j = order[self.ref_genome]
        order[self.genomes[0]] = j
        order[self.ref_genome] = 0                
        
        # init reference fragment
        ref_frag = Frag(genome=genome_name,
                        chrom=chrom_name, 
                        start=max(start, 0),
                        end=min(end, ref_chrom.end),
                        strand=direction,
                        x=max(start,0),
                        y=0)
        self.frags.add(ref_frag)
        self.layout_frag_contents(ref_frag)
        
        
        # find all synteny blocks in this region
        # sort blocks by appearance in ref_chrom
        blocks = list(self.filter_blocks(self.blocks, ref_chrom, start, end))
        def blocksort(a):
            if a[1] == 0:
                starta = a[0].region1.start
            else:
                starta = a[0].region2.start
        blocks.sort(key=blocksort)
        
        
        # make lookup for genes to block and block to fragment
        block_lookup = {}
        frag_lookup = {}
        for block, flip in blocks:            
            if flip == 0:
                other = block.region2
            else:
                other = block.region1
                
            frag = Frag()
            frag.genome = other.species
            frag.chrom = other.seqname
            frag_lookup[block] = frag

            for gene2 in iter_chrom(self.db.get_regions(frag.genome, 
                                                        frag.chrom),
                                    other.start, other.end):
                block_lookup[gene2] = block
                
        self.block_lookup = block_lookup
        
        
        # find all genes that will be drawn
        # walk along ref_chrom and store drawn genes into fragments
        refLookup = {}
        for gene in iter_chrom(self.db.get_regions(genome_name, chrom_name),
                               start, end):
            for name2 in self.orth_lookup.get(gene.data["ID"], []):
                gene2 = self.db.get_region(name2)
                if gene2 in block_lookup:
                    frag_lookup[block_lookup[gene2]].genes.append(gene2)
                    refLookup[gene2] = gene
        self.refLookup = refLookup
        
        # determine fragment dimensions
        for frag in frag_lookup.itervalues():
            if len(frag.genes) == 0:
                frag.x = None
                continue
            frag.genes.sort(key=lambda a: a.start)
            
            # set fragment start and end
            frag.start = frag.genes[0].start
            frag.end = frag.genes[-1].end
            
            # find fragment direction
            vote = 0
            last = None
            
            for gene2 in frag.genes:
                pos = refLookup[gene2].start
                
                if last != None and pos != last:
                    if last < pos:
                        vote += 1
                    else:
                        vote -= 1
                last = pos
            
            if vote > 0:
                frag.direction = direction
            else:
                frag.direction = -direction
            
            # find fragment x-coordinate
            diffs = []
            for gene2 in frag.genes:
                if direction == 1:
                    offset1 = refLookup[gene2].start - ref_frag.start
                else:
                    offset1 = ref_frag.end - refLookup[gene2].end
                
                if frag.direction == 1:
                    offset2 = gene2.start - frag.start
                else:
                    offset2 = frag.end - gene2.end
                diffs.append(offset2 - offset1)
            frag.x = ref_frag.x - stats.median(diffs)
        
        # place blocks
        fragY = util.Dict(default=-self.genome_sep)
        for block, flip in blocks:
            frag = frag_lookup[block]
            otherGenome = frag.genome
            
            if frag.x == None:
                # fragment could not be placed
                continue
            
            frag.y = fragY[otherGenome] - \
                     ((order[otherGenome] - 1) * 
                       self.max_genome_sep)

            # re-get all genes between those coordinates
            #frag.genes = list(iter_chrom(self.db.get_regions(frag.genome, 
            #                                                 frag.chrom),
            #                             frag.start, frag.end))
            
            # store and lyaout frag
            self.frags.add(frag)
            self.layout_frag_contents(frag)

            # stagger fragments
            fragY[otherGenome] -= self.frag_sep
            if fragY[otherGenome] < -self.max_genome_sep:
                fragY[otherGenome] = -self.genome_sep
예제 #4
0
    def layout_frags(self, genome_name, chrom_name, start, end, direction=1):

        ref_chrom = self.chroms_lookup[(genome_name, chrom_name)]

        # setup genome display order
        order = {}
        for i, genome in enumerate(self.genomes):
            order[genome] = i

        # swap the genome with order 0 and the reference genome
        j = order[self.ref_genome]
        order[self.genomes[0]] = j
        order[self.ref_genome] = 0

        # init reference fragment
        ref_frag = Frag(genome=genome_name,
                        chrom=chrom_name,
                        start=max(start, 0),
                        end=min(end, ref_chrom.end),
                        strand=direction,
                        x=max(start, 0),
                        y=0)
        self.frags.add(ref_frag)
        self.layout_frag_contents(ref_frag)

        # find all synteny blocks in this region
        # sort blocks by appearance in ref_chrom
        blocks = list(self.filter_blocks(self.blocks, ref_chrom, start, end))

        def blocksort(a):
            if a[1] == 0:
                starta = a[0].region1.start
            else:
                starta = a[0].region2.start

        blocks.sort(key=blocksort)

        # make lookup for genes to block and block to fragment
        block_lookup = {}
        frag_lookup = {}
        for block, flip in blocks:
            if flip == 0:
                other = block.region2
            else:
                other = block.region1

            frag = Frag()
            frag.genome = other.species
            frag.chrom = other.seqname
            frag_lookup[block] = frag

            for gene2 in iter_chrom(
                    self.db.get_regions(frag.genome, frag.chrom), other.start,
                    other.end):
                block_lookup[gene2] = block

        self.block_lookup = block_lookup

        # find all genes that will be drawn
        # walk along ref_chrom and store drawn genes into fragments
        refLookup = {}
        for gene in iter_chrom(self.db.get_regions(genome_name, chrom_name),
                               start, end):
            for name2 in self.orth_lookup.get(gene.data["ID"], []):
                gene2 = self.db.get_region(name2)
                if gene2 in block_lookup:
                    frag_lookup[block_lookup[gene2]].genes.append(gene2)
                    refLookup[gene2] = gene
        self.refLookup = refLookup

        # determine fragment dimensions
        for frag in frag_lookup.itervalues():
            if len(frag.genes) == 0:
                frag.x = None
                continue
            frag.genes.sort(key=lambda a: a.start)

            # set fragment start and end
            frag.start = frag.genes[0].start
            frag.end = frag.genes[-1].end

            # find fragment direction
            vote = 0
            last = None

            for gene2 in frag.genes:
                pos = refLookup[gene2].start

                if last != None and pos != last:
                    if last < pos:
                        vote += 1
                    else:
                        vote -= 1
                last = pos

            if vote > 0:
                frag.direction = direction
            else:
                frag.direction = -direction

            # find fragment x-coordinate
            diffs = []
            for gene2 in frag.genes:
                if direction == 1:
                    offset1 = refLookup[gene2].start - ref_frag.start
                else:
                    offset1 = ref_frag.end - refLookup[gene2].end

                if frag.direction == 1:
                    offset2 = gene2.start - frag.start
                else:
                    offset2 = frag.end - gene2.end
                diffs.append(offset2 - offset1)
            frag.x = ref_frag.x - stats.median(diffs)

        # place blocks
        fragY = util.Dict(default=-self.genome_sep)
        for block, flip in blocks:
            frag = frag_lookup[block]
            otherGenome = frag.genome

            if frag.x == None:
                # fragment could not be placed
                continue

            frag.y = fragY[otherGenome] - \
                     ((order[otherGenome] - 1) *
                       self.max_genome_sep)

            # re-get all genes between those coordinates
            #frag.genes = list(iter_chrom(self.db.get_regions(frag.genome,
            #                                                 frag.chrom),
            #                             frag.start, frag.end))

            # store and lyaout frag
            self.frags.add(frag)
            self.layout_frag_contents(frag)

            # stagger fragments
            fragY[otherGenome] -= self.frag_sep
            if fragY[otherGenome] < -self.max_genome_sep:
                fragY[otherGenome] = -self.genome_sep