예제 #1
0
    def saveByChromosomeHeatmap(self,
                                filename,
                                resolution,
                                gInfo,
                                includeTrans=False):

        self.genome.setResolution(resolution)

        mydict = h5dict(filename)

        for chrom in range(self.genome.chrmCount):
            c1 = self.h5dict.get_dataset("chrms1")
            p1 = self.h5dict.get_dataset("cuts1")
            low = h5dictBinarySearch(c1, p1, (chrom, -1), "left")
            high = h5dictBinarySearch(c1, p1, (chrom, 999999999), "right")

            chr1 = self._getVector("chrms1", low, high)
            chr2 = self._getVector("chrms2", low, high)
            pos1 = np.array(self._getVector("mids1", low, high) // resolution,
                            dtype=np.int32)
            pos2 = np.array(self._getVector("mids2", low, high) // resolution,
                            dtype=np.int32)

            assert (chr1 == chrom).all()  # getting sure that bincount worked

            args = np.argsort(chr2)
            chr2 = chr2[args]
            pos1 = pos1[args]
            pos2 = pos2[args]

            for chrom2 in range(chrom, self.genome.chrmCount):
                if (includeTrans == False) and (chrom2 != chrom):
                    continue
                start = np.searchsorted(chr2, chrom2, "left")
                end = np.searchsorted(chr2, chrom2, "right")
                cur1 = pos1[start:end]
                cur2 = pos2[start:end]
                label = np.array(cur1, "int64")
                label *= self.genome.chrmLensBin[chrom2]
                label += cur2
                maxLabel = self.genome.chrmLensBin[chrom] * \
                           self.genome.chrmLensBin[chrom2]
                counts = np.bincount(label, minlength=maxLabel)
                mymap = counts.reshape((self.genome.chrmLensBin[chrom], -1))
                if chrom == chrom2:
                    mymap = mymap + mymap.T
                    fillDiagonal(mymap, np.diag(mymap).copy() / 2)
                mydict["%d %d" % (chrom, chrom2)] = mymap

        mydict['resolution'] = resolution
        mydict['genomeInformation'] = gInfo

        return
        desierd_fragids = []

        c1_h5 = fragments.h5dict.get_dataset("chrms1")
        p1_h5 = fragments.h5dict.get_dataset("cuts1")
        c2_h5 = fragments.h5dict.get_dataset("chrms2")
        p2_h5 = fragments.h5dict.get_dataset("cuts2")

        for ind, region in enumerate(regions):
            chrm, st, end = region
            chrm = chrm[
                1:]  #Stupid names: in genome_db they are T_nnnn due to chrmFileTemplate="N%s.fa", but normally they are NT_nnnn.
            if not chrm in genome_db.label2idx.keys():
                continue
            else:
                chrm = genome_db.label2idx[chrm]
                low1 = h5dictBinarySearch(c1_h5, p1_h5, (chrm, st), "left")
                high1 = h5dictBinarySearch(c1_h5, p1_h5, (chrm, end), "right")
                if low1 != high1:
                    desierd_fragids += list(
                        np.unique(fragments._getVector("fragids1", low1,
                                                       high1)))

                low2 = h5dictBinarySearch(c2_h5, p2_h5, (chrm, st), "left")
                high2 = h5dictBinarySearch(c2_h5, p2_h5, (chrm, end), "right")
                if low2 != high2:
                    desierd_fragids += list(
                        np.unique(fragments._getVector("fragids1", low2,
                                                       high2)))

        desierd_fragids = np.unique(desierd_fragids)
        print len(desierd_fragids)  #DEBUG