Beispiel #1
0
    def test_19_matrix_manip(self):
        if ONLY and not "19" in ONLY:
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads("lala-map~", resolution=10000)
        hic_map(hic_data1, savedata="lala-map.tsv~", savefig="lala.pdf")
        hic_map(hic_data1,
                by_chrom="intra",
                savedata="lala-maps~",
                savefig="lalalo~")
        hic_map(hic_data1,
                by_chrom="inter",
                savedata="lala-maps~",
                savefig="lalala~")
        # slowest part of the all test:
        hic_data2 = read_matrix("lala-map.tsv~", resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        # vals = plot_distance_vs_interactions(hic_data1)

        # self.assertEqual([round(i, 2) if str(i)!="nan" else 0.0 for i in
        #                   reduce(lambda x, y: x + y, vals)],
        #                  [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0])

        a, b = insert_sizes("lala-map~")
        self.assertEqual([int(a), int(b)], [43, 1033])

        hic_data1 = read_matrix(PATH + "/20Kb/chrT/chrT_A.tsv",
                                resolution=20000)
        hic_data2 = read_matrix(PATH + "/20Kb/chrT/chrT_B.tsv",
                                resolution=20000)

        corr = correlate_matrices(hic_data1, hic_data2)
        corr = [round(i, 3) for i in corr[0]]
        self.assertEqual(corr, [
            0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797,
            0.832
        ])

        ecorr = eig_correlate_matrices(hic_data1,
                                       hic_data2,
                                       savefig='lala3.pdf')

        ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)]
        self.assertEqual(ecorr, [
            0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01,
            0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
            0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974,
            0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89
        ])
        system("rm -rf lala*")
        if CHKTIME:
            self.assertEqual(True, True)
            print "19", time() - t0
Beispiel #2
0
    def test_19_matrix_manip(self):
        if ONLY and ONLY != '19':
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000)
        hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~')
        hic_map(hic_data1,
                by_chrom='intra',
                savedata='lala-maps~',
                savefig='lalalo~')
        hic_map(hic_data1,
                by_chrom='inter',
                savedata='lala-maps~',
                savefig='lalala~')
        # slowest part of the all test:
        hic_data2 = read_matrix('lala-map.tsv~', resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        vals = plot_distance_vs_interactions(hic_data1)

        self.assertEqual([
            round(i, 2) if str(i) != 'nan' else 0.0
            for i in reduce(lambda x, y: x + y, vals)
        ], [-1.68, -2.08, 0.02, 2.76, -8.99, 0.0, 0.82, -6.8, 0.0])

        a, b = insert_sizes('lala-map~')
        self.assertEqual([int(a), int(b)], [43, 1033])

        hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000)
        hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000)

        corr = correlate_matrices(hic_data1, hic_data2)
        corr = [round(i, 3) for i in corr[0]]
        self.assertEqual(corr, [
            0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828, 0.757, 0.797,
            0.832
        ])

        ecorr = eig_correlate_matrices(hic_data1, hic_data2)
        ecorr = [round(i, 3) for i in reduce(lambda x, y: x + y, ecorr)]
        self.assertEqual(ecorr, [
            0.997, 0.322, 0.442, 0.017, 0.243, 0.014, 0.321, 0.999, 0.01,
            0.006, 0.0, 0.007, 0.451, 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
            0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013, 0.031, 0.08, 0.974,
            0.018, 0.028, 0.004, 0.0, 0.028, 0.034, 0.89
        ])
        system('rm -rf lala*')
        if CHKTIME:
            self.assertEqual(True, True)
            print '19', time() - t0
    def load_hic_read_data(self):
        """
        Load the interactions into the HiC-Data data type
        
        This should be used as the primary way of loading the HiC-data as the 
        data is loaded in the right form for later functions. Options like the
        TAD calling also require non-normalised data.
        """
        filter_reads = self.parsed_reads_dir + '/filtered_map.tsv'

        print "\nfilter_reads: " + filter_reads
        self.hic_data = load_hic_data_from_reads(filter_reads,
                                                 resolution=int(
                                                     self.resolution))
Beispiel #4
0
    def test_19_matrix_manip(self):
        if ONLY and ONLY != '19':
            return
        if CHKTIME:
            t0 = time()
        hic_data1 = load_hic_data_from_reads('lala-map~', resolution=10000)
        hic_map(hic_data1, savedata='lala-map.tsv~', savefig='lala.pdf~')
        hic_map(hic_data1, by_chrom='intra', savedata='lala-maps~', savefig='lalalo~')
        hic_map(hic_data1, by_chrom='inter', savedata='lala-maps~', savefig='lalala~')
        # slowest part of the all test:
        hic_data2 = read_matrix('lala-map.tsv~', resolution=10000)
        self.assertEqual(hic_data1, hic_data2)
        vals = plot_distance_vs_interactions(hic_data1)
        
        self.assertEqual([round(i, 2) if str(i)!='nan' else 0.0 for i in
                          reduce(lambda x, y: x + y, vals)],
                         [-1.74, 4.2, 0.52, 1.82, -0.44, 0.0, -0.5, 2.95, 0.0])
        
        a, b = insert_sizes('lala-map~')
        self.assertEqual([int(a),int(b)], [43, 1033])

        hic_data1 = read_matrix('20Kb/chrT/chrT_A.tsv', resolution=20000)
        hic_data2 = read_matrix('20Kb/chrT/chrT_B.tsv', resolution=20000)
        
        corr = correlate_matrices(hic_data1, hic_data2)
        corr =  [round(i,3) for i in corr[0]]
        self.assertEqual(corr, [0.755, 0.729, 0.804, 0.761, 0.789, 0.776, 0.828,
                                0.757, 0.797, 0.832])
        
        ecorr = eig_correlate_matrices(hic_data1, hic_data2)
        ecorr = [round(i,3) for i in reduce(lambda x, y:x+y, ecorr)]
        self.assertEqual(ecorr, [0.997, 0.322, 0.442, 0.017, 0.243, 0.014,
                                 0.321, 0.999, 0.01, 0.006, 0.0, 0.007, 0.451,
                                 0.012, 0.996, 0.031, 0.013, 0.004, 0.002,
                                 0.006, 0.029, 0.974, 0.076, 0.03, 0.219, 0.013,
                                 0.031, 0.08, 0.974, 0.018, 0.028, 0.004, 0.0,
                                 0.028, 0.034, 0.89])
        system('rm -rf lala*')
        if CHKTIME:
            self.assertEqual(True, True)
            print '19', time() - t0
Beispiel #5
0
def hic_map(data, resolution=None, normalized=False, masked=None,
            by_chrom=False, savefig=None, show=False, savedata=None,
            focus=None, clim=None, cmap='jet', pdf=False, decay=True,
            perc=10, name=None, decay_resolution=None, **kwargs):
    """
    function to retrieve data from HiC-data object. Data can be stored as
    a square matrix, or drawn using matplotlib

    :param data: can be either a path to a file with pre-processed reads
       (filtered or not), or a Hi-C-data object
    :param None resolution: at which to bin the data (try having a dense matrix
       with < 10% of cells with zero interaction counts). Note: not necessary
       if a hic_data object is passed as 'data'.
    :param False normalized: used normalized data, based on precalculated biases
    :param masked: a list of columns to be removed. Usually because to few
       interactions
    :param False by_chrom: data can be stored in a partitioned way. This
       parameter can take the values of:
        * 'intra': one output per each chromosome will be created
        * 'inter': one output per each possible pair of chromosome will be
           created
        * 'all'  : both of the above outputs
    :param None savefig: path where to store the output images. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None savedata: path where to store the output matrices. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None focus: can be either two number (i.e.: (1, 100)) specifying the
       start and end position of the sub-matrix to display (start and end, along
       the diagonal of the original matrix); or directly a chromosome name; or
       two chromosome names (i.e.: focus=('chr2, chrX')), in order to store the
       data corresponding to inter chromosomal interactions between these two
       chromosomes
    :param True decay: plot the correlation between genomic distance and
       interactions (usually a decay).
    :param False force_image: force to generate an image even if resolution is
       crazy...
    :param None clim: cutoff for the upper and lower bound in the coloring scale
       of the heatmap
    :param False pdf: when using the bny_chrom option, to specify the format of
       the stored images
    :param Reds cmap: color map to be used for the heatmap
    :param None decay_resolution: chromatin fragment size to consider when
       calculating decay of the number of interactions with genomic distance.
       Default is equal to resolution of the matrix.
    """
    if isinstance(data, str):
        data = load_hic_data_from_reads(data, resolution=resolution, **kwargs)
        if not kwargs.get('get_sections', True) and decay:
            warn('WARNING: not decay not available when get_sections is off.')
            decay = False
    hic_data = data
    resolution = data.resolution
    if not decay_resolution:
        decay_resolution = resolution
    if hic_data.bads and not masked:
        masked = hic_data.bads
    # save and draw the data
    if by_chrom:
        if focus:
            raise Exception('Incompatible options focus and by_chrom\n')
        if savedata:
            mkdir(savedata)
        if savefig:
            mkdir(savefig)
        for i, crm1 in enumerate(hic_data.chromosomes):
            for crm2 in hic_data.chromosomes.keys()[i:]:
                if by_chrom == 'intra' and crm1 != crm2:
                    continue
                if by_chrom == 'inter' and crm1 == crm2:
                    continue
                try:
                    subdata = hic_data.get_matrix(focus=(crm1, crm2), normalized=normalized)
                    start1, _ = hic_data.section_pos[crm1]
                    start2, _ = hic_data.section_pos[crm2]
                    masked1 = {}
                    masked2 = {}
                    if focus and hic_data.bads:
                        # rescale masked
                        masked1 = dict([(m - start1, hic_data.bads[m])
                                        for m in hic_data.bads])
                        masked2 = dict([(m - start2, hic_data.bads[m])
                                        for m in hic_data.bads])
                    if masked1 or masked2:
                        for i in xrange(len(subdata)):
                            if i in masked1:
                                subdata[i] = [float('nan')
                                              for j in xrange(len(subdata))]
                            for j in xrange(len(subdata)):
                                if j in masked2:
                                    subdata[i][j] = float('nan')
                    if savedata:
                        hic_data.write_matrix('%s/%s.mat' % (
                            savedata, '_'.join(set((crm1, crm2)))),
                                              focus=(crm1, crm2),
                                              normalized=normalized)
                    if show or savefig:
                        if (len(subdata) > 10000
                            and not kwargs.get('force_image', False)):
                            warn('WARNING: Matrix image not created, more than '
                                 '10000 rows, use a lower resolution to create images')
                            continue
                        draw_map(subdata, 
                                 OrderedDict([(k, hic_data.chromosomes[k])
                                              for k in hic_data.chromosomes.keys()
                                              if k in [crm1, crm2]]),
                                 hic_data.section_pos,
                                 '%s/%s.%s' % (savefig,
                                               '_'.join(set((crm1, crm2))),
                                               'pdf' if pdf else 'png'),
                                 show, one=True, clim=clim, cmap=cmap,
                                 decay_resolution=decay_resolution, perc=perc,
                                 name=name, cistrans=float('NaN'))
                except ValueError, e:
                    print 'Value ERROR: problem with chromosome %s' % crm1
                    print str(e)
                except IndexError, e:
                    print 'Index ERROR: problem with chromosome %s' % crm1
                    print str(e)
Beispiel #6
0
def hic_map(data, resolution=None, normalized=False, masked=None,
            by_chrom=False, savefig=None, show=False, savedata=None,
            focus=None, clim=None, cmap='jet', pdf=False, decay=True,
            perc=10, name=None, decay_resolution=None, **kwargs):
    """
    function to retrieve data from HiC-data object. Data can be stored as
    a square matrix, or drawn using matplotlib

    :param data: can be either a path to a file with pre-processed reads
       (filtered or not), or a Hi-C-data object
    :param None resolution: at which to bin the data (try having a dense matrix
       with < 10% of cells with zero interaction counts). Note: not necessary
       if a hic_data object is passed as 'data'.
    :param False normalized: used normalized data, based on precalculated biases
    :param masked: a list of columns to be removed. Usually because to few
       interactions
    :param False by_chrom: data can be stored in a partitioned way. This
       parameter can take the values of:
        * 'intra': one output per each chromosome will be created
        * 'inter': one output per each possible pair of chromosome will be
           created
        * 'all'  : both of the above outputs
    :param None savefig: path where to store the output images. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None savedata: path where to store the output matrices. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None focus: can be either two number (i.e.: (1, 100)) specifying the
       start and end position of the sub-matrix to display (start and end, along
       the diagonal of the original matrix); or directly a chromosome name; or
       two chromosome names (i.e.: focus=('chr2, chrX')), in order to store the
       data corresponding to inter chromosomal interactions between these two
       chromosomes
    :param True decay: plot the correlation between genomic distance and
       interactions (usually a decay).
    :param None clim: cutoff for the upper and lower bound in the coloring scale
       of the heatmap
    :param False pdf: when using the bny_chrom option, to specify the format of
       the stored images
    :param Reds cmap: color map to be used for the heatmap
    :param None decay_resolution: chromatin fragment size to consider when
       calculating decay of the number of interactions with genomic distance.
       Default is equal to resolution of the matrix.
    """
    if isinstance(data, str):
        data = load_hic_data_from_reads(data, resolution=resolution, **kwargs)
        if not kwargs.get('get_sections', True) and decay:
            warn('WARNING: not decay not available when get_sections is off.')
            decay = False
    hic_data = data
    resolution = data.resolution
    if not decay_resolution:
        decay_resolution = resolution
    if hic_data.bads and not masked:
        masked = hic_data.bads
    # save and draw the data
    if by_chrom:
        if focus:
            raise Exception('Incompatible options focus and by_chrom\n')
        os.system('mkdir -p ' + (savedata if savedata else savefig))
        for i, crm1 in enumerate(hic_data.chromosomes):
            for crm2 in hic_data.chromosomes.keys()[i:]:
                if by_chrom == 'intra' and crm1 != crm2:
                    continue
                if by_chrom == 'inter' and crm1 == crm2:
                    continue
                subdata = hic_data.get_matrix(focus=(crm1, crm2), normalized=normalized)
                if savedata:
                    hic_data.write_matrix('%s/%s.mat' % (
                        savedata, '_'.join(set((crm1, crm2)))),
                                          focus=(crm1, crm2),
                                          normalized=normalized)
                if show or savefig:
                    draw_map(subdata, 
                             OrderedDict([(k, hic_data.chromosomes[k])
                                          for k in hic_data.chromosomes.keys()
                                          if k in [crm1, crm2]]),
                             hic_data.section_pos,
                             '%s/%s.%s' % (savefig,
                                           '_'.join(set((crm1, crm2))),
                                           'pdf' if pdf else 'png'),
                             show, one=True, clim=clim, cmap=cmap,
                             decay_resolution=decay_resolution, perc=perc,
                             name=name, cistrans=float('NaN'))
    else:
        if savedata:
            hic_data.write_matrix(savedata, focus=focus,
                                  normalized=normalized)
        if show or savefig:
            subdata = hic_data.get_matrix(focus=focus, normalized=normalized)
            if focus and masked:
                # rescale masked
                masked = dict([(m - focus[0], masked[m]) for m in masked])
            if masked:
                for i in xrange(len(subdata)):
                    if i in masked:
                        subdata[i] = [float('nan')
                                      for j in xrange(len(subdata))]
                    for j in xrange(len(subdata)):
                        if j in masked:
                            subdata[i][j] = float('nan')
            draw_map(subdata,
                     {} if focus else hic_data.chromosomes,
                     hic_data.section_pos, savefig, show,
                     one = True if focus else False, decay=decay,
                     clim=clim, cmap=cmap, decay_resolution=decay_resolution,
                     perc=perc, normalized=normalized,
                     max_diff=kwargs.get('max_diff', None),
                     name=name, cistrans=float('NaN') if focus else
                     hic_data.cis_trans_ratio(kwargs.get('normalized', False),
                                              kwargs.get('exclude', None),
                                              kwargs.get('diagonal', True),
                                              kwargs.get('equals', None),
                                              kwargs.get('verbose', False)))
Beispiel #7
0
def hic_map(
    data,
    resolution=None,
    normalized=False,
    masked=None,
    by_chrom=False,
    savefig=None,
    show=False,
    savedata=None,
    focus=None,
    clim=None,
    cmap="jet",
    pdf=False,
    decay=True,
    perc=10,
    name=None,
    decay_resolution=None,
    **kwargs
):
    """
    function to retrieve data from HiC-data object. Data can be stored as
    a square matrix, or drawn using matplotlib

    :param data: can be either a path to a file with pre-processed reads
       (filtered or not), or a Hi-C-data object
    :param None resolution: at which to bin the data (try having a dense matrix
       with < 10% of cells with zero interaction counts). Note: not necessary
       if a hic_data object is passed as 'data'.
    :param False normalized: used normalized data, based on precalculated biases
    :param masked: a list of columns to be removed. Usually because to few
       interactions
    :param False by_chrom: data can be stored in a partitioned way. This
       parameter can take the values of:
        * 'intra': one output per each chromosome will be created
        * 'inter': one output per each possible pair of chromosome will be
           created
        * 'all'  : both of the above outputs
    :param None savefig: path where to store the output images. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None savedata: path where to store the output matrices. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None focus: can be either two number (i.e.: (1, 100)) specifying the
       start and end position of the sub-matrix to display (start and end, along
       the diagonal of the original matrix); or directly a chromosome name; or
       two chromosome names (i.e.: focus=('chr2, chrX')), in order to store the
       data corresponding to inter chromosomal interactions between these two
       chromosomes
    :param True decay: plot the correlation between genomic distance and
       interactions (usually a decay).
    :param None clim: cutoff for the upper and lower bound in the coloring scale
       of the heatmap
    :param False pdf: when using the bny_chrom option, to specify the format of
       the stored images
    :param Reds cmap: color map to be used for the heatmap
    :param None decay_resolution: chromatin fragment size to consider when
       calculating decay of the number of interactions with genomic distance.
       Default is equal to resolution of the matrix.
    """
    if isinstance(data, str):
        data = load_hic_data_from_reads(data, resolution=resolution, **kwargs)
        if not kwargs.get("get_sections", True) and decay:
            warn("WARNING: not decay not available when get_sections is off.")
            decay = False
    hic_data = data
    resolution = data.resolution
    if not decay_resolution:
        decay_resolution = resolution
    if hic_data.bads and not masked:
        masked = hic_data.bads
    # save and draw the data
    if by_chrom:
        if focus:
            raise Exception("Incompatible options focus and by_chrom\n")
        os.system("mkdir -p " + (savedata if savedata else savefig))
        for i, crm1 in enumerate(hic_data.chromosomes):
            for crm2 in hic_data.chromosomes.keys()[i:]:
                if by_chrom == "intra" and crm1 != crm2:
                    continue
                if by_chrom == "inter" and crm1 == crm2:
                    continue
                subdata = hic_data.get_matrix(focus=(crm1, crm2), normalized=normalized)
                if savedata:
                    hic_data.write_matrix(
                        "%s/%s.mat" % (savedata, "_".join(set((crm1, crm2)))), focus=(crm1, crm2), normalized=normalized
                    )
                if show or savefig:
                    draw_map(
                        subdata,
                        OrderedDict(
                            [(k, hic_data.chromosomes[k]) for k in hic_data.chromosomes.keys() if k in [crm1, crm2]]
                        ),
                        hic_data.section_pos,
                        "%s/%s.%s" % (savefig, "_".join(set((crm1, crm2))), "pdf" if pdf else "png"),
                        show,
                        one=True,
                        clim=clim,
                        cmap=cmap,
                        decay_resolution=decay_resolution,
                        perc=perc,
                        name=name,
                        cistrans=float("NaN"),
                    )
    else:
        if savedata:
            hic_data.write_matrix(savedata, focus=focus, normalized=normalized)
        if show or savefig:
            subdata = hic_data.get_matrix(focus=focus, normalized=normalized)
            if focus and masked:
                # rescale masked
                masked = dict([(m - focus[0], masked[m]) for m in masked])
            if masked:
                for i in xrange(len(subdata)):
                    if i in masked:
                        subdata[i] = [float("nan") for j in xrange(len(subdata))]
                    for j in xrange(len(subdata)):
                        if j in masked:
                            subdata[i][j] = float("nan")
            draw_map(
                subdata,
                {} if focus else hic_data.chromosomes,
                hic_data.section_pos,
                savefig,
                show,
                one=True if focus else False,
                decay=decay,
                clim=clim,
                cmap=cmap,
                decay_resolution=decay_resolution,
                perc=perc,
                normalized=normalized,
                max_diff=kwargs.get("max_diff", None),
                name=name,
                cistrans=float("NaN")
                if focus
                else hic_data.cis_trans_ratio(
                    kwargs.get("normalized", False),
                    kwargs.get("exclude", None),
                    kwargs.get("diagonal", True),
                    kwargs.get("equals", None),
                    kwargs.get("verbose", False),
                ),
            )
Beispiel #8
0
def hic_map(data, resolution=None, normalized=False, masked=None,
            by_chrom=False, savefig=None, show=False, savedata=None,
            focus=None, clim=None, cmap='tadbit', pdf=False, decay=True,
            perc=10, name=None, **kwargs):
    """
    function to retrieve data from HiC-data object. Data can be stored as
    a square matrix, or drawn using matplotlib

    :param data: can be either a path to a file with pre-processed reads
       (filtered or not), or a Hi-C-data object
    :param None resolution: at which to bin the data (try having a dense matrix
       with < 10% of cells with zero interaction counts). Note: not necessary
       if a hic_data object is passed as 'data'.
    :param False normalized: used normalized data, based on precalculated biases
    :param masked: a list of columns to be removed. Usually because to few
       interactions
    :param False by_chrom: data can be stored in a partitioned way. This
       parameter can take the values of:
        * 'intra': one output per each chromosome will be created
        * 'inter': one output per each possible pair of chromosome will be
           created
        * 'all'  : both of the above outputs
    :param None savefig: path where to store the output images. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None savedata: path where to store the output matrices. Note that, if
       the by_chrom option is used, then savefig will be the name of the
       directory containing the output files.
    :param None focus: can be either two number (i.e.: (1, 100)) specifying the
       start and end position of the sub-matrix to display (start and end, along
       the diagonal of the original matrix); or directly a chromosome name; or
       two chromosome names (i.e.: focus=('chr2, chrX')), in order to store the
       data corresponding to inter chromosomal interactions between these two
       chromosomes
    :param True decay: plot the correlation between genomic distance and
       interactions (usually a decay).
    :param None clim: cutoff for the upper and lower bound in the coloring scale
       of the heatmap
    :param False pdf: when using the bny_chrom option, to specify the format of
       the stored images
    :param Reds cmap: color map to be used for the heatmap
    :param False get_sections: for very very high resolution, when the column
       index does not fit in memory
    """
    if isinstance(data, str):
        data = load_hic_data_from_reads(data, resolution=resolution, **kwargs)
        if not kwargs.get('get_sections', True) and decay:
            warn('WARNING: not decay not available when get_sections is off.')
            decay = False
    hic_data = data
    if hic_data.bads and not masked:
        masked = hic_data.bads
    # save and draw the data
    if by_chrom:
        if focus:
            raise Exception('Incompatible options focus and by_chrom\n')
        os.system('mkdir -p ' + (savedata if savedata else savefig))
        for i, crm1 in enumerate(hic_data.chromosomes):
            for crm2 in hic_data.chromosomes.keys()[i:]:
                if by_chrom == 'intra' and crm1 != crm2:
                    continue
                if by_chrom == 'inter' and crm1 == crm2:
                    continue
                subdata = hic_data.get_matrix(focus=(crm1, crm2), normalized=normalized)
                if savedata:
                    out = open('%s/%s.mat' % (
                        savedata, '_'.join(set((crm1, crm2)))), 'w')
                    out.write('\n'.join(['\t'.join([str(i) for i in d])
                                         for d in subdata]) + '\n')
                    out.close()
                if show or savefig:
                    draw_map(subdata, 
                             OrderedDict([(k, hic_data.chromosomes[k])
                                          for k in hic_data.chromosomes.keys()
                                          if k in [crm1, crm2]]),
                             hic_data.section_pos,
                             '%s/%s.%s' % (savefig,
                                           '_'.join(set((crm1, crm2))),
                                           'pdf' if pdf else 'png'),
                             show, one=True, clim=clim, cmap=cmap,
                             resolution=resolution, perc=perc,
                             name=name, cistrans=float('NaN'))
    else:
        if savedata:
            out = open(savedata, 'w')
            out.write('\n'.join(
                ['\t'.join([str(i) for i in line])
                 for line in hic_data.get_matrix(
                     focus=focus, normalized=normalized)]) + '\n')
            out.close()
        if show or savefig:
            subdata = hic_data.get_matrix(focus=focus, normalized=normalized)
            if focus and masked:
                # rescale masked
                masked = dict([(m - focus[0], masked[m]) for m in masked])
            if masked:
                for i in xrange(len(subdata)):
                    if i in masked:
                        subdata[i] = [float('nan')
                                      for j in xrange(len(subdata))]
                    for j in xrange(len(subdata)):
                        if j in masked:
                            subdata[i][j] = float('nan') 
            draw_map(subdata,
                     {} if focus else hic_data.chromosomes,
                     hic_data.section_pos, savefig, show,
                     one = True if focus else False, decay=decay,
                     clim=clim, cmap=cmap, resolution=resolution, perc=perc,
                     name=name, cistrans=float('NaN') if focus else
                     hic_data.cis_trans_ratio(kwargs.get('normalized', False),
                                              kwargs.get('exclude', None),
                                              kwargs.get('diagonal', False),
                                              kwargs.get('equals', None),
                                              kwargs.get('verbose', False)))