def gather_high_low_cool(
        cooler_file='Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool',
        path='./data/raw/',
        chromosome='22',
        scale=4,
        output_path='./experiment/evaluation/'):
    file = os.path.join(path, cooler_file)
    cool_hic = cooler.Cooler(file)
    resolution = cool_hic.binsize
    mat = cool_hic.matrix(balance=True).fetch('chr' + chromosome)
    high_hic, idx = remove_zeros(
        mat)  # idx: {true, false}, len is not changed/shrinked
    bool_idx = np.array(idx).flatten()
    num_idx = np.array(np.where(idx)).flatten()
    low_hic = sampling_hic(high_hic, scale**2, fix_seed=True)
    print('high hic shape: {}.'.format(high_hic.shape), end=' ')
    print('low hic shape: {}.'.format(low_hic.shape))

    b = {
        'chrom': ['chr{}'.format(chromosome)] * len(bool_idx),
        'start': resolution * np.arange(len(bool_idx)),
        'end': resolution * (np.arange(1, (len(bool_idx) + 1))),
        'weight': 1.0 * bool_idx
    }
    bins = pd.DataFrame(data=b)

    high_hic = ICE_normalization(high_hic)
    low_hic = ICE_normalization(low_hic)

    high_hic = triu(high_hic, format='coo')
    low_hic = triu(low_hic, format='coo')

    output_path = os.path.join(output_path, 'chr{}'.format(chromosome))
    os.makedirs(output_path, exist_ok=True)

    outfile = 'high_chr{}.cool'.format(chromosome)
    print('saving file {}'.format(os.path.join(output_path, outfile)))
    uri = os.path.join(output_path, outfile)
    p = {
        'bin1_id': num_idx[high_hic.row],
        'bin2_id': num_idx[high_hic.col],
        'count': high_hic.data
    }
    pixels = pd.DataFrame(data=p)
    cooler.create_cooler(cool_uri=uri, bins=bins, pixels=pixels)

    outfile = 'low_chr{}.cool'.format(chromosome)
    print('saving file {}'.format(os.path.join(output_path, outfile)))
    uri = os.path.join(output_path, outfile)
    p = {
        'bin1_id': num_idx[low_hic.row],
        'bin2_id': num_idx[low_hic.col],
        'count': low_hic.data
    }
    pixels = pd.DataFrame(data=p)
    cooler.create_cooler(cool_uri=uri, bins=bins, pixels=pixels)
def gather_high_low_mat(cooler_file='Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool', path='./data/raw/', chromosome='22', scale=4, output_path='./experiment/evaluation/'):
    file = os.path.join(path, cooler_file)
    cool_hic = cooler.Cooler(file)
    # resolution = cool_hic.binsize
    mat = cool_hic.matrix(balance=True).fetch('chr' + chromosome)
    high_hic, idx = remove_zeros(mat)
    low_hic = sampling_hic(high_hic, scale**2, fix_seed=True)

    output_path = os.path.join(output_path, 'chr{}'.format(chromosome))
    os.makedirs(output_path, exist_ok=True)

    outfile = 'high_chr{}_10000.npz'.format(chromosome)
    print('saving file {}'.format(os.path.join(output_path, outfile)))
    np.savez_compressed(os.path.join(output_path, outfile),
                        hic=high_hic, compact=idx)

    outfile = 'low_chr{}_{}0000.npz'.format(chromosome, scale)
    print('saving file {}'.format(os.path.join(output_path, outfile)))
    np.savez_compressed(os.path.join(output_path, outfile),
                        hic=low_hic, compact=idx)