Esempio n. 1
0
def configure(len_size=None,
              genomic_distance=None,
              methods_name='ours',
              dataset_path=None,
              raw_path='raw',
              raw_hic='Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool',
              input_path='input',
              output_path='output'):

    resolution = None  # assigned by cooler binsizes
    scale = 4
    if len_size is None:
        len_size = 40
    block_size = 2048  # number of entries in one file
    if genomic_distance is None:
        genomic_distance = 200000
    if dataset_path is None:
        # assume current directory is the root of project
        # pathto/proj/data
        # pathto/proj/our_method
        dataset_path = os.path.join(
            operations.redircwd_back_projroot(
                project_name='refine_resolution'), 'data')

    print('data path: ', dataset_path)
    input_file = raw_hic.split('-')[0] + '_' + raw_hic.split(
        '-')[1] + '_' + raw_hic.split('-')[2] + '_' + raw_hic.split('.')[1]
    input_path = '_'.join(
        [input_path, methods_name,
         str(genomic_distance),
         str(len_size)])
    output_file = input_file
    output_path = '_'.join(
        [output_path, methods_name,
         str(genomic_distance),
         str(len_size)])

    # load raw hic matrix
    file = os.path.join(dataset_path, raw_path, raw_hic)
    print('raw hic data: ', file)
    '''if ~os.path.exists(file):
        url = 'ftp://cooler.csail.mit.edu/coolers/hg19/'+raw_hic
        print(url)
        file = wget.download(url, file)'''
    cool_hic = cooler.Cooler(file)
    resolution = cool_hic.binsize
    return cool_hic, resolution, scale, len_size, genomic_distance,\
        block_size, dataset_path, \
        [raw_path, raw_hic], \
        [input_path, input_file], \
        [output_path, output_file]
Esempio n. 2
0

"""
configure data:
dataset_path-raw
            -input
            -output
"""

if __name__ == '__main__':
    raw_list = [
        'Rao2014-CH12LX-MboI-allreps-filtered.10kb.cool',
        'Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool',
        'Rao2014-GM12878-MboI-allreps-filtered.10kb.cool',
        'Rao2014-HMEC-MboI-allreps-filtered.10kb.cool',
        'Rao2014-HUVEC-MboI-allreps-filtered.10kb.cool',
        'Rao2014-IMR90-MboI-allreps-filtered.10kb.cool',
        'Rao2014-K562-MboI-allreps-filtered.10kb.cool',
        'Rao2014-KBM7-MboI-allreps-filtered.10kb.cool',
        'Rao2014-NHEK-MboI-allreps-filtered.10kb.cool'
    ]
    root = operations.redircwd_back_projroot(project_name='refine_resolution')
    raw_hic = 'Rao2014-GM12878-MboI-allreps-filtered.10kb.cool'
    # raw_hic='Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool'
    config = configure(raw_hic=raw_hic,
                       len_size=int(sys.argv[2]),
                       genomic_distance=int(sys.argv[3]))
    chromosome_list = [str(sys.argv[1])]
    for chri in chromosome_list:
        save_samples(config, chromosome=chri)
Esempio n. 3
0
            if hic_lr is None:
                hic_lr = data['hic']
            else:
                hic_lr = np.concatenate((hic_lr, data['hic']), axis=0)
    return hic_hr, hic_lr


if __name__ == '__main__':
    # the size of input
    len_size = int(sys.argv[1])  # 40, 128, 200
    scale = 4
    # genomic_disstance is used for input path, nothing to do with model
    genomic_distance = int(sys.argv[2])  # 2000000, 2560000
    EPOCHS = 300
    BATCH_SIZE = 9
    root_path = redircwd_back_projroot(project_name='refine_resolution')
    data_path = os.path.join(root_path, 'data')
    # raw_hic = 'Rao2014-GM12878-DpnII-allreps-filtered.10kb.cool'
    raw_hic = 'Rao2014-GM12878-MboI-allreps-filtered.10kb.cool'
    input_path = '_'.join(['input', 'ours', str(genomic_distance), str(len_size)])
    input_file = raw_hic.split('-')[0] + '_' + raw_hic.split('-')[1] + '_' + raw_hic.split('-')[2] + '_' + raw_hic.split('.')[1]

    log_dir = os.path.join(root_path, 'our_model', 'logs', 'model')
    logging.basicConfig(filename=os.path.join(log_dir, 'training.log'), level=logging.INFO)

    # ['1', '2', '3', '4', '5','6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16']
    # ['17', '18']
    # ['19', '20', '21', '22', 'X']
    train_chr_list = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16']
    valid_chr_list = ['17', '18']