def create_rev_com_genome(source_genome_path, write_dir):
    source_genome_path = source_genome_path.rstrip('/')
    genome_name = source_genome_path.split('/')[-1]
    write_dir = os.path.join(write_dir, genome_name)
    if not os.path.exists(write_dir):
        os.mkdir(write_dir)
    source_genome_dict = load_directory(source_genome_path, in_memory=True)
    file_shapes = {}
    for key in source_genome_dict.keys():
        data_arr = source_genome_dict[key].__dict__['_arr'][:].transpose()
        ##Reverse complement by taking base pair complements as well as reversing

        rev_comp_data_arr = data_arr[::-1, ::
                                     -1]  #Reverse complement the entrire chromosome
        _array_writer['bcolz'](rev_comp_data_arr.astype(np.float32),
                               os.path.join(write_dir, key))
        file_shapes[key] = rev_comp_data_arr.shape
        print("Created chromosome %s \n" % (key))

    ##Write the metadata.json file::

    print("Writing metadata file \n")
    with open(os.path.join(write_dir, 'metadata.json'), 'w') as fp:
        json.dump(
            {
                'file_shapes': file_shapes,
                'type': 'array_{}'.format('bcolz'),
                'source': source_genome_path
            }, fp)
def create_complemented_genome(source_genome_path, write_dir):
    """
        Does not reverse. Only takes complements

    """
    source_genome_path = source_genome_path.rstrip('/')
    source_genome_dict = load_directory(source_genome_path, in_memory=True)
    genome_name = source_genome_path.split('/')[-1]
    write_dir = os.path.join(write_dir, genome_name)
    if not os.path.exists(write_dir):
        os.mkdir(write_dir)

    file_shapes = {}
    for key in source_genome_dict.keys():
        data_arr = source_genome_dict[key].__dict__['_arr'][:].transpose()

        ##Take the complement by just flipping bases
        ##The shape is now (4,N)
        rev_comp_data_arr = data_arr[::-1]
        _array_writer['bcolz'](rev_comp_data_arr.astype(np.float32),
                               os.path.join(write_dir, key))
        file_shapes[key] = rev_comp_data_arr.shape
        print("Created chromosome %s \n" % (key))

    print("Writing metadata file \n")
    ##Write the metadata.json file::
    with open(os.path.join(write_dir, 'metadata.json'), 'w') as fp:
        json.dump(
            {
                'file_shapes': file_shapes,
                'type': 'array_{}'.format('bcolz'),
                'source': source_genome_path
            }, fp)
Esempio n. 3
0
frag = args.frag
histone = args.histone
model_path = args.model_path
save_dir = args.save_dir
cuda = args.cuda

os.environ["CUDA_VISIBLE_DEVICES"] = cuda

# Logging directories
srv_dir = os.path.join("/srv", "www", "kundaje", "jesikmin",
                       "test_experiments", save_dir)
if not os.path.exists(srv_dir):
    os.makedirs(srv_dir)

data = Data_Directories()
X_test = load_directory(data.input_atac[day][frag],
                        in_memory=True)['chr22']._arr
X_test = np.expand_dims(np.nan_to_num(X_test), axis=0)
print "Finished fetching X_test"
print X_test.shape
print "Dimension of ATAC-seq signal (input): {}".format(X_test[0].shape)

y_test = load_directory('/srv/scratch/jesikmin/output/bcolz/',
                        in_memory=True)['chr22']
y_test = np.expand_dims(y_test, axis=0)
y_test = np.expand_dims(y_test, axis=2)
print "Finished fetching Y_test"
print y_test.shape
print "Dimension of ChIP-seq signal (output): {}".format(y_test[0].shape)
'''
Generator only
'''
Esempio n. 4
0
        help=
        "bigwig prefix. Example: a `outfile` prefix results in a `outfile.bw` bigwig file"
    )

    args = parser.parse_args()
    return args


# parse args
args = parse_args()
print(args)
bigwig = '{}.bw'.format(args.output_prefix)

# load data directory
logger.info("Loading genomelake data..")
data = load_directory(args.data_dir, in_memory=True)

file_shapes = {}
for chrom, chrom_data in data.items():
    logger.info("Chrom " + str(chrom) + "...")
    for _channel_idx in range(5):
        channel = np.copy(chrom_data._arr[:, _channel_idx])
        output_path = os.path.join(
            "/srv/scratch/jesikmin/temp/" + str(_channel_idx), chrom)
        os.makedirs(output_path)
        _array_writer['bcolz'](channel.astype(np.float32), output_path)
        file_shapes[chrom] = (chrom_data._arr.shape[0], )
for idx in range(5):
    with open(
            os.path.join("/srv/scratch/jesikmin/temp/" + str(idx),
                         'metadata.json'), 'w') as fpp: