Exemple #1
0
if batch_size != 32:
    name += '_bs=' + str(batch_size)

epsilon = args.e
if epsilon != 0.05:
    name += '_eps=' + str(epsilon)

num_steps = args.steps
if num_steps != 20:
    name += '_steps=' + str(num_steps)

if args.name:
    name += '_' + args.name
print(name)

results_path = helper.make_directory('../results', args.o)

#-----------------------------------------------------------------
# load data
data_path = '../data'
filepath = os.path.join(data_path, 'synthetic_code_dataset.h5')
x_train, y_train, x_valid, y_valid, x_test, y_test, model_test = helper.load_data(
    filepath)
N, L, A = x_train.shape
num_labels = y_train.shape[1]

#-----------------------------------------------------------------
# create model
model = genome_model.model(input_shape=(L, A),
                           num_labels=1,
                           activation=activation,
Exemple #2
0
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import argparse
import helper, tfomics
from model_zoo import cnn_p as genome_model

#-----------------------------------------------------------------

base_name = 'cnn_local'
batch_size = 32
num_trials = 5
pool_size = 25

results_path = helper.make_directory('../results_test', 'synthetic')

# load data
data_path = '../data'
filepath = os.path.join(data_path, 'synthetic_code_dataset.h5')
x_train, y_train, x_valid, y_valid, x_test, y_test, model_test = helper.load_data(
    filepath)
N, L, A = x_train.shape
num_labels = y_train.shape[1]

for reg in [True, False]:
    if reg:
        dropout = [0.2, 0.2, 0.5]
        bn = [True, True, True]
    else:
        dropout = [0, 0, 0]
import os
import numpy as np
from tensorflow.keras import backend as K
from residualbind import ResidualBind
import helper

#---------------------------------------------------------------------------------------

normalization = 'log_norm'  # 'log_norm' or 'clip_norm'
ss_type = 'seq'  # 'seq', 'pu', or 'struct'
data_path = '../data/RNAcompete_2013/rnacompete2013.h5'
results_path = helper.make_directory('../results', 'rnacompete_2013')
save_path = helper.make_directory(results_path, normalization + '_' + ss_type)

#---------------------------------------------------------------------------------------

# loop over different RNA binding proteins
pearsonr_scores = []
experiments = helper.get_experiment_names(data_path)
for rbp_index, experiment in enumerate(experiments):
    print('Analyzing: ' + experiment)

    # load rbp dataset
    train, valid, test = helper.load_rnacompete_data(
        data_path,
        ss_type=ss_type,
        normalization=normalization,
        rbp_index=rbp_index)

    # load residualbind model
    input_shape = list(train['inputs'].shape)[1:]
Exemple #4
0
#---------------------------------------------------------------------------------------------------

models = ['clip_conv_net', 'clip_residualbind']
ss_types = ['seq', 'pu']
window = 200

# training parameters
batch_size = 100
num_epochs = 200

# dataset path
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'

# set results path
results_path = helper.make_directory('../../results', 'encode_eclip')

# get list of .h5 files in dataset path
file_names = helper.get_file_names(dataset_path)

# loop through models
for model in models:

    # model results path
    model_path = helper.make_directory(results_path, model)

    # loop through secondary structure types
    for ss_type in ss_types:

        # model results path
        sstype_path = helper.make_directory(model_path, ss_type)
import numpy as np
import logomaker
from six.moves import cPickle
import matplotlib.pyplot as plt
from scipy import stats
from residualbind import ResidualBind, GlobalImportance
import helper, explain

#---------------------------------------------------------------------

normalization = 'log_norm'  # 'log_norm' or 'clip_norm'
ss_type = 'seq'  # 'seq', 'pu', or 'struct'
data_path = '../data/RNAcompete_2013/rnacompete2013.h5'
results_path = os.path.join('../results', 'rnacompete_2013')
save_path = os.path.join(results_path, normalization + '_' + ss_type)
plot_path = helper.make_directory(save_path, 'plots')
motif_path = helper.make_directory(save_path, 'motifs')
kmer_path = helper.make_directory(save_path, 'kmer_motifs')
alphabet = 'ACGU'

#---------------------------------------------------------------------------------------

# get experiment names
experiments = helper.get_experiment_names(data_path)

# loop over different RNA binding proteins
multiple_sites_all = []
gcbias_all = []
hairpin_all = []
for rbp_index, experiment in enumerate(experiments):
    print(rbp_index, experiment)
Exemple #6
0
from six.moves import cPickle
import matplotlib.pyplot as plt
from scipy import stats
from residualbind import ResidualBind, GlobalImportance
import helper, explain

#---------------------------------------------------------------------

null_model = 'profile'  # 'profile', 'random' , 'dinuc', 'quartile1', 'quartile2', 'quartile3', 'quartile4']  

normalization = 'log_norm'   # 'log_norm' or 'clip_norm'
ss_type = 'seq'                  # 'seq', 'pu', or 'struct'
data_path = '../data/RNAcompete_2013/rnacompete2013.h5'
results_path = os.path.join('../results', 'rnacompete_2013')
save_path = os.path.join(results_path, normalization+'_'+ss_type)
plot_path = helper.make_directory(save_path, 'plots_'+null_model)
motif_path = helper.make_directory(save_path, 'motifs_'+null_model)
kmer_path = helper.make_directory(save_path, 'kmer_motifs_'+null_model)
alphabet = 'ACGU'

#---------------------------------------------------------------------------------------

# get experiment names
experiments = helper.get_experiment_names(data_path)

# loop over different RNA binding proteins
multiple_sites_all = []
gcbias_all = []
hairpin_all = []
for rbp_index, experiment in enumerate(experiments):
    print(rbp_index, experiment)