Beispiel #1
0
print('INFO - %s' % ('building concatenate model.'))
units = 512
x = Merge(mode='concat', concat_axis=1)([reg_output, seq_output])
x = Dense(units, activation='relu')(x)
x = Dropout(0.5, seed=42)(x)
x = Dense(units, activation='relu')(x)
x = Dropout(0.5, seed=42)(x)
rgs_output = Dense(1, activation='linear', name='rgs_output')(x)

model = Model(input=[reg_input, seq_input], output=[rgs_output])
model.compile(loss={'rgs_output': 'mean_squared_error'}, optimizer='sgd')
plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir))

print('INFO - %s' % ('loading data.'))
train, val, test = input_data.read_data_sets(train_pct=80,
                                             val_pct=10,
                                             test_pct=10,
                                             mode='whole_experiment')

print('INFO - %s' % ('training model.'))
reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
checkpoint = ModelCheckpoint(
    filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir),
    monitor='val_loss')
batchhistory = BatchHistory(val_data=val,
                            loss_function='mse',
                            every_n_batch=1000)
history = model.fit(
    {
        'seq_input': train['seq'],
        'reg_input': train['reg']
Beispiel #2
0
                          name='scaled_rgs_output')([rgs_output, label_output])

model = Model(input=[reg_input, seq_input],
              output=[scaled_rgs_output, label_output])
model.compile(loss={
    'scaled_rgs_output': 'mean_squared_error',
    'label_output': 'binary_crossentropy'
},
              optimizer=SGD(lr=0.01, momentum=0.5),
              loss_weights=[1., 0.])
plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir))

print('INFO - %s' % ('loading data.'))
train, val, test = input_data.read_data_sets(train_pct=80,
                                             val_pct=10,
                                             test_pct=10,
                                             conv1d=True,
                                             add_shuffle=True)

print('INFO - %s' % ('training model.'))
reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
checkpoint = ModelCheckpoint(
    filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir),
    monitor='val_loss',
    save_best_only=True)
history = model.fit({
    'seq_input': train['seq'],
    'reg_input': train['reg']
}, {
    'scaled_rgs_output': train['expr'],
x = Merge(mode='concat', concat_axis=1)([reg_output, seq_output])
x = Dense(units, activation='relu')(x)
x = Dropout(0.5, seed=42)(x)
x = Dense(units, activation='relu')(x)
x = Dropout(0.5, seed=42)(x)
cls_output = Dense(3, activation='sigmoid', name='cls_output')(x)

model = Model(input=[reg_input, seq_input], output=[cls_output])
model.compile(loss={'cls_output': 'categorical_crossentropy'},
              optimizer='sgd',
              metric='accuracy')
plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir))

print('INFO - %s' % ('loading data.'))
train, val, test = input_data.read_data_sets(train_pct=80,
                                             val_pct=10,
                                             test_pct=10)

print('INFO - %s' % ('training model.'))
reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
checkpoint = ModelCheckpoint(
    filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir),
    monitor='val_loss')
history = model.fit({
    'seq_input': train['seq'],
    'reg_input': train['reg']
}, {'cls_output': to_categorical(train['class'])},
                    validation_data=({
                        'seq_input': val['seq'],
                        'reg_input': val['reg']
Beispiel #4
0
	plt.ylabel('distance')
	dendrogram(z,leaf_rotation=90,labels=diff_t.index)
	plt.tight_layout()


def plot_orig_vs_perturbed(x,y,ylab):
	plt.figure()
	plt.scatter(x,y)
	plt.xlabel('Original')
	plt.ylabel(ylab)


# wildtype model:
model_fn='../logs/concatenation/regression/model.41-0.2300.hdf5'
model=load_model(model_fn)
full_data,_,_=input_data.read_data_sets(train_pct=100,val_pct=0,test_pct=0)
pred=model.predict({'seq_input':full_data['seq'],'reg_input':full_data['reg']},batch_size=100,verbose=1)



# MSN2/4 motif KO:
full_data,_,_=input_data.read_data_sets(train_pct=100,val_pct=0,test_pct=0,seq_file='%s/yeast_promoters.msn24_ko.fa'%(seq_dir))
pred_2=model.predict({'seq_input':full_data['seq'],'reg_input':full_data['reg']},batch_size=100,verbose=1)

plot_orig_vs_perturbed(pred, pred_2, 'MSN2/4 motif KO')
plt.savefig('%s/orig_vs_msn24_motif_ko.png'%fig_dir)

hclust(pred, pred_2, full_data)
plt.savefig('%s/hclust_msn24_motif_ko.pdf'%fig_dir)

Beispiel #5
0
from deeplift.blobs import NonlinearMxtsMode
from deeplift.visualization import viz_sequence
import os

from sklearn.cluster.bicluster import SpectralBiclustering
from sklearn.metrics import consensus_score
from scipy.stats import ranksums
from scipy.stats import ttest_ind

from collections import OrderedDict

# Load model:
model_fn = '../logs/concatenation/classification/model.32-0.5284.hdf5'
model = load_model(model_fn)
full_data, _, _ = input_data.read_data_sets(train_pct=100,
                                            val_pct=0,
                                            test_pct=0)

#NonlinearMxtsMode defines the method for computing importance scores.
#NonlinearMxtsMode.DeepLIFT_GenomicsDefault uses the RevealCancel rule on Dense layers
#and the Rescale rule on conv layers (see paper for rationale)
#Other supported values are:
#NonlinearMxtsMode.RevealCancel - DeepLIFT-RevealCancel at all layers (used for the MNIST example)
#NonlinearMxtsMode.Rescale - DeepLIFT-rescale at all layers
#NonlinearMxtsMode.Gradient - the 'multipliers' will be the same as the gradients
#NonlinearMxtsMode.GuidedBackprop - the 'multipliers' will be what you get from guided backprop
#Use deeplift.util.get_integrated_gradients_function to compute integrated gradients
#Feel free to email avanti [dot] [email protected] if anything is unclear
deeplift_model = kc.convert_functional_model(
    model,
    nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.
Beispiel #6
0
x=Dropout(0.5,seed=42)(x)
x=Dense(units)(x)
x=Activation('relu')(x)
x=Dropout(0.5,seed=42)(x)

rgs_output=[]
for i in xrange(num_treatment):
	rgs_output.append(Dense(1,name='rgs_output_%s'%i)(x))


model=Model(input=[seq_input],output=rgs_output)
model.compile(loss='mean_squared_error',optimizer=SGD(lr=0.001))
plot(model, show_shapes=True, to_file='%s/model.eps'%(fig_dir))

print('INFO - %s'%('loading data.'))
train,val,test=input_data.read_data_sets(train_pct=90,val_pct=5,test_pct=5,conv1d=True,seq_only=True)


print('INFO - %s'%('training model.'))
reduce_lr=ReduceLROnPlateau(verbose=1,factor=0.5, patience=5)
early_stopping=EarlyStopping(monitor='val_loss',patience=10)
checkpoint=ModelCheckpoint(filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5"%(log_dir), monitor='val_loss')
train_output_dict=dict()
val_output_dict=dict()
for i in xrange(num_treatment):
	train_output_dict['rgs_output_%s'%i]=train['expr'].iloc[:,i]
	val_output_dict['rgs_output_%s'%i]=val['expr'].iloc[:,i]

history=model.fit({'seq_input':train['seq']},train_output_dict,
	validation_data=({'seq_input':val['seq']},val_output_dict),
	nb_epoch=200,
print('INFO - %s'%('building concatenate model.'))
units=512
x=Merge(mode='concat',concat_axis=1)([reg_output,seq_output])
x=Dense(units,activation='relu')(x)
x=Dropout(0.5,seed=42)(x)
x=Dense(units,activation='relu')(x)
x=Dropout(0.5,seed=42)(x)
rgs_output=Dense(1,activation='linear',name='rgs_output')(x)


model=Model(input=[reg_input,seq_input],output=[rgs_output])
model.compile(loss={'rgs_output':'mean_squared_error'},optimizer='sgd')
plot(model, show_shapes=True,to_file='%s/model.eps'%(fig_dir))

print('INFO - %s'%('loading data.'))
train,val,test=input_data.read_data_sets(train_pct=80,val_pct=10,test_pct=10,mode='whole_gene')


print('INFO - %s'%('training model.'))
reduce_lr=ReduceLROnPlateau(verbose=1,factor=0.5, patience=5)
early_stopping=EarlyStopping(monitor='val_loss',patience=10)
checkpoint=ModelCheckpoint(filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5"%(log_dir), monitor='val_loss')
batchhistory=BatchHistory(val_data=val,loss_function='mse',every_n_batch=1000)
history=model.fit({'seq_input':train['seq'],'reg_input':train['reg']},{'rgs_output':train['expr']},
	validation_data=({'seq_input':val['seq'],'reg_input':val['reg']},{'rgs_output':val['expr']}),
	nb_epoch=100,
	batch_size=100,
	callbacks=[early_stopping,checkpoint,reduce_lr,batchhistory],
	verbose=1)
with open('%s/history.pkl'%(log_dir),'wb') as f:
	pickle.dump([history.history,batchhistory.val_loss],f)