Example #1
0
# set y label
y_label = np.log(
    sample_df['non_syn_counts'].values /
    (panels.loc[panels['Panel'] == 'Agilent_kit']['cds'].values[0] / 1e6) +
    1)[:, np.newaxis]
y_strat = np.argmax(samples['histology'], axis=-1)

losses = [Losses.QuantileLoss()]
metrics = [Metrics.QuantileLoss()]

encoders = [
    InstanceModels.PassThrough(shape=(1, )),
    InstanceModels.VariantPositionBin(24, 100),
    InstanceModels.VariantSequence(6,
                                   4,
                                   2, [16, 16, 8, 8],
                                   fusion_dimension=32)
]

all_weights = [
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
            'run_naive.pkl', 'rb')),
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
            'run_position.pkl', 'rb')),
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
Example #2
0
ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: ((tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x),
                                      tf.gather(tf.constant(D['cds_emb'], dtype=tf.float32), x)
                                       ),
                                       y,
                                      ))



sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [64, 64, 64, 64], fusion_dimension=128, use_frame=True)
mil = RaggedModels.MIL(instance_encoders=[], sample_encoders=[sequence_encoder.model], output_dim=y_label.shape[-1], output_type='other', mil_hidden=[128, 128, 64, 32], mode='none')
losses = [Losses.CrossEntropy()]
mil.model.compile(loss=losses,
                  metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  weighted_metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001,
                                                     ))

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_weighted_CE', min_delta=0.001, patience=10, mode='min', restore_best_weights=True)]


mil.model.fit(ds_train, steps_per_epoch=50,
              validation_data=ds_valid,
              epochs=10000,
              callbacks=callbacks,
Example #3
0
     alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True),
     tf.gather(tf.constant(types), x)), y))

ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: (
    (five_p_loader(x, ragged_output=True), three_p_loader(
        x, ragged_output=True), ref_loader(x, ragged_output=True),
     alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True),
     tf.gather(tf.constant(types), x)), y))

histories = []
evaluations = []
weights = []
for i in range(3):
    sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [16, 16, 8, 8])
    sample_encoder = SampleModels.Type(shape=(), dim=len(np.unique(types)))
    # mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], sample_encoders=[sample_encoder.model], sample_layers=[64, ], output_dim=1, pooling='both', output_type='other', pooled_layers=[32, ])
    mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model],
                           sample_encoders=[sample_encoder.model],
                           fusion='before',
                           output_dim=1,
                           pooling='both',
                           output_type='other',
                           pooled_layers=[
                               32,
                           ])
    losses = ['mse']
    mil.model.compile(loss=losses,
                      metrics=['mse'],
                      optimizer=tf.keras.optimizers.Adam(
Example #4
0
ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: (
    (
        tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x),
    ),
    y,
))

sequence_encoder = InstanceModels.VariantSequence(6,
                                                  4,
                                                  2, [16, 16, 8, 8],
                                                  fusion_dimension=128)
mil = RaggedModels.MIL(instance_encoders=[],
                       sample_encoders=[sequence_encoder.model],
                       output_dim=y_label.shape[-1],
                       output_type='other',
                       mil_hidden=[128, 128],
                       mode='none')
losses = [Losses.CrossEntropy()]
mil.model.compile(
    loss=losses,
    metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
    weighted_metrics=[Metrics.Accuracy(),
                      Metrics.CrossEntropy()],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, ))
Example #5
0
    for group, event in zip(cancer_labels, y_label[:, 1])
])
class_counts = dict(zip(*np.unique(y_strat, return_counts=True)))
y_weights = np.array([1 / class_counts[_] for _ in y_strat])
y_weights /= np.sum(y_weights)

weights = []
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_CE',
                                     min_delta=0.0001,
                                     patience=50,
                                     mode='min',
                                     restore_best_weights=True)
]
losses = [Losses.CrossEntropy(from_logits=False)]
sequence_encoder = InstanceModels.VariantSequence(20, 4, 2, [8, 8, 8, 8])
mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model],
                       output_dim=2,
                       pooling='sum',
                       mil_hidden=(64, 64, 32, 16),
                       output_type='classification_probability')
mil.model.compile(
    loss=losses,
    metrics=[Metrics.CrossEntropy(from_logits=False),
             Metrics.Accuracy()],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=10000))
initial_weights = mil.model.get_weights()

##stratified K fold for test
for idx_train, idx_test in StratifiedKFold(n_splits=9,
                                           random_state=0,