Esempio n. 1
0
    tf.float32)

loaders = [
    [ones_loader],
    [pos_loader, bin_loader, chr_loader],
    [five_p_loader, three_p_loader, ref_loader, alt_loader, strand_loader],
]

# set y label
y_label = np.log(
    sample_df['non_syn_counts'].values /
    (panels.loc[panels['Panel'] == 'Agilent_kit']['cds'].values[0] / 1e6) +
    1)[:, np.newaxis]
y_strat = np.argmax(samples['histology'], axis=-1)

losses = [Losses.QuantileLoss()]
metrics = [Metrics.QuantileLoss()]

encoders = [
    InstanceModels.PassThrough(shape=(1, )),
    InstanceModels.VariantPositionBin(24, 100),
    InstanceModels.VariantSequence(6,
                                   4,
                                   2, [16, 16, 8, 8],
                                   fusion_dimension=32)
]

all_weights = [
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
Esempio n. 2
0
 ds_test = ds_test.map(lambda x, y: (
     (five_p_loader(x, ragged_output=True),
      three_p_loader(x, ragged_output=True),
      ref_loader(x, ragged_output=True), alt_loader(x, ragged_output=True),
      strand_loader(x, ragged_output=True)), y))
 X = False
 while X == False:
     try:
         tile_encoder = InstanceModels.VariantSequence(
             6, 4, 2, [16, 16, 8, 8])
         mil = RaggedModels.MIL(instance_encoders=[tile_encoder.model],
                                output_dim=1,
                                pooling='sum',
                                output_type='other',
                                instance_layers=[128, 64])
         losses = [Losses.CoxPH()]
         mil.model.compile(loss=losses,
                           metrics=[Losses.CoxPH()],
                           optimizer=tf.keras.optimizers.Adam(
                               learning_rate=0.001, ))
         callbacks = [
             tf.keras.callbacks.EarlyStopping(monitor='val_coxph',
                                              min_delta=0.0001,
                                              patience=20,
                                              mode='min',
                                              restore_best_weights=True)
         ]
         history = mil.model.fit(ds_train,
                                 steps_per_epoch=4,
                                 validation_data=ds_valid,
                                 epochs=10000,
Esempio n. 3
0
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: ((tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x),
                                      tf.gather(tf.constant(D['cds_emb'], dtype=tf.float32), x)
                                       ),
                                       y,
                                      ))



sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [64, 64, 64, 64], fusion_dimension=128, use_frame=True)
mil = RaggedModels.MIL(instance_encoders=[], sample_encoders=[sequence_encoder.model], output_dim=y_label.shape[-1], output_type='other', mil_hidden=[128, 128, 64, 32], mode='none')
losses = [Losses.CrossEntropy()]
mil.model.compile(loss=losses,
                  metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  weighted_metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001,
                                                     ))

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_weighted_CE', min_delta=0.001, patience=10, mode='min', restore_best_weights=True)]


mil.model.fit(ds_train, steps_per_epoch=50,
              validation_data=ds_valid,
              epochs=10000,
              callbacks=callbacks,
              )
Esempio n. 4
0
    strat_dict[(group, event)]
    for group, event in zip(cancer_labels, y_label[:, 1])
])
class_counts = dict(zip(*np.unique(y_strat, return_counts=True)))
y_weights = np.array([1 / class_counts[_] for _ in y_strat])
y_weights /= np.sum(y_weights)

weights = []
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_CE',
                                     min_delta=0.0001,
                                     patience=50,
                                     mode='min',
                                     restore_best_weights=True)
]
losses = [Losses.CrossEntropy(from_logits=False)]
sequence_encoder = InstanceModels.VariantSequence(20, 4, 2, [8, 8, 8, 8])
mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model],
                       output_dim=2,
                       pooling='sum',
                       mil_hidden=(64, 64, 32, 16),
                       output_type='classification_probability')
mil.model.compile(
    loss=losses,
    metrics=[Metrics.CrossEntropy(from_logits=False),
             Metrics.Accuracy()],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=10000))
initial_weights = mil.model.get_weights()

##stratified K fold for test
for idx_train, idx_test in StratifiedKFold(n_splits=9,