tf.float32) loaders = [ [ones_loader], [pos_loader, bin_loader, chr_loader], [five_p_loader, three_p_loader, ref_loader, alt_loader, strand_loader], ] # set y label y_label = np.log( sample_df['non_syn_counts'].values / (panels.loc[panels['Panel'] == 'Agilent_kit']['cds'].values[0] / 1e6) + 1)[:, np.newaxis] y_strat = np.argmax(samples['histology'], axis=-1) losses = [Losses.QuantileLoss()] metrics = [Metrics.QuantileLoss()] encoders = [ InstanceModels.PassThrough(shape=(1, )), InstanceModels.VariantPositionBin(24, 100), InstanceModels.VariantSequence(6, 4, 2, [16, 16, 8, 8], fusion_dimension=32) ] all_weights = [ pickle.load( open( cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
ds_test = ds_test.map(lambda x, y: ( (five_p_loader(x, ragged_output=True), three_p_loader(x, ragged_output=True), ref_loader(x, ragged_output=True), alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True)), y)) X = False while X == False: try: tile_encoder = InstanceModels.VariantSequence( 6, 4, 2, [16, 16, 8, 8]) mil = RaggedModels.MIL(instance_encoders=[tile_encoder.model], output_dim=1, pooling='sum', output_type='other', instance_layers=[128, 64]) losses = [Losses.CoxPH()] mil.model.compile(loss=losses, metrics=[Losses.CoxPH()], optimizer=tf.keras.optimizers.Adam( learning_rate=0.001, )) callbacks = [ tf.keras.callbacks.EarlyStopping(monitor='val_coxph', min_delta=0.0001, patience=20, mode='min', restore_best_weights=True) ] history = mil.model.fit(ds_train, steps_per_epoch=4, validation_data=ds_valid, epochs=10000,
ds_test = ds_test.batch(len(idx_test), drop_remainder=False) ds_test = ds_test.map(lambda x, y: ((tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x), tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x), tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x), tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x), tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x), tf.gather(tf.constant(D['cds_emb'], dtype=tf.float32), x) ), y, )) sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [64, 64, 64, 64], fusion_dimension=128, use_frame=True) mil = RaggedModels.MIL(instance_encoders=[], sample_encoders=[sequence_encoder.model], output_dim=y_label.shape[-1], output_type='other', mil_hidden=[128, 128, 64, 32], mode='none') losses = [Losses.CrossEntropy()] mil.model.compile(loss=losses, metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()], weighted_metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()], optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, )) callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_weighted_CE', min_delta=0.001, patience=10, mode='min', restore_best_weights=True)] mil.model.fit(ds_train, steps_per_epoch=50, validation_data=ds_valid, epochs=10000, callbacks=callbacks, )
strat_dict[(group, event)] for group, event in zip(cancer_labels, y_label[:, 1]) ]) class_counts = dict(zip(*np.unique(y_strat, return_counts=True))) y_weights = np.array([1 / class_counts[_] for _ in y_strat]) y_weights /= np.sum(y_weights) weights = [] callbacks = [ tf.keras.callbacks.EarlyStopping(monitor='val_CE', min_delta=0.0001, patience=50, mode='min', restore_best_weights=True) ] losses = [Losses.CrossEntropy(from_logits=False)] sequence_encoder = InstanceModels.VariantSequence(20, 4, 2, [8, 8, 8, 8]) mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], output_dim=2, pooling='sum', mil_hidden=(64, 64, 32, 16), output_type='classification_probability') mil.model.compile( loss=losses, metrics=[Metrics.CrossEntropy(from_logits=False), Metrics.Accuracy()], optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=10000)) initial_weights = mil.model.get_weights() ##stratified K fold for test for idx_train, idx_test in StratifiedKFold(n_splits=9,