tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x), tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x), tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x), tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x), tf.gather(tf.constant(D['cds_emb'], dtype=tf.float32), x) ), y, )) sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [64, 64, 64, 64], fusion_dimension=128, use_frame=True) mil = RaggedModels.MIL(instance_encoders=[], sample_encoders=[sequence_encoder.model], output_dim=y_label.shape[-1], output_type='other', mil_hidden=[128, 128, 64, 32], mode='none') losses = [Losses.CrossEntropy()] mil.model.compile(loss=losses, metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()], weighted_metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()], optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, )) callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_weighted_CE', min_delta=0.001, patience=10, mode='min', restore_best_weights=True)] mil.model.fit(ds_train, steps_per_epoch=50, validation_data=ds_valid, epochs=10000, callbacks=callbacks, ) with open(cwd / 'figures' / 'controls' / 'instances' / 'sequence' / 'codons' / 'results' / 'weights_with_frame.pkl', 'wb') as f:
min_delta=0.0001, patience=50, mode='min', restore_best_weights=True) ] losses = [Losses.CrossEntropy(from_logits=False)] sequence_encoder = InstanceModels.VariantSequence(20, 4, 2, [8, 8, 8, 8]) mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], output_dim=2, pooling='sum', mil_hidden=(64, 64, 32, 16), output_type='classification_probability') mil.model.compile( loss=losses, metrics=[Metrics.CrossEntropy(from_logits=False), Metrics.Accuracy()], optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=10000)) initial_weights = mil.model.get_weights() ##stratified K fold for test for idx_train, idx_test in StratifiedKFold(n_splits=9, random_state=0, shuffle=True).split( y_strat, y_strat): ##due to the y_strat levels not being constant this idx_train/idx_valid split is not deterministic idx_train, idx_valid = [ idx_train[idx] for idx in list( StratifiedShuffleSplit(n_splits=1, test_size=300, random_state=0). split(np.zeros_like(y_strat)[idx_train], y_strat[idx_train]))[0] ]