]) def test_constant_models_categorical(data_setting, accept_range): """ Test that the fixed categorical experiment can easily figure the constant testing settings Sometimes it can be as high as 1.00, but sometimes it can fail because of the clipping. Can we make it work everytime? Yea by using bigger pages number per epoch the randomness will probably go away, but from the point of the test Lets set it to 30% to not fail randomly """ val_metric = run_keras_fixed_experiment_categorical(data_setting, df_proc_num=1) assert val_metric >= accept_range[0] and val_metric <= accept_range[1] @pytest.mark.parametrize('data_setting, zero_class, accept_range', [ (constant_testing_setting_multiclass(), [1, 0], [0.9, 1.0]), ]) def test_rendered_models(data_setting, zero_class, accept_range): """ Test that the rendered categorical model is able to figure out multiclass setting. """ val_metric = run_keras_rendered_experiment_categorical( data_setting, zero_class=zero_class) assert val_metric >= accept_range[0] and val_metric <= accept_range[1] def test_rendered_concepts_tf_gen(): """ Test data generators for rendered experiment setting. """ test_df_data = ({
def run_keras_articlemodel( const_data_def=constant_testing_setting_multiclass(), validation_pages=2, n_epochs=100, verbose=2, stop_early=True, # key_metric='val_categorical_accuracy', key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=15, key_metric_mode='min', pages_per_epoch=10, batch_size=2, zero_class=(1, 0), df_proc_num=2, neighbours=3, count_classes=True, bin_class_weights=1.0, # from 100/8000 positive class count n_siz=1): """ Runs a simplest model against a dataset and returns the max epoch accuracy. """ gen = RenderedConceptsPacker(const_data_def, df_proc_num=df_proc_num, batch_size=batch_size, df_batches_to_prefetch=1, zero_class=zero_class, use_neighbours=neighbours, bin_class_weights=bin_class_weights) if count_classes: n_out_classes = gen.get_batchpadded_shapes()[1][-1] classcounts = np.zeros(n_out_classes) total_counts = 0 count_classes_gen = tf_dataset_as_iterator( gen.get_final_tf_data_dataset(pages_per_epoch, phase='train')) for _ in range(int(pages_per_epoch / batch_size)): batch_data = next(count_classes_gen) classcounts += np.sum(batch_data[1], axis=(0, 1)) total_counts += batch_data[1].shape[0] * batch_data[1].shape[1] print("class counts done {} / {}".format(classcounts, total_counts)) # cca 100/8000 neighbours = gen.use_neighbours depth_variation = 0 use_attention = True use_more_dense = True fields_input = Input(shape=gen.get_batchpadded_shapes()[0]['features'], name='features') # per page, features neighbours_ids_input = Input( shape=gen.get_batchpadded_shapes()[0]['neighbours'], name='neighbours', dtype=np.int32) # per page, features # merge positional embedding - from integers to sins coss # in the generator they are at the last 4 positions.... positions_reading_order_embedded = SinCosPositionalEmbedding( 4 * n_siz, from_inputs_features=[-1, -2, -3, -4], # embedd all 4 integers pos_divisor=10000, keep_ndim=True)(fields_input) # embedd lrtb positions_embedded = SinCosPositionalEmbedding( 4 * n_siz, from_inputs_features=[0, 1, 2, 3], # embedd all 4 integers pos_divisor=10000, keep_ndim=True)(fields_input) merged_result = Concatenate()( [fields_input, positions_reading_order_embedded, positions_embedded]) # gather neighbours so that we will see them and can operate on them also: fields_input_with_neighbours = GatherFromIndices(mask_value=0, include_self=True, flatten_indices_features=True) \ ([merged_result, neighbours_ids_input]) if neighbours > 0 else merged_result fields = TimeDistributed(Dense( 256 * n_siz, activation='relu'))(fields_input_with_neighbours) if depth_variation > 1: fields_input_with_neighbours = GatherFromIndices(mask_value=0, include_self=True, flatten_indices_features=True) \ ([fields, neighbours_ids_input]) if neighbours > 0 else fields fields = TimeDistributed(Dense( 256 * n_siz, activation='relu'))(fields_input_with_neighbours) fields = Dropout(0.15)(fields) use_seq_convolution = True if use_seq_convolution: fields = Conv1D(128 * n_siz, kernel_size=5, padding='same', activation='relu')( fields) # data_format="channels_last" else: fields = fields # Dense(128 * n_siz, activation='relu')(fields) # try lstms? # lstm1 = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(merged_result) # lstm2 = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(lstm1) # flatten_for_a = TimeDistributed(Flatten())(lstm1) flatten_for_a = Dense(64 * n_siz, activation='relu')(fields) if use_attention: tmp_a = AttentionTransformer( usesoftmax=True, usequerymasks=False, num_heads=8 * n_siz, num_units=64 * n_siz, causality=False)([flatten_for_a, flatten_for_a, flatten_for_a]) if depth_variation > 1: tmp_a = AttentionTransformer( usesoftmax=True, usequerymasks=False, num_heads=8, num_units=64 * n_siz, causality=False)([tmp_a, tmp_a, tmp_a]) # we do not want softmax for doing binary classification # tmp_a = AttentionTransformer(usesoftmax=True, usequerymasks=False, num_heads=8, num_units=64, # causality=False)([tmp_a, tmp_a, tmp_a]) else: tmp_a = flatten_for_a # tmp_f = TimeDistributed(Flatten())(tmp_a) # highway = Concatenate()([flatten_for_a, tmp_f]) if use_more_dense: bef_fork = TimeDistributed(Dense(64 * n_siz, activation='relu'))(tmp_a) bef_fork = Dropout(0.15)(bef_fork) else: bef_fork = tmp_a fork_node = TimeDistributed(Dense(64 * n_siz, activation='relu'))( bef_fork) # from this point on, we will fork to different outputs! outclassesperbox = Dense(gen.get_batchpadded_shapes()[1][-1], activation='sigmoid')(fork_node) # outclassesperbox = Softmax()(outclassesperbox) def mean_pred(y_true, y_pred): return K.mean(y_pred) const_model = Model(inputs=[fields_input, neighbours_ids_input], outputs=outclassesperbox) const_model.compile( optimizer='adam', # loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy], loss='binary_crossentropy', metrics=[metrics.binary_accuracy], sample_weight_mode='temporal') const_model.summary() # val: eval_gen = tf_dataset_as_iterator( gen.get_final_tf_data_dataset(validation_pages, phase='val')) def report_f(): result = evaluate(eval_gen, int(validation_pages / batch_size), const_model, None) return result callbacks = { 'checkpointer': ModelCheckpoint(weights_best_fname, monitor=key_metric, save_best_only=True, mode=key_metric_mode, verbose=verbose), 'val_reporter': EvaluateFCallback(report_f, monitor=key_metric, mode=key_metric_mode) } print( "Printing metrics on random model to see from where the first epoch started: " ) callbacks['val_reporter'].on_epoch_end(0) if stop_early: callbacks['early_stopping'] = EarlyStopping(monitor=key_metric, patience=patience, mode=key_metric_mode, verbose=verbose) hist = const_model.fit_generator( tf_dataset_as_iterator( gen.get_final_tf_data_dataset(pages_per_epoch, phase='train')), pages_per_epoch / batch_size, n_epochs, verbose=verbose, # class_weight=class_weight, # keras cannot use class weight and sample weights at the same time validation_data=tf_dataset_as_iterator( gen.get_final_tf_data_dataset(validation_pages, phase='val')), # we validate on the same set because it is a random generator, so the data are never the same validation_steps=validation_pages / batch_size, callbacks=[callbacks[key] for key in callbacks], workers=0, # because we use dataflow use_multiprocessing=False) return min(hist.history[key_metric])
def run_keras_all2all_model( const_data_def=constant_testing_setting_multiclass(), validation_pages=2, n_epochs=100, verbose=2, stop_early=True, # key_metric='val_categorical_accuracy', key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=15, key_metric_mode='min', pages_per_epoch=10, batch_size=2, zero_class=(1, 0), df_proc_num=2, neighbours=3, count_classes=True, bin_class_weights=1.0, # from 100/8000 positive class count n_siz=1): """ allows all fields to see all other fields. """ gen = RenderedConceptsPacker(const_data_def, df_proc_num=df_proc_num, batch_size=batch_size, df_batches_to_prefetch=1, zero_class=zero_class, use_neighbours=neighbours, bin_class_weights=bin_class_weights) """ if count_classes: n_out_classes = gen.get_batchpadded_shapes()[1][-1] classcounts = np.zeros(n_out_classes) total_counts = 0 count_classes_gen = tf_dataset_as_iterator(gen.get_final_tf_data_dataset(pages_per_epoch, phase='train')) for _ in range(int(pages_per_epoch/batch_size)): batch_data = count_classes_gen.next() classcounts += np.sum(batch_data[1], axis=(0, 1)) total_counts += batch_data[1].shape[0]*batch_data[1].shape[1] print("class counts done") # cca 100/8000 """ neighbours = gen.use_neighbours depth_variation = 0 use_attention = True use_more_dense = True fields_input = Input(shape=gen.get_batchpadded_shapes()[0]['features'], name='features') # per page, features # neighbours_ids_input = Input(shape=gen.get_batchpadded_shapes()[0]['neighbours'], name='neighbours', # dtype=np.int32) # per page, features # merge positional embedding - from integers to sins coss # in the generator they are at the last 4 positions.... positions_reading_order_embedded = SinCosPositionalEmbedding( 4 * n_siz, from_inputs_features=[-1, -2, -3, -4], # embedd all 4 integers pos_divisor=10000, keep_ndim=True)(fields_input) # embedd lrtb positions_embedded = SinCosPositionalEmbedding( 4 * n_siz, from_inputs_features=[0, 1, 2, 3], # embedd all 4 integers pos_divisor=10000, keep_ndim=True)(fields_input) merged_result = Concatenate()( [fields_input, positions_reading_order_embedded, positions_embedded]) fields_r = Conv1D(128 * n_siz, kernel_size=5, padding='same', activation='relu')(merged_result) matrix_all = Lambda(make_product_matrix)(fields_r) a = Conv2D(128 * n_siz, kernel_size=(5, 5), padding='same', activation='relu')(matrix_all) a = Conv2D(128 * n_siz, kernel_size=(5, 5), padding='same', activation='relu', dilation_rate=5)(a) a = Conv2D(128 * n_siz, kernel_size=(5, 5), padding='same', activation='relu')(a) def diag2d(arr): # input [..., batches, N, N, features] # output [..., batches, N, features] arrshape = tf.shape(arr) assert_op = tf.Assert(tf.equal(arrshape[-2], arrshape[-3]), [arr]) with tf.control_dependencies([assert_op]): # assert arrshape[-3] == arrshape[-2] that it is square before features dimension newshape = tf.concat([arrshape[0:-3], [-1], arrshape[-1:]], axis=-1) arr = tf.reshape(arr, newshape) diagind = tf.range(arrshape[-3]) * (arrshape[-2] + 1) return tf.gather( arr, diagind, axis=-2, ) def global_reduce(arr): return tf.concat([ diag2d(arr), tf.reduce_max(arr, axis=-2), tf.reduce_mean(arr, axis=-2), tf.reduce_max(arr, axis=-3), tf.reduce_mean(arr, axis=-3) ], axis=-1) sequence = Lambda(global_reduce)(a) sequence = Concatenate(axis=-1)([sequence, fields_r]) if use_more_dense: bef_fork = TimeDistributed(Dense(64 * n_siz, activation='relu'))(sequence) bef_fork = Dropout(0.15)(bef_fork) else: bef_fork = sequence fork_node = TimeDistributed(Dense(64 * n_siz, activation='relu'))( bef_fork) # from this point on, we will fork to different outputs! outclassesperbox = Dense(gen.get_batchpadded_shapes()[1][-1], activation='sigmoid')(fork_node) # outclassesperbox = Softmax()(outclassesperbox) def mean_pred(y_true, y_pred): return K.mean(y_pred) const_model = Model(inputs=[fields_input], outputs=outclassesperbox) const_model.compile( optimizer='adam', # loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy], loss='binary_crossentropy', metrics=[metrics.binary_accuracy], sample_weight_mode='temporal') const_model.summary() # val: eval_gen = tf_dataset_as_iterator( gen.get_final_tf_data_dataset(validation_pages, phase='val')) def report_f(): result = evaluate(eval_gen, int(validation_pages / batch_size), const_model, None) return result callbacks = { 'checkpointer': ModelCheckpoint(weights_best_fname, monitor=key_metric, save_best_only=True, mode=key_metric_mode, verbose=verbose), 'val_reporter': EvaluateFCallback(report_f, monitor=key_metric, mode=key_metric_mode) } print( "Printing metrics on random model to see from where the first epoch started: " ) callbacks['val_reporter'].on_epoch_end(0) if stop_early: callbacks['early_stopping'] = EarlyStopping(monitor=key_metric, patience=patience, mode=key_metric_mode, verbose=verbose) hist = const_model.fit_generator( tf_dataset_as_iterator( gen.get_final_tf_data_dataset(pages_per_epoch, phase='train')), # .make_one_shot_iterator().get_next(), pages_per_epoch / batch_size, n_epochs, verbose=verbose, # class_weight=class_weight, # keras cannot use class weight and sample weights at the same time validation_data=tf_dataset_as_iterator( gen.get_final_tf_data_dataset(validation_pages, phase='val')), # we validate on the same set because it is a random generator, so the data are never the same validation_steps=validation_pages / batch_size, callbacks=[callbacks[key] for key in callbacks], workers=0, # because we use dataflow use_multiprocessing=False) return min(hist.history[key_metric])
def run_keras_rendered_experiment_categorical( const_data_def=constant_testing_setting_multiclass(), validation_pages=2, n_epochs=100, verbose=2, stop_early=True, key_metric='val_categorical_accuracy', weights_best_fname='weightstmp.h5', patience=15, key_metric_mode='max', pages_per_epoch=10, batch_size=2, zero_class=(1, 0), df_proc_num=2): """ Runs a simplest model against a dataset and returns the max epoch accuracy. """ gen = RenderedConceptsPacker(const_data_def, df_proc_num=df_proc_num, batch_size=batch_size, df_batches_to_prefetch=1, zero_class=zero_class, use_neighbours=3) neighbours = gen.use_neighbours features = Input(shape=gen.get_batchpadded_shapes()[0]['features'], name='features') # per page, features neighbours_ids_input = Input( shape=gen.get_batchpadded_shapes()[0]['neighbours'], name='neighbours', dtype=np.int32) # per page, features inputs_merged = GatherFromIndices(mask_value=0, include_self=True, flatten_indices_features=True) \ ([features, neighbours_ids_input]) if neighbours > 0 else features c1 = Conv1D(filters=8, kernel_size=5, padding='same', activation='relu')(inputs_merged) c2 = Conv1D(filters=8, kernel_size=5, padding='same', activation='relu')(c1) fpb = Dense(8, activation='relu')(c2) outclassesperbox = Dense(gen.get_batchpadded_shapes()[1][-1], activation='sigmoid')(fpb) outclassesperbox = Softmax()(outclassesperbox) const_model = Model(inputs=[features, neighbours_ids_input], outputs=outclassesperbox) const_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') const_model.summary() callbacks = { 'checkpointer': ModelCheckpoint(weights_best_fname, monitor=key_metric, save_best_only=True, mode=key_metric_mode, verbose=verbose), # 'datetime': DatetimePrinter(), # 'procnum': ProcNumPrinter(), # 'memory': MemoryPrinter(), # 'weights_printer': PrintWeightsStats(check_on_batch=debug), # 'nanterminator': TerminateOnNaNRemember(), # 'sacred': SacredCallback(self.run, 'val_loss'), } if stop_early: callbacks['early_stopping'] = EarlyStopping(monitor=key_metric, patience=patience, mode=key_metric_mode, verbose=verbose) hist = const_model.fit_generator( tf_dataset_as_iterator( gen.get_final_tf_data_dataset(pages_per_epoch, phase='train')), # .make_one_shot_iterator().get_next(), pages_per_epoch / batch_size, n_epochs, verbose=verbose, # class_weight=class_weight, # keras cannot use class weight and sample weights at the same time validation_data=tf_dataset_as_iterator( gen.get_final_tf_data_dataset(validation_pages, phase='val')), # we validate on the same set because it is a random generator, so the data are never the same validation_steps=validation_pages / batch_size, callbacks=[callbacks[key] for key in callbacks], workers=0, # because we use dataflow use_multiprocessing=False) return max(hist.history['val_categorical_accuracy'])