def realistic_experiment_articlemodel_local(): """ Runs article model on small local problem. Gets to 0.81 f1, which does not beat the baseline """ maxscore = run_keras_articlemodel(realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1, keep_near=True), zero_class=[0] * 4, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=1, neighbours=3, # bin_class_weights=(80.0, 1.0), # from 100/8000 positive class count for # tot_classes=18, num_pp_high=13, num_pp_low=7 bin_class_weights=(4.0, 1.0), # from 80/700 positive class count for tot_classes=4, num_pp_high=1, num_pp_low=1 n_siz=2 ) print(maxscore)
def sample_concepts_example(): """ Visual inspection for concepts randomly generated from setting. """ page_c = realistic_setting() for i in range(10): drawn_page = page_c.draw_objects(1)[0] lims = (-1.0, 1.0) fig1 = plt.figure(figsize=(10, 10)) ax1 = fig1.add_subplot(111, aspect='equal') for concept in drawn_page: c_ce = lrtb_center(concept.bbox) txt = concept.params["name"] if "name" in concept.params else type(concept).__name__ draw_texted_bbox(ax1, concept.bbox, txt, color='blue') for in_concept in concept.in_concepts: txt = in_concept.params["name"] if "name" in in_concept.params else type(concept).__name__ draw_texted_bbox(ax1, in_concept.bbox, txt, color='red') i_ce = lrtb_center(in_concept.bbox) ax1.plot([c_ce[0], i_ce[0]], [c_ce[1], i_ce[1]], color='green', marker='o', linestyle='dashed', linewidth=2, markersize=12) plt.ylim(lims) plt.xlim(lims) fig1.savefig("plot{}.png".format(i)) print("saved figure")
def fixed_known_borders_bigger_all_boxes_noshuffle(): """ Experiment with known borders of concepts but trying to predict all boxes (not only concept's interiors) and not shuffled. realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1), predict all, shuffle false goes to nonbg micro f1 0.87 (all micro f1 0.98) realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7), nonbg f1 0.91 """ max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7), zero_class=[0] * 18, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=2, predict_all_boxes=True, shuffle_bboxes=False ) print(max_acc)
def model_sees_all_to_all(): """ So we believe that it is now a harder task, lets try a model that sees everything without attention: # 0.79 nonbg f1 tot classes = 4(1,1), binclasssweights 1,1 nsiz2 # 0.65 nonbg f1 tot classes = 8(1,1), binclassweights 1,1 nsiz=1 """ maxscore = run_keras_all2all_model(realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1), zero_class=[0] * 4, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=4, df_proc_num=2, neighbours=3, bin_class_weights=(1.0, 1.0), # bin_class_weights=(80.0, 1.0), # from 100/8000 positive class count n_siz=1 ) print(maxscore)
def baseline_rendered(): """ baseline realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1) goes to 0.59 nonbg micro with bin_class_weights=(4.0, 1.0) goes to 0.80-0.86 nonbg micro f1 """ run_keras_rendered_experiment_binary( realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1), zero_class=[0] * 4, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=1, neighbours=3, bin_class_weights=(80.0, 1.0), )
def fixed_known_borders_bigger(): """ Experiment with known borders of concepts: 'apriori info', but with bigger network and bigger setting. Gets to 0.98 bin acc, ourf1nonbg: 0.97 all f1micro 0.99 """ max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7), validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=1, ) print(max_acc)
def fixed_known_borders(): """ Experiment with known borders of concepts: 'apriori info'. Gets to 0.97 bin acc, ourf1nonbg: 0.83 all f1micro 0.96, """ max_acc = fixed_experiment_binary(realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1), validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=1, ) print(max_acc)
def articlemodel(): """ Now the model from our original article: # tot_classes=18, num_pp_high=13, num_pp_low=7: # gets to nonbg micro f1 to >> 0.35 << in 90 epochs with bin_class_weights=(800.0, 1.0) # same for bin_class_weights=(80.0, 1.0) # so to see the results, it needed to see like 90*200 pages # (tot_classes=10, num_pp_high=7, num_pp_low=4: # 0.36, bin_class_weights=(80.0, 1.0) # tot_classes=8, num_pp_high=5, num_pp_low=2 # 0.40 # was on nu, neiighbours = 3, 5 ddoes not help, 7 does not help # tot_classes=8, num_pp_high=5, num_pp_low=2 & neighbours = 1 helps # - 0.42 # tot_classes = 4, num_pp_high = 2, num_pp_low = 1 # 0.674 """ maxscore = run_keras_articlemodel( realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1), zero_class=[0] * 4, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=2, neighbours=3, # bin_class_weights=(80.0, 1.0), # from 100/8000 positive class count for tot_classes=18, num_pp_high=13, num_pp_low=7 bin_class_weights=(4.0, 1.0), # from 80/700 positive class count for tot_classes=4, num_pp_high=1, num_pp_low=1 n_siz=2) print(maxscore)
def fixed_known_borders_all_boxes_shuffle(): """ # witth known borderss of concepts: AND AALL BOXES and bigger AND SHUFFLING # realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7), # # goes to nonbg micro f1 0.88 (all micro f1 0.99) """ max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7), zero_class=[0] * 18, validation_pages=100, n_epochs=100, verbose=2, stop_early=True, key_metric='val_loss', weights_best_fname='weightstmp.h5', patience=20, key_metric_mode='min', pages_per_epoch=200, batch_size=8, df_proc_num=1, predict_all_boxes=True, shuffle_bboxes=True ) print(max_acc)