Ejemplo n.º 1
0
def realistic_experiment_articlemodel_local():
    """
    Runs article model on small local problem.
    Gets to 0.81 f1, which does not beat the baseline
    """
    maxscore = run_keras_articlemodel(realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1, keep_near=True),
                                      zero_class=[0] * 4,
                                      validation_pages=100,
                                      n_epochs=100,
                                      verbose=2,
                                      stop_early=True,
                                      key_metric='val_loss',
                                      weights_best_fname='weightstmp.h5',
                                      patience=20,
                                      key_metric_mode='min',
                                      pages_per_epoch=200,
                                      batch_size=8,
                                      df_proc_num=1,
                                      neighbours=3,
                                      # bin_class_weights=(80.0, 1.0),  # from 100/8000 positive class count for
                                      # tot_classes=18, num_pp_high=13, num_pp_low=7
                                      bin_class_weights=(4.0, 1.0),
                                      # from 80/700 positive class count for tot_classes=4, num_pp_high=1, num_pp_low=1
                                      n_siz=2
                                      )
    print(maxscore)
Ejemplo n.º 2
0
def sample_concepts_example():
    """
    Visual inspection for concepts randomly generated from setting.
    """
    page_c = realistic_setting()
    
    for i in range(10):
        drawn_page = page_c.draw_objects(1)[0]
        lims = (-1.0, 1.0)
        fig1 = plt.figure(figsize=(10, 10))
        ax1 = fig1.add_subplot(111, aspect='equal')
        for concept in drawn_page:
            c_ce = lrtb_center(concept.bbox)
            txt = concept.params["name"] if "name" in concept.params else type(concept).__name__
            draw_texted_bbox(ax1, concept.bbox, txt, color='blue')
            for in_concept in concept.in_concepts:
                txt = in_concept.params["name"] if "name" in in_concept.params else type(concept).__name__
                draw_texted_bbox(ax1, in_concept.bbox, txt, color='red')
                i_ce = lrtb_center(in_concept.bbox)
                
                ax1.plot([c_ce[0], i_ce[0]], [c_ce[1], i_ce[1]], color='green', marker='o', linestyle='dashed',
                         linewidth=2, markersize=12)
        plt.ylim(lims)
        plt.xlim(lims)
        fig1.savefig("plot{}.png".format(i))
        print("saved figure")
Ejemplo n.º 3
0
def fixed_known_borders_bigger_all_boxes_noshuffle():
    """
    Experiment with known borders of concepts but trying to predict all boxes (not only concept's interiors)
    and not shuffled.
    
    realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1), predict all, shuffle false
    goes to nonbg micro f1 0.87 (all micro f1 0.98)
    realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7),
    nonbg f1 0.91
    """
    max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7),
                                                       zero_class=[0] * 18,
                                                       validation_pages=100,
                                                       n_epochs=100,
                                                       verbose=2,
                                                       stop_early=True,
                                                       key_metric='val_loss',
                                                       weights_best_fname='weightstmp.h5',
                                                       patience=20,
                                                       key_metric_mode='min',
                                                       pages_per_epoch=200,
                                                       batch_size=8,
                                                       df_proc_num=2,
                                                       predict_all_boxes=True,
                                                       shuffle_bboxes=False
                                                       )
    print(max_acc)
Ejemplo n.º 4
0
def model_sees_all_to_all():
    """
    So we believe that it is now a harder task, lets try a model that sees everything without attention:
    
    # 0.79 nonbg f1 tot classes = 4(1,1), binclasssweights  1,1 nsiz2
    # 0.65 nonbg f1 tot classes = 8(1,1), binclassweights  1,1 nsiz=1
    """
    maxscore = run_keras_all2all_model(realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1),
                                       zero_class=[0] * 4,
                                       validation_pages=100,
                                       n_epochs=100,
                                       verbose=2,
                                       stop_early=True,
                                       key_metric='val_loss',
                                       weights_best_fname='weightstmp.h5',
                                       patience=20,
                                       key_metric_mode='min',
                                       pages_per_epoch=200,
                                       batch_size=4,
                                       df_proc_num=2,
                                       neighbours=3,
                                       bin_class_weights=(1.0, 1.0),
                                       # bin_class_weights=(80.0, 1.0),  # from 100/8000 positive class count
                                       n_siz=1
                                       )
    print(maxscore)
Ejemplo n.º 5
0
def baseline_rendered():
    """
    baseline
    realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1)
    goes to 0.59 nonbg micro
    with
    bin_class_weights=(4.0, 1.0) goes to 0.80-0.86 nonbg micro f1
    """
    run_keras_rendered_experiment_binary(
        realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1),
        zero_class=[0] * 4,
        validation_pages=100,
        n_epochs=100,
        verbose=2,
        stop_early=True,
        key_metric='val_loss',
        weights_best_fname='weightstmp.h5',
        patience=20,
        key_metric_mode='min',
        pages_per_epoch=200,
        batch_size=8,
        df_proc_num=1,
        neighbours=3,
        bin_class_weights=(80.0, 1.0),
    )
Ejemplo n.º 6
0
def fixed_known_borders_bigger():
    """
    Experiment with known borders of concepts: 'apriori info', but with bigger network and bigger setting.
    Gets to 0.98 bin acc, ourf1nonbg: 0.97 all f1micro 0.99
    """
    max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7),
                                                       validation_pages=100,
                                                       n_epochs=100,
                                                       verbose=2,
                                                       stop_early=True,
                                                       key_metric='val_loss',
                                                       weights_best_fname='weightstmp.h5',
                                                       patience=20,
                                                       key_metric_mode='min',
                                                       pages_per_epoch=200,
                                                       batch_size=8,
                                                       df_proc_num=1,
                                                       )
    print(max_acc)
Ejemplo n.º 7
0
def fixed_known_borders():
    """
    Experiment with known borders of concepts: 'apriori info'.
    Gets to 0.97 bin acc, ourf1nonbg: 0.83 all f1micro 0.96,
    """
    max_acc = fixed_experiment_binary(realistic_setting(tot_classes=4, num_pp_high=2, num_pp_low=1),
                                      validation_pages=100,
                                      n_epochs=100,
                                      verbose=2,
                                      stop_early=True,
                                      key_metric='val_loss',
                                      weights_best_fname='weightstmp.h5',
                                      patience=20,
                                      key_metric_mode='min',
                                      pages_per_epoch=200,
                                      batch_size=8,
                                      df_proc_num=1,
                                      )
    print(max_acc)
Ejemplo n.º 8
0
def articlemodel():
    """
    Now the model from our original article:
    
    # tot_classes=18, num_pp_high=13, num_pp_low=7:
    # gets to nonbg micro f1 to >> 0.35 << in 90 epochs with bin_class_weights=(800.0, 1.0)
    # same for bin_class_weights=(80.0, 1.0)
    # so to see the results, it needed to see like 90*200 pages
    
    # (tot_classes=10, num_pp_high=7, num_pp_low=4:
    # 0.36, bin_class_weights=(80.0, 1.0)
    
    # tot_classes=8, num_pp_high=5, num_pp_low=2
    # 0.40  # was on nu, neiighbours = 3, 5 ddoes not help, 7 does not help
    # tot_classes=8, num_pp_high=5, num_pp_low=2 & neighbours = 1 helps
    # - 0.42
    
    # tot_classes = 4, num_pp_high = 2, num_pp_low = 1
    # 0.674
    """
    maxscore = run_keras_articlemodel(
        realistic_setting(tot_classes=4, num_pp_high=1, num_pp_low=1),
        zero_class=[0] * 4,
        validation_pages=100,
        n_epochs=100,
        verbose=2,
        stop_early=True,
        key_metric='val_loss',
        weights_best_fname='weightstmp.h5',
        patience=20,
        key_metric_mode='min',
        pages_per_epoch=200,
        batch_size=8,
        df_proc_num=2,
        neighbours=3,
        # bin_class_weights=(80.0, 1.0),  # from 100/8000 positive class count for tot_classes=18, num_pp_high=13, num_pp_low=7
        bin_class_weights=(4.0, 1.0),
        # from 80/700 positive class count for tot_classes=4, num_pp_high=1, num_pp_low=1
        n_siz=2)
    print(maxscore)
Ejemplo n.º 9
0
def fixed_known_borders_all_boxes_shuffle():
    """
    # witth known borderss of concepts: AND AALL BOXES and bigger AND SHUFFLING
    # realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7),
    # # goes to nonbg micro f1 0.88 (all micro f1 0.99)
    """
    max_acc = run_keras_fixed_experiment_binary_bigger(realistic_setting(tot_classes=18, num_pp_high=13, num_pp_low=7),
                                                       zero_class=[0] * 18,
                                                       validation_pages=100,
                                                       n_epochs=100,
                                                       verbose=2,
                                                       stop_early=True,
                                                       key_metric='val_loss',
                                                       weights_best_fname='weightstmp.h5',
                                                       patience=20,
                                                       key_metric_mode='min',
                                                       pages_per_epoch=200,
                                                       batch_size=8,
                                                       df_proc_num=1,
                                                       predict_all_boxes=True,
                                                       shuffle_bboxes=True
                                                       )
    print(max_acc)