Esempio n. 1
0
def main(adjustable, h5_data_list, all_ranking, merged_training_pos,
         merged_training_neg):
    """Runs a the whole training and testing phase
    :return:    array of dataset names, array containing the confusion matrix for each dataset, array containing the
                ranking for each dataset
    """
    if not adjustable.load_model_name == None:
        model = load_model(
            os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS,
                         adjustable.load_model_name))
    else:
        model = create_siamese_network(adjustable)

        if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc':
            nadam = optimizers.Nadam(lr=adjustable.learning_rate,
                                     schedule_decay=pc.DECAY_RATE)
            model.compile(loss=adjustable.loss_function,
                          optimizer=nadam,
                          metrics=['accuracy'])
        elif adjustable.cost_module_type == 'euclidean' or adjustable.cost_module_type == 'cosine':
            rms = keras.optimizers.RMSprop()
            model.compile(loss=contrastive_loss,
                          optimizer=rms,
                          metrics=[absolute_distance_difference])

    for epoch in range(adjustable.epochs):
        print('epoch %d/%d' % (epoch, adjustable.epochs))
        # sample from the big set of negative training instances
        random.shuffle(merged_training_neg)
        training_neg_sample = merged_training_neg[0:len(merged_training_pos)]
        # now we have the final list of keys to the instances we use for training

        final_training_data = merged_training_pos + training_neg_sample

        random.shuffle(final_training_data)

        final_training_data = pu.sideways_shuffle(final_training_data)

        final_training_labels = [
            int(final_training_data[item].strip().split(',')[-1])
            for item in range(len(final_training_data))
        ]
        if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc':
            final_training_labels = keras.utils.to_categorical(
                final_training_labels, pc.NUM_CLASSES)

        train_network_light(adjustable, model, final_training_data,
                            final_training_labels, h5_data_list)

        time_stamp = time.strftime('scnn_%d%m%Y_%H%M')

        if adjustable.save_inbetween and adjustable.iterations == 1:
            if epoch + 1 in adjustable.save_points:
                if adjustable.name_indication == 'epoch':
                    model_name = time_stamp + '_epoch_%s_model.h5' % str(
                        epoch + 1)
                    weights_name = time_stamp + '_epoch_%s_weights.h5' % str(
                        epoch + 1)
                elif adjustable.name_indication == 'dataset_name' and len(
                        adjustable.datasets) == 1:
                    model_name = time_stamp + '_%s_model.h5' % adjustable.datasets[
                        0]
                    weights_name = time_stamp + '_%s_weights.h5' % adjustable.datasets[
                        0]
                else:
                    model_name = None
                    weights_name = None

                model.save(
                    os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, model_name))
                model.save_weights(
                    os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, weights_name))

    confusion_matrices = []
    ranking_matrices = []
    names = []

    # for test_set in range(test_sets):
    for dataset in range(len(adjustable.datasets)):
        # name = test[test_set * 3]
        name = adjustable.datasets[dataset]
        names.append(name)
        this_ranking = all_ranking[dataset]
        test_data = ddl.grab_em_by_the_keys(this_ranking, h5_data_list)
        test_data = np.asarray(test_data)

        # make a record of the ranking selection for each dataset
        # for priming
        if adjustable.save_inbetween and adjustable.iterations == 1:
            file_name = '%s_ranking_%s.txt' % (name,
                                               adjustable.ranking_time_name)
            file_name = os.path.join(pc.SAVE_LOCATION_RANKING_FILES, file_name)
            with open(file_name, 'w') as my_file:
                for item in this_ranking:
                    my_file.write(item)

        final_testing_labels = [
            int(this_ranking[item].strip().split(',')[-1])
            for item in range(len(this_ranking))
        ]

        if adjustable.cost_module_type == 'neural_network' or adjustable.cost_module_type == 'euclidean_fc':
            final_testing_labels = keras.utils.to_categorical(
                final_testing_labels, pc.NUM_CLASSES)

        predictions = model.predict([test_data[0, :], test_data[1, :]])
        # print predictions
        if adjustable.cost_module_type == 'euclidean' or adjustable.cost_module_type == 'cosine':
            new_thing = zip(predictions, final_testing_labels)
            print(new_thing[0:50])

        # matrix = pu.make_confusion_matrix(predictions, test_labels)
        matrix = pu.make_confusion_matrix(adjustable, predictions,
                                          final_testing_labels)
        accuracy = (matrix[0] + matrix[2]) * 1.0 / (sum(matrix) * 1.0)
        if not matrix[0] == 0:
            precision = (matrix[0] * 1.0 / (matrix[0] + matrix[1] * 1.0))
        else:
            precision = 0
        confusion_matrices.append(matrix)

        ranking = pu.calculate_CMC(adjustable, predictions)
        ranking_matrices.append(ranking)

        print(
            '%s accuracy: %0.2f   precision: %0.2f   confusion matrix: %s \n CMC: \n %s'
            % (name, accuracy, precision, str(matrix), str(ranking)))

    del model
    return names, confusion_matrices, ranking_matrices
Esempio n. 2
0
def get_final_training_data(adjustable, train_pos, train_neg):
    """
    Merges the positive and negative training data together accordingly
    :param adjustable:      object of class ProjectVariable
    :param train_pos:       list of string pairs containing keys and labels of the positive training data
    :param train_neg:       list of string pairs containing keys and labels of the negative training data
    :return:                returns a single list of string pairs containing keys and labels of the training data
    """
    if isinstance(train_pos, list):
        if type(train_pos[0]) == list:
            number_of_datasets = len(train_pos)
        elif type(train_pos[0]) == str:
            number_of_datasets = 1
        else:
            print('Warning: something weird is happening')
            return
    else:
        print('Error: train_pos must be a list')
        return

    final_training_data = []

    if adjustable.only_test == True:
        # only test, nothing to do
        print('Only testing, nothing to train here.')
        final_training_data = None
    else:
        # train
        if number_of_datasets == 0:
            print('Error: no training datasets have been specified')
            return
        elif number_of_datasets == 1:
            # normal shuffle, just take subset
            final_training_data = train_pos + train_neg
            random.shuffle(final_training_data)

            if adjustable.sideways_shuffle == True:
                final_training_data = pu.sideways_shuffle(final_training_data)
            random.shuffle(final_training_data)
        else:
            # can be train + test on multiple datasets
            # can be only train on multiple datasets
            # mixing does matter
            if adjustable.mix == True:
                # shuffle the data with each other
                # here we need to know if we only train or train+test
                if adjustable.dataset_test is None:
                    # normal shuffle, just take subset
                    final_training_data = train_pos + train_neg
                    random.shuffle(final_training_data)

                    if adjustable.sideways_shuffle == True:
                        final_training_data = pu.sideways_shuffle(
                            final_training_data)
                    random.shuffle(final_training_data)
                else:
                    if adjustable.mix_with_test == True:
                        # mix with the test
                        # normal shuffle, just take subset
                        final_training_data = train_pos + train_neg
                        random.shuffle(final_training_data)

                        if adjustable.sideways_shuffle == True:
                            final_training_data = pu.sideways_shuffle(
                                final_training_data)
                        random.shuffle(final_training_data)
                    else:
                        # don't mix with the test (which is at the end)
                        # for each partition, shuffle and get a subset
                        for index in range(len(train_neg)):
                            partition = train_pos[index] + train_neg[index]
                            random.shuffle(partition)

                            if adjustable.sideways_shuffle == True:
                                partition = pu.sideways_shuffle(partition)
                            random.shuffle(partition)

                            final_training_data += partition

            else:
                # train in order.
                # number of datasets don't matter
                # for each partition, shuffle and get a subset
                for index in range(len(train_neg)):
                    partition = train_pos[index] + train_neg[index]
                    random.shuffle(partition)

                    if adjustable.sideways_shuffle == True:
                        partition = pu.sideways_shuffle(partition)
                    random.shuffle(partition)

                    final_training_data += partition

    return final_training_data