def ga():
    '''
        render template and crack crypto system depending on choice of ciphers such as Caesar Cipher or Vigenere ciphers in our case!
    '''
    if request.method == 'POST':
        cipher_text = request.form['cipher_text']
        key_length = int(request.form['key_length'])
        num_of_generations = int(request.form['generations'])
        data = run_genetic_algorithm(key_length=key_length, cipher_text=cipher_text, number_of_generations=num_of_generations)
        data.append(cipher_text)
        return render_template('genetic_algo.html', data=data)
    return render_template('genetic_algo.html')    
Beispiel #2
0
}

# Set the number of generations
number_of_generations = 500

# Number of generators and periods
num_gen = gen_info.shape[0]
T = demand.size

# Get a random schedule to begin with
seed_schedules = np.random.choice(2,
                                  size=(all_kwargs.get('pop_size'), T,
                                        num_gen))

# Run GA
best_genotype, results, population = run_genetic_algorithm(
    number_of_generations, seed_schedules, **all_kwargs)

# Are constraints violated?
gen_info = all_kwargs.get('gen_info')
init_status = all_kwargs.get('init_status')
penalty = all_kwargs.get('constraint_penalty')
demand = all_kwargs.get('demand')
reserve_margin = all_kwargs.get('reserve_margin')

constraint_costs = calculate_constraint_costs(best_genotype.schedule, gen_info,
                                              init_status, penalty, demand,
                                              reserve_margin)
print("Constraint costs: {}".format(np.sum(constraint_costs)))

# plot results
fig1, ax1 = plt.subplots()
Beispiel #3
0
def process_and_run(args):

    """

    Processes data and parameters and runs the algorithm.

    Parameters
    ----------
    args : list
        list of command line arguments

    Returns
    -------
    train_bacc : float
        training balanced accuracy
    test_bacc : float
        test balanced accuracy
    updates : int
        number of best score updates
    training_time : float
        training time
    first_global : float
        first global best score
    first_avg_pop : float
        first population average score

    """

    train_datafile, test_datafile, rule_list, filter_data, discretize_data, m_bin, a_bin, l_bin, classifier_size, \
        evaluation_threshold, bacc_weight, uniqueness, iterations, fixed_iterations, population_size, elitism, \
        popt_fraction, crossover_probability, mutation_probability, tournament_size = args

    print("##PARAMETERS##")
    if filter_data == 't':
        print("FILTERING: ", "on")
        filter_data = True
    else:
        print("FILTERING: ", "off")
        filter_data = False
    if discretize_data == 't':
        print("DISCRETIZE: ", "on")
        print("DISCRETIZATION M: ", m_bin)
        print("DISCRETIZATION ALPHA: ", a_bin)
        print("DISCRETIZATION LAMBDA: ", l_bin)
    else:
        print("DISCRETIZE: ", "off")
    print("EVALUATION THRESHOLD: ", evaluation_threshold)
    print("MAX SIZE: ", classifier_size)
    print("WEIGHT: ", bacc_weight)
    if uniqueness == 't':
        print("UNIQUENESS: ", "on")
        uniqueness = True
    else:
        print("UNIQUENESS: ", "off")
        uniqueness = False

    if rule_list is not None:
        print("POPULATION PRE-OPTIMIZATION: ", "on")
        print("POPULATION PRE-OPTIMIZED FRACTION: ", popt_fraction)
    print("GA PARAMETERS: ", "TC: ", iterations, ", PS: ", population_size, ", CP: ", crossover_probability, ", MP: ",
          mutation_probability, ", TS: ", tournament_size)

    print("\n##TRAIN DATA##")
    # read the data
    train_dataset, annotation, negatives, positives, features = preproc.read_data(train_datafile)
    annotation = train_dataset["Annots"]

    # discretize data
    if discretize_data == 't':
        print("\n##DISCRETIZATION##")
        data_discretized, features, thresholds, feature_cdds = \
            preproc.discretize_train_data(train_dataset, m_bin, a_bin, l_bin, True)
    else:
        data_discretized = train_dataset
        feature_cdds = {}
        bacc_weight = 1.0

    print("\nTRAINING...")
    start_train = time.time()
    classifier, best_classifiers, updates, first_best_score, first_avg_pop = \
        genetic_algorithm.run_genetic_algorithm(data_discretized, filter_data, iterations, fixed_iterations,
                                                population_size, elitism, rule_list, popt_fraction, classifier_size,
                                                evaluation_threshold, feature_cdds, crossover_probability,
                                                mutation_probability, tournament_size, bacc_weight, uniqueness, True)

    end_train = time.time()
    training_time = end_train - start_train
    print("TRAINING TIME: ", end_train - start_train)

    # evaluate best classifier
    classifier_score, train_bacc, errors, train_error_rates, train_additional_scores, cdd_score = \
        eval.evaluate_classifier(classifier, data_discretized, annotation, positives, negatives, feature_cdds,
                                 uniqueness, bacc_weight)

    print("\n##TRAIN DATA SCORES##")
    print("BACC: ", train_bacc)
    print("CDD SCORE: ", cdd_score)
    print("TPR: ", train_error_rates["tpr"])
    print("TNR: ", train_error_rates["tnr"])
    print("FNR: ", train_error_rates["fpr"])
    print("FPR: ", train_error_rates["fnr"])

    if test_datafile is not None:

        print("\n##TEST DATA##")
        # read test data
        test_dataset, annotation, negatives, positives, features = preproc.read_data(test_datafile)
        annotation = test_dataset["Annots"]

        # discretize data
        if discretize_data == 't':
            print("\n##DISCRETIZATION##")
            data_discretized = preproc.discretize_test_data(test_dataset, thresholds)
        else:
            data_discretized = test_dataset
            feature_cdds = {}
            bacc_weight = 1.0

        # evaluate classifier
        classifier_score, test_bacc, errors, test_error_rates, test_additional_scores, cdd_score = \
            eval.evaluate_classifier(classifier, data_discretized, annotation, positives, negatives, feature_cdds,
                                     uniqueness, bacc_weight)

        print("\n##TEST DATA SCORES##")
        print("BACC: ", test_bacc)
        print("CDD SCORE: ", cdd_score)
        print("TPR: ", test_error_rates["tpr"])
        print("TNR: ", test_error_rates["tnr"])
        print("FNR: ", test_error_rates["fpr"])
        print("FPR: ", test_error_rates["fnr"])

    else:
        test_bacc = None

    return train_bacc, test_bacc, updates, training_time, first_best_score, first_avg_pop
Beispiel #4
0
def train_and_test(data, path, file_name, parameter_set, classifier_size,
                   evaluation_threshold, elitism, rules, uniqueness, repeats,
                   print_results):
    """

    Trains classifier on training data and tests on testing data.

    Parameters
    ----------
    data : list
        list including train data set, test data set and feature cdds list
    path : str
        path to output files
    file_name : str
        name of a file (based on which the further file names are created)
    parameter_set : list
        list of genetic algorithm parameters and objective function weight ([weight, iterations, population size,
        crossover probability, mutation probability, tournament size])
    classifier_size : int
        maximal classifier size
    evaluation_threshold : float
        classifier evaluation threshold
    elitism : bool
        if True the best found solutions are added to the population in each selection operation
    rules : list
        list of pre-optimized rules
    uniqueness : bool
         if True only unique inputs in a classifier are counted, otherwise the input cdd score is multiplied by
         the number of input occurrences
    repeats : int
        number of single test repeats
    print_results : bool
        if True more information is shown

    Returns
    -------
    test_bacc_avg : float
        average test balanced accuracy

    """

    # parameter set
    weight, tc, pop, cp, mp, ts = parameter_set

    # unpack training data, testing data and feature cdds
    training_fold, testing_fold, feature_cdd_fold = data

    # lists of train scores
    train_score_avg = []
    train_bacc_avg = []
    train_tpr_avg = []
    train_tnr_avg = []
    train_fpr_avg = []
    train_fnr_avg = []
    train_f1_avg = []
    train_mcc_avg = []
    train_ppv_avg = []
    train_fdr_avg = []
    train_cdd_avg = []

    # lists of test scores
    test_bacc_avg = []
    test_tpr_avg = []
    test_tnr_avg = []
    test_fpr_avg = []
    test_fnr_avg = []
    test_f1_avg = []
    test_mcc_avg = []
    test_ppv_avg = []
    test_fdr_avg = []

    # lists of numbers of inputs and rules
    inputs_avg = []
    rules_avg = []

    print("\nTRAINING ON DATA FOLD...")

    train_runtimes = []  # training run-times
    update_number = []  # number of score updates

    classifier_list = []

    for i in range(0, repeats):  # repeat tests

        print("\nREPEAT: ", i + 1)

        # measure time
        start_test = time.time()

        # run the algorithm
        classifier, best_classifiers, updates, first_global_best_score, first_avg_population_score \
            = genetic_algorithm.run_genetic_algorithm(train_data=training_fold,
                                                      filter_data=False,
                                                      iterations=tc,
                                                      fixed_iterations=0,
                                                      population_size=pop,
                                                      elitism=elitism,
                                                      rules=rules,
                                                      popt_fraction=0,
                                                      classifier_size=classifier_size,
                                                      evaluation_threshold=evaluation_threshold,
                                                      feature_cdds=feature_cdd_fold,
                                                      crossover_probability=cp,
                                                      mutation_probability=mp,
                                                      tournament_size=ts,
                                                      bacc_weight=weight,
                                                      uniqueness=uniqueness,
                                                      print_results=print_results)

        # measure time
        end_test = time.time()

        classifier_list.append(classifier)

        train_runtimes.append(end_test - start_test)
        update_number.append(updates)

        # get annotation
        header = training_fold.columns.values.tolist()
        samples, annotation, negatives, positives = preproc.get_data_info(
            dataset=training_fold, header=header)

        # calculate best train BACC
        train_score, train_bacc, train_errors, train_error_rates, train_additional_scores, train_cdd = \
            eval.evaluate_classifier(classifier=classifier,
                                     dataset=training_fold,
                                     annotation=annotation,
                                     negatives=negatives,
                                     positives=positives,
                                     feature_cdds=feature_cdd_fold,
                                     uniqueness=uniqueness,
                                     bacc_weight=weight)

        print("TRAIN BACC: ", train_bacc)

        train_score_avg.append(train_score)
        train_bacc_avg.append(train_bacc)

        train_tpr_avg.append(train_error_rates["tpr"])
        train_tnr_avg.append(train_error_rates["tnr"])
        train_fpr_avg.append(train_error_rates["fpr"])
        train_fnr_avg.append(train_error_rates["fnr"])

        train_f1_avg.append(train_additional_scores["f1"])
        train_mcc_avg.append(train_additional_scores["mcc"])
        train_ppv_avg.append(train_additional_scores["ppv"])
        train_fdr_avg.append(train_additional_scores["fdr"])

        train_cdd_avg.append(train_cdd)

        # get annotation
        header = testing_fold.columns.values.tolist()
        samples, annotation, negatives, positives = preproc.get_data_info(
            testing_fold, header)

        # calculate best test BACC
        test_score, test_bacc, test_errors, test_error_rates, test_additional_scores, train_cdd = \
            eval.evaluate_classifier(classifier=classifier,
                                     dataset=testing_fold,
                                     annotation=annotation,
                                     negatives=negatives,
                                     positives=positives,
                                     feature_cdds=feature_cdd_fold,
                                     uniqueness=uniqueness,
                                     bacc_weight=weight)

        test_bacc_avg.append(test_bacc)

        test_tpr_avg.append(test_error_rates["tpr"])
        test_tnr_avg.append(test_error_rates["tnr"])
        test_fpr_avg.append(test_error_rates["fpr"])
        test_fnr_avg.append(test_error_rates["fnr"])

        test_f1_avg.append(test_additional_scores["f1"])
        test_mcc_avg.append(test_additional_scores["mcc"])
        test_ppv_avg.append(test_additional_scores["ppv"])
        test_fdr_avg.append(test_additional_scores["fdr"])

        print("TEST BACC: ", test_bacc)

        # show all found solutions
        if print_results is True:
            print("\n##ALL FOUND CLASSIFIERS##")
            for classifier_str in best_classifiers.solutions_str:
                print(classifier_str)

        # calculate classifier size
        number_of_inputs = len(classifier.get_input_list())
        number_of_rules = len(classifier.rule_set)

        inputs_avg.append(number_of_inputs)
        rules_avg.append(number_of_rules)

    if print_results:
        # rank features by frequency
        print("\n###FEATURE FREQUENCY ANALYSIS###")
        toolbox.rank_features_by_frequency(classifier_list, path, file_name)

        # average scores
        print("\n###AVERAGE SCORES###")

        # calculate train average scores
        print("\nTRAIN AVERAGE RESULTS")
        print("TRAIN AVG BACC: ", numpy.average(train_bacc_avg))
        print("TRAIN AVG STDEV: ", numpy.std(train_bacc_avg, ddof=1))
        print("TRAIN AVG CDD: ", numpy.average(train_cdd_avg))
        print("TRAIN AVG TPR: ", numpy.average(train_tpr_avg))
        print("TRAIN AVG TNR: ", numpy.average(train_tnr_avg))
        print("TRAIN AVG FPR: ", numpy.average(train_fpr_avg))
        print("TRAIN AVG FNR: ", numpy.average(train_fnr_avg))
        print("TRAIN AVG F1: ", numpy.average(train_f1_avg))
        print("TRAIN AVG MCC: ", numpy.average(train_mcc_avg))
        print("TRAIN AVG PPV: ", numpy.average(train_ppv_avg))
        print("TRAIN AVG FDR: ", numpy.average(train_fdr_avg))

        # calculate test average scores
        print("\nTEST AVERAGE RESULTS")
        print("TEST AVG BACC: ", numpy.average(test_bacc_avg))
        print("TEST AVG STDEV: ", numpy.std(test_bacc_avg, ddof=1))
        print("TEST AVG TPR: ", numpy.average(test_tpr_avg))
        print("TEST AVG TNR: ", numpy.average(test_tnr_avg))
        print("TEST AVG FPR: ", numpy.average(test_fpr_avg))
        print("TEST AVG FNR: ", numpy.average(test_fnr_avg))
        print("TEST AVG F1: ", numpy.average(test_f1_avg))
        print("TEST AVG MCC: ", numpy.average(test_mcc_avg))
        print("TEST AVG PV: ", numpy.average(test_ppv_avg))
        print("TEST AVG FDR: ", numpy.average(test_fdr_avg))

        # calculate size averages
        print("\nAVERAGE SIZE")
        print("AVERAGE NUMBER OF INPUTS: ", numpy.average(inputs_avg))
        print("AVERAGE NUMBER OF RULES: ", numpy.average(rules_avg))
        print("MEDIAN OF INPUTS: ", numpy.median(inputs_avg))
        print("MEDIAN OF RULES: ", numpy.median(rules_avg))

        print("\nRUNTIME")
        print("RUN-TIME PER TRAINING: ", numpy.average(train_runtimes))
        print("UPDATES PER TRAINING:", numpy.average(update_number))

        print("CSV;", numpy.average(train_bacc_avg), ";",
              numpy.std(train_bacc_avg, ddof=1), ";",
              numpy.average(test_bacc_avg), ";",
              numpy.std(test_bacc_avg, ddof=1), ";", numpy.average(rules_avg),
              ";", numpy.average(inputs_avg))

    test_bacc_avg = numpy.average(test_bacc_avg)

    return test_bacc_avg