Beispiel #1
0
def get_number_of_individual_by_sample(
        sample_name,
        remainder_division_number_individuals_all_samples_except_testing_and_number_of_samples,
        indexes_individuals_sample_training, indexes_individuals_sample_test,
        number_of_samples_except_testing, number_of_individuals):
    if remainder_division_number_individuals_all_samples_except_testing_and_number_of_samples == 0:
        if sample_name == 'training_1' or sample_name == 'training_2' or sample_name == 'validation':
            number_of_individuals[sample_name] = int(
                len(indexes_individuals_sample_training) /
                number_of_samples_except_testing)
        elif sample_name == 'test':
            number_of_individuals[sample_name] = len(
                indexes_individuals_sample_test)
        else:
            raise error.my_custom_error(
                "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
            )
    elif remainder_division_number_individuals_all_samples_except_testing_and_number_of_samples == 1:
        if sample_name == 'training_1':
            number_of_individuals[sample_name] = int(
                len(indexes_individuals_sample_training) /
                number_of_samples_except_testing) + 1
        elif sample_name == 'training_2' or sample_name == 'validation':
            number_of_individuals[sample_name] = int(
                len(indexes_individuals_sample_training) /
                number_of_samples_except_testing)
        elif sample_name == 'test':
            number_of_individuals[sample_name] = len(
                indexes_individuals_sample_test)
        else:
            raise error.my_custom_error(
                "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
            )

    elif remainder_division_number_individuals_all_samples_except_testing_and_number_of_samples == 2:
        if sample_name == 'training_1' or sample_name == 'training_2':
            number_of_individuals[sample_name] = int(
                len(indexes_individuals_sample_training) /
                number_of_samples_except_testing) + 1
        elif sample_name == 'validation':
            number_of_individuals[sample_name] = int(
                len(indexes_individuals_sample_training) /
                number_of_samples_except_testing)
        elif sample_name == 'test':
            number_of_individuals[sample_name] = len(
                indexes_individuals_sample_test)
        else:
            raise error.my_custom_error(
                "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
            )
    return number_of_individuals
Beispiel #2
0
def kernel_function_second_step(model, time_period_1, time_period_2):
    individual_1 = model.data_to_train.iloc[time_period_1 - 1, :]
    individual_2 = model.data_alpha_variables.iloc[time_period_2 - 1, :]
    correspondence_time_period_line_all_samples = model.correspondence_time_period_line_all_samples
    original_index_time_period_1 = model.sample_by_line[model.sample_to_train][
        time_period_1 - 1]
    original_index_time_period_2 = model.sample_by_line[
        model.sample_alpha_variables][time_period_2 - 1]
    correspondence_time_period_line_sample_to_train = correspondence_time_period_line_all_samples[
        model.sample_to_train]
    correspondence_time_period_line_sample_alpha_variables = correspondence_time_period_line_all_samples[
        model.sample_alpha_variables]

    squared_difference_between_individuals = pd.DataFrame(
        data=(individual_1 - individual_2)**2)

    line_associated_to_time_period_1 = correspondence_time_period_line_sample_to_train.loc[
        correspondence_time_period_line_sample_to_train['time_period'] ==
        original_index_time_period_1, 'line'].item()
    line_associated_to_time_period_2 = correspondence_time_period_line_sample_alpha_variables.loc[
        correspondence_time_period_line_sample_alpha_variables['time_period']
        == original_index_time_period_2, 'line'].item()

    if (model.number_of_renewable_energy > 0):
        raise error.my_custom_error(
            "The optimization of the second step is not designed for a number of renewable energy greater than 0. Please, check the value variable in the kernel function."
        )

    value = (
        1 + msvm.dirac_delta(value_1=line_associated_to_time_period_1,
                             value_2=line_associated_to_time_period_2)
    ) * pe.exp(-sum(squared_difference_between_individuals.iloc[node - 1, 0] *
                    model.weights_variables[node]
                    for node in model.indexes_nodes))
    return value
Beispiel #3
0
def get_indexes_individuals_by_sample(sample_name, seed_sampling,
                                      indexes_individuals_sample_training,
                                      indexes_individuals_sample_test,
                                      number_of_individuals,
                                      indexes_individuals):
    np.random.seed(seed=seed_sampling)
    if sample_name == 'training_1':
        indexes_individuals[sample_name] = pd.DataFrame(
            data=sorted(
                np.random.choice(
                    a=indexes_individuals_sample_training['time_period'],
                    size=number_of_individuals[sample_name],
                    replace=False)),
            columns=['time_period'],
            index=np.array(range(1, number_of_individuals[sample_name] + 1)))
    elif sample_name == 'training_2':
        difference_all_individuals_and_individuals_training_1 = np.setdiff1d(
            ar1=np.array(indexes_individuals_sample_training),
            ar2=indexes_individuals['training_1'])
        indexes_individuals[sample_name] = pd.DataFrame(
            data=sorted(
                np.random.choice(a=np.array(
                    difference_all_individuals_and_individuals_training_1),
                                 size=number_of_individuals[sample_name],
                                 replace=False)),
            columns=['time_period'],
            index=np.array(range(1, number_of_individuals[sample_name] + 1)))
    elif sample_name == 'validation':
        difference_all_individuals_and_individuals_training_1 = np.setdiff1d(
            ar1=np.array(indexes_individuals_sample_training),
            ar2=indexes_individuals['training_1'])
        indexes_individuals[sample_name] = pd.DataFrame(
            data=np.setdiff1d(
                ar1=difference_all_individuals_and_individuals_training_1,
                ar2=indexes_individuals['training_2']),
            columns=['time_period'],
            index=np.array(range(1, number_of_individuals[sample_name] + 1)))
    elif sample_name == 'test':
        indexes_individuals[sample_name] = indexes_individuals_sample_test
        indexes_individuals[sample_name].index = range(
            1, number_of_individuals[sample_name] + 1)
    else:
        raise error.my_custom_error(
            "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
        )

    return indexes_individuals
Beispiel #4
0
def get_indexes_individuals_all_lines(sample_name,
                                      indexes_individuals_all_lines,
                                      indexes_individuals, number_of_lines,
                                      seed_shuffle):
    if sample_name == 'training_1' or sample_name == 'training_2':
        indexes_individuals_all_lines[sample_name] = list_split(
            list_to_split=indexes_individuals[sample_name]
            ['time_period'].values.tolist(),
            number_of_parts=number_of_lines,
            seed_shuffle=seed_shuffle)
    elif sample_name == 'validation' or sample_name == 'test':
        indexes_individuals_all_lines[sample_name] = [
            indexes_individuals[sample_name]['time_period'].values.tolist()
        ] * number_of_lines
    else:
        raise error.my_custom_error(
            "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
        )

    return indexes_individuals_all_lines
def run_optimization_problem_given_solver(
        solver, problem, neos_flag, number_of_variables, number_of_constraints,
        sense_opt_problem, maximum_number_iterations_multistart,
        folder_results, csv_file_name_multistart, ampl_flag):

    if problem == "m2svm_optimal_weights":
        run_m2svm_optimal_weights(
            solver=solver,
            problem=problem,
            neos_flag=neos_flag,
            number_of_variables=number_of_variables,
            number_of_constraints=number_of_constraints,
            sense_opt_problem=sense_opt_problem,
            maximum_number_iterations_multistart=
            maximum_number_iterations_multistart,
            folder_results=folder_results,
            csv_file_name_multistart=csv_file_name_multistart,
            ampl_flag=ampl_flag)
    elif problem == "MINLP_trigonometric_functions":
        run_minlp_trigonometric_functions(
            solver=solver,
            problem=problem,
            neos_flag=neos_flag,
            number_of_variables=number_of_variables,
            number_of_constraints=number_of_constraints,
            sense_opt_problem=sense_opt_problem,
            maximum_number_iterations_multistart=
            maximum_number_iterations_multistart,
            folder_results=folder_results,
            csv_file_name_multistart=csv_file_name_multistart,
            ampl_flag=ampl_flag)
    else:
        raise error.my_custom_error(
            "The given optimization problem does not exist❌🚫 Please, check 👁‍🗨 that the name is well-written ✍"
        )

    return "The optimization problem has been solved"
def alternating_approach(
        maximum_number_iterations_alternating_approach,
        threshold_difference_objective_values_second_step,
        default_difference_objective_values_second_step,
        seed_random_prediction_values, index_SVM_regularization_parameter,
        sample_names, lowest_label_value, highest_label_value,
        transformed_label_all_samples, SVM_regularization_parameter,
        seed_initialize_parameters, number_of_nodes, bounds_weights,
        new_label_values, data_all_samples, number_of_renewable_energy,
        correspondence_time_period_line_all_samples, sample_by_line,
        maximum_number_iterations_multistart,
        perturbation_multistart_variables, seed_multistart,
        default_new_objective_value_second_step, line, initial_weights,
        label_values, solver, neos_flag):

    iteration_alternating_approach = 0
    difference_objective_values_second_step = default_difference_objective_values_second_step
    variables_previous_iteration = np.array([math.nan] * number_of_nodes)
    new_objective_value_second_step = default_new_objective_value_second_step
    output_alternating_approach_by_iteration = {}
    while iteration_alternating_approach <= maximum_number_iterations_alternating_approach and difference_objective_values_second_step >= threshold_difference_objective_values_second_step:

        initial_variables = get_initial_variables(
            iteration_alternating_approach=iteration_alternating_approach,
            seed_initialize_parameters=seed_initialize_parameters,
            number_of_nodes=number_of_nodes,
            index_SVM_regularization_parameter=
            index_SVM_regularization_parameter,
            bounds_weights=bounds_weights,
            variables_previous_iteration=variables_previous_iteration,
            initial_weights=initial_weights,
            line=line)
        ###################################################################################################################################################################################################
        # ASUN: The alpha variables are the optimal solution of the Multiclass Support Vector Machine optimization problem. Such a problem is a convex quadratic problem which is solved using Cplex.
        # Since the objective of this repo is to compare the performance of the different non-linear solvers, it is desirable to avoid the computation of any extra optimization problem. Hence,I copy
        # and fix the results of the alpha variables obtained for a toy example with 12 individuals and SVM_regularization_parameter = 1e-3, i.e., C = 1/(1e-3) = 1e3. However, the code which computes
        # the optimal variables in a general case will be just commented and not deleted.
        # If further information about this point is necessary, please do not hesitate to contact Asun =)

        #        output_first_step = fsap.run_first_step_alternating_approach(SVM_regularization_parameter = SVM_regularization_parameter,
        #                                                                     number_of_nodes = number_of_nodes,
        #                                                                     index_SVM_regularization_parameter = index_SVM_regularization_parameter,
        #                                                                     sample_to_train = sample_names[0],
        #                                                                     transformed_label_all_samples = transformed_label_all_samples,
        #                                                                     new_label_values = new_label_values,
        #                                                                     data_all_samples = data_all_samples,
        #                                                                     number_of_renewable_energy = number_of_renewable_energy,
        #                                                                     correspondence_time_period_line_all_samples = correspondence_time_period_line_all_samples,
        #                                                                     sample_by_line = sample_by_line,
        #                                                                     initial_variables = initial_variables,
        #                                                                     sample_names = sample_names,
        #                                                                     line = line,
        #                                                                     label_values = label_values)
        #        alpha_variables = output_first_step['alpha_variables']
        ###################################################################################################################################################################################################
        output_first_step = {}
        output_first_step['accuracy'] = {}
        output_first_step['accuracy'][sample_names[1]] = 1e2
        alpha_variables = pd.DataFrame(data={
            1: [1e-6, 1e-7, 0.999],
            2: [1e-6, 1e-7, 0.999],
            3: [1e-5, 1e-7, 0.999],
            4: [0.000112, 0.203, 0.796],
            5: [1e-5, 0.999, 1e-7],
            6: [1e-5, 1e-6, 0.999],
            7: [0.00108, 0.796, 0.203],
            8: [1e-5, 0.999, 1e-7],
            9: [1e-5, 0.999, 1e-7],
            10: [1e-6, 0.999, 1e-7],
            11: [1e-6, 0.999, 1e-7],
            12: [1e-6, 0.999, 1e-7]
        },
                                       index=new_label_values)
        if (len(data_all_samples['training_1']) != alpha_variables.shape[1]
                or SVM_regularization_parameter != 1e-3):
            raise error.my_custom_error(
                "The data set or the regularization parameter has changed. Please, ask Asun to check the status of the alpha variables"
            )

        output_second_step = ssap.run_second_step_alternating_approach(
            alpha_variables=alpha_variables,
            initial_variables=initial_variables,
            maximum_number_iterations_multistart=
            maximum_number_iterations_multistart,
            new_label_values=new_label_values,
            transformed_label_all_samples=transformed_label_all_samples,
            sample_alpha_variables=sample_names[0],
            sample_to_train=sample_names[1],
            number_of_nodes=number_of_nodes,
            bounds_weights=bounds_weights,
            perturbation_multistart_variables=perturbation_multistart_variables,
            seed_multistart=seed_multistart,
            index_SVM_regularization_parameter=
            index_SVM_regularization_parameter,
            iteration_alternating_approach=iteration_alternating_approach,
            sample_names=sample_names,
            SVM_regularization_parameter=SVM_regularization_parameter,
            data_all_samples=data_all_samples,
            correspondence_time_period_line_all_samples=
            correspondence_time_period_line_all_samples,
            sample_by_line=sample_by_line,
            number_of_renewable_energy=number_of_renewable_energy,
            line=line,
            solver=solver,
            neos_flag=neos_flag)

        if (output_first_step['accuracy'][sample_names[1]] >
                output_second_step['best_accuracy'][sample_names[1]]):
            variables_previous_iteration = initial_variables
        else:
            variables_previous_iteration = output_second_step[
                'optimal_weights']

        old_objective_value_second_step = new_objective_value_second_step
        new_objective_value_second_step = output_second_step[
            'optimal_objective_value']
        difference_objective_values_second_step = abs(
            new_objective_value_second_step - old_objective_value_second_step)
        last_iteration_alternating_approach = iteration_alternating_approach

        output_alternating_approach_by_iteration[
            iteration_alternating_approach] = {
                'first_step': output_first_step,
                'second_step': output_second_step
            }
        iteration_alternating_approach = iteration_alternating_approach + 1

    output_alternating_approach = output_alternating_approach_by_iteration[
        last_iteration_alternating_approach]
    output_alternating_approach[
        'last_iteration_alternating_approach'] = last_iteration_alternating_approach

    return output_alternating_approach
Beispiel #7
0
def get_correspondence_time_period_line_all_samples(
        correspondence_time_period_line_all_samples, number_of_lines,
        indexes_individuals_all_lines,
        indexes_individuals_all_samples_and_lines, sample_names):
    for sample_name in sample_names:
        correspondence_time_period_line_all_samples[
            sample_name] = pd.DataFrame(index=[],
                                        columns=['time_period', 'line'])

        if sample_name in ['training_1', 'training_2']:
            for line in range(0, number_of_lines):
                correspondence_time_period_line_by_sample = pd.DataFrame(
                    index=range(
                        1,
                        len(indexes_individuals_all_lines[sample_name][line]) +
                        1),
                    columns=['time_period', 'line'])
                for time_period in range(
                        1,
                        len(indexes_individuals_all_samples_and_lines[line]
                            [sample_name]) + 1):
                    correspondence_time_period_line_by_sample.at[
                        time_period,
                        'time_period'] = indexes_individuals_all_samples_and_lines[
                            line][sample_name][time_period - 1]
                    correspondence_time_period_line_by_sample.at[
                        time_period, 'line'] = line + 1
                correspondence_time_period_line_all_samples[
                    sample_name] = correspondence_time_period_line_all_samples[
                        sample_name].append(
                            correspondence_time_period_line_by_sample
                        ).sort_values(by='time_period')
                correspondence_time_period_line_all_samples[
                    sample_name].index = range(
                        1,
                        len(correspondence_time_period_line_all_samples[
                            sample_name]) + 1)

        elif sample_name in ['validation', 'test']:
            default_line = 0  #Since in the validation and testing set, all the individuals belong to all the lines
            default_value_for_line = -1  #In order to indicate that all the individuals share the same lines.
            correspondence_time_period_line_by_sample = pd.DataFrame(
                index=range(
                    1,
                    len(indexes_individuals_all_lines[sample_name]
                        [default_line]) + 1),
                columns=['time_period', 'line'])
            for time_period in range(
                    1,
                    len(indexes_individuals_all_samples_and_lines[default_line]
                        [sample_name]) + 1):
                correspondence_time_period_line_by_sample.at[
                    time_period,
                    'time_period'] = indexes_individuals_all_samples_and_lines[
                        default_line][sample_name][time_period - 1]
                correspondence_time_period_line_by_sample.at[
                    time_period, 'line'] = default_value_for_line

            correspondence_time_period_line_all_samples[
                sample_name] = correspondence_time_period_line_all_samples[
                    sample_name].append(
                        correspondence_time_period_line_by_sample).sort_values(
                            by='time_period')
            correspondence_time_period_line_all_samples[
                sample_name].index = range(
                    1,
                    len(correspondence_time_period_line_all_samples[
                        sample_name]) + 1)

        else:
            raise error.my_custom_error(
                "The name of the sample introduced does not correspond to the given configuration. Please, have a look at the sample_name variable"
            )

    return correspondence_time_period_line_all_samples