예제 #1
0
class DataSetAnalyzer:
    def __init__(self, file_path, file_name):
        self.validator = Validator()
        self.validator.is_folder(file_path)
        self.normalized = False
        self.norm = None
        self.file_name = file_name
        self.file_path = file_path
        self.data, self.header = self.set_data()
        self.rbfn = RBFN()

    def set_data(self):
        return CSVUtils(dir_path=self.file_path,
                        file_name=self.file_name).get_data(has_header=None,
                                                           header_line=None,
                                                           transpose=False)

    def set_normalized(self, normalized):
        # set if data has to be normalized
        self.validator.is_bool(normalized)
        self.normalized = normalized

    def set_norm(self, norm):
        self.validator.in_list(norm, Constants().get_norms_list())
        if norm is None:
            norm = Constants().get_l2_norm_abbreviation()
        if norm == 1 or norm == Constants().get_l1_norm_abbreviation():
            norm = Constants().get_l1_norm_abbreviation()
        if norm == 2 or norm == Constants().get_l2_norm_abbreviation():
            norm = Constants().get_l2_norm_abbreviation()
        if norm == Constants().get_max_norm_abbreviation():
            norm = Constants().get_max_norm_abbreviation()
        self.norm = norm

    def set_rbfn(self, rbfn):
        self.validator.is_object_type(rbfn, RBFN())
        self.rbfn = rbfn

    def get_data(self, transpose=False):
        if transpose:
            return np.array(self.data).T
        return np.array(self.data)

    def get_header(self):
        return self.header

    def get_file_path(self):
        return self.file_path

    def get_file_name(self):
        return self.file_name

    def get_targets_number(self):
        return len(self.data[0])

    def get_features_number(self):
        return len(self.data)

    def get_feature_axis(self):
        return np.arange(1, len(self.data) + 1, 1)

    def get_targets(self, transpose=True):
        if (self.normalized):
            return self.get_normalized_target(transpose=transpose)
        if transpose == True:
            return np.transpose(self.data)
        return self.data

    def get_average_target(self):
        return np.mean(self.get_targets(), axis=0)

    def get_std_target(self):
        return np.std(self.get_targets(), axis=0)

    def get_variance_target(self):
        return np.var(self.get_targets(), axis=0)

    def get_mode_target(self):
        mode = stats.mode(self.get_targets()).mode
        return np.reshape(mode, self.get_features_number())

    def get_median_target(self):
        return np.median(self.get_targets(), axis=0)

    def get_normalized_target(self, transpose=True):
        if transpose == True:
            data = np.transpose(self.data)
        else:
            data = self.data
        return normalize(data,
                         norm=Constants().get_norm_abbreviation(self.norm),
                         axis=1)

    def base_plot(self, y_axis, type, style='ro'):
        # a base plot which all other plots extend
        feature_axis = self.get_feature_axis()
        plt.plot(feature_axis, y_axis, style)
        title = 'Theoretical target'
        if (self.normalized):
            title += '(norm ' + Constants().get_norm_abbreviation(
                self.norm) + ') '
            y_label += ' (normalized)'
        title += self.file_name + ' ' + type
        plt.suptitle(title, fontsize=15)
        plt.show()

    def plot_average(self):
        average_target = self.get_average_target()
        self.base_plot(average_target, 'mean', 'ro')

    def plot_std(self):
        std_target = self.get_std_target()
        self.base_plot(std_target, 'standard deviation', 'b^')

    def plot_mode(self):
        mode_target = self.get_mode_target()
        self.base_plot(mode_target, 'mode', 'y-')

    def plot_median(self):
        median_target = self.get_median_target()
        self.base_plot(median_target, 'median', 'k^')

    def plot_variance(self):
        variance_target = self.get_variance_target()
        self.base_plot(variance_target, 'variance', 'go')

    def plot_mean_and_std(self):
        average_target = self.get_average_target()
        std_target = self.get_std_target()
        feature_axis = self.get_feature_axis()

        red_patch = mpatches.Patch(color='red', label='average')
        blue_patch = mpatches.Patch(color='blue', label='standard deviation')
        plt.legend(handles=[red_patch, blue_patch])

        plt.plot(feature_axis, std_target, 'b^', average_target, 'ro')
        plt.suptitle('Theoretical target ' + self.file_name +
                     ' standard deviation and mean',
                     fontsize=15)
        plt.show()

    def save_weigths_in_csv(self,
                            centers,
                            file_path,
                            file_name=None,
                            save_plot=False,
                            class_id=None,
                            float_format='%0.15f',
                            remove_file=False):
        # perform rbfn
        targets = self.get_targets()
        if file_name is None:
            file_name = self.file_name

        rbfn = self.rbfn
        rbfn.set_centers(centers)

        CSV = CSVUtils(dir_path=file_path, file_name=file_name)
        file = CSV.create_name()

        is_file = self.validator.check_is_file(file)
        if is_file and remove_file:
            os.remove(file)
        index = 1
        for p in targets:
            rbfn.set_target(p)
            rbfn.train()
            if save_plot == True:
                rbfn.plot(name=file_name,
                          save=True,
                          path=file_path,
                          index=index)
            rbfn.save_weigths(file_path,
                              file_name,
                              class_id,
                              float_format=float_format)
            index = index + 1
        if (class_id):
            CSV.close_csv()

    def plot(self, centers, index, save=False):
        self.validator.index_exists(targets, index)
        p = targets[index]
        rbfn = self.rbfn
        rbfn.set_target(p)
        rbfn.set_centers(centers)
        rbfn.train()
        rbfn.plot(save)

    def plot_all(self, centers, name, save=False):
        targets = self.get_targets()
        for p in targets:
            rbfn = self.rbfn
            rbfn.set_target(p)
            rbfn.set_centers(centers)
            rbfn.train()
            rbfn.plot(name, save=save)
예제 #2
0
파일: ClassRBFN.py 프로젝트: tety94/rbfn
class RBFN:
    def __init__ (self, training_data=None, target=None):
        self.epsilon = Constants().get_rbfn_deafult_epsilon()
        self.k = Constants().get_rbfn_default_k()
        self.base_function_type = Constants().get_rbfn_gaussian_abbreviation()
        self.training_data = training_data
        self.target = target
        self.solver = Constants().get_solver_ls_abbreviation()
        self.validator = Validator()

    def calculate_base_function (self, center, x):
        #  this function calculate one of all the RBF which will compose the final approximation
        radius = norm(center - x)
        epsilon = self.epsilon
        k = self.k
        function = self.base_function_type
        self.validator.in_list(function, Constants().get_rbfn_base_functions_list())
        if function == Constants().get_rbfn_inv_multq_abbreviation():
            return np.sqrt(1 / (1 + (epsilon * radius) ** 2))
        if function == Constants().get_rbfn_inv_q_abbreviation():
            return 1 / (1 + (epsilon * radius) ** 2)
        if function == Constants().get_rbfn_polyharmonic_abbreviation():
            if (k % 2) == 0:
                return radius ** k
            return radius ** k * np.log(radius)
        if function == Constants().get_rbfn_gaussian_abbreviation():
            return np.exp(- (epsilon * radius) ** 2)
        # Gaussian like default function
        return np.exp(- (epsilon * radius) ** 2)

    def set_centers (self, centers):
        self.centers = centers
        self.num_centers = len(centers)

    def set_epsilon (self, epsilon):
        self.epsilon = epsilon

    def set_base_function_type (self, base_function_type):
        self.base_function_type = base_function_type

    def set_k (self):
        self.k = int(k)

    def set_training_data (self, training_data):
        self.training_data = training_data

    def set_target (self, target):
        self.target = target

    def set_solvers (self, solver):
        self.validator.in_list(solver, Constants().get_valid_solvers())
        self.solver = solver

    def get_weights (self):
        return self.weights

    def activation_function (self):
        # calculate the interpolation matrix, evaluating points against centers
        training_data = self.training_data
        interpolation_matrix = np.zeros((training_data.shape[0], self.num_centers), float)
        for center_index, center in enumerate(self.centers):
            for xi, x in enumerate(training_data):
                interpolation_matrix[xi, center_index] = self.calculate_base_function(center, x)
        return interpolation_matrix

    def train (self):
        # train the model, set the weigths.
        # calculate output weights using pseudoinverse or leat squares.
        norm_matrix = self.activation_function()
        if self.solver == Constants().get_solver_pinv_abbreviation():
            self.weights = dot(pinv(norm_matrix), self.target)
        if self.solver == Constants().get_solver_ls_abbreviation():
            self.weights = np.linalg.lstsq(norm_matrix, self.target, rcond=None)[0]

    def test (self):
        # test the model
        interpolation_matrix = self.activation_function()
        target_calculated = np.dot(interpolation_matrix, self.weights)
        return target_calculated

    def init_plot_center (self):
        plt.plot(self.centers, np.zeros(self.num_centers), 'gs')

    def init_plot_test (self):
        test = self.test()
        plt.plot(self.training_data, test, 'r-')

    def plot (self, name=None, save=False, path=None, index=None):
        # utils to plot trained (original) data and tested data
        training_data = self.training_data
        plt.figure(figsize=(10, 6))

        plt.plot(training_data, self.target, 'k-')
        self.init_plot_test()
        self.init_plot_center()

        red_patch = mpatches.Patch(color='red', label='Tested data')
        black_patch = mpatches.Patch(color='black', label='Training data')
        green_patch = mpatches.Patch(color='green', label='Centers')
        plt.legend(handles=[red_patch, black_patch, green_patch])
        if (save == True):
            plt.savefig(
                path + name + '-' + str(index) + '-' + str(np.random.randint(0, 1000000)) + '.png', bbox_inches='tight')
            plt.close()
        else:
            plt.show()

    def save_weigths (self, file_path, file_name, class_id=None, float_format=Constants().get_default_float_format(),
                      remove_file=False):
        # utils to save weigths in a CSV
        self.validator.is_folder(file_path)
        CSV = CSVUtils(file_name=file_name, dir_path=file_path)
        file = CSV.create_name()

        is_file = self.validator.check_is_file(file)
        if is_file and remove_file:
            os.remove(file)

        weights = self.get_weights()
        if (class_id):
            weights = insert(weights, 0, class_id)

        CSV.save(weights, header_read=None, header_insert=False, float_format=float_format, index=False)