Ejemplo n.º 1
0
    def create_model(self, num_iter, alpha):
        """
        Create models based on label value
        :param num_iter: number of iterations
        :param alpha: learning rate
        :return:
        """
        if self.model_label == 'NaiveBayes':
            # Create Naive Bayes
            self.model = NaiveBayes(self.data, self.label)

        elif self.model_label == "NeuralNetwork":
            # Create ANN
            self.model = NeuralNetwork(self.data, self.label, the_num_iter=num_iter)

        elif self.model_label == "SVM":
            # Create SVM
            self.model = SVM(self.data, self.label, the_num_iter=num_iter, the_alpha=alpha)
Ejemplo n.º 2
0
class Problem:
    """ A problem is a definition of a machine learning problem.

    """
    def __init__(self, problem_type=None, file_path=None, model_label=None):
        """ Init function for Problem
        :param problem_type: it should be one of the defined types
        :param file_path: a file path to read data from
        :param model: chosen model
        :return:
        """

        # define problem type
        self.problem_type = problem_type
        if problem_type is not None:
            self.check_type()

        # define data set
        self.file_path = file_path
        self.data = None
        if file_path is not None:
            self.read_data()

        # define label
        self.label = None
        if self.label is not None:
            self.check_label()

        # define model
        self.model_label = model_label
        self.model = None
        if model_label is not None:
            self.check_model()
            self.create_model()

        # placeholder for testing data
        self.testing_data = None

    def check_type(self):
        """ Check if problem type is defined
        :param self:
        :return:
        """
        if self.problem_type not in ['BinaryClassification', 'MultiClassification']:
            raise Exception('Problem Type Not Included.')

    def check_label(self):
        """ Check if label is defined
        :param self:
        :return:
        """
        stat = pd.DataFrame.describe(self.data[self.label])
        if(self.problem_type == 'BinaryClassification' and stat['unique'] != 2) \
                or (self.problem_type == 'MultiClassification' and stat['unique'] <= 2):
            raise Exception("Number of unique items is not consistent with problem type")

    def check_model(self):
        """ Check if model is defined
        :param self:
        :return:
        """
        if self.model_label not in ['SVM', 'NaiveBayes', 'NeuralNetwork']:
            raise Exception('Model is not defined')

    def set_model(self, model, num_iter=1000, alpha=0.003):
        """ Set the problem's model
        :param self:
        :param model: model to be set
        :return:
        """
        self.model_label = model
        self.check_model()
        self.create_model(num_iter, alpha)

    def read_data(self):
        """ Read data from the file_path
        :param self:
        :return:
        """
        try:
            self.data = pd.read_csv(self.file_path)
        except IOError:
            raise

    def set_label(self, label):
        """ Set the problem's label
        :param self:
        :param label: label to be set
        :return:
        """
        if label in self.data.columns:
            self.label = label
            self.data[label] = self.data[label].astype(str)
        else:
            raise Exception("Label column does not exist.")

    def save(self, save_path, data_path):
        """ Set the problem's elements
        :param self:
        :param save_path: file path to save info
        :param data_path: file path to save data
        :return:
        """
        problem = dict()
        problem["problem_type"] = self.problem_type
        problem["label"] = self.label
        problem["model_label"] = self.model_label

        self.data.to_json(data_path)
        print("Saved the data to ", data_path)

        with open(save_path, 'w') as fp:
            json.dump(problem, fp, indent=1)
        print("Saved the problem to ", save_path)

    def load(self, load_path, data_path):
        """ Load problem's elements from a file & load problem's data from a file
        :param self:
        :param load_path: file path to load info
        :param data_path: file path to load data
        :return:
        """
        # load path into dict
        problem = json.loads(open(load_path).read())

        # read problem description
        self.label = problem['label']
        self.problem_type = problem['problem_type']
        self.model_label = problem['model_label']

        # load data
        self.data = pd.read_json(data_path)

    def create_model(self, num_iter, alpha):
        """
        Create models based on label value
        :param num_iter: number of iterations
        :param alpha: learning rate
        :return:
        """
        if self.model_label == 'NaiveBayes':
            # Create Naive Bayes
            self.model = NaiveBayes(self.data, self.label)

        elif self.model_label == "NeuralNetwork":
            # Create ANN
            self.model = NeuralNetwork(self.data, self.label, the_num_iter=num_iter)

        elif self.model_label == "SVM":
            # Create SVM
            self.model = SVM(self.data, self.label, the_num_iter=num_iter, the_alpha=alpha)

    def set_testing(self, file_path):
        """
        Set testing data
        :param file_path: path of testing data
        :return:
        """
        self.testing_data = pd.read_csv(file_path)

    def predict(self):
        """
        Predict on testing data
        :return:
        """
        self.model.predict(self.testing_data)