Ejemplo n.º 1
0
    def fit(self, x, y, feature_sampling=None):
        if x.shape[0] != y.shape[0] or len(y) == 0:
            raise Error("Invalid x, y!")

        if self.type == "CART":
            self.root = self.build_CART_tree(x, y, feature_sampling)
        elif self.type == "ID3" or self.type == "C4.5":
            self.root = self.build_tree(x, y, np.arange(x.shape[1]))
        else:
            raise Error("Invalid type!")
Ejemplo n.º 2
0
    def fit(self,
            x,
            y,
            use_normal_equation=True,
            regularization=None,
            lam=0.1,
            detailed=False):
        if use_normal_equation:
            X = np.hstack((np.ones([x.shape[0], 1]), x))
            if regularization is None:
                inv_matrix = np.dot(X.T, X)
            elif regularization == 2:
                inv_matrix = np.dot(X.T, X) + lam * np.eye(X.shape[1])
            else:
                raise Error("invalid regularization!")
            if np.linalg.det(inv_matrix) == 0:
                raise Error("The matrix is singular, can't do inverse!")
            inv_matrix = np.linalg.inv(inv_matrix)
            self.w = np.dot(np.dot(inv_matrix, X.T), np.reshape(y, [-1, 1]))
            self.w = self.w.flatten()
            self.b = self.w[0]
            self.w = self.w[range(1, len(self.w))]
        else:
            if self.batch_size is None:
                self.batch_size = x.shape[0]

            self.w = np.ones(x.shape[1])
            self.b = np.ones(1)
            num_of_batch = x.shape[0] // self.batch_size
            if x.shape[0] % self.batch_size != 0:
                num_of_batch += 1

            for i in range(self.epoch):
                for j in range(num_of_batch):
                    start = j * self.batch_size
                    end = min((j + 1) * self.batch_size, x.shape[0])
                    error = self.predict(x[start:end, :]) - y[start:end]
                    error = error.reshape([-1, 1])
                    if regularization is None:
                        delta_w = np.sum(error * x[start:end, :], axis=0)
                        delta_b = np.sum(error)
                    elif regularization == 1:
                        delta_w = np.sum(error * x[start:end, :],
                                         axis=0) + lam * np.sign(self.w)
                        delta_b = np.sum(error) + lam * np.sign(self.b)
                    elif regularization == 2:
                        delta_w = np.sum(error * x[start:end, :],
                                         axis=0) + 2 * lam * self.w
                        delta_b = np.sum(error) + 2 * lam * self.b
                    else:
                        raise Error("invalid regularization!")
                    self.w -= self.eta * delta_w
                    self.b -= self.eta * delta_b
                if detailed:
                    print("epoch:", i, " w:", self.w, " b:", self.b)
Ejemplo n.º 3
0
    def build_tree(self, x, y, features):
        if len(y) < self.pre_pruning[1]:
            return None

        is_same_kind, label = self.is_same_kind(y)
        if is_same_kind or len(features) == 0:
            return Node(label, is_leaf=True)

        split_feature, split_values, max_info = None, None, 0
        for feature in features:
            if self.type == "ID3":
                info, values = calculate_info_gain(x[:, feature], y)
            elif self.type == "C4.5":
                info, values = calculate_info_gain_ratio(x[:, feature], y)
            else:
                raise Error("Invalid type!")
            if max_info < info:
                split_feature = feature
                split_values = values
                max_info = info

        if max_info < self.pre_pruning[0]:
            return Node(label, is_leaf=True)

        node = Node(label, is_leaf=False)
        for value in split_values:
            new_features = np.delete(features, split_feature)
            value_x = x[x[:, split_feature] == value]
            value_y = y[x[:, split_feature] == value]
            child = self.build_tree(value_x, value_y, new_features)
            if child is None:
                return Node(label, is_leaf=True)
            node.add_child(value, child)

        return node
Ejemplo n.º 4
0
 async def reload_translation(self, word):
     try:
         if not await self.db_layer.word.record_is_exists(word):
             return Error("Word not found!!!")
         await self.producer.send_message(WordKafkaMessage(word))
         return "The audio recording will be updated!!!"
     except Exception as exp:
         return self.__unexpected_exception__(exp)
Ejemplo n.º 5
0
    def __init__(self,
                 input_attr=0,
                 output_attr=0,
                 num=20,
                 feature_sampling=np.log2):
        if input_attr != 0 and input_attr != 1:
            raise Error("Invalid input_attr!")
        if output_attr != 0 and output_attr != 1:
            raise Error("Invalid output_attr!")

        self.num = num
        self.input_attr = input_attr
        self.output_attr = output_attr
        self.feature_sampling = feature_sampling
        self.trees = list()
        for i in range(num):
            tree = DecisionTree(input_attr, output_attr, type="CART")
            self.trees.append(tree)
Ejemplo n.º 6
0
 async def save_word(self, arg_json):
     try:
         word_inst = Word.init_form_json(
             validate_json(Word.get_json_schema(), arg_json))
         if await self.db_layer.word.record_is_exists(word_inst.word):
             return Error(
                 "The dictionary already contains this word. Word - {}".
                 format(word_inst.word))
         await self.db_layer.word.save(vars(word_inst))
         await self.producer.send_message(WordKafkaMessage(word_inst.word))
     except (TypeError, ValidationError) as exp:
         return Error(exp)
     except JSONDecodeError:
         return Error("Not valid JSON was passed.")
     except Exception as exp:
         return self.__unexpected_exception__(exp)
     else:
         return "Word was added!!!"
Ejemplo n.º 7
0
 async def update_word(self, arg_json):
     try:
         json_dict = validate_json(Word.get_json_schema(), arg_json)
         word = json_dict['word']
         new_json_dict = exclude_json_fields(
             json_dict, fields=['translation', 'phrase', 'synonyms'])
         if new_json_dict:
             result = await self.db_layer.word.find_one_and_update(
                 word, new_json_dict)
             if result is None:
                 return Error("Word not found!!!")
     except (TypeError, ValidationError) as exp:
         return Error(exp)
     except JSONDecodeError:
         return Error("Not valid JSON was passed.")
     except Exception as exp:
         return self.__unexpected_exception__(exp)
     else:
         return "Word was updated!!!"
Ejemplo n.º 8
0
 async def get_word(self, word):
     try:
         db_record_dict = await self.db_layer.word.find_one_by_word(word)
     except Exception as exp:
         return self.__unexpected_exception__(exp)
     else:
         if db_record_dict is None:
             return Error("Word not found!!!")
         else:
             return Word\
                 .init_form_json(db_record_dict)\
                 .get_pretty_view()
Ejemplo n.º 9
0
 async def delete_word(self, word):
     try:
         result = await self.db_layer.word.find_one_and_delete(word)
         if result is not None:
             sound_record_path = result['sound_record_path']
             if sound_record_path is not None:
                 remove_file(sound_record_path)
         else:
             return Error("Word not found!!!")
     except Exception as exp:
         return self.__unexpected_exception__(exp)
     else:
         return "Word was deleted!!!"
Ejemplo n.º 10
0
    def __init__(self, input_attr=0, output_attr=0, type="CART", pre_pruning=(0, 0)):
        """
        :param input_attr:      输入属性,1:连续,0:离散   for CART
        :param output_attr:     输出属性,1:连续,0:离散   for CART
        :param type:            ID3, C4.5, CART
        :param pre_pruning:     2元组,第一个元素表示损失变化的最小值,第二个元素表示节点上最小样本数量

        ## 输出属性连续,表示回归问题,采用均方差度量损失
        ## 输出属性离散,表示分类问题,采用基尼指数度量损失

        ## 输入属性连续,则对输入排序后,以任意两个相邻点中位数作为切分点,选取最优切分点,将数据划分为两部分
        ## 输入属性离散,则依次遍历每个可能取值,以是否等于改值为标准,将数据集划分为两部分,选取最优特征取值
        """
        self.root = None
        self.type = type
        self.input_attr = input_attr
        self.output_attr = output_attr
        self.pre_pruning = pre_pruning
        if input_attr != 0 and input_attr != 1:
            raise Error("Invalid input_attr!")
        if output_attr != 0 and output_attr != 1:
            raise Error("Invalid output_attr!")
Ejemplo n.º 11
0
    def search(self, q):
        """
        Prepare elements for request and play it
        """
        self.param = urlencode(self.param.myParam.get())
        self.recherche = urlparse(self.recherche.my_recherche.get())
        url = "%s%s%s" % (
            self.recherche[1],
            self.recherche[2],
            self.recherche[3]
        )
        ok, response, timer = self._requester()
        if not ok:
            self.error = Error()
            self.error.add_param(
                GT_('Erreur'),
                GT_('Url injoignable : %s\n%s') % (url, response)
            )
        else:
            self.result = Result()
            self.result.unset()
            # Request information (url, execution time)
            self.result.add_param(
                GT_('http://%s en %d.%06d secondes') % (
                    url,
                    timer.seconds,
                    timer.microseconds
                ),
                '%s : %s' % (response.status, response.reason)
            )
            # Request information (url, execution time)
            self.result.add_param(
                GT_('Entetes'),
                response.getheaders()
            )
            version = 1.0
            if response.version == 11:
                version = 1.1

            self.result.add_param(
                GT_('Version du protocole'),
                GT_('HTTP/%s' % version)
            )
        q.task_done()
Ejemplo n.º 12
0
 def __unexpected_exception__(self, exp):
     self.module_logger.error("Exception : {}".format(exp))
     return Error("Something was wrong")
Ejemplo n.º 13
0
    n_data, n_input = X.shape
    n_class = np.unique(t).size
    T = create_label(t, n_data, n_class)

    print 'make train/test data'
    n_train, n_test = 1000, 50
    i = np.random.permutation(n_data)[:n_train+n_test]
    i_train, i_test = np.hsplit(i, [n_train])
    X_train, X_test = X[i_train, :].reshape(n_train, 1, 28, 28), X[i_test, :].reshape(n_test, 1, 28, 28)
    T_train, T_test = T[i_train, :], T[i_test, :]

    print 'initialize...'
    linear, sigmoid, softmax, relu = act.linear(), act.sigmoid(), act.softmax(), act.relu()
    conv1, conv2 = conv(20, 1, 5, 5, relu), conv(50, 20, 5, 5, relu)
    pool1, pool2 = pool(2, 2, 2), pool(2, 2, 2)
    neural = NN(800, 500, 10, linear, sigmoid, softmax)
    error = err.cross_entropy()
    cnn = CNN(conv1, pool1, conv2, pool2, neural, error)

    print 'train...'
    cnn.train(X_train, T_train, epsilon = 0.005, lam = 0.0001, gamma = 0.9, s_batch = 5, epochs = 50)

    print 'predict...'
    Y_test = cnn.predict(X_test)
    accuracy = cnn.accuracy(Y_test, T_test)
    print 'accuracy: {0}'.format(accuracy)

    print 'save figure of loss...'
    cnn.save_lossfig()

Ejemplo n.º 14
0
class Action:
    """
    Class Recherche
    Classe de controle de la vue "Recherche"
    """

    def __init__(self):
        """
        Constructeur du controleur de la vue "Recherche"
        A à sa charge la gestion des événements sur la vue "vue.Recherche"
        """
        self.param = Parametre()
        self.recherche = Recherche()
        self.header = Header()

    def search(self, q):
        """
        Prepare elements for request and play it
        """
        self.param = urlencode(self.param.myParam.get())
        self.recherche = urlparse(self.recherche.my_recherche.get())
        url = "%s%s%s" % (
            self.recherche[1],
            self.recherche[2],
            self.recherche[3]
        )
        ok, response, timer = self._requester()
        if not ok:
            self.error = Error()
            self.error.add_param(
                GT_('Erreur'),
                GT_('Url injoignable : %s\n%s') % (url, response)
            )
        else:
            self.result = Result()
            self.result.unset()
            # Request information (url, execution time)
            self.result.add_param(
                GT_('http://%s en %d.%06d secondes') % (
                    url,
                    timer.seconds,
                    timer.microseconds
                ),
                '%s : %s' % (response.status, response.reason)
            )
            # Request information (url, execution time)
            self.result.add_param(
                GT_('Entetes'),
                response.getheaders()
            )
            version = 1.0
            if response.version == 11:
                version = 1.1

            self.result.add_param(
                GT_('Version du protocole'),
                GT_('HTTP/%s' % version)
            )
        q.task_done()

    def _requester(self):
        """
        Execute request and return result
        """
        try:
            methode = 'POST'
            if not self.param:
                methode = 'GET'
            debut = datetime.now()
            connection = httplib.HTTPConnection(self.recherche[1])
            connection.request(
                methode,
                '%s/%s' % (self.recherche[2], self.recherche[3]),
                self.param,
                self.header.my_header.get()
            )
            response = connection.getresponse()
            connection.close()
            timer = datetime.now() - debut
            return True, response, timer
        except socket.gaierror:
            return False, GT_("Socket erreur"), None
        except http.client.InvalidURL:
            return False, GT_("Url non valide"), None