def fit(self, x, y, feature_sampling=None): if x.shape[0] != y.shape[0] or len(y) == 0: raise Error("Invalid x, y!") if self.type == "CART": self.root = self.build_CART_tree(x, y, feature_sampling) elif self.type == "ID3" or self.type == "C4.5": self.root = self.build_tree(x, y, np.arange(x.shape[1])) else: raise Error("Invalid type!")
def fit(self, x, y, use_normal_equation=True, regularization=None, lam=0.1, detailed=False): if use_normal_equation: X = np.hstack((np.ones([x.shape[0], 1]), x)) if regularization is None: inv_matrix = np.dot(X.T, X) elif regularization == 2: inv_matrix = np.dot(X.T, X) + lam * np.eye(X.shape[1]) else: raise Error("invalid regularization!") if np.linalg.det(inv_matrix) == 0: raise Error("The matrix is singular, can't do inverse!") inv_matrix = np.linalg.inv(inv_matrix) self.w = np.dot(np.dot(inv_matrix, X.T), np.reshape(y, [-1, 1])) self.w = self.w.flatten() self.b = self.w[0] self.w = self.w[range(1, len(self.w))] else: if self.batch_size is None: self.batch_size = x.shape[0] self.w = np.ones(x.shape[1]) self.b = np.ones(1) num_of_batch = x.shape[0] // self.batch_size if x.shape[0] % self.batch_size != 0: num_of_batch += 1 for i in range(self.epoch): for j in range(num_of_batch): start = j * self.batch_size end = min((j + 1) * self.batch_size, x.shape[0]) error = self.predict(x[start:end, :]) - y[start:end] error = error.reshape([-1, 1]) if regularization is None: delta_w = np.sum(error * x[start:end, :], axis=0) delta_b = np.sum(error) elif regularization == 1: delta_w = np.sum(error * x[start:end, :], axis=0) + lam * np.sign(self.w) delta_b = np.sum(error) + lam * np.sign(self.b) elif regularization == 2: delta_w = np.sum(error * x[start:end, :], axis=0) + 2 * lam * self.w delta_b = np.sum(error) + 2 * lam * self.b else: raise Error("invalid regularization!") self.w -= self.eta * delta_w self.b -= self.eta * delta_b if detailed: print("epoch:", i, " w:", self.w, " b:", self.b)
def build_tree(self, x, y, features): if len(y) < self.pre_pruning[1]: return None is_same_kind, label = self.is_same_kind(y) if is_same_kind or len(features) == 0: return Node(label, is_leaf=True) split_feature, split_values, max_info = None, None, 0 for feature in features: if self.type == "ID3": info, values = calculate_info_gain(x[:, feature], y) elif self.type == "C4.5": info, values = calculate_info_gain_ratio(x[:, feature], y) else: raise Error("Invalid type!") if max_info < info: split_feature = feature split_values = values max_info = info if max_info < self.pre_pruning[0]: return Node(label, is_leaf=True) node = Node(label, is_leaf=False) for value in split_values: new_features = np.delete(features, split_feature) value_x = x[x[:, split_feature] == value] value_y = y[x[:, split_feature] == value] child = self.build_tree(value_x, value_y, new_features) if child is None: return Node(label, is_leaf=True) node.add_child(value, child) return node
async def reload_translation(self, word): try: if not await self.db_layer.word.record_is_exists(word): return Error("Word not found!!!") await self.producer.send_message(WordKafkaMessage(word)) return "The audio recording will be updated!!!" except Exception as exp: return self.__unexpected_exception__(exp)
def __init__(self, input_attr=0, output_attr=0, num=20, feature_sampling=np.log2): if input_attr != 0 and input_attr != 1: raise Error("Invalid input_attr!") if output_attr != 0 and output_attr != 1: raise Error("Invalid output_attr!") self.num = num self.input_attr = input_attr self.output_attr = output_attr self.feature_sampling = feature_sampling self.trees = list() for i in range(num): tree = DecisionTree(input_attr, output_attr, type="CART") self.trees.append(tree)
async def save_word(self, arg_json): try: word_inst = Word.init_form_json( validate_json(Word.get_json_schema(), arg_json)) if await self.db_layer.word.record_is_exists(word_inst.word): return Error( "The dictionary already contains this word. Word - {}". format(word_inst.word)) await self.db_layer.word.save(vars(word_inst)) await self.producer.send_message(WordKafkaMessage(word_inst.word)) except (TypeError, ValidationError) as exp: return Error(exp) except JSONDecodeError: return Error("Not valid JSON was passed.") except Exception as exp: return self.__unexpected_exception__(exp) else: return "Word was added!!!"
async def update_word(self, arg_json): try: json_dict = validate_json(Word.get_json_schema(), arg_json) word = json_dict['word'] new_json_dict = exclude_json_fields( json_dict, fields=['translation', 'phrase', 'synonyms']) if new_json_dict: result = await self.db_layer.word.find_one_and_update( word, new_json_dict) if result is None: return Error("Word not found!!!") except (TypeError, ValidationError) as exp: return Error(exp) except JSONDecodeError: return Error("Not valid JSON was passed.") except Exception as exp: return self.__unexpected_exception__(exp) else: return "Word was updated!!!"
async def get_word(self, word): try: db_record_dict = await self.db_layer.word.find_one_by_word(word) except Exception as exp: return self.__unexpected_exception__(exp) else: if db_record_dict is None: return Error("Word not found!!!") else: return Word\ .init_form_json(db_record_dict)\ .get_pretty_view()
async def delete_word(self, word): try: result = await self.db_layer.word.find_one_and_delete(word) if result is not None: sound_record_path = result['sound_record_path'] if sound_record_path is not None: remove_file(sound_record_path) else: return Error("Word not found!!!") except Exception as exp: return self.__unexpected_exception__(exp) else: return "Word was deleted!!!"
def __init__(self, input_attr=0, output_attr=0, type="CART", pre_pruning=(0, 0)): """ :param input_attr: 输入属性,1:连续,0:离散 for CART :param output_attr: 输出属性,1:连续,0:离散 for CART :param type: ID3, C4.5, CART :param pre_pruning: 2元组,第一个元素表示损失变化的最小值,第二个元素表示节点上最小样本数量 ## 输出属性连续,表示回归问题,采用均方差度量损失 ## 输出属性离散,表示分类问题,采用基尼指数度量损失 ## 输入属性连续,则对输入排序后,以任意两个相邻点中位数作为切分点,选取最优切分点,将数据划分为两部分 ## 输入属性离散,则依次遍历每个可能取值,以是否等于改值为标准,将数据集划分为两部分,选取最优特征取值 """ self.root = None self.type = type self.input_attr = input_attr self.output_attr = output_attr self.pre_pruning = pre_pruning if input_attr != 0 and input_attr != 1: raise Error("Invalid input_attr!") if output_attr != 0 and output_attr != 1: raise Error("Invalid output_attr!")
def search(self, q): """ Prepare elements for request and play it """ self.param = urlencode(self.param.myParam.get()) self.recherche = urlparse(self.recherche.my_recherche.get()) url = "%s%s%s" % ( self.recherche[1], self.recherche[2], self.recherche[3] ) ok, response, timer = self._requester() if not ok: self.error = Error() self.error.add_param( GT_('Erreur'), GT_('Url injoignable : %s\n%s') % (url, response) ) else: self.result = Result() self.result.unset() # Request information (url, execution time) self.result.add_param( GT_('http://%s en %d.%06d secondes') % ( url, timer.seconds, timer.microseconds ), '%s : %s' % (response.status, response.reason) ) # Request information (url, execution time) self.result.add_param( GT_('Entetes'), response.getheaders() ) version = 1.0 if response.version == 11: version = 1.1 self.result.add_param( GT_('Version du protocole'), GT_('HTTP/%s' % version) ) q.task_done()
def __unexpected_exception__(self, exp): self.module_logger.error("Exception : {}".format(exp)) return Error("Something was wrong")
n_data, n_input = X.shape n_class = np.unique(t).size T = create_label(t, n_data, n_class) print 'make train/test data' n_train, n_test = 1000, 50 i = np.random.permutation(n_data)[:n_train+n_test] i_train, i_test = np.hsplit(i, [n_train]) X_train, X_test = X[i_train, :].reshape(n_train, 1, 28, 28), X[i_test, :].reshape(n_test, 1, 28, 28) T_train, T_test = T[i_train, :], T[i_test, :] print 'initialize...' linear, sigmoid, softmax, relu = act.linear(), act.sigmoid(), act.softmax(), act.relu() conv1, conv2 = conv(20, 1, 5, 5, relu), conv(50, 20, 5, 5, relu) pool1, pool2 = pool(2, 2, 2), pool(2, 2, 2) neural = NN(800, 500, 10, linear, sigmoid, softmax) error = err.cross_entropy() cnn = CNN(conv1, pool1, conv2, pool2, neural, error) print 'train...' cnn.train(X_train, T_train, epsilon = 0.005, lam = 0.0001, gamma = 0.9, s_batch = 5, epochs = 50) print 'predict...' Y_test = cnn.predict(X_test) accuracy = cnn.accuracy(Y_test, T_test) print 'accuracy: {0}'.format(accuracy) print 'save figure of loss...' cnn.save_lossfig()
class Action: """ Class Recherche Classe de controle de la vue "Recherche" """ def __init__(self): """ Constructeur du controleur de la vue "Recherche" A à sa charge la gestion des événements sur la vue "vue.Recherche" """ self.param = Parametre() self.recherche = Recherche() self.header = Header() def search(self, q): """ Prepare elements for request and play it """ self.param = urlencode(self.param.myParam.get()) self.recherche = urlparse(self.recherche.my_recherche.get()) url = "%s%s%s" % ( self.recherche[1], self.recherche[2], self.recherche[3] ) ok, response, timer = self._requester() if not ok: self.error = Error() self.error.add_param( GT_('Erreur'), GT_('Url injoignable : %s\n%s') % (url, response) ) else: self.result = Result() self.result.unset() # Request information (url, execution time) self.result.add_param( GT_('http://%s en %d.%06d secondes') % ( url, timer.seconds, timer.microseconds ), '%s : %s' % (response.status, response.reason) ) # Request information (url, execution time) self.result.add_param( GT_('Entetes'), response.getheaders() ) version = 1.0 if response.version == 11: version = 1.1 self.result.add_param( GT_('Version du protocole'), GT_('HTTP/%s' % version) ) q.task_done() def _requester(self): """ Execute request and return result """ try: methode = 'POST' if not self.param: methode = 'GET' debut = datetime.now() connection = httplib.HTTPConnection(self.recherche[1]) connection.request( methode, '%s/%s' % (self.recherche[2], self.recherche[3]), self.param, self.header.my_header.get() ) response = connection.getresponse() connection.close() timer = datetime.now() - debut return True, response, timer except socket.gaierror: return False, GT_("Socket erreur"), None except http.client.InvalidURL: return False, GT_("Url non valide"), None