def dimensionReductionWithPCA(data: np.ndarray, n_components=None) -> np.ndarray: LOGGER.info("Dimensionality reduction with PCA") if type(data) != numpy.ndarray: LOGGER.warn(f'PCA data type is {type(data)}') if isinstance(n_components, float): LOGGER.warn("data is sparse matrix, use integer n_components") raise Exception("data is sparse matrix, please confirm n_components use integer") from sklearn.decomposition import TruncatedSVD pca = TruncatedSVD(n_components) return pca.fit_transform(data) def _su(a: list, cp: float): p = 0 for i in range(len(a)): p += a[i] if p > cp: return i return len(a) assert data.ndim == 2 import math if n_components is None: n_components = math.ceil(data.shape[1] / 2) assert isinstance(n_components, int) or isinstance(n_components, float) if isinstance(n_components, int): if n_components > min(data.shape): n_components = min(data.shape) warnings.warn(f"n_components exceed max size,revise to ${n_components}") pca = PCA(n_components) return pca.fit_transform(data) else: assert 0 < n_components < 1 pca = PCA() result = pca.fit_transform(data) components = _su(pca.explained_variance_ratio_, n_components) LOGGER.info(f'Dimensionality reduction components is {components}') return result[:, 0:components + 1]
def calculateHiddenLayerActivation(self, features): V = np.dot(features, np.transpose(self.inputWeights)) + self.bias if callable(self.activate_function): return self.activate_function(V) else: LOGGER.warn("activate_func could not callable,use sigmoid instead") return _internal_activation_funcs["sigmoid"](V)
def loadSupportedDistros(cls): ''' Returns list of supported OS distributions in PDS ''' LOGGER.debug('loadSupportedDistros: In loadSupportedDistros') json_data = [] try: distro_data_file = '%s/distros_supported.json' % cls.getDataFilePath() json_data = json.load(open(distro_data_file)) except Exception,ex: LOGGER.warn('loadSupportedDistros: In loadSupportedDistros %s distro loading resulted in: %s' % (distro_data_file, str(ex)))
def trainELMWithoutKNN(self): i = 0 print("-------------------------ELM Without KNN-------------------------") while self._iter_continue: i = i + 1 print(f'---------------第{i}次训练-------------------') self.elmc.fit(self.X_train, self.Y_train) preData = self.elmc.predict_with_percentage(self.X_iter) if preData is None: LOGGER.warn("未获取迭代数据,迭代训练结束") break self.updateDataWithoutKNN(preData) LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.elmc.score(self.X_test, self.Y_test)}')
def trainOSELMWithKNNButBvsb(self): i = 0 print("----------------------OSELM WITH KNN BUT BVSB---------------------------") while self._iter_continue: i = i + 1 print(f'---------------第{i}次训练-------------------') predict = self.elmc.predict(self.X_iter) _data = self.getUpdataWithoutBVSB(predict) if _data is None: LOGGER.warn("未获取迭代数据,迭代训练结束") break LOGGER.info(f'第{i}次训练时进行训练的数据个数:{_data[1].size}') print(_data[1].shape) self.elmc.fit(_data[0], _data[1]) LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.score(self.X_test, self.Y_test)}')
def trainELMWithKNNButBvsb(self): i = 0 print("------------------------------------ELM WITH KNN BUT BVSB") while self._iter_continue: i = i + 1 print(f'-------------------------第{i}次训练----------------------------') self.elmc.fit(self.X_train, self.Y_train) predict = self.elmc.predict_with_percentage(self.X_iter) _data = self.getUpdataWithoutBVSB(predict) if _data is None: LOGGER.warn("未获取迭代数据,迭代结束") break LOGGER.info(f'第{i}次训练时添加的数据个数:{_data[1].size}') self.mergeTrainData(_data) self.elmc.fit(self.X_train, self.Y_train) LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.score(self.X_test, self.Y_test)}')
def trainOSELMWithBvsb(self): i = 0 print("-------------------------------OSELM-BVSB-TRAIN------------------------------------------") LOGGER.info(f'迭代训练前算法对测试集的正确率为{self.elmc.score(self.X_test, self.Y_test)}') while self._iter_continue: i = i + 1 print(f'---------------第{i}次训练-------------------') predict = self.elmc.predict(self.X_iter) score = self.elmc.scoreWithPredict(self.Y_iter, predict) LOGGER.info(f'第{i}次迭代后迭代数据集的正确率为{score}') _data = self.getUpdateDataWithBvsb(predict) if _data is None: LOGGER.warn("未获取迭代数据,迭代训练结束") break self.elmc.fit(_data[0], _data[1]) LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.elmc.score(self.X_test, self.Y_test)}')
def __init__(self, features: np.ndarray, targets: np.ndarray, numHiddenNeurons, active_function="sigmoid"): self.binarizer = LabelBinarizer(-1, 1) assert features.ndim == 2 self.inputs = features.shape[1] if targets.ndim == 1: targets = self.binarizer.fit_transform(targets) self.outputs = targets.shape[1] self.numHiddenNeurons = numHiddenNeurons self.inputWeights = None self.bias = None self.beta = None self.M = None if active_function in _internal_activation_funcs: self.activate_function = _internal_activation_funcs[active_function] LOGGER.info(f"activate function is {active_function}") else: self.activate_function = _internal_activation_funcs["sigmoid"] LOGGER.warn("activate function is not in list, use sigmoid instead") self.initializePhase(features, targets)
def loadPackageData(cls): ''' Returns list of Packages in PDS ''' LOGGER.debug('loadPackageData: In loadSupportedDistros') distro_data_file = '%s/cached_data.json' % cls.getDataFilePath() try: json_data = json.load(open(distro_data_file)) except: LOGGER.warn('loadPackageData: Loading cached distros data failed generating from scratch') LOGGER.debug('loadPackageData: start writing distros data') json_data = cls.preparePackageData() cached_file = open(distro_data_file, 'w') cached_file.write(json.dumps(json_data)) cached_file.close() LOGGER.debug('loadPackageData: end writing distros data') LOGGER.debug('loadPackageData: Loading supported distros data') return json_data
def send(portal, message, subject, recipients=[]): """Send an email. this is taken from Products.eXtremeManagement """ # Weed out any empty strings. recipients = [r for r in recipients if r] if not recipients: LOGGER.warn("No recipients to send the mail to, not sending.") return charset = portal.getProperty('email_charset', 'ISO-8859-1') # Header class is smart enough to try US-ASCII, then the charset we # provide, then fall back to UTF-8. header_charset = charset # We must choose the body charset manually for body_charset in 'US-ASCII', charset, 'UTF-8': try: message = message.encode(body_charset) except UnicodeError: pass else: break # Get the 'From' address. registry = getUtility(IRegistry) sender_name = registry.get('plone.email_from_name') sender_addr = registry.get('plone.email_from_address') # We must always pass Unicode strings to Header, otherwise it will # use RFC 2047 encoding even on plain ASCII strings. sender_name = str(Header(safe_unicode(sender_name), header_charset)) # Make sure email addresses do not contain non-ASCII characters sender_addr = sender_addr.encode('ascii') email_from = formataddr((sender_name, sender_addr)) formatted_recipients = [] for recipient in recipients: # Split real name (which is optional) and email address parts recipient_name, recipient_addr = parseaddr(recipient) recipient_name = str(Header(safe_unicode(recipient_name), header_charset)) recipient_addr = recipient_addr.encode('ascii') formatted = formataddr((recipient_name, recipient_addr)) formatted_recipients.append(formatted) email_to = ', '.join(formatted_recipients) # Make the subject a nice header subject = Header(safe_unicode(subject), header_charset) # Create the message ('plain' stands for Content-Type: text/plain) # plone4 should use 'text/plain' according to the docs, but this should work for us # http://plone.org/documentation/manual/upgrade-guide/version/upgrading-plone-3-x-to-4.0/updating-add-on-products-for-plone-4.0/mailhost.securesend-is-now-deprecated-use-send-instead/ msg = MIMEText(message, 'html', body_charset) msg['From'] = email_from msg['To'] = email_to msg['Subject'] = subject msg = msg.as_string() # Finally send it out. mailhost = getToolByName(portal, 'MailHost') try: LOGGER.info("Begin sending email to %r " % formatted_recipients) LOGGER.info("Subject: %s " % subject) mailhost.send(msg) except gaierror, exc: LOGGER.error("Failed sending email to %r" % formatted_recipients) LOGGER.error("Reason: %s: %r" % (exc.__class__.__name__, str(exc)))
def updateTrainDataWithBvsb(self, preData: np.ndarray): _data = self.getUpdateDataWithBvsb(preData) if _data is None: LOGGER.warn("getUpdateTrain is None") return None return self.mergeTrainData(_data)