Example #1
0
    def dimensionReductionWithPCA(data: np.ndarray, n_components=None) -> np.ndarray:
        LOGGER.info("Dimensionality reduction  with PCA")
        if type(data) != numpy.ndarray:
            LOGGER.warn(f'PCA data type is {type(data)}')
            if isinstance(n_components, float):
                LOGGER.warn("data is sparse matrix, use integer n_components")
                raise Exception("data is sparse matrix, please confirm n_components use integer")
            from sklearn.decomposition import TruncatedSVD
            pca = TruncatedSVD(n_components)
            return pca.fit_transform(data)

        def _su(a: list, cp: float):
            p = 0
            for i in range(len(a)):
                p += a[i]
                if p > cp: return i
            return len(a)

        assert data.ndim == 2
        import math
        if n_components is None: n_components = math.ceil(data.shape[1] / 2)
        assert isinstance(n_components, int) or isinstance(n_components, float)
        if isinstance(n_components, int):
            if n_components > min(data.shape):
                n_components = min(data.shape)
                warnings.warn(f"n_components exceed max size,revise to ${n_components}")
                pca = PCA(n_components)
                return pca.fit_transform(data)
        else:
            assert 0 < n_components < 1
            pca = PCA()
            result = pca.fit_transform(data)
            components = _su(pca.explained_variance_ratio_, n_components)
            LOGGER.info(f'Dimensionality reduction components is {components}')
            return result[:, 0:components + 1]
Example #2
0
 def calculateHiddenLayerActivation(self, features):
     V = np.dot(features, np.transpose(self.inputWeights)) + self.bias
     if callable(self.activate_function):
         return self.activate_function(V)
     else:
         LOGGER.warn("activate_func could not callable,use sigmoid instead")
         return _internal_activation_funcs["sigmoid"](V)
Example #3
0
    def loadSupportedDistros(cls):
        '''
        Returns list of supported OS distributions in PDS
        '''

        LOGGER.debug('loadSupportedDistros: In loadSupportedDistros')

        json_data = []
        try:
            distro_data_file = '%s/distros_supported.json' % cls.getDataFilePath()
            json_data = json.load(open(distro_data_file))
        except Exception,ex:
            LOGGER.warn('loadSupportedDistros: In loadSupportedDistros %s distro loading resulted in: %s' % (distro_data_file, str(ex)))
Example #4
0
 def trainELMWithoutKNN(self):
     i = 0
     print("-------------------------ELM Without KNN-------------------------")
     while self._iter_continue:
         i = i + 1
         print(f'---------------第{i}次训练-------------------')
         self.elmc.fit(self.X_train, self.Y_train)
         preData = self.elmc.predict_with_percentage(self.X_iter)
         if preData is None:
             LOGGER.warn("未获取迭代数据,迭代训练结束")
             break
         self.updateDataWithoutKNN(preData)
         LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.elmc.score(self.X_test, self.Y_test)}')
Example #5
0
 def trainOSELMWithKNNButBvsb(self):
     i = 0
     print("----------------------OSELM WITH KNN BUT BVSB---------------------------")
     while self._iter_continue:
         i = i + 1
         print(f'---------------第{i}次训练-------------------')
         predict = self.elmc.predict(self.X_iter)
         _data = self.getUpdataWithoutBVSB(predict)
         if _data is None:
             LOGGER.warn("未获取迭代数据,迭代训练结束")
             break
         LOGGER.info(f'第{i}次训练时进行训练的数据个数:{_data[1].size}')
         print(_data[1].shape)
         self.elmc.fit(_data[0], _data[1])
         LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.score(self.X_test, self.Y_test)}')
Example #6
0
 def trainELMWithKNNButBvsb(self):
     i = 0
     print("------------------------------------ELM WITH KNN BUT BVSB")
     while self._iter_continue:
         i = i + 1
         print(f'-------------------------第{i}次训练----------------------------')
         self.elmc.fit(self.X_train, self.Y_train)
         predict = self.elmc.predict_with_percentage(self.X_iter)
         _data = self.getUpdataWithoutBVSB(predict)
         if _data is None:
             LOGGER.warn("未获取迭代数据,迭代结束")
             break
         LOGGER.info(f'第{i}次训练时添加的数据个数:{_data[1].size}')
         self.mergeTrainData(_data)
         self.elmc.fit(self.X_train, self.Y_train)
         LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.score(self.X_test, self.Y_test)}')
Example #7
0
 def trainOSELMWithBvsb(self):
     i = 0
     print("-------------------------------OSELM-BVSB-TRAIN------------------------------------------")
     LOGGER.info(f'迭代训练前算法对测试集的正确率为{self.elmc.score(self.X_test, self.Y_test)}')
     while self._iter_continue:
         i = i + 1
         print(f'---------------第{i}次训练-------------------')
         predict = self.elmc.predict(self.X_iter)
         score = self.elmc.scoreWithPredict(self.Y_iter, predict)
         LOGGER.info(f'第{i}次迭代后迭代数据集的正确率为{score}')
         _data = self.getUpdateDataWithBvsb(predict)
         if _data is None:
             LOGGER.warn("未获取迭代数据,迭代训练结束")
             break
         self.elmc.fit(_data[0], _data[1])
         LOGGER.debug(f'第{i}次迭代训练后测试集的分类正确率为{self.elmc.score(self.X_test, self.Y_test)}')
Example #8
0
 def __init__(self, features: np.ndarray, targets: np.ndarray, numHiddenNeurons, active_function="sigmoid"):
     self.binarizer = LabelBinarizer(-1, 1)
     assert features.ndim == 2
     self.inputs = features.shape[1]
     if targets.ndim == 1:
         targets = self.binarizer.fit_transform(targets)
     self.outputs = targets.shape[1]
     self.numHiddenNeurons = numHiddenNeurons
     self.inputWeights = None
     self.bias = None
     self.beta = None
     self.M = None
     if active_function in _internal_activation_funcs:
         self.activate_function = _internal_activation_funcs[active_function]
         LOGGER.info(f"activate function is {active_function}")
     else:
         self.activate_function = _internal_activation_funcs["sigmoid"]
         LOGGER.warn("activate function is not in list, use sigmoid instead")
     self.initializePhase(features, targets)
Example #9
0
    def loadPackageData(cls):
        '''
        Returns list of Packages in PDS
        '''

        LOGGER.debug('loadPackageData: In loadSupportedDistros')
        distro_data_file = '%s/cached_data.json' % cls.getDataFilePath()
        try:
            json_data = json.load(open(distro_data_file))           
        except:
            LOGGER.warn('loadPackageData: Loading cached distros data failed generating from scratch')
            LOGGER.debug('loadPackageData: start writing distros data')
            json_data = cls.preparePackageData()
            cached_file = open(distro_data_file, 'w')
            cached_file.write(json.dumps(json_data))
            cached_file.close()
            LOGGER.debug('loadPackageData: end writing distros data')

        LOGGER.debug('loadPackageData: Loading supported distros data')

        return json_data
Example #10
0
def send(portal, message, subject, recipients=[]):
    """Send an email.

    this is taken from Products.eXtremeManagement
    """
    # Weed out any empty strings.
    recipients = [r for r in recipients if r]
    if not recipients:
        LOGGER.warn("No recipients to send the mail to, not sending.")
        return

    charset = portal.getProperty('email_charset', 'ISO-8859-1')
    # Header class is smart enough to try US-ASCII, then the charset we
    # provide, then fall back to UTF-8.
    header_charset = charset

    # We must choose the body charset manually
    for body_charset in 'US-ASCII', charset, 'UTF-8':
        try:
            message = message.encode(body_charset)
        except UnicodeError:
            pass
        else:
            break
        
    # Get the 'From' address.
    registry = getUtility(IRegistry)
    sender_name = registry.get('plone.email_from_name')
    sender_addr = registry.get('plone.email_from_address')

    # We must always pass Unicode strings to Header, otherwise it will
    # use RFC 2047 encoding even on plain ASCII strings.
    sender_name = str(Header(safe_unicode(sender_name), header_charset))
    # Make sure email addresses do not contain non-ASCII characters
    sender_addr = sender_addr.encode('ascii')
    email_from = formataddr((sender_name, sender_addr))

    formatted_recipients = []
    for recipient in recipients:
        # Split real name (which is optional) and email address parts
        recipient_name, recipient_addr = parseaddr(recipient)
        recipient_name = str(Header(safe_unicode(recipient_name),
                                    header_charset))
        recipient_addr = recipient_addr.encode('ascii')
        formatted = formataddr((recipient_name, recipient_addr))
        formatted_recipients.append(formatted)
    email_to = ', '.join(formatted_recipients)

    # Make the subject a nice header
    subject = Header(safe_unicode(subject), header_charset)

    # Create the message ('plain' stands for Content-Type: text/plain)

    # plone4 should use 'text/plain' according to the docs, but this should work for us
    # http://plone.org/documentation/manual/upgrade-guide/version/upgrading-plone-3-x-to-4.0/updating-add-on-products-for-plone-4.0/mailhost.securesend-is-now-deprecated-use-send-instead/
    msg = MIMEText(message, 'html', body_charset)
    msg['From'] = email_from
    msg['To'] = email_to
    msg['Subject'] = subject
    msg = msg.as_string()

    # Finally send it out.
    mailhost = getToolByName(portal, 'MailHost')
    try:
        LOGGER.info("Begin sending email to %r " % formatted_recipients)
        LOGGER.info("Subject: %s " % subject)
        mailhost.send(msg)
    except gaierror, exc:
        LOGGER.error("Failed sending email to %r" % formatted_recipients)
        LOGGER.error("Reason: %s: %r" % (exc.__class__.__name__, str(exc)))
Example #11
0
 def updateTrainDataWithBvsb(self, preData: np.ndarray):
     _data = self.getUpdateDataWithBvsb(preData)
     if _data is None:
         LOGGER.warn("getUpdateTrain is None")
         return None
     return self.mergeTrainData(_data)