Beispiel #1
0
def calibrateSingleModelRecall(thisModel):
    """
    Perform calibration for single model implementations.

    This method either uses the bhattacharyya distance to perform calibration of known and unknown or uses histograms to use the histogram distribution of known and unknown labels in the training data to carry out the classification. This method depends on the following parameters present in config.ini. \n

    1) __useMaxDistance__ : `False` or `True`. This enables the use of bhattacharyya distance method to recognise known and unknown. \n
    2) __calibrateUnknown__ : `True` or `False`. This turns on or off the calibration of the model for known and unknown inputs. \n
    3) __noBins__ : Integer number of bins to be used for the histogram method if __calibrateUnknown__ is `True` and __useMaxDistance__ is `False`. \n
    4) __method__ : String indicating the method used when histograms are used for calibration. When using histograms, the multi-dimensional probability of known and unknown are both calculated using the histogram. `sumProb` then performs a decision based on the largest sum after summing the probabilities of known and unknown independently. `mulProb` performs a decision based on the largest sum after multiplying the probabilities of known and unknown independently.\n

    Args:
        thisModel: SAMObject model to calibrate.

    Returns:
        None
    """
    yCalib = formatDataFunc(thisModel[0].allDataDict['Y'])
    logging.info('entering segment testing')
    labelList, confMatrix, ret, variancesKnown, variancesUnknown = segmentTesting(
        thisModel,
        yCalib,
        thisModel[0].allDataDict['L'],
        thisModel[0].verbose,
        'calib',
        serialMode=False,
        optimise=thisModel[0].optimiseRecall,
        calibrate=True)
    thisModel[0].classificationDict = dict()

    if thisModel[0].useMaxDistance:
        [mk, vk, rk] = utils.meanVar_varianceDistribution(variancesKnown)
        [muk, vuk, ruk] = utils.meanVar_varianceDistribution(variancesUnknown)

        distance = []
        for j in range(len(mk)):
            distance.append(
                utils.bhattacharyya_distance(mk[j], muk[j], vk[j], vuk[j]))

        if distance is not None:
            maxIdx = distance.index(max(distance))
        thisModel[0].classificationDict['bestDistanceIDX'] = maxIdx
        thisModel[0].classificationDict['bestDistance_props'] = {
            'KnownMean': mk[maxIdx],
            'UnknownMean': muk[maxIdx],
            'KnownVar': vk[maxIdx],
            'UnknownVar': vuk[maxIdx]
        }

        # if maxIdx < len(mk) - 2:
        #     thisModel[0].bestSegOperation = maxIdx
        # elif maxIdx == len(mk) - 2:
        #     thisModel[0].bestSegOperation = 'sum'
        # elif maxIdx == len(mk) - 1:
        #     thisModel[0].bestSegOperation = 'mean'

        intersection = utils.solve_intersections(mk[maxIdx], muk[maxIdx],
                                                 np.sqrt(vk[maxIdx]),
                                                 np.sqrt(vuk[maxIdx]))

        maxLim = max(rk[maxIdx][1], ruk[maxIdx][1])
        minLim = min(rk[maxIdx][0], ruk[maxIdx][0])

        delList = []
        for j in range(len(intersection)):
            if intersection[j] > maxLim or intersection[j] < minLim:
                delList.append(j)

        thisModel[0].classificationDict['segIntersections'] = np.delete(
            intersection, delList)
        thisModel[0].classificationDict['bhattaDistances'] = distance

        logging.info(
            'Num Intersections: ' +
            str(len(thisModel[0].classificationDict['segIntersections'])))

        [thisModel[0].classificationDict['varianceThreshold'],
         thisModel[0].classificationDict['varianceDirection']] = \
            calculateVarianceThreshold(thisModel[0].classificationDict['segIntersections'], mk[maxIdx], muk[maxIdx],
                                       vk[maxIdx], vuk[maxIdx])

        logging.info('varianceThreshold ' +
                     str(thisModel[0].classificationDict['varianceThreshold']))
        logging.info('varianceDirection ' +
                     str(thisModel[0].classificationDict['varianceDirection']))
    else:
        variancesKnownArray = np.asarray(variancesKnown)
        variancesUnknownArray = np.asarray(variancesUnknown)
        varianceAllArray = np.vstack(
            [variancesKnownArray, variancesUnknownArray])
        histKnown = [None] * (len(variancesKnownArray[0]) - 2)
        binEdges = [None] * (len(variancesKnownArray[0]) - 2)
        histUnknown = [None] * (len(variancesKnownArray[0]) - 2)

        thisModel[0].classificationDict['binWidth'] = thisModel[0].paramsDict[
            'binWidth']
        thisModel[0].classificationDict['method'] = thisModel[0].paramsDict[
            'method']

        numBins = np.ceil(
            np.max(varianceAllArray) /
            thisModel[0].classificationDict['binWidth'])

        bins = range(int(numBins))
        bins = np.multiply(bins, thisModel[0].classificationDict['binWidth'])

        for j in range(len(variancesKnown[0]) - 2):
            histKnown[j], binEdges[j] = np.histogram(variancesKnownArray[:, j],
                                                     bins=bins)
            histKnown[j] = 1.0 * histKnown[j] / np.sum(histKnown[j])

            histUnknown[j], _ = np.histogram(variancesUnknownArray[:, j],
                                             bins=bins)
            histUnknown[j] = 1.0 * histUnknown[j] / np.sum(histUnknown[j])

        thisModel[0].classificationDict['histKnown'] = histKnown
        thisModel[0].classificationDict['binEdgesKnown'] = binEdges
        thisModel[0].classificationDict['histUnknown'] = histUnknown

    thisModel[0].calibrated = True
Beispiel #2
0
def calibrateSingleModelRecall(thisModel):
    yCalib = formatDataFunc(thisModel[0].allDataDict['Y'])
    logging.info('entering segment testing')
    labelList, confMatrix, ret, variancesKnown, variancesUnknown = segmentTesting(thisModel, yCalib,
                                                                                  thisModel[0].allDataDict['L'],
                                                                                  thisModel[0].verbose, 'calib',
                                                                                  serialMode=False,
                                                                                  optimise=thisModel[0].optimiseRecall,
                                                                                  calibrate=True)
    thisModel[0].classificationDict = dict()

    if thisModel[0].useMaxDistance:
        [mk, vk, rk] = utils.meanVar_varianceDistribution(variancesKnown)
        [muk, vuk, ruk] = utils.meanVar_varianceDistribution(variancesUnknown)

        distance = []
        for j in range(len(mk)):
            distance.append(utils.bhattacharyya_distance(mk[j], muk[j], vk[j], vuk[j]))

        if distance is not None:
            maxIdx = distance.index(max(distance))
        thisModel[0].classificationDict['bestDistanceIDX'] = maxIdx
        thisModel[0].classificationDict['bestDistance_props'] = {'KnownMean': mk[maxIdx], 'UnknownMean': muk[maxIdx],
                                                                 'KnownVar': vk[maxIdx], 'UnknownVar': vuk[maxIdx]}

        # if maxIdx < len(mk) - 2:
        #     thisModel[0].bestSegOperation = maxIdx
        # elif maxIdx == len(mk) - 2:
        #     thisModel[0].bestSegOperation = 'sum'
        # elif maxIdx == len(mk) - 1:
        #     thisModel[0].bestSegOperation = 'mean'

        intersection = utils.solve_intersections(mk[maxIdx], muk[maxIdx], np.sqrt(vk[maxIdx]), np.sqrt(vuk[maxIdx]))

        maxLim = max(rk[maxIdx][1], ruk[maxIdx][1])
        minLim = min(rk[maxIdx][0], ruk[maxIdx][0])

        delList = []
        for j in range(len(intersection)):
            if intersection[j] > maxLim or intersection[j] < minLim:
                delList.append(j)

        thisModel[0].classificationDict['segIntersections'] = np.delete(intersection, delList)
        thisModel[0].classificationDict['bhattaDistances'] = distance

        logging.info('Num Intersections: ' + str(len(thisModel[0].classificationDict['segIntersections'])))

        [thisModel[0].classificationDict['varianceThreshold'],
         thisModel[0].classificationDict['varianceDirection']] = \
            calculateVarianceThreshold(thisModel[0].classificationDict['segIntersections'], mk[maxIdx], muk[maxIdx],
                                       vk[maxIdx], vuk[maxIdx])

        logging.info('varianceThreshold ' + str(thisModel[0].classificationDict['varianceThreshold']))
        logging.info('varianceDirection ' + str(thisModel[0].classificationDict['varianceDirection']))
    else:
        variancesKnownArray = np.asarray(variancesKnown)
        variancesUnknownArray = np.asarray(variancesUnknown)
        varianceAllArray = np.vstack([variancesKnownArray, variancesUnknownArray])
        histKnown = [None] * (len(variancesKnownArray[0]) - 2)
        binEdges = [None] * (len(variancesKnownArray[0]) - 2)
        histUnknown = [None] * (len(variancesKnownArray[0]) - 2)

        thisModel[0].classificationDict['binWidth'] = thisModel[0].paramsDict['binWidth']
        thisModel[0].classificationDict['method'] = thisModel[0].paramsDict['method']

        numBins = np.ceil(np.max(varianceAllArray) / thisModel[0].classificationDict['binWidth'])

        bins = range(int(numBins))
        bins = np.multiply(bins, thisModel[0].classificationDict['binWidth'])

        for j in range(len(variancesKnown[0]) - 2):
            histKnown[j], binEdges[j] = np.histogram(variancesKnownArray[:, j], bins=bins)
            histKnown[j] = 1.0 * histKnown[j] / np.sum(histKnown[j])

            histUnknown[j], _ = np.histogram(variancesUnknownArray[:, j], bins=bins)
            histUnknown[j] = 1.0 * histUnknown[j] / np.sum(histUnknown[j])

        thisModel[0].classificationDict['histKnown'] = histKnown
        thisModel[0].classificationDict['binEdgesKnown'] = binEdges
        thisModel[0].classificationDict['histUnknown'] = histUnknown

    thisModel[0].calibrated = True