예제 #1
0
def biasMeasGenerator(generator: MeasurementGenerator, priors: Tuple[float],
                      conds: List[List[float]]):
    # Updates measures with bias towards those indicated by class conditional probabilites.
    # No output. Destructively alters generator.
    newProbs = calcMeasProb(priors, conds)
    generator.updateProbs(newProbs)
    return
예제 #2
0
def main():
    from random import seed
    from probability import genProbs
    #### Unit test for training funciton
    # Initiatializes seed for recurrent testing.
    for _ in range(10):
        dimen = (4, 3, 6, 5, 6)
        classValues = 5

        measures = MeasurementGenerator(dimen)
        classes = ClassAssign(dimen, classValues)

        conds = [list(genProbs(measures.range)) for _ in range(classValues)]
        egain = [[2, 0, 0, 0, 1], [3, 4, 0, 2, 2], [2, 2, 5, 1, 1],
                 [2, 2, 3, 4, 1], [0, 1, -3, 2, 3]]
        classifier = BayesClassifier(
            None, conds, eGain=egain
        )  # Worries that supplying similar priors is affecting our results. Even though vFold updates.
        y = train(classifier, 20, measures, classes, 6000, delta=.0005)
        z = [y[i] - y[i - 1] for i in range(1, len(y))]
        # Trying to figure out average negative error to see if this is floating point.
        print(y)
        print()
        print(z)
        q = [i for i in z if i < 0]
        q = sum(q) / max(len(q), 1)
        print(q)
        print()
    x = measures.genMeas(20)

    p = classes.assign(x)
    l = classifier.assign(x)
예제 #3
0
def train(tagger: BayesClassifier,
          iter: int,
          measGen: MeasurementGenerator,
          classGen: ClassAssign,
          Z: int,
          V: int = 10,
          delta: float = .05) -> List[float]:
    # Performs 'iter' iterations of vFold testing (default 'V' is ten) with 'tagger' classifier
    # for 'Z' samples generated by 'measGen' and 'classGen.' After each vFold validation, appends
    # an expected value (attached to tagger) and then optimizes tagger by 'delta' paramenter (default .05).
    # Outputs a new optimized tagger and list of gain values from each iteration.
    expectedGain = []

    for _ in range(iter):
        # Generates measurements
        samples = measGen.genMeas(Z)
        values = classGen.assign(samples)

        # Shuffles values
        samplesSh, valuesSh = shuffle(samples, values)

        # Performs Test
        matrix = vFold(samplesSh, valuesSh, V, tagger)
        # Appends value to list
        expectedGain.append(calcExpGain(matrix, tagger.eGain))

        # Gives class probability over whole data set.
        tagger.priorUpdate(calcClassProb(valuesSh, tagger.range))

        # Updates tagger
        tagger.optimize(delta, samplesSh, valuesSh)

    return expectedGain
예제 #4
0
def main():
    from bayes import calcClassProb

    priors = (.6, .4)
    conds = [[.12 / .6, .18 / .6, .3 / .6], [.2 / .4, .16 / .4, .04 / .4]]
    gain = ((1, 0), (0, 2))

    ### Tests for biasCCP
    classifier = BayesClassifier(priors, conds, eGain=gain)
    # Classification should be 1,1,0. The update should alter, 0|1, 1|1, 2|0.
    newCCP = biasCCP(classifier, .05)
    # print(newCCP) # Uncomment to see if ccp comforms to predictiosn. Should bias 2|0 and raise 0 and 1 |1.

    ### Tests for biasMeasGenerator
    generator = MeasurementGenerator((2, 2))
    # print(generator.cmlProbs)
    prev = generator.cmlProbs[1] - generator.cmlProbs[0]

    # Let's give some conditionals that biases one measure
    conds = ((.1, .7, .1, .1), (.25, .25, .25, .25))
    # And feed biased priors
    priors = (.7, .3)

    biasMeasGenerator(generator, priors, conds)
    # print(generator.cmlProbs)
    now = generator.cmlProbs[1] - generator.cmlProbs[0]
    # Should show bias towards second value now.
    assert now > prev

    generator = MeasurementGenerator((2, 2))
    # print(generator.cmlProbs)
    prev1 = generator.cmlProbs[1] - generator.cmlProbs[0]
    prev2 = generator.cmlProbs[-1] - generator.cmlProbs[-2]

    # Let's give some conditionals that biases one measure
    conds = ((.1, .5, .1, .3), (.25, .25, .25, .25))
    # And feed biased priors
    priors = (.7, .3)

    biasMeasGenerator(generator, priors, conds)
    # print(generator.cmlProbs)
    now1 = generator.cmlProbs[1] - generator.cmlProbs[0]
    now2 = generator.cmlProbs[-1] - generator.cmlProbs[-2]

    # Should show bias towards second value now.
    assert now1 > prev1, (prev1, now1)
    assert now2 > prev2, (prev2, now2)
예제 #5
0
def main():

    priors = (.6, .4)
    conds = [[.12 / .6, .18 / .6, .3 / .6], [.2 / .4, .16 / .4, .04 / .4]]
    gain = ((1, 0), (0, 2))

    ### Tests for biasMeasGenerator
    generator = MeasurementGenerator((2, 2))
    # print(generator.cmlProbs) ## For peeking.
    prev = generator.cmlProbs[1] - generator.cmlProbs[0]

    # Let's give some conditionals that biases one measure
    conds = ((.1, .7, .1, .1), (.25, .25, .25, .25))
    # And feed biased priors
    priors = (.7, .3)

    biasMeasGenerator(generator, priors, conds)
    # print(generator.cmlProbs)
    now = generator.cmlProbs[1] - generator.cmlProbs[0]
    # Should show bias towards second value now.
    assert now > prev

    generator = MeasurementGenerator((2, 2))
    # print(generator.cmlProbs)
    # Records previous probability for second value.
    prev1 = generator.cmlProbs[1] - generator.cmlProbs[0]
    # Records previous probability for final value.
    prev2 = generator.cmlProbs[-1] - generator.cmlProbs[-2]

    # Let's give some conditionals that biases one measure
    conds = ((0.0, .5, 0.0, .5), (.05, .4, .05, .5))
    # And feed biased priors
    priors = (.7, .3)

    biasMeasGenerator(generator, priors, conds)
    # print(generator.cmlProbs)
    now1 = generator.cmlProbs[1] - generator.cmlProbs[0]
    now2 = generator.cmlProbs[-1] - generator.cmlProbs[-2]

    # Should show bias towards second value now. May vary due to the original conditional probability being random.
    assert now1 > prev1, (prev1, now1)
    assert now2 > prev2, (prev2, now2)
예제 #6
0
def fitData(classifier: BayesClassifier, generator: MeasurementGenerator,
            Z: int) -> Tuple[BayesClassifier, Tuple[int], Tuple[int]]:
    # Uses classifier values to generate new measure and tag generators that are biased towards the classifier.
    # Then generates Z new measure, tag pairs.
    # Returns measure-tag pairs for another round of testing
    conds = classifier.cond
    priors = classifier.prior
    posts = calcPosterior(priors, conds)
    postsNorm = normPosterior(posts)

    biasMeasGenerator(generator, priors, conds)

    measures = generator.genMeas(Z)

    tags = genBiasTags(measures, postsNorm)

    return measures, tags
예제 #7
0
def fitData(classifier: BayesClassifier, generator: MeasurementGenerator,
            Z: int,
            delta: float) -> Tuple[BayesClassifier, Tuple[int], Tuple[int]]:
    # Takes delta and uses to update conditional probability
    # Uses new value to update distribution of measurement space
    # Then generates Z new measures
    # Then generates new tags in line with conditional probabbility distribution of class per tag
    # Returns a classifier and measure-tag pairs for another round of testing
    conds = classifier.cond
    priors = classifier.prior
    posts = calcPosterior(priors, conds)
    postsNorm = normPosterior(posts)

    biasMeasGenerator(generator, priors, conds)

    measures = generator.genMeas(Z)

    tags = genBiasTags(measures, postsNorm)

    return classifier, measures, tags
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("samples",
                        help="Number of measurement samples to generate.",
                        type=int)
    parser.add_argument("dimen", help="Measurement space.", type=str)
    parser.add_argument("classes", help="Number of classes.", type=int)
    parser.add_argument("seed",
                        help="Random seed for experiement duplication.",
                        type=int)
    parser.add_argument(
        "--vfolds",
        "-v",
        default=10,
        help=
        "Number of v-folds to partition testing data for v-folds testing. Default is 10.",
        type=int)
    parser.add_argument(
        "--optimization",
        "-o",
        default=0.0,
        help=
        "Specify if iterative improvement of class conditional probability values should be taken.",
        type=float)
    parser.add_argument("--iteration",
                        "-t",
                        default=10,
                        help="Number of iterations for conditional update.",
                        type=int)
    parser.add_argument(
        "--identity",
        "-i",
        action="store_true",
        default=False,
        help="Specify if economic gain matrix should be identity.")
    args = parser.parse_args()

    # Checks that our given limits are even feasible memory wise

    # Prompts for reader friendliness
    print("Generating testing data for seed {}".format(args.seed))

    # Sets seed
    seed(args.seed)

    # Assigns values
    dimen = eval(args.dimen)
    # Calculates size of domain
    M = 1
    for N in dimen:
        M *= N
    K = args.classes
    V = args.vfolds
    Z = args.samples
    print("Dimensions of Measurement Space: {}".format(dimen))
    print("Number of Samples: {}".format(Z))
    print("Classes: {}".format(K))

    # Checks that this is even possible to calculate.
    if config.computeLimit(M, K):
        print("Possible measurements exceed memory capabilities.")
        sys.exit()

    print("Generating {0}x{0} Gain Matrix. Identity Matrix: {1}".format(
        K, args.identity))

    gain = genGain(K, identity=args.identity)
    print("{}x{} Economic Gain Matrix Generated".format(
        len(gain), len(gain[0])))

    # Generates measures
    print("Generating {} Measure-Value pairs.".format(Z))
    print("Generating measures.")
    generator = MeasurementGenerator(dimen)
    measures = generator.genMeas(Z)

    assigner = ClassAssign(dimen, K)
    tags = assigner.assign(measures)
    print("{} measures and {} values generated.".format(
        len(measures), len(tags)))

    ## Generates classifier.
    print(
        "Generating class conditional probabilities for {} classes and {} possible measures."
        .format(K, M))

    conditionals = genCCP(K, dimen)
    print(
        "Class conditional probabilities generated for {} classes and {} possible measures"
        .format(len(conditionals), len(conditionals[0])))

    classifier = BayesClassifier(
        None, conditionals,
        eGain=gain)  # No priors given since vFold always assigns.

    print("Testing classifier. V-fold factor: {}".format(V))
    measures, tags = shuffle(measures, tags)
    results = vFold(measures, tags, V, classifier)
    matrix = genConMatrix(tags, results, K)
    norm = normConMatrix(matrix)
    expGain = calcExpGain(norm, classifier.eGain)
    #expGain = test(classifier, measures, tags, V=V)

    print("The expected gain for the given data is: {}".format(expGain))

    #### Here we will work on updating
    if args.optimization:
        print(
            "Fitting data for improved performance. Improvement factor {} used over {} iterations."
            .format(args.optimization, args.iteration))
        gains = []
        # Going to set priors generated from this measurement set as permanent priors.
        priors = calcClassProb(tags, K)
        classifier.priorUpdate(priors)
        for i in range(args.iteration):
            # print(priors)
            classifier.optimize(args.optimization, measures, tags)

            classifier, measures, tags = fitData(classifier, generator, Z,
                                                 args.optimization)

            measures, tags = shuffle(measures, tags)
            results = vFold(measures, tags, V, classifier)
            matrix = genConMatrix(tags, results, K)
            norm = normConMatrix(matrix)
            expGain = calcExpGain(norm, classifier.eGain)
            #expGain = test(classifier, measures, tags, V=V)
            gains.append(expGain)
            print("Expected Gain from iteration {} is {}".format(
                i + 1, expGain))
        print("The expected gain for fitted data after {} iterations is: {}".
              format(args.iteration, gains[-1]))

    # Writes all data to files
    print("Writing to file.")
    reader.writeData(measures, tags, dimen)
    reader.writePriors(classifier.prior)
    reader.writeGain(gain)
    reader.writeCCP(classifier.cond)
    print("Done.")