Beispiel #1
0
def train(tagger: BayesClassifier,
          iter: int,
          measGen: MeasurementGenerator,
          classGen: ClassAssign,
          Z: int,
          V: int = 10,
          delta: float = .05) -> List[float]:
    # Performs 'iter' iterations of vFold testing (default 'V' is ten) with 'tagger' classifier
    # for 'Z' samples generated by 'measGen' and 'classGen.' After each vFold validation, appends
    # an expected value (attached to tagger) and then optimizes tagger by 'delta' paramenter (default .05).
    # Outputs a new optimized tagger and list of gain values from each iteration.
    expectedGain = []

    for _ in range(iter):
        # Generates measurements
        samples = measGen.genMeas(Z)
        values = classGen.assign(samples)

        # Shuffles values
        samplesSh, valuesSh = shuffle(samples, values)

        # Performs Test
        matrix = vFold(samplesSh, valuesSh, V, tagger)
        # Appends value to list
        expectedGain.append(calcExpGain(matrix, tagger.eGain))

        # Gives class probability over whole data set.
        tagger.priorUpdate(calcClassProb(valuesSh, tagger.range))

        # Updates tagger
        tagger.optimize(delta, samplesSh, valuesSh)

    return expectedGain
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("samples",
                        help="Number of measurement samples to generate.",
                        type=int)
    parser.add_argument("dimen", help="Measurement space.", type=str)
    parser.add_argument("classes", help="Number of classes.", type=int)
    parser.add_argument("seed",
                        help="Random seed for experiement duplication.",
                        type=int)
    parser.add_argument(
        "--vfolds",
        "-v",
        default=10,
        help=
        "Number of v-folds to partition testing data for v-folds testing. Default is 10.",
        type=int)
    parser.add_argument(
        "--optimization",
        "-o",
        default=0.0,
        help=
        "Specify if iterative improvement of class conditional probability values should be taken.",
        type=float)
    parser.add_argument("--iteration",
                        "-t",
                        default=10,
                        help="Number of iterations for conditional update.",
                        type=int)
    parser.add_argument(
        "--identity",
        "-i",
        action="store_true",
        default=False,
        help="Specify if economic gain matrix should be identity.")
    args = parser.parse_args()

    # Checks that our given limits are even feasible memory wise

    # Prompts for reader friendliness
    print("Generating testing data for seed {}".format(args.seed))

    # Sets seed
    seed(args.seed)

    # Assigns values
    dimen = eval(args.dimen)
    # Calculates size of domain
    M = 1
    for N in dimen:
        M *= N
    K = args.classes
    V = args.vfolds
    Z = args.samples
    print("Dimensions of Measurement Space: {}".format(dimen))
    print("Number of Samples: {}".format(Z))
    print("Classes: {}".format(K))

    # Checks that this is even possible to calculate.
    if config.computeLimit(M, K):
        print("Possible measurements exceed memory capabilities.")
        sys.exit()

    print("Generating {0}x{0} Gain Matrix. Identity Matrix: {1}".format(
        K, args.identity))

    gain = genGain(K, identity=args.identity)
    print("{}x{} Economic Gain Matrix Generated".format(
        len(gain), len(gain[0])))

    # Generates measures
    print("Generating {} Measure-Value pairs.".format(Z))
    print("Generating measures.")
    generator = MeasurementGenerator(dimen)
    measures = generator.genMeas(Z)

    assigner = ClassAssign(dimen, K)
    tags = assigner.assign(measures)
    print("{} measures and {} values generated.".format(
        len(measures), len(tags)))

    ## Generates classifier.
    print(
        "Generating class conditional probabilities for {} classes and {} possible measures."
        .format(K, M))

    conditionals = genCCP(K, dimen)
    print(
        "Class conditional probabilities generated for {} classes and {} possible measures"
        .format(len(conditionals), len(conditionals[0])))

    classifier = BayesClassifier(
        None, conditionals,
        eGain=gain)  # No priors given since vFold always assigns.

    print("Testing classifier. V-fold factor: {}".format(V))
    measures, tags = shuffle(measures, tags)
    results = vFold(measures, tags, V, classifier)
    matrix = genConMatrix(tags, results, K)
    norm = normConMatrix(matrix)
    expGain = calcExpGain(norm, classifier.eGain)
    #expGain = test(classifier, measures, tags, V=V)

    print("The expected gain for the given data is: {}".format(expGain))

    #### Here we will work on updating
    if args.optimization:
        print(
            "Fitting data for improved performance. Improvement factor {} used over {} iterations."
            .format(args.optimization, args.iteration))
        gains = []
        # Going to set priors generated from this measurement set as permanent priors.
        priors = calcClassProb(tags, K)
        classifier.priorUpdate(priors)
        for i in range(args.iteration):
            # print(priors)
            classifier.optimize(args.optimization, measures, tags)

            classifier, measures, tags = fitData(classifier, generator, Z,
                                                 args.optimization)

            measures, tags = shuffle(measures, tags)
            results = vFold(measures, tags, V, classifier)
            matrix = genConMatrix(tags, results, K)
            norm = normConMatrix(matrix)
            expGain = calcExpGain(norm, classifier.eGain)
            #expGain = test(classifier, measures, tags, V=V)
            gains.append(expGain)
            print("Expected Gain from iteration {} is {}".format(
                i + 1, expGain))
        print("The expected gain for fitted data after {} iterations is: {}".
              format(args.iteration, gains[-1]))

    # Writes all data to files
    print("Writing to file.")
    reader.writeData(measures, tags, dimen)
    reader.writePriors(classifier.prior)
    reader.writeGain(gain)
    reader.writeCCP(classifier.cond)
    print("Done.")