def main() -> None:
    logging.info("test_sum_of_residuals.py main()")

    variableNameToTypeDict: Dict[str, str] = ast.literal_eval(
        args.variableNameToTypeDict)

    dataset: List[Tuple[Dict[str, float], float]] = []
    if args.datasetFilepath is not 'None':
        dataset = create_dataset.LoadDataset(args.datasetFilepath,
                                             variableNameToTypeDict,
                                             args.returnType)
    else:
        if args.datasetPrototype.lower().endswith('2d'):
            dataset = create_dataset.CreateDataset_2D(args.datasetPrototype,
                                                      args.numberOfSamples,
                                                      args.noiseStdDev)
        else:
            dataset = create_dataset.CreateDataset(args.datasetPrototype,
                                                   args.numberOfSamples,
                                                   args.noiseStdDev)
        create_dataset.SaveDataset(
            dataset, './outputs/test_sum_of_residuals_generated_{}.csv'.format(
                args.datasetPrototype))

    # Load the population
    population: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation()
    population.LoadIndividuals('./outputs/residualChampion_')

    # Create the interpreter
    tree_filepath: str = '../../src/genprog/domains/arithmetics.xml'
    domainFunctionsTree: ET.ElementTree = ET.parse(tree_filepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        domainFunctionsTree)

    # Evaluate
    evaluations: List[float] = []
    for sample in dataset:
        inputsDict = sample[0]
        evaluationSum = 0
        for individual in population._individualsList:
            individualOutput: float = interpreter.Evaluate(
                individual, variableNameToTypeDict, inputsDict,
                args.returnType)
            evaluationSum += individualOutput
        evaluations.append(evaluationSum)

    # Write to a comparison file
    comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
    sample0 = dataset[0]
    featuresDict = sample0[0]
    featureNames = list(featuresDict.keys())
    comparisonFile.write(
        '{},target,prediction\n'.format('x'))  #,'.join(featureNames)))
    for index, xTarget in enumerate(dataset):
        featureValues = list(xTarget[0].values())
        featureValues = [str(v) for v in featureValues]  # Convert to strings
        target = xTarget[1]
        prediction = evaluations[index]
        comparisonFile.write(featureValues[0] + ',' + str(target) + ',' +
                             str(prediction) + '\n')
def main():
    logging.info("test_ensemble.py main()")
    residualsPopulation = gpevo.ArithmeticsPopulation()
    residualsPopulation.LoadIndividuals(args.ensembleMembersFilepathPrefix)

    # Create the autoencoder
    encoder = autoencoder.position.Net()
    encoder.Load(args.autoencoder)

    # Load the features preprocessor
    preprocessor = pickle.load(open(args.featuresPreprocessor, 'rb'))

    # Game authority
    authority = tictactoe.Authority()
    position = authority.InitialPosition()
    position[0, 0, 1, 1] = 1
    position[1, 0, 0, 1] = 1
    position[0, 0, 0, 2] = 1
    position[1, 0, 2, 0] = 1
    position[0, 0, 2, 2] = 1
    #position = authority.SwapPositions(position, 'X', 'O')
    authority.Display(position)

    # Encode the position
    encoding = encoder.Encode(position.unsqueeze(0))
    logging.debug("encoding = {}".format(encoding))
    # Preprocess the encoding
    preprocessedEncoding = preprocessor.transform(encoding.detach().numpy())[0]

    print ("preprocessedEncoding = {}".format(preprocessedEncoding))

    # Load the population
    population = gpevo.ArithmeticsPopulation()
    population.LoadIndividuals(args.ensembleMembersFilepathPrefix)

    # Variable names
    variableNames = list(variableNameToTypeDict)
    variableNameToValueDict = dict(zip(variableNames, preprocessedEncoding))
    logging.debug("variableNameToValueDict = {}".format(variableNameToValueDict))

    # Load the interpreter
    domainFunctionsTree: ET.ElementTree = ET.parse(args.domainPrimitivesFilepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(domainFunctionsTree)

    outputsSum = population.SumOfEvaluations([variableNameToValueDict],
    interpreter,
    variableNameToTypeDict,
    'float')
    logging.info("outputsSum = {}".format(outputsSum))
Esempio n. 3
0
def main():
    logging.info("test_ensemble.py main()")

    tree_filepath: str = '../../src/genprog/domains/arithmetics.xml'
    returnType: str = 'float'

    # Load the ensemble
    ensemble: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation()
    ensemble.LoadIndividuals(args.ensembleMembersFilepathPrefix)
    logging.debug("len(ensemble._individualsList) = {}".format(
        len(ensemble._individualsList)))

    # Generate a dataset
    logging.info("Generating the dataset '{}'...".format(
        args.datasetPrototype))
    if args.datasetPrototype.lower().endswith('2d'):
        xDictOutputValueTupleList: List[Tuple[
            Dict[str, float],
            float]] = create_dataset.CreateDataset_2D(args.datasetPrototype,
                                                      args.numberOfSamples,
                                                      args.noiseStdDev)
    else:
        xDictOutputValueTupleList = create_dataset.CreateDataset(
            args.datasetPrototype, args.numberOfSamples, args.noiseStdDev)

    # Create the interpreter
    domainFunctionsTree: ET.ElementTree = ET.parse(tree_filepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        domainFunctionsTree)

    # Comparison file
    comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
    comparisonFile.write('x,target,prediction\n')

    inputsList: List[Dict[str,
                          Any]] = [t[0] for t in xDictOutputValueTupleList
                                   ]  # element 0 of each tuple in the list

    inputPredictionList: List[Tuple[Dict[str, Any],
                                    Any]] = ensemble.AverageEvaluation(
                                        inputsList, interpreter,
                                        variableNameToTypeDict, returnType)
    for sampleNdx in range(len(xDictOutputValueTupleList)):
        input = xDictOutputValueTupleList[sampleNdx][0]
        targetOutput = xDictOutputValueTupleList[sampleNdx][1]
        prediction = inputPredictionList[sampleNdx][1]
        comparisonFile.write('{},{},{}\n'.format(input['x'], targetOutput,
                                                 prediction))
Esempio n. 4
0
def main():
    logging.info("test_backpropagation.py main()")

    # Create interpreter
    arithmeticsFunctionsFilepaths: str = './genprog/domains/arithmetics.xml'
    arithmeticsFunctionsTree: ET.ElementTree = ET.parse(
        arithmeticsFunctionsFilepaths)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        arithmeticsFunctionsTree)

    # Load the individual
    individual: gp.Individual = gp.LoadIndividual(args.treeFilepath)
    headElm: ET.Element = list(individual._tree.getroot())[0]

    # Create a population with the single individual
    population: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation(
        [individual])

    # Generate the dataset
    logging.info("Generating the dataset '{}'...".format(
        args.datasetPrototype))
    if args.datasetPrototype.lower().endswith('2d'):
        xDictOutputValueTupleList: List[Tuple[
            Dict[str, float],
            float]] = create_dataset.CreateDataset_2D(args.datasetPrototype,
                                                      args.numberOfSamples)
    else:
        xDictOutputValueTupleList = create_dataset.CreateDataset(
            args.datasetPrototype, args.numberOfSamples)
    # Split dataset
    (trainingDataset, validationDataset) = create_dataset.SplitDataset(
        xDictOutputValueTupleList, trainingProportion=args.trainingProportion)

    # Evaluate the population
    (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost, medianValidationCost,
     training_individualToCostDict) = \
        create_dataset.EvaluateIndividuals(
            population,
            trainingDataset,
            validationDataset,
            variableNameToTypeDict,
            interpreter,
            args.returnType
        )

    for epoch in range(1, args.numberOfEpochs + 1):
        logging.info("Epoch {}".format(epoch))
        individual = interpreter.EpochOfTraining(individual,
                                                 variableNameToTypeDict,
                                                 args.returnType,
                                                 trainingDataset,
                                                 args.learningRate)

        # Evaluate the population
        (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost, medianValidationCost,
         training_individualToCostDict) = \
            create_dataset.EvaluateIndividuals(
                population,
                trainingDataset,
                validationDataset,
                variableNameToTypeDict,
                interpreter,
                args.returnType
            )

        # Comparison file
        comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
        comparisonFile.write('x,target,prediction\n')
        for xTarget in validationDataset:
            x = xTarget[0]['x']
            target = xTarget[1]
            prediction = interpreter.Evaluate(individual,
                                              variableNameToTypeDict,
                                              xTarget[0], args.returnType)
            comparisonFile.write(
                str(x) + ',' + str(target) + ',' + str(prediction) + '\n')
    """elementToEvaluationDict = interpreter.EvaluateElements(
def main() -> None:
    logging.info("test_arithmetics_population.py main()")
    logging.info("Generating the dataset '{}'...".format(
        args.datasetPrototype))
    xyDictOutputValueTupleList = CreateDataset(args.datasetPrototype,
                                               args.numberOfSamples)
    (trainingDatasetList,
     validationDatasetList) = SplitDataset(xyDictOutputValueTupleList,
                                           args.trainingProportion)

    variableNameToTypeDict: Dict[str, str] = {'x': 'float'}  #, 'y': 'float'}
    tree_filepath: str = '../../src/genprog/domains/arithmetics.xml'
    returnType: str = 'float'
    domainFunctionsTree: ET.ElementTree = ET.parse(tree_filepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        domainFunctionsTree)
    population: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation()
    population.Generate(args.numberOfIndividuals, interpreter, returnType,
                        levelToFunctionProbabilityDict,
                        args.proportionOfConstants,
                        constantCreationParametersList, variableNameToTypeDict)

    logging.info("Evaluating the individuals...")
    training_individualToCostDict: Dict[
        gp.Individual,
        float] = population.EvaluateIndividualCosts(trainingDatasetList,
                                                    variableNameToTypeDict,
                                                    interpreter, returnType)

    (trainingChampion,
     championTrainingCost) = population.Champion(training_individualToCostDict)
    medianTrainingCost: float = population.MedianCost(
        training_individualToCostDict)

    # Validation cost
    validation_individualToCostDict: Dict[
        gp.Individual,
        float] = population.EvaluateIndividualCosts(validationDatasetList,
                                                    variableNameToTypeDict,
                                                    interpreter, returnType)
    (validationChampion, championValidationCost
     ) = population.Champion(validation_individualToCostDict)
    medianValidationCost = population.MedianCost(
        validation_individualToCostDict)

    logging.info(
        "championTrainingCost = {};    championValidationCost = {};    medianTrainingCost = {};    medianValidationCost = {}"
        .format(championTrainingCost, championValidationCost,
                medianTrainingCost, medianValidationCost))

    # Output monitoring file
    generationsCostFile = open('./outputs/generationsCost.csv',
                               "w",
                               buffering=1)  # Flush the buffer at each line
    generationsCostFile.write(
        "generation,championTrainingCost,championValidationCost,medianTrainingCost,medianValidationCost\n"
    )
    generationsCostFile.write('0,' + str(championTrainingCost) + ',' +
                              str(championValidationCost) + ',' +
                              str(medianTrainingCost) + ',' +
                              str(medianValidationCost) + '\n')

    lowestChampionValidationCost = sys.float_info.max

    for generationNdx in range(1, args.numberOfGenerations + 1):
        logging.info("Generation {}".format(generationNdx))
        training_individualToCostDict = population.NewGenerationWithTournament(
            trainingDatasetList, variableNameToTypeDict, interpreter,
            returnType, args.numberOfTournamentParticipants,
            args.mutationProbability, training_individualToCostDict,
            args.proportionOfConstants, levelToFunctionProbabilityDict, None,
            constantCreationParametersList, args.proportionOfNewIndividuals,
            args.weightForNumberOfElements)
        (trainingChampion, championTrainingCost
         ) = population.Champion(training_individualToCostDict)
        medianTrainingCost = population.MedianCost(
            training_individualToCostDict)

        # Validation cost
        validation_individualToCostDict = population.EvaluateIndividualCosts(
            validationDatasetList, variableNameToTypeDict, interpreter,
            returnType)
        (validationChampion, championValidationCost
         ) = population.Champion(validation_individualToCostDict)
        medianValidationCost = population.MedianCost(
            validation_individualToCostDict)

        logging.info(
            "championTrainingCost = {};    championValidationCost = {};    medianTrainingCost = {};    medianValidationCost = {}"
            .format(championTrainingCost, championValidationCost,
                    medianTrainingCost, medianValidationCost))
        generationsCostFile.write(
            str(generationNdx) + ',' + str(championTrainingCost) + ',' +
            str(championValidationCost) + ',' + str(medianTrainingCost) + ',' +
            str(medianValidationCost) + '\n')
        if championValidationCost < lowestChampionValidationCost:
            validationChampion.Save('./outputs/champion_' +
                                    str(generationNdx) + '.xml')
            lowestChampionValidationCost = championValidationCost

        # Comparison file
        comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
        comparisonFile.write('x,target,prediction\n')
        xTargetPredictionTuplesList: List[Tuple[float, float, float]] = []
        for xTarget in validationDatasetList:
            x = xTarget[0]['x']
            target = xTarget[1]
            prediction = interpreter.Evaluate(validationChampion,
                                              variableNameToTypeDict,
                                              xTarget[0], returnType)
            comparisonFile.write(
                str(x) + ',' + str(target) + ',' + str(prediction) + '\n')
def main() -> None:
    logging.info("test_evolution_residuals.py main()")

    returnType: str = 'float'

    # Generate the dataset
    logging.info("Generating the dataset '{}'...".format(
        args.datasetPrototype))
    if args.datasetFilepath is not 'None':
        xDictOutputValueTupleList = create_dataset.LoadDataset(
            args.datasetFilepath, variableNameToTypeDict, returnType)
    else:
        if args.datasetPrototype.lower().endswith('2d'):
            xDictOutputValueTupleList: List[Tuple[Dict[
                str, float], float]] = create_dataset.CreateDataset_2D(
                    args.datasetPrototype, args.numberOfSamples,
                    args.noiseStdDev)
        else:
            xDictOutputValueTupleList = create_dataset.CreateDataset(
                args.datasetPrototype, args.numberOfSamples, args.noiseStdDev)
        create_dataset.SaveDataset(
            xDictOutputValueTupleList,
            './outputs/test_evolution_residuals_generated_{}.csv'.format(
                args.datasetPrototype))
    # Split dataset
    (trainingDataset, validationDataset) = create_dataset.SplitDataset(
        xDictOutputValueTupleList, trainingProportion=args.trainingProportion)

    # Create the interpreter
    domainFunctionsTree: ET.ElementTree = ET.parse(
        args.domainPrimitivesFilepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        domainFunctionsTree)

    # Generate the population
    logging.info("Generating the population...")
    if args.originalPopulationFilepathPrefix == 'None':
        population: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation()
        population.Generate(args.numberOfIndividuals, interpreter, returnType,
                            levelToFunctionProbabilityDict,
                            args.proportionOfConstants,
                            constantCreationParametersList,
                            variableNameToTypeDict)
    else:
        population = gpevo.ArithmeticsPopulation()
        population.LoadIndividuals(args.originalPopulationFilepathPrefix)

    (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost, medianValidationCost, training_individualToCostDict) = \
        evaluate_individuals(
            population,
            trainingDataset,
            validationDataset,
            variableNameToTypeDict,
            interpreter,
            returnType
        )

    # Optimize constants with backpropagation
    logging.info("Optimizing constants with backpropagation...")
    for epoch in range(args.numberOfEpochsPerGeneration):
        logging.debug("Epoch {}".format(epoch + 1))
        for individualNdx in range(len(population._individualsList)):
            population._individualsList[
                individualNdx] = interpreter.EpochOfTraining(
                    population._individualsList[individualNdx],
                    variableNameToTypeDict, returnType, trainingDataset,
                    args.learningRate)


    (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost, medianValidationCost, training_individualToCostDict) = \
        evaluate_individuals(
            population,
            trainingDataset,
            validationDataset,
            variableNameToTypeDict,
            interpreter,
            returnType
        )

    # Output monitoring file
    generationsCostFile = open('./outputs/generationsCost.csv',
                               "w",
                               buffering=1)  # Flush the buffer at each line
    generationsCostFile.write(
        "generation,championTrainingCost,championValidationCost,medianTrainingCost,medianValidationCost\n"
    )
    generationsCostFile.write('0,' + str(championTrainingCost) + ',' +
                              str(championValidationCost) + ',' +
                              str(medianTrainingCost) + ',' +
                              str(medianValidationCost) + '\n')

    lowestChampionValidationCost = sys.float_info.max
    residualChampion = None

    for generationNdx in range(1, args.numberOfGenerations + 1):
        logging.info("Generation {}".format(generationNdx))

        if generationNdx % args.numberOfGenerationsPerResidual == 0:
            logging.info("Generating a new population")
            #residualChampion = validationChampion
            residualChampion.Save(
                './outputs/residualChampion_{}.xml'.format(generationNdx))
            trainingDataset = compute_residual(trainingDataset,
                                               residualChampion,
                                               variableNameToTypeDict,
                                               returnType, interpreter)
            validationDataset = compute_residual(validationDataset,
                                                 residualChampion,
                                                 variableNameToTypeDict,
                                                 returnType, interpreter)
            # Generate the population
            logging.info("Generating the population...")
            population.Generate(args.numberOfIndividuals, interpreter,
                                returnType, levelToFunctionProbabilityDict,
                                args.proportionOfConstants,
                                constantCreationParametersList,
                                variableNameToTypeDict)
            lowestChampionValidationCost = sys.float_info.max
            residualChampion = None

        else:
            training_individualToCostDict = population.NewGenerationWithTournament(
                trainingDataset, variableNameToTypeDict, interpreter,
                returnType, args.numberOfTournamentParticipants,
                args.mutationProbability, training_individualToCostDict,
                args.proportionOfConstants, levelToFunctionProbabilityDict,
                None, constantCreationParametersList,
                args.proportionOfNewIndividuals,
                args.weightForNumberOfElements)

        # Optimize constants with backpropagation
        for epoch in range(args.numberOfEpochsPerGeneration):
            for individualNdx in range(len(population._individualsList)):
                population._individualsList[
                    individualNdx] = interpreter.EpochOfTraining(
                        population._individualsList[individualNdx],
                        variableNameToTypeDict, returnType, trainingDataset,
                        args.learningRate)

        # Population evaluation
        (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost, medianValidationCost,
         training_individualToCostDict) = \
            evaluate_individuals(
                population,
                trainingDataset,
                validationDataset,
                variableNameToTypeDict,
                interpreter,
                returnType
            )

        # Output monitoring file
        generationsCostFile.write(
            str(generationNdx) + ',' + str(championTrainingCost) + ',' +
            str(championValidationCost) + ',' + str(medianTrainingCost) + ',' +
            str(medianValidationCost) + '\n')

        # Is it a new champion?
        if championValidationCost < lowestChampionValidationCost:
            validationChampion.Save('./outputs/champion_' +
                                    str(generationNdx) + '.xml')
            lowestChampionValidationCost = championValidationCost
            residualChampion = copy.deepcopy(validationChampion)

        # Comparison file
        if args.writeToComparisonFile:
            comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
            comparisonFile.write('x,target,prediction\n')
            for xTarget in validationDataset:
                x = xTarget[0]['x']
                target = xTarget[1]
                prediction = interpreter.Evaluate(validationChampion,
                                                  variableNameToTypeDict,
                                                  xTarget[0], returnType)
                comparisonFile.write(
                    str(x) + ',' + str(target) + ',' + str(prediction) + '\n')
def main() -> None:
    logging.info("evolve_ensemble.py main()")

    tree_filepath: str = '../../src/genprog/domains/arithmetics.xml'
    returnType: str = 'float'

    # Generate the dataset
    logging.info("Generating the dataset '{}'...".format(args.datasetPrototype))
    if args.datasetPrototype.lower().endswith('2d'):
        xDictOutputValueTupleList: List[Tuple[Dict[str, float], float]] = create_dataset.CreateDataset_2D(
            args.datasetPrototype, args.numberOfSamples, args.noiseStdDev
        )
    else:
        xDictOutputValueTupleList = create_dataset.CreateDataset(
            args.datasetPrototype, args.numberOfSamples, args.noiseStdDev
        )
    # Split dataset
    (trainingDataset, validationDataset) = create_dataset.SplitDataset(
        xDictOutputValueTupleList, trainingProportion=args.trainingProportion
    )

    # Create the interpreter
    domainFunctionsTree: ET.ElementTree = ET.parse(tree_filepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(domainFunctionsTree)

    # Output monitoring file
    generationsCostFile = open('./outputs/generationsCost.csv', "w", buffering=1)  # Flush the buffer at each line
    generationsCostFile.write(
        "generation,championTrainingCost,championValidationCost,medianTrainingCost,medianValidationCost\n")

    for memberNdx in range(args.numberOfEnsembleMembers):
        logging.info("Evolving member {}".format(memberNdx))
        # Choose the training samples
        memberTrainingDataset = random.choices(trainingDataset, k=args.numberOfTrainingSamplesPerMember)

        # Generate the population
        logging.info("Generating the population...")
        population: gpevo.ArithmeticsPopulation = gpevo.ArithmeticsPopulation()
        population.Generate(
            args.numberOfIndividuals,
            interpreter,
            returnType,
            levelToFunctionProbabilityDict,
            args.proportionOfConstants,
            constantCreationParametersList,
            variableNameToTypeDict
        )

        # Optimize constants with backpropagation
        logging.info("Optimizing constants with backpropagation...")
        for epoch in range(args.numberOfEpochsPerGeneration):
            for individualNdx in range(len(population._individualsList)):
                population._individualsList[individualNdx] = interpreter.EpochOfTraining(
                    population._individualsList[individualNdx],
                    variableNameToTypeDict,
                    returnType,
                    memberTrainingDataset,
                    args.learningRate
                )

        lowestChampionValidationCost = sys.float_info.max
        training_individualToCostDict = None
        memberChampion = None

        for generationNdx in range(1, args.numberOfGenerations + 1):
            logging.info("Generation {}".format(generationNdx))
            training_individualToCostDict = population.NewGenerationWithTournament(
                memberTrainingDataset,
                variableNameToTypeDict,
                interpreter,
                returnType,
                args.numberOfTournamentParticipants,
                args.mutationProbability,
                training_individualToCostDict,
                args.proportionOfConstants,
                levelToFunctionProbabilityDict,
                None,
                constantCreationParametersList,
                args.proportionOfNewIndividuals,
                args.weightForNumberOfElements
            )

            # Optimize constants with backpropagation
            for epoch in range(args.numberOfEpochsPerGeneration):
                for individualNdx in range(len(population._individualsList)):
                    population._individualsList[individualNdx] = interpreter.EpochOfTraining(
                        population._individualsList[individualNdx],
                        variableNameToTypeDict,
                        returnType,
                        memberTrainingDataset,
                        args.learningRate
                    )
            # Population evaluation
            (validationChampion, championTrainingCost, championValidationCost, medianTrainingCost,
             medianValidationCost,
             training_individualToCostDict) = \
                evaluate_individuals(
                    population,
                    memberTrainingDataset,
                    validationDataset,
                    variableNameToTypeDict,
                    interpreter,
                    returnType
                )

            # Output monitoring file
            generationsCostFile.write(str(memberNdx * args.numberOfGenerations + generationNdx) + ',' + str(championTrainingCost) + ',' + str(
                championValidationCost) + ',' + str(
                medianTrainingCost) + ',' + str(medianValidationCost) + '\n')

            # Is it a new champion?
            if championValidationCost < lowestChampionValidationCost:
                #validationChampion.Save('./outputs/champion_' + str(generationNdx) + '.xml')
                memberChampion = validationChampion
                lowestChampionValidationCost = championValidationCost

            # Comparison file
            comparisonFile = open('./outputs/comparison.csv', 'w', buffering=1)
            comparisonFile.write('x,target,prediction\n')
            for xTarget in validationDataset:
                x = xTarget[0]['x']
                target = xTarget[1]
                prediction = interpreter.Evaluate(validationChampion, variableNameToTypeDict, xTarget[0],
                                                  returnType)
                comparisonFile.write(str(x) + ',' + str(target) + ',' + str(prediction) + '\n')
        if memberChampion is not None:
            memberChampion.Save('./outputs/member_{}.xml'.format(memberNdx))
def main():
    logging.info("regressorCreatesSamples.py main()")

    authority = tictactoe.Authority()
    #positionTsrShape = authority.PositionTensorShape()
    playersList = authority.PlayersList()

    # Load the interpreter
    domainFunctionsTree: ET.ElementTree = ET.parse(
        args.domainPrimitivesFilepath)
    interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(
        domainFunctionsTree)

    # Load the ensemble
    if args.epsilon < 1.0:
        population = RegressorPopulation(
            interpreter, variableNameToTypeDict,
            'float')  #gpevo.ArithmeticsPopulation()
        population.LoadIndividuals(args.populationMembersFilepathPrefix)

    else:
        population = None

    # Create the autoencoder
    encoder = autoencoder.position.Net()
    encoder.Load(args.autoencoderFilepath)
    numberOfLatentVariables = encoder.numberOfLatentVariables
    header = ''
    for latentNdx in range(numberOfLatentVariables):
        header += 'p' + str(latentNdx) + ','

    # Load the features preprocessor
    preprocessor = pickle.load(open(args.featuresPreprocessor, 'rb'))

    # Create the output file
    outputFile = open(args.outputFilepath, "w",
                      buffering=1)  # Flush the buffer at each line
    outputFile.write(header + "player0WinRate,drawRate,player1WinRate\n")

    for positionNdx in range(1, args.numberOfPositions + 1):
        logging.info("Generating position {}...".format(positionNdx))
        startingPosition = winRatesRegression.SimulateRandomGames(
            authority,
            encoder=encoder,
            minimumNumberOfMovesForInitialPositions=0,
            maximumNumberOfMovesForInitialPositions=7,
            numberOfPositions=1,
            swapIfOddNumberOfMoves=False)[0]
        authority.Display(startingPosition)
        numberOfWinsForPlayer0 = 0
        numberOfWinsForPlayer1 = 0
        numberOfDraws = 0
        for simulationNdx in range(args.numberOfSimulationsPerPosition):
            (positionsList, winner) = winRatesRegression.SimulateAGame(
                population,
                encoder,
                authority,
                startingPosition=startingPosition,
                nextPlayer=playersList[1],
                playerToEpsilonDict={
                    playersList[0]: args.epsilon,
                    playersList[1]: args.epsilon
                },
                encodingPreprocessor=preprocessor)
            if winner == playersList[0]:
                numberOfWinsForPlayer0 += 1
            elif winner == playersList[1]:
                numberOfWinsForPlayer1 += 1
            elif winner == 'draw':
                numberOfDraws += 1
            else:
                raise ValueError("Unknown winner '{}'".format(winner))
            # print ("positionsList = \n{}\nwinner = {}".format(positionsList, winner))
        player0WinRate = numberOfWinsForPlayer0 / args.numberOfSimulationsPerPosition
        player1WinRate = numberOfWinsForPlayer1 / args.numberOfSimulationsPerPosition
        drawRate = numberOfDraws / args.numberOfSimulationsPerPosition
        logging.info(
            "winRateForPlayer0 = {}; drawRate = {}; winRateForPlayer1 = {}".
            format(player0WinRate, drawRate, player1WinRate))

        #positionList = startingPosition.flatten().tolist()
        positionEncoding = encoder.Encode(
            startingPosition.unsqueeze(0)).flatten().tolist()
        print("positionEncoding = {}".format(positionEncoding))
        for encodingNdx in range(len(positionEncoding)):
            outputFile.write("{},".format(positionEncoding[encodingNdx]))
        outputFile.write("{},{},{}\n".format(player0WinRate, drawRate,
                                             player1WinRate))