def exp_run_industrial_datasets(iteration,
                                datasets=['paintcontrol', 'iofrol', 'gsdtsr']):
    ags = [
        lambda:
        (agents.TableauAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                             learning_rate=retecs.DEFAULT_LEARNING_RATE,
                             state_size=retecs.DEFAULT_STATE_SIZE,
                             action_size=retecs.DEFAULT_NO_ACTIONS,
                             epsilon=retecs.DEFAULT_EPSILON), retecs.
         preprocess_discrete, reward.timerank), lambda:
        (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                             state_size=retecs.DEFAULT_STATE_SIZE,
                             action_size=1,
                             hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES),
         retecs.preprocess_continuous, reward.tcfail)
    ]

    reward_funs = {
        'failcount': reward.failcount,
        'timerank': reward.timerank,
        'tcfail': reward.tcfail
    }

    avg_napfd = []

    for i, get_agent in enumerate(ags):
        for sc in datasets:
            for (reward_name, reward_fun) in list(reward_funs.items()):
                agent, preprocessor, _ = get_agent()
                file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc,
                                                    reward_name, iteration)

                scenario = get_scenario(sc)

                rl_learning = retecs.PrioLearning(
                    agent=agent,
                    scenario_provider=scenario,
                    reward_function=reward_fun,
                    preprocess_function=preprocessor,
                    file_prefix=file_appendix,
                    dump_interval=100,
                    validation_interval=0,
                    output_dir=DATA_DIR)
                res = rl_learning.train(no_scenarios=CI_CYCLES,
                                        print_log=True,
                                        plot_graphs=False,
                                        save_graphs=False,
                                        collect_comparison=(i == 0))
                avg_napfd.append(res)

    return avg_napfd
Beispiel #2
0
        agent = agents.TableauAgent(learning_rate=args.learning_rate,
                                    state_size=state_size,
                                    action_size=args.actions,
                                    epsilon=args.epsilon,
                                    histlen=args.histlen)

    ## If the action size is 1 , we use MLPClassifier
    ## for action size as 2, we use MLPRegressor
    elif args.agent == 'network':
        if args.reward in ('binary'):
            action_size = 1
        else:
            action_size = 2

        agent = agents.NetworkAgent(state_size=state_size,
                                    action_size=action_size,
                                    hidden_size=args.hiddennet,
                                    histlen=args.histlen)
    elif args.agent == 'heur_random':
        agent = agents.RandomAgent(histlen=args.histlen)
    elif args.agent == 'heur_sort':
        agent = agents.HeuristicSortAgent(histlen=args.histlen)
    elif args.agent == 'heur_weight':
        agent = agents.HeuristicWeightAgent(histlen=args.histlen)
    else:
        print('Unknown Agent')
        sys.exit()

    if args.scenario_provider == 'random':
        scenario_provider = scenarios.RandomScenarioProvider()
    elif args.scenario_provider == 'incremental':
        scenario_provider = scenarios.IncrementalScenarioProvider(