def __init__(self, mod, value, seed, dist, directory, full): Optimizer.__init__(self, value) self.rand = random.Random(seed) self.model = mod self.dist = dist self.directory = directory self.full = full
def __init__(self, argv, reactor, objective): """ Constructor. """ # Initialize the Optimizer object. Optimizer.__init__(self, argv, reactor, objective) # Initialize the PGA object. PGA.__init__(self, argv, PGA.DATATYPE_INTEGER, self.reactor.number_bundles(), PGA.MAXIMIZE) # Set default operators. self.SetCrossover(self.htbx) # Crossover self.SetEndOfGen(self.end_of_iteration) # End of generation info self.SetInitString(self.init) # String initialization self.SetMutation(self.swap) # Mutation via a single swap # Set default values. self.maximum_generations = 100 # Maximum generations self.population_size = 50 # Population size self.number_replaced = 40 # Number replaced each generation self.seed = 123 # PGA random number seed self.np_seed = 123 # NumPy random number seed self.binary_sweep = False # Perform one sweep of binary exchanges # Optimizer-specific flags. self.track_best = False self.fixed_central = True # Counter for evaluations on each process. self.evals = 0
def main(args, profile=False): db = DataBase(args.games) results = db.load_game_results(mingames=50) print('{} games loaded.'.format(len(results))) players = db.load_players() optimizer = Optimizer(disp=True) optimizer.load_games(results) maxiter = 30 if profile else 0 ratings, f, v = optimizer.run(method='l-bfgs-b', maxiter=maxiter) if profile: return print() by_rating = [] for iplayer, rating in ratings.items(): print(players[iplayer]) mr = 0 for date, r in sorted(rating.items()): date = datetime.date.fromtimestamp(date * 24 * 3600) print(date.isoformat(), r) if r > mr: mr = r by_rating.append((mr, players[iplayer])) print() print(f) print(f.calc(0.2) - f.calc(-0.2)) best = list(sorted(by_rating))[-20:] for r, p in reversed(best): print('{:24} {}'.format(p, r))
def cnn_2d_mnist(): d = image.mnist() d.shuffle() def layer_gen(): l1 = ConvLayer2d(layer_id=0, image_size=d.data_shape, activation=calcutil.relu, c_in=1, c_out=16, k=(2, 2), s=(1, 1), is_dropout=True) l2 = MaxPoolLayer2d(layer_id=1, image_size=l1.output_size, activation=calcutil.identity, c_in=16, k=(2, 2)) l3 = ConvLayer2d(layer_id=2, image_size=l2.output_size, activation=calcutil.relu, c_in=16, c_out=32, k=(2, 2), s=(1, 1), is_dropout=True) l4 = MaxPoolLayer2d(layer_id=3, image_size=l3.output_size, activation=calcutil.identity, c_in=32, k=(2, 2)) l5 = HiddenLayer(layer_id=4, n_in=l4.n_out, n_out=800, activation=calcutil.relu) l6 = HiddenLayer(layer_id=5, n_in=l5.n_out, n_out=100, activation=calcutil.relu) l7 = SoftMaxLayer(layer_id=6, n_in=l6.n_out, n_out=len(d.classes())) layers = [l1, l2, l3, l4, l5, l6, l7] return layers m = Model(input_dtype='float32', layers_gen_func=layer_gen) optimizer = Optimizer(d, m) optimizer.optimize(100, 1000)
def test_emit_operation(self): opt = Optimizer() opt.add_operation(Types.VOID, Operations.FINISH, []) ops = opt.build_operations() assert len(ops) == 1 assert ops[0].op == Operations.FINISH assert ops[0].getarglist() == []
def end_of_iteration(self): """ Do something at the end of each generation. In general, this a very customizable routine. This is where hill-climbing heuristics can be placed. Additionally, tracking of objectives as a function of generation is easily done. For this default implementation, the best keff and maxpeak are kept for each generation. """ best = self.GetBestIndex(PGA.OLDPOP) bestpattern = self.GetIntegerChromosome(best, PGA.OLDPOP) it = self.GetGAIterValue() # Note, we need to reshuffle and reevaluate for the best pattern. # This is *probably* more efficient than keeping k and p for # all evaluations and then trying to find the values corresponding # to the bestpattern. self.reactor.shuffle(bestpattern) self.k, self.p = self.reactor.evaluate() # Tell mother what we've done. Optimizer.fire_signal(self) #print "iter = ",iter #print " it = ", it, " best = ", self.GetEvaluation(best, PGA.NEWPOP), " k p = ",k,p,bestpattern #print " *** ", self.GetEvaluation(best, PGA.OLDPOP) if self.track_best : self.best_eval[it-1] = self.GetEvaluation(best, PGA.OLDPOP) self.best_k[it-1] = self.k self.best_p[it-1] = self.p self.best_pattern[it-1,:] = bestpattern del bestpattern
def test_simple_new(self, cpu): opt = Optimizer([Virtualize]) struct_descr = cpu.new_struct() opt.add_operation(Types.REF, Operations.NEW, [], descr=struct_descr) ops = opt.build_operations() assert len(ops) == 0
def test_symmetric_wins(self): o = Optimizer(rand_seed=239) o.load_games([(1, 2, 1, 1), (1, 2, 1, 0), (2, 1, 1, 1), (2, 1, 1, 0)]) rating, f, v = o.run(method='Newton-CG') self.assertAlmostEqual( f.calc(convert_rating_diff(rating[1][1] - rating[2][1])), 0.5, 2) self.assertLess(abs(rating[1][1] - rating[2][1]), 5)
def test_draw(self): o = Optimizer(rand_seed=239) o.load_games([(1, 2, 1, 2), (2, 1, 1, 2)]) rating, f, v = o.run(method='cg') self.assertAlmostEqual(f.calc(0), 0.5, 3) self.assertAlmostEqual( f.calc(convert_rating_diff(rating[1][1] - rating[2][1])), 0.5, 2) self.assertLess(abs(rating[1][1] - rating[2][1]), 5)
def test_objective_time_reg(self): o = Optimizer(rand_seed=239) o.load_games([(1, 2, 1, 1), (1, 2, 2, 0)]) v = o.create_vars({1: {1: 2200, 2: 1800}, 2: {1: 1800, 2: 2200}}, (0, -1.01)) (total, likelihood, regularization, _, _, _) = o.objective( v, verbose=True) self.assertLess(likelihood, 0) self.assertTrue(1E-6 < regularization < 1)
def test_inputs(self): opt = Optimizer() res = opt.add_input(Types.INT) opt.add_operation(Types.VOID, Operations.FINISH, [res]) ops = opt.build_operations() assert len(ops) == 1 assert ops[0].op == Operations.FINISH assert ops[0].getarglist() == [res]
def test_read_unsetfield(self, cpu): opt = Optimizer([Virtualize]) struct_descr = cpu.new_struct() field_descr = cpu.new_field(struct_descr) p0 = opt.add_operation(Types.REF, Operations.NEW, [], descr=struct_descr) i0 = opt.add_operation(Types.INT, Operations.GETFIELD, [p0], descr=field_descr) ops = opt.build_operations() assert len(ops) == 0 assert opt.getvalue(i0).getint() == 0
def test_output_format(self): o = Optimizer() o.load_games(GAMES1) ratings, f = o.random_solution() self.assertEqual(len(ratings), 3) self.assertEqual(list(sorted(ratings.keys())), [1, 2, 3]) for player_rating in ratings.values(): for r in player_rating.values(): self.assertTrue(1000 < r < 3000) self.assertEqual(list(sorted(ratings[1].keys())), [1, 2]) self.assertEqual(list(sorted(ratings[2].keys())), [1, 3, 4]) self.assertEqual(list(sorted(ratings[3].keys())), [1, 2, 3, 4])
def test_lt(self): opt = Optimizer([IntBounds, GuardPropagation]) i0 = opt.add_input(Types.INT) i1 = opt.add_operation(Types.INT, Operations.INT_LT, [i0, opt.new_constant_int(10)], ) opt.add_operation(Types.VOID, Operations.GUARD_TRUE, [i1]) opt.add_operation(Types.INT, Operations.INT_LT, [i0, opt.new_constant_int(15)]) ops = opt.build_operations() assert len(ops) == 2
def test_objective_single_game(self): o = Optimizer() o.load_games([(1, 2, 1, 1)]) v1 = o.create_vars({1: {1: 2200}, 2: {1: 1800}}, [0, -1.01]) v2 = o.create_vars({1: {1: 2200}, 2: {1: 2200}}, [0, -1.01]) v3 = o.create_vars({1: {1: 1800}, 2: {1: 2200}}, [0, -1.01]) (total1, likelihood1, regularization1, smoothness1, func_hard_reg, _) = o.objective(v1, verbose=True) self.assertLess(likelihood1, 0) self.assertTrue(1E-6 < regularization1 < 1) self.assertEqual(smoothness1, 0) self.assertTrue(func_hard_reg < 1) (total2, likelihood2, regularization2, _, _, _) = o.objective( v2, verbose=True) self.assertLess(likelihood2, likelihood1) (total3, likelihood3, regularization3, _, _, _) = o.objective( v3, verbose=True) self.assertAlmostEqual(regularization1, regularization3) self.assertLess(total1 / total2, 0.9) self.assertLess(total2 / total3, 0.9)
def compilePB(pyBonCode: str, verbosity=0) -> str: """Compile Python Bonsai code and return Bonsai code. This function combines the various stages of the compiler and optionally outputs the interim stages. The stages called are (in order): - Lexer - Syntactic Analyzer (Parser) - Semantic Analyzer - Intermediate Code Generator - Optimizer - Intermediate Code Compiler Parameters: @param pyBonCode: the Python Bonsai code to be compiled as raw source @param verbosity: the verbosity level defines which interim stages to print @type pyBonCode: str @type verbosity: int """ if verbosity is None: verbosity = 0 tokens = Lexer(pyBonCode).tokens() if verbosity > 1: print("Tokens:", file=stderr) for token in tokens: print(token, file=stderr) print("", file=stderr) ast = SyntacticAnalysis(tokens) SemanticAnalysis(ast) ic = IntermediateCode() ic.fromSyntaxTree(ast) if verbosity > 2: print("Original instructions:", file=stderr) _print_instructions(ic) print("\nSymbols:", file=stderr) print(ic.symbolTable, file=stderr) print("\nRegisters:", file=stderr) print(ic.registers, file=stderr) oc = Optimizer(ic) bonCode = oc.compile() if verbosity > 0: print("\nOptimized instructions:", file=stderr) _print_instructions(oc) print("\nSymbols:", file=stderr) print(oc.symbolTable, file=stderr) print("\nRegisters:", file=stderr) print(oc.registers, file=stderr) print("\nCompiled Bonsai code:", file=stderr) print(bonCode, file=stderr) return bonCode
def test_guard_false(self): opt = Optimizer([ConstantFold, GuardPropagation]) i0 = opt.add_input(Types.INT) opt.add_operation(Types.VOID, Operations.GUARD_FALSE, [i0]) opt.add_operation(Types.INT, Operations.INT_EQ, [i0, opt.new_constant_int(1)]) ops = opt.build_operations() assert len(ops) == 1 assert opt.getvalue(i0).getint() == 0
def test_gradient_games1(self): o = Optimizer(rand_seed=239, time_delta=0.01, func_hard_reg=0, func_soft_reg=0) o.load_games(GAMES1) v = o.init() grad = o.gradient(v) for i in range(len(v)): def ocomp(x): save_x = v[i] v[i] = x res = o.objective(v) v[i] = save_x return res self.assertAlmostEqual(derivative(ocomp, v[i]), grad[i])
def generate(generations, population, nn_param_choices, dataset): """Generate a network with the genetic algorithm. Args: generations (int): Number of times to evole the population population (int): Number of networks in each generation nn_param_choices (dict): Parameter choices for networks dataset (str): Dataset to use for training/evaluating """ optimizer = Optimizer(nn_param_choices) networks = optimizer.create_population(population) # Evolve the generation. for i in range(generations): logging.info("") logging.info("") logging.info("***Doing generation %d of %d***" % (i + 1, generations)) print("\n\n\n**************************************") print("***Generation %d/%d" % (i + 1, generations)) print("**************************************\n\n") # Train and get accuracy for networks. train_networks(networks, dataset) # Get the average accuracy for this generation. average_accuracy, average_loss = get_averages(networks) # Print out the average accuracy each generation. logging.info("Generation average: %.2f%% (%.4f)" % (average_accuracy * 100, average_loss )) logging.info('-'*80) # Evolve, except on the last iteration. if i != generations - 1: # Do the evolution. networks = optimizer.evolve(networks) copy_accuracies(networks) # Sort our final population. networks = sorted(networks, key=lambda x: x.accuracy, reverse=True) # Print out the top 5 networks. print_networks(networks[:5])
def cnn_3d_psb(): # PSB ボクセルデータ(Train/Test双方に存在するクラスのデータのみ) data = PSBVoxel.create(is_co_class=True, is_cached=True, from_cached=True, align_data=True) # ボクセルデータを回転してデータ数増加 data.augment_rotate(start=(-5, 0, 0), end=(5, 0, 0), step=(1, 1, 1), center=(50, 50, 50), is_cached=True, from_cached=True, is_co_class=True) # data.augment_translate(start=(0, 0, -5), end=(0, 0, 5), step=(1, 1, 1), # is_cached=True, from_cached=True, is_co_class=True) # データの順番をランダムに入れ替え data.shuffle() # データセットの次元ごとの要素数確認 print data # 学習モデル生成関数 def layer_gen(): l1 = ConvLayer3d(layer_id=0, shape_size=data.data_shape, activation=calcutil.relu, c_in=1, c_out=16, k=5, s=3, is_dropout=True) l2 = MaxPoolLayer3d(layer_id=1, shape_size=l1.output_size, activation=calcutil.identity, c_in=16, k=4) l3 = HiddenLayer(layer_id=2, n_in=l2.n_out, n_out=512, activation=calcutil.relu, is_dropout=True) l4 = HiddenLayer(layer_id=3, n_in=l3.n_out, n_out=256, activation=calcutil.relu, is_dropout=True) l5 = SoftMaxLayer(layer_id=4, n_in=l4.n_out, n_out=len(data.classes())) layers = [l1, l2, l3, l4, l5] return layers # 学習モデル model = Model(input_dtype='float32', layers_gen_func=layer_gen) print model # 学習モデルの学習パラメタを最適化するオブジェクト optimizer = Optimizer(data, model) # バッチ一回分の学習時に呼ばれる関数 def on_optimized(): optimizer.result.save() optimizer.params_result.save() # 最適化開始 optimizer.optimize(n_iter=100, n_batch=len(data.x_train) / 10, is_total_test_enabled=False, on_optimized=on_optimized)
def test_time_regularization(self): o = Optimizer(rand_seed=239) o.load_games([(1, 2, 1, 1), (2, 1, 1, 0), (1, 2, 2, 0), (2, 1, 2, 1)]) rating, f, v = o.run() (total1, _, reg, smoothness1, func_hard_reg, func_soft_reg) = o.objective(v, verbose=True) self.assertLess(func_hard_reg, 1) self.assertGreater(smoothness1, 0.001) self.assertGreater(rating[1][1], rating[1][2]) self.assertLess(rating[2][1], rating[2][2]) self.assertGreater(rating[1][1], rating[2][1]) self.assertLess(rating[1][2], rating[2][2]) prob1 = f.calc(convert_rating_diff(rating[1][1] - rating[2][1])) self.assertGreater(prob1, 0.51) prob2 = f.calc(convert_rating_diff(rating[1][2] - rating[2][2])) self.assertLess(prob2, 0.49)
def cnn_3d_shrec_usdf(n_fold): # PSB ボクセルデータ(Train/Test双方に存在するクラスのデータのみ) data = SHRECVoxelUSDF.create_shrec_voxel_usdf(n_fold=n_fold) from CubicCNN.src.util.plotutil import plot_voxel for x, y in zip(data.x_test[:, 0], data.y_test): print y plot_voxel(x == 10) # データの順番をランダムに入れ替え data.shuffle() # データセットの次元ごとの要素数確認 print data # 学習モデル生成関数 def layer_gen(): l1 = ConvLayer3d(layer_id=0, shape_size=data.data_shape, activation=calcutil.relu, c_in=1, c_out=16, k=5, s=3, is_dropout=True) l2 = MaxPoolLayer3d(layer_id=1, shape_size=l1.output_size, activation=calcutil.identity, c_in=16, k=4) l3 = HiddenLayer(layer_id=2, n_in=l2.n_out, n_out=512, activation=calcutil.relu, is_dropout=True) l4 = HiddenLayer(layer_id=3, n_in=l3.n_out, n_out=256, activation=calcutil.relu, is_dropout=True) l5 = SoftMaxLayer(layer_id=4, n_in=l4.n_out, n_out=len(data.classes())) layers = [l1, l2, l3, l4, l5] return layers # 学習モデル model = Model(input_dtype='float32', layers_gen_func=layer_gen) print model # 学習モデルの学習パラメタを最適化するオブジェクト optimizer = Optimizer(data, model) # バッチ一回分の学習時に呼ばれる関数 def on_optimized(): optimizer.result.save() optimizer.params_result.save() # 最適化開始 optimizer.optimize(n_iter=100, n_batch=len(data.x_train) / 10, is_total_test_enabled=False, on_optimized=on_optimized)
def test_setfield_escapes(self, cpu): opt = Optimizer([Virtualize]) i0 = opt.add_input(Types.INT) struct_descr = cpu.new_struct() field_descr = cpu.new_field(struct_descr) p0 = opt.add_operation(Types.REF, Operations.NEW, [], descr=struct_descr) opt.add_operation(Types.VOID, Operations.SETFIELD, [p0, i0], descr=field_descr) opt.add_operation(Types.VOID, Operations.FINISH, [p0]) ops = opt.build_operations() assert len(ops) == 3
def test_objective_symmetric_wins(self): o = Optimizer(rating_reg=0) o.load_games([(1, 2, 1, 1), (2, 1, 1, 1)]) v1 = o.create_vars({1: {1: 2200}, 2: {1: 1800}}, [0, -1.01]) v2 = o.create_vars({1: {1: 2000}, 2: {1: 2000}}, [0, -1.01]) v3 = o.create_vars({1: {1: 1800}, 2: {1: 2200}}, [0, -1.01]) self.assertLess(o.objective(v2) / o.objective(v1), 0.9) self.assertLess(o.objective(v2) / o.objective(v3), 0.9)
def test_objective_draw(self): o = Optimizer(rating_reg=0) o.load_games([(1, 2, 1, 2), (2, 1, 1, 2)]) v1 = o.create_vars({1: {1: 2200}, 2: {1: 1800}}, [0, -1.01]) v2 = o.create_vars({1: {1: 2000}, 2: {1: 2000}}, [0, -1.01]) v3 = o.create_vars({1: {1: 1800}, 2: {1: 2200}}, [0, -1.01]) self.assertLess(o.objective(v2), o.objective(v1)) self.assertLess(o.objective(v2), o.objective(v3))
def test_get_setfield(self, cpu): opt = Optimizer([Virtualize]) i0 = opt.add_input(Types.INT) struct_descr = cpu.new_struct() field_descr = cpu.new_field(struct_descr) p0 = opt.add_operation(Types.REF, Operations.NEW, [], descr=struct_descr) opt.add_operation(Types.VOID, Operations.SETFIELD, [p0, i0], descr=field_descr) i1 = opt.add_operation(Types.INT, Operations.GETFIELD, [p0], descr=field_descr) ops = opt.build_operations() assert len(ops) == 0 assert opt.getvalue(i1) is i0
def test_gradient(self): o = Optimizer(func_hard_reg=0, func_soft_reg=0, time_delta=0, rating_reg=0) o.load_games([(1, 2, 1, 1)]) v = o.create_vars({1: {1: 2200}, 2: {1: 1800}}, (0, -1.01)) def o0(x): save_x = v[0] v[0] = x res = o.objective(v) v[0] = save_x return res def o1(x): save_x = v[1] v[1] = x res = o.objective(v) v[1] = save_x return res self.assertAlmostEqual(derivative(o0, v[0]), o.gradient(v)[0]) self.assertAlmostEqual(derivative(o1, v[1]), o.gradient(v)[1])
def test_subtraction(self): opt = Optimizer([ConstantFold]) opt.add_operation(Types.INT, Operations.INT_SUB, [opt.new_constant_int(1), opt.new_constant_int(0)] ) ops = opt.build_operations() assert len(ops) == 0
class TestOptimizer(unittest.TestCase): def setUp(self): roster_data = { "players": [ {"name": "Kalpesh Shah", "skill": 3, "division": "West"}, {"name": "Larry Ward", "skill": 3, "division": "West"}, {"name": "Trent Miller", "skill": 3, "division": "West"}, {"name": "Katrina Brinkley", "skill": 2, "division": "West"}, {"name": "Dan Doepner", "skill": 2, "division": "West"}, {"name": "Kevin Dahl", "skill": 2, "division": "West"}, {"name": "Doug Nufer", "skill": 1, "division": "West"}, {"name": "Bill Schaefermeyer", "skill": 3, "division": "East"}, {"name": "James Morris", "skill": 3, "division": "East"}, {"name": "Justin Long", "skill": 3, "division": "East"}, {"name": "Joe Au", "skill": 2, "division": "East"}, {"name": "Joseph Hoyal", "skill": 2, "division": "East"}, {"name": "Eric Prusse", "skill": 2, "division": "East"}, {"name": "Maria Bates", "skill": 1, "division": "East"}, ] } roster = Roster(roster_data) matchmaker = Matchmaker() teams = matchmaker.get_teams(roster) matches = matchmaker.get_matches(teams) self.optimizer = Optimizer(matches) def tearDown(self): self.matches = None def test_matches_count(self): optimized_matches = self.optimizer.get_optimized_matches() self.assertEqual(21, len(optimized_matches)) def test_match_skill_difference(self): optimized_matches = self.optimizer.get_optimized_matches() for match in optimized_matches: self.assertEqual(0, match.get_skill_difference())
def test_cant_fold(self): opt = Optimizer([ConstantFold]) i0 = opt.add_input(Types.INT) opt.add_operation(Types.INT, Operations.INT_ADD, [i0, opt.new_constant_int(1)] ) ops = opt.build_operations() assert len(ops) == 1
def __init__(self, argv, reactor, objective): """ Constructor. """ # Initialize the Optimizer object. Optimizer.__init__(self, argv, reactor, objective)
class DPG: def __init__(self, config, task, directory, callback=None, summary_writer=None): self.task = task self.directory = directory self.callback = callback self.summary_writer = summary_writer self.config = config self.batch_size = config['batch_size'] self.n_episode = config['num_episode'] self.capacity = config['capacity'] self.history_len = config['history_len'] self.epsilon_decay = config['epsilon_decay'] self.epsilon_min = config['epsilon_min'] self.time_between_two_copies = config['time_between_two_copies'] self.update_interval = config['update_interval'] self.tau = config['tau'] self.action_dim = task.get_action_dim() self.state_dim = task.get_state_dim() * self.history_len self.critic_layers = [50, 50] self.actor_layers = [50, 50] self.actor_activation = task.get_activation_fn() self._init_modules() def _init_modules(self): # Replay memory self.replay_memory = ReplayMemory(history_len=self.history_len, capacity=self.capacity) # Actor critic network self.ac_network = ActorCriticNet( input_dim=self.state_dim, action_dim=self.action_dim, critic_layers=self.critic_layers, actor_layers=self.actor_layers, actor_activation=self.actor_activation, scope='ac_network') # Target network self.target_network = ActorCriticNet( input_dim=self.state_dim, action_dim=self.action_dim, critic_layers=self.critic_layers, actor_layers=self.actor_layers, actor_activation=self.actor_activation, scope='target_network') # Optimizer self.optimizer = Optimizer(config=self.config, ac_network=self.ac_network, target_network=self.target_network, replay_memory=self.replay_memory) # Ops for updating target network self.clone_op = self.target_network.get_clone_op(self.ac_network, tau=self.tau) # For tensorboard self.t_score = tf.placeholder(dtype=tf.float32, shape=[], name='new_score') tf.summary.scalar("score", self.t_score, collections=['dpg']) self.summary_op = tf.summary.merge_all('dpg') def set_summary_writer(self, summary_writer=None): self.summary_writer = summary_writer self.optimizer.set_summary_writer(summary_writer) def choose_action(self, sess, state, epsilon=0.1): x = numpy.asarray(numpy.expand_dims(state, axis=0), dtype=numpy.float32) action = self.ac_network.get_action(sess, x)[0] return action + epsilon * numpy.random.randn(len(action)) def play(self, action): r, new_state, termination = self.task.play_action(action) return r, new_state, termination def update_target_network(self, sess): sess.run(self.clone_op) def train(self, sess, saver=None): num_of_trials = -1 for episode in range(self.n_episode): frame = self.task.reset() for _ in range(self.history_len + 1): self.replay_memory.add(frame, 0, 0, 0) for _ in range(self.config['T']): num_of_trials += 1 epsilon = self.epsilon_min + \ max(self.epsilon_decay - num_of_trials, 0) / \ self.epsilon_decay * (1 - self.epsilon_min) print("epi {}, frame {}k, epsilon {}".format( episode, num_of_trials // 1000, epsilon)) if num_of_trials % self.update_interval == 0: self.optimizer.train_one_step(sess, num_of_trials, self.batch_size) state = self.replay_memory.phi(frame) action = self.choose_action(sess, state, epsilon) r, new_frame, termination = self.play(action) self.replay_memory.add(frame, action, r, termination) frame = new_frame if num_of_trials % self.time_between_two_copies == 0: self.update_target_network(sess) self.save(sess, saver) if self.callback: self.callback() if termination: score = self.task.get_total_reward() summary_str = sess.run(self.summary_op, feed_dict={self.t_score: score}) self.summary_writer.add_summary(summary_str, num_of_trials) self.summary_writer.flush() break def evaluate(self, sess): for episode in range(self.n_episode): frame = self.task.reset() for _ in range(self.history_len + 1): self.replay_memory.add(frame, 0, 0, 0) for _ in range(self.config['T']): print("episode {}, total reward {}".format( episode, self.task.get_total_reward())) state = self.replay_memory.phi(frame) action = self.choose_action(sess, state, self.epsilon_min) r, new_frame, termination = self.play(action) self.replay_memory.add(frame, action, r, termination) frame = new_frame if self.callback: self.callback() if termination: break def save(self, sess, saver, model_name='model.ckpt'): if saver: try: checkpoint_path = os.path.join(self.directory, model_name) saver.save(sess, checkpoint_path) except: pass def load(self, sess, saver, model_name='model.ckpt'): if saver: try: checkpoint_path = os.path.join(self.directory, model_name) saver.restore(sess, checkpoint_path) except: pass
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) #### get data # with open(os.path.join(args.data_dir, args.data_file), 'rb') as file: # data = pickle.load(file) data_obj = _Data() train_data, valid_data, vocab_obj = data_obj.f_load_data_amazon(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger_obj = Logger() logger_obj.f_add_writer(args) ### add count parameters if not args.test: now_time = datetime.datetime.now() time_name = str(now_time.day) + "_" + str(now_time.month) + "_" + str( now_time.hour) + "_" + str(now_time.minute) model_file = os.path.join( args.model_path, args.model_name + "/model_best_" + time_name + ".pt") args.model_file = model_file print("vocab_size", len(vocab_obj.m_w2i)) ### get model # user_num = 10 network = REVIEWDI(vocab_obj, args, device=device) total_param_num = 0 for name, param in network.named_parameters(): if param.requires_grad: param_num = param.numel() total_param_num += param_num print(name, "\t", param_num) print("total parameters num", total_param_num) if not args.test: optimizer = Optimizer(network.parameters(), args) trainer = TRAINER(vocab_obj, args, device) trainer.f_train(train_data, valid_data, network, optimizer, logger_obj) if args.test: print("=" * 10, "eval", "=" * 10) # eval_obj = EVAL(vocab_obj, args, device) # eval_obj.f_init_eval(network, args.model_file, reload_model=True) # eval_obj.f_eval(valid_data) print("=" * 10, "inference", "=" * 10) infer = INFER(vocab_obj, args, device) infer.f_init_infer(network, args.model_file, reload_model=True) infer.f_inference(valid_data) logger_obj.f_close_writer()
def train(verbose=True, **kwargs): args = kwargs['args'] torch.cuda.set_device(args.local_rank) dist.init_process_group(backend='nccl', init_method='tcp://127.0.0.1:{}'.format(cfg.port), world_size=torch.cuda.device_count(), rank=args.local_rank) setup_logger(cfg.respth) logger = logging.getLogger() ## dataset ds = CityScapes(cfg, mode='train') sampler = torch.utils.data.distributed.DistributedSampler(ds) dl = DataLoader(ds, batch_size=cfg.ims_per_gpu, shuffle=False, sampler=sampler, num_workers=cfg.n_workers, pin_memory=True, drop_last=True) ## model net = Deeplab_v3plus(cfg) net.train() net.cuda() net = nn.parallel.DistributedDataParallel(net, device_ids=[ args.local_rank, ], output_device=args.local_rank) n_min = cfg.ims_per_gpu * cfg.crop_size[0] * cfg.crop_size[1] // 16 criteria = OhemCELoss(thresh=cfg.ohem_thresh, n_min=n_min).cuda() ## optimizer optim = Optimizer(net, cfg.lr_start, cfg.momentum, cfg.weight_decay, cfg.warmup_steps, cfg.warmup_start_lr, cfg.max_iter, cfg.lr_power) ## train loop loss_avg = [] st = glob_st = time.time() diter = iter(dl) n_epoch = 0 for it in range(cfg.max_iter): try: im, lb = next(diter) if not im.size()[0] == cfg.ims_per_gpu: continue except StopIteration: n_epoch += 1 sampler.set_epoch(n_epoch) diter = iter(dl) im, lb = next(diter) im = im.cuda() lb = lb.cuda() H, W = im.size()[2:] lb = torch.squeeze(lb, 1) optim.zero_grad() logits = net(im) loss = criteria(logits, lb) loss.backward() optim.step() loss_avg.append(loss.item()) ## print training log message if it % cfg.msg_iter == 0 and not it == 0: loss_avg = sum(loss_avg) / len(loss_avg) lr = optim.lr ed = time.time() t_intv, glob_t_intv = ed - st, ed - glob_st eta = int((cfg.max_iter - it) * (glob_t_intv / it)) eta = str(datetime.timedelta(seconds=eta)) msg = ', '.join([ 'iter: {it}/{max_it}', 'lr: {lr:4f}', 'loss: {loss:.4f}', 'eta: {eta}', 'time: {time:.4f}', ]).format(it=it, max_it=cfg.max_iter, lr=lr, loss=loss_avg, time=t_intv, eta=eta) logger.info(msg) loss_avg = [] st = ed ## dump the final model and evaluate the result if verbose: net.cpu() save_pth = osp.join(cfg.respth, 'model_final.pth') state = net.module.state_dict() if hasattr( net, 'module') else net.state_dict() if dist.get_rank() == 0: torch.save(state, save_pth) logger.info('training done, model saved to: {}'.format(save_pth)) logger.info('evaluating the final model') net.cuda() net.eval() evaluator = MscEval(cfg) mIOU = evaluator(net) logger.info('mIOU is: {}'.format(mIOU))
def main(): try: os.mkdir(args.snapshot_path) except: pass comm = chainermn.create_communicator() device = comm.intra_rank print("device", device, "/", comm.size) cuda.get_device(device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior if comm.rank == 0: hyperparams.save(args.snapshot_path) hyperparams.print() model = Model(hyperparams, hdf5_path=args.snapshot_path) model.to_gpu() optimizer = Optimizer(model.parameters, communicator=comm) sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") random.seed(0) subset_indices = list(range(len(dataset.subset_filenames))) current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 total_batch = 0 subset_size_per_gpu = len(subset_indices) // comm.size start_time = time.time() for subset_loop in range(subset_size_per_gpu): random.shuffle(subset_indices) subset_index = subset_indices[comm.rank] subset = dataset.read(subset_index) iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views)) query_index = random.choice(range(total_views)) if current_training_step == 0 and num_views == 0: num_views = 1 # avoid OpenMPI error if num_views > 0: r = generate_observation_representation( images, viewpoints, num_views, model) else: r = xp.zeros( (args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.Variable(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # (batch * views, height, width, channels) -> (batch * views, channels, height, width) query_images = query_images.transpose((0, 3, 1, 2)) # transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) h0_gen, c0_gen, u_0, h0_enc, c0_enc = model.generate_initial_state( args.batch_size, xp) loss_kld = 0 hl_enc = h0_enc cl_enc = c0_enc hl_gen = h0_gen cl_gen = c0_gen ul_enc = u_0 xq = model.inference_downsampler.downsample(query_images) for l in range(model.generation_steps): inference_core = model.get_inference_core(l) inference_posterior = model.get_inference_posterior(l) generation_core = model.get_generation_core(l) generation_piror = model.get_generation_prior(l) h_next_enc, c_next_enc = inference_core.forward_onestep( hl_gen, hl_enc, cl_enc, xq, query_viewpoints, r) mean_z_q = inference_posterior.compute_mean_z(hl_enc) ln_var_z_q = inference_posterior.compute_ln_var_z(hl_enc) ze_l = cf.gaussian(mean_z_q, ln_var_z_q) mean_z_p = generation_piror.compute_mean_z(hl_gen) ln_var_z_p = generation_piror.compute_ln_var_z(hl_gen) h_next_gen, c_next_gen, u_next_enc = generation_core.forward_onestep( hl_gen, cl_gen, ul_enc, ze_l, query_viewpoints, r) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) hl_gen = h_next_gen cl_gen = c_next_gen ul_enc = u_next_enc hl_enc = h_next_enc cl_enc = c_next_enc mean_x = model.generation_observation.compute_mean_x(ul_enc) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) if comm.rank == 0: printr( "Iteration {}: Subset {} / {}: Batch {} / {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f}". format(iteration + 1, subset_loop * comm.size + 1, len(dataset), batch_index + 1, len(subset) // args.batch_size, float(loss_nll.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max( sf + (si - sf) * (1.0 - current_training_step / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) total_batch += 1 current_training_step += comm.size mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) if comm.rank == 0: model.serialize(args.snapshot_path) if comm.rank == 0: elapsed_time = time.time() - start_time print( "\033[2KIteration {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f} - step: {} - elapsed_time: {:.3f} min". format(iteration + 1, mean_nll / total_batch, mean_kld / total_batch, optimizer.learning_rate, sigma_t, current_training_step, elapsed_time / 60)) model.serialize(args.snapshot_path)
def get_pixel(coordinate): return pyautogui.screenshot().getpixel(coordinate) if __name__ == '__main__': mouse = detect_mouse() print("Mouse coordinates: {}".format(mouse)) field = Field() print("First tetromino:") current_tetromino = Tetromino.create(input()) next_tetromino = None time.sleep(2) while True: next_tetromino = TETROMINO[get_pixel(mouse)]() opt = Optimizer.get_optimal_drop(field, current_tetromino) rotation = opt['tetromino_rotation'] column = opt['tetromino_column'] current_tetromino.rotate(rotation) field.drop(current_tetromino, column) keys = Optimizer.get_keystrokes( rotation, column, { 'rotate_right': 'x', 'rotate_left': 'z', 'move_left': 'left', 'move_right': 'right', 'drop': ' ' }) pyautogui.typewrite(keys) print(field) current_tetromino = next_tetromino
def train(): args = parse_args() torch.cuda.set_device(args.local_rank) dist.init_process_group(backend='nccl', init_method='tcp://127.0.0.1:33241', world_size=torch.cuda.device_count(), rank=args.local_rank) setup_logger(respth) # dataset n_classes = 11 n_img_per_gpu = 16 n_workers = 8 cropsize = [448, 448] data_root = '/home/ubuntu/zk/FaceParsingData/' ds = FaceMask(data_root, cropsize=cropsize, mode='train') sampler = torch.utils.data.distributed.DistributedSampler(ds) dl = DataLoader(ds, batch_size=n_img_per_gpu, shuffle=False, sampler=sampler, num_workers=n_workers, pin_memory=True, drop_last=True) # model ignore_idx = -100 net = BiSeNet(n_classes=n_classes) net.cuda() net.train() net = nn.parallel.DistributedDataParallel(net, device_ids=[ args.local_rank, ], output_device=args.local_rank) score_thres = 0.7 n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16 LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) ## optimizer momentum = 0.9 weight_decay = 5e-4 lr_start = 1e-2 max_iter = 80000 power = 0.9 warmup_steps = 1000 warmup_start_lr = 1e-5 optim = Optimizer(model=net.module, lr0=lr_start, momentum=momentum, wd=weight_decay, warmup_steps=warmup_steps, warmup_start_lr=warmup_start_lr, max_iter=max_iter, power=power) ## train loop msg_iter = 50 loss_avg = [] st = glob_st = time.time() diter = iter(dl) epoch = 0 for it in range(max_iter): try: im, lb = next(diter) if not im.size()[0] == n_img_per_gpu: raise StopIteration except StopIteration: epoch += 1 sampler.set_epoch(epoch) diter = iter(dl) im, lb = next(diter) im = im.cuda() lb = lb.cuda() H, W = im.size()[2:] lb = torch.squeeze(lb, 1) optim.zero_grad() out, out16, out32 = net(im) lossp = LossP(out, lb) loss2 = Loss2(out16, lb) loss3 = Loss3(out32, lb) loss = lossp + loss2 + loss3 loss.backward() optim.step() loss_avg.append(loss.item()) # print training log message if (it + 1) % msg_iter == 0: loss_avg = sum(loss_avg) / len(loss_avg) lr = optim.lr ed = time.time() t_intv, glob_t_intv = ed - st, ed - glob_st eta = int((max_iter - it) * (glob_t_intv / it)) eta = str(datetime.timedelta(seconds=eta)) msg = ', '.join([ 'it: {it}/{max_it}', 'lr: {lr:4f}', 'loss: {loss:.4f}', 'eta: {eta}', 'time: {time:.4f}', ]).format(it=it + 1, max_it=max_iter, lr=lr, loss=loss_avg, time=t_intv, eta=eta) logger.info(msg) loss_avg = [] st = ed if dist.get_rank() == 0: if (it + 1) % 5000 == 0: state = net.module.state_dict() if hasattr( net, 'module') else net.state_dict() if dist.get_rank() == 0: torch.save(state, './res/cp/{}_iter.pth'.format(it)) evaluate(dspth='/home/ubuntu/zk/FaceParsingData/test-img', cp='{}_iter.pth'.format(it)) # dump the final model save_pth = osp.join(respth, 'model_final_diss.pth') # net.cpu() state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() if dist.get_rank() == 0: torch.save(state, save_pth) logger.info('training done, model saved to: {}'.format(save_pth))
def optimize(self): optimizer = Optimizer(self.plant, self.orderList, Simulator(self.plant), Evaluator(self.plant)) result = optimizer.run() print bestSolution(result)
from optimizer import Optimizer from examples.CAPITA.PART1.nets import ND_net import torch train_queries = load('data/train_data_no_dealer.txt') test_queries = load('data/test_data_no_dealer.txt') # # def neural_pred(network,i1,i2): # d = torch.zeros(20) # d[int(i1)] = 1.0 # d[int(i2)+10] = 1.0 # d = torch.autograd.Variable(d.unsqueeze(0)) # output = network.net(d) # return output.squeeze(0) # # # fc1 = FC(20,2) # adam = torch.optim.Adam(fc1.parameters(), lr=0.5) # swap_net = Network(fc1, 'no_dealer_net', neural_pred, optimizer=adam) #with open('compare.pl') as f: with open('NoDealer.pl') as f: problog_string = f.read() model = Model(problog_string, [ND_net]) optimizer = Optimizer(model, 32) train_model(model, train_queries, 5, optimizer,test_iter=10, test = lambda x: Model.accuracy(x, test_queries), snapshot_iter=10000)
class LearnGraphTopolgy: def __init__(self, S, is_data_matrix=False, alpha=0, maxiter=10000, abstol=1e-6, reltol=1e-4, record_objective=False, record_weights=False): self.S = S self.is_data_matrix = is_data_matrix self.alpha = alpha self.maxiter = maxiter self.abstol = abstol self.reltol = reltol self.record_objective = record_objective self.record_weights = record_weights self.op = Operators() self.obj = Objectives() self.optimizer = Optimizer() self.bic = 0 def learn_k_component_graph(self, k=1, rho=1e-2, beta=1e4, w0='naive', fix_beta=True, beta_max=1e6, lb=0, ub=1e10, eigtol=1e-9, eps=1e-4): # number of nodes n = self.S.shape[0] # find an appropriate inital guess if self.is_data_matrix or self.S.shape[0] != self.S.shape[1]: raise Exception('Not implemented yet!') else: Sinv = np.linalg.pinv(self.S) # if w0 is either "naive" or "qp", compute it, else return w0 w0 = self.optimizer.w_init(w0, Sinv) # compute quantities on the initial guess Lw0 = self.op.L(w0) # l1-norm penalty factor H = self.alpha * (np.eye(n) - np.ones((n, n))) K = self.S + H U0 = self.optimizer.U_update(Lw=Lw0, k=k) lamda0 = self.optimizer.lamda_update(lb=lb, ub=ub, beta=beta, U=U0, Lw=Lw0, k=k) # save objective function value at initial guess if self.record_objective: nll0 = self.obj.negloglikelihood(Lw=Lw0, lamda=lamda0, K=K) fun0 = nll0 + self.obj.prior(beta=beta, Lw=Lw0, lamda=lamda0, U=U0) fun_seq = [fun0] nll_seq = [nll0] beta_seq = [beta] if self.record_weights: w_seq = [w0] time_seq = [0] start_time = time.time() for _ in tqdm(range(self.maxiter)): w = self.optimizer.w_update(w=w0, Lw=Lw0, U=U0, beta=beta, lamda=lamda0, K=K) Lw = self.op.L(w) U = self.optimizer.U_update(Lw=Lw, k=k) lamda = self.optimizer.lamda_update(lb=lb, ub=ub, beta=beta, U=U, Lw=Lw, k=k) # compute negloglikelihood and objective function values if self.record_objective: nll = self.obj.negloglikelihood(Lw=Lw, lamda=lamda, K=K) fun = nll + self.obj.prior(beta=beta, Lw=Lw, lamda=lamda, U=U) nll_seq.append(nll) fun_seq.append(fun) if self.record_weights: w_seq.append(w) # check for convergence werr = np.abs(w0 - w) has_w_converged = all(werr <= .5 * self.reltol * (w + w0)) or all(werr <= self.abstol) time_seq.append(time.time() - start_time) if not fix_beta: eigvals, _ = np.linalg.eigh(Lw) if not fix_beta: n_zero_eigenvalues = np.sum(np.abs(eigvals) < eigtol) if k <= n_zero_eigenvalues: beta = (1 + rho) * beta elif k > n_zero_eigenvalues: beta = beta / (1 + rho) if beta > beta_max: beta = beta_max beta_seq.append(beta) if has_w_converged: break # update estimates w0 = w U0 = U lamda0 = lamda Lw0 = Lw K = self.S + H / (-Lw + eps) # compute the adjancency matrix Aw = self.op.A(w) results = { 'laplacian': Lw, 'adjacency': Aw, 'w': w, 'lamda': lamda, 'U': U, 'elapsed_time': time_seq, 'convergence': has_w_converged, 'beta_seq': beta_seq } if self.record_objective: results['obj_fun'] = fun_seq results['nll'] = nll_seq results['bic'] = 0 if self.record_weights: results['w_seq'] = w_seq return results def learn_bipartite_graph(self, z=0, nu=1e4, m=7, w0='naive'): # number of nodes n = self.S.shape[0] # find an appropriate inital guess if self.is_data_matrix or self.S.shape[0] != self.S.shape[1]: raise Exception('Not implemented yet!') else: Sinv = np.linalg.pinv(self.S) # note now that S is always some sort of similarity matrix J = np.ones((n, n)) * (1 / n) # l1-norm penalty factor H = self.alpha * (2 * np.eye(n) - np.ones((n, n))) K = self.S + H # if w0 is either "naive" or "qp", compute it, else return w0 w0 = self.optimizer.w_init(w0, Sinv) Lips = 1 / min(np.linalg.eigvals(self.op.L(w0) + J)) # compute quantities on the initial guess Aw0 = self.op.A(w0) V0 = self.optimizer.V_update(Aw0, z) psi0 = self.optimizer.psi_update(V0, Aw0) Lips_seq = [Lips] time_seq = [0] start_time = time.time() ll0 = self.obj.bipartite_nll(Lw=self.op.L(w0), K=K, J=J) fun0 = ll0 + self.obj.bipartite_prior(nu=nu, Aw=Aw0, psi=psi0, V=V0) fun_seq = [fun0] nll_seq = [ll0] if self.record_weights: w_seq = [w0] for _ in tqdm(range(self.maxiter)): # we need to make sure that the Lipschitz constant is large enough # in order to avoid divergence while (1): # compute the update for w w = self.optimizer.bipartite_w_update(w=w0, Aw=Aw0, V=V0, nu=nu, psi=psi0, K=K, J=J, Lips=Lips) # compute the objective function at the updated value of w fun_t = self.obj.bipartite_obj(Aw=self.op.A(w), Lw=self.op.L(w), V=V0, psi=psi0, K=K, J=J, nu=nu) # check if the previous value of the objective function is # smaller than the current one Lips_seq.append(Lips) if fun0 < fun_t: # in case it is in fact larger, then increase Lips and recompute w Lips = 2 * Lips else: # otherwise decrease Lips and get outta here! Lips = .5 * Lips if Lips < 1e-12: Lips = 1e-12 break Lw = self.op.L(w) Aw = self.op.A(w) V = self.optimizer.V_update(Aw=Aw, z=z) psi = self.optimizer.psi_update(V=V, Aw=Aw) # compute negloglikelihood and objective function values ll = self.obj.bipartite_nll(Lw=Lw, K=K, J=J) fun = ll + self.obj.bipartite_prior(nu=nu, Aw=Aw, psi=psi, V=V) # save measurements of time and objective functions time_seq.append(time.time() - start_time) nll_seq.append(ll) fun_seq.append(fun) # compute the relative error and check the tolerance on the Adjacency # matrix and on the objective function if self.record_weights: w_seq.append(w) # check for convergence werr = np.abs(w0 - w) has_w_converged = all(werr <= .5 * self.reltol * (w + w0)) or all(werr <= self.abstol) if has_w_converged: break # update estimates fun0 = fun w0 = w V0 = V psi0 = psi Aw0 = Aw results = { 'laplacian': Lw, 'adjacency': Aw, 'w': w, 'psi': psi, 'V': V, 'elapsed_time': time_seq, 'Lips_seq': Lips_seq, 'convergence': has_w_converged, 'nu': nu } if self.record_objective: results['obj_fun'] = fun_seq results['nll'] = nll_seq results['bic'] = 0 if self.record_weights: results['w_seq'] = w_seq return results
def train(): args = parse_args() torch.cuda.set_device(args.local_rank) dist.init_process_group( backend = 'nccl', init_method = 'tcp://127.0.0.1:33241', world_size = torch.cuda.device_count(), rank=args.local_rank ) setup_logger(respth) ## dataset n_classes = 19#19 n_img_per_gpu = 5 n_workers = 10#4 cropsize = [1024, 1024] ds = CityScapes('./data/cityscapes', cropsize=cropsize, mode='train') sampler = torch.utils.data.distributed.DistributedSampler(ds) dl = DataLoader(ds, batch_size = n_img_per_gpu, shuffle = False, sampler = sampler, num_workers = n_workers, pin_memory = True, drop_last = True) ## model ignore_idx = 255 device = torch.device("cuda") net = ShelfNet(n_classes=n_classes) net.load_state_dict(torch.load('./res/model_final_idd.pth')) net.to(device) # net.load_state_dict(checkpoint['model'].module.state_dict()) # net.cuda() net.train() # net.cuda() # net.train() # net = nn.parallel.DistributedDataParallel(net, # device_ids = [args.local_rank, ], # output_device = args.local_rank, # find_unused_parameters=True # ) score_thres = 0.7 n_min = n_img_per_gpu*cropsize[0]*cropsize[1]//16 LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) ## optimizer momentum = 0.9 weight_decay = 5e-4 lr_start = 1e-2 max_iter = 80000 power = 0.9 warmup_steps = 1000 warmup_start_lr = 1e-5 optim = Optimizer( model = net, lr0 = lr_start, momentum = momentum, wd = weight_decay, warmup_steps = warmup_steps, warmup_start_lr = warmup_start_lr, max_iter = max_iter, power = power) ## train loop msg_iter = 50 loss_avg = [] st = glob_st = time.time() diter = iter(dl) epoch = 0 for it in range(max_iter): try: im, lb = next(diter) if not im.size()[0]==n_img_per_gpu: raise StopIteration except StopIteration: epoch += 1 sampler.set_epoch(epoch) diter = iter(dl) im, lb = next(diter) im = im.cuda() lb = lb.cuda() H, W = im.size()[2:] lb = torch.squeeze(lb, 1) optim.zero_grad() out, out16, out32 = net(im) lossp = LossP(out, lb) loss2 = Loss2(out16, lb) loss3 = Loss3(out32, lb) loss = lossp + loss2 + loss3 loss.backward() optim.step() loss_avg.append(loss.item()) ## print training log message if (it+1)%msg_iter==0: loss_avg = sum(loss_avg) / len(loss_avg) lr = optim.lr ed = time.time() t_intv, glob_t_intv = ed - st, ed - glob_st eta = int((max_iter - it) * (glob_t_intv / it)) eta = str(datetime.timedelta(seconds=eta)) msg = ', '.join([ 'it: {it}/{max_it}', 'lr: {lr:4f}', 'loss: {loss:.4f}', 'eta: {eta}', 'time: {time:.4f}', ]).format( it = it+1, max_it = max_iter, lr = lr, loss = loss_avg, time = t_intv, eta = eta ) logger.info(msg) loss_avg = [] st = ed if it % (1000) == 0:#1000 ## dump the final model save_pth = osp.join(respth, 'shelfnet_model_it_%d.pth'%it) #net.cpu() #state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() #if dist.get_rank() == 0: torch.save(state, save_pth) torch.save(net.state_dict(),save_pth) if it % (1000) == 0 and it > 0:#1000 evaluate(checkpoint=save_pth) ## dump the final model save_pth = osp.join(respth, 'model_final.pth') net.cpu() state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() if dist.get_rank()==0: torch.save(state, save_pth) logger.info('training done, model saved to: {}'.format(save_pth))
def test_premutations(self): optimizer = Optimizer(None, None, None) all_permutations = optimizer.get_all_permutations(['A', 'B']) assert len(all_permutations) == 2 all_permutations = optimizer.get_all_permutations(['A', 'B', 'C', 'D']) assert len(all_permutations) == 24
def test_spherical_distance(self): optimizer = Optimizer(None, None, None) assert optimizer.spherical_distance(Coordinate(0.0, 0.0), Coordinate(0.0, 0.0)) == 0 assert optimizer.spherical_distance(Coordinate(0.0, 1.1), Coordinate(1.1, 1.1)) == 122
data_length = data_label.shape[0] train_data_length = int(data_length * 0.8) print("train_label_length:", train_data_length) data_sample_train, data_sample_test = data_sample[: train_data_length], data_sample[ train_data_length:] data_label_train, data_label_test = data_label[: train_data_length], data_label[ train_data_length:] num_iterations = 1000 lr = 0.001 weight_decay = 0.01 train_batch_size = 16 test_batch_size = 100 data_handler = DataHander(16) opt = Optimizer(lr=lr, momentum=0.9, iteration=0, gamma=0.0005, power=0.75) initializer = Initializer() data_handler.get_data(sample=data_sample_train, label=data_label_train) data_handler.shuffle() dnn = DNNNet(optimizer=opt.batch_gradient_descent_anneling, initializer=initializer.xavier, batch_size=train_batch_size, weights_decay=weight_decay) dnn.initial() train_error = [] max_loss = math.inf early_stopping_iter = 15 early_stopping_mark = 0 for i in range(num_iterations): print('第', i, '次迭代') opt.update_iteration(i)
def run_model(Xtest, Xtrain, Ytest, Ytrain, cond_ll, kernel, method, name, run_id, num_inducing, num_samples, sparsify_factor, to_optimize, trans_class, random_Z, logging_level, export_X, latent_noise=0.001, opt_per_iter=None, max_iter=200, n_threads=1, model_image_file=None, xtol=1e-3, ftol=1e-5, partition_size=3000): """ Fits a model to the data (Xtrain, Ytrain) using the method provided by 'method', and makes predictions on 'Xtest' and 'Ytest', and exports the result to csv files. Parameters ---------- Xtest : ndarray X of test points Xtrain : ndarray X of training points Ytest : ndarray Y of test points Ytrain : ndarray Y of traiing points cond_ll : subclass of likelihood/Likelihood Conditional log likelihood function used to build the model. kernel : list The kernel that the model uses. It should be an array, and size of the array should be same as the number of latent processes. Each element should provide interface similar to ``ExtRBF`` class method : string The method to use to learns the model. It can be 'full', 'mix1', and 'mix2' name : string The name that will be used for logger file names, and results files names run_id : object ID of the experiment, which can be anything, and it will be included in the configuration file. It can provdie for example a number referring to a particular test and train partition. num_inducing : integer Number of inducing points num_samples : integer Number of samples for estimating objective function and gradients sparsify_factor : float Can be any number and will be included in the configuration file. It will not determine the number of inducing points to_optimize : list The set of parameters to optimize. It should be a list, and it can include 'll', 'mog', 'hyp', 'inducing' e.g., it can be ['ll', 'mog'] in which case posterior and ll will be optimised. trans_class : subclass of DataTransformation The class which will be used to transform data. random_Z : boolean Whether to initialise inducing points randomly on the training data. If False, inducing points will be placed using k-means (or mini-batch k-mean) clustering. If True, inducing points will be placed randomly on the training data. logging_level : string The logging level to use. export_X : boolean Whether to export X to csv files. latent_noise : integer The amount of latent noise to add to the kernel. A white noise of amount latent_noise will be added to the kernel. opt_per_iter: integer Number of update of each subset of parameters in each iteration, e.g., {'mog': 15000, 'hyp': 25, 'll': 25} max_iter: integer Maximum of global iterations used on optimization. n_threads: integer Maximum number of threads used. model_image_file: string The image file from the which the model will be initialized. xtol: float Tolerance of 'X' below which the optimization is determined as converged. ftol: float Tolerance of 'f' below which the optimization is determined as converged. partition_size: integer The size which is used to partition training data (This is not the partition used for SGD). Training data will be split to the partitions of size ``partition_size`` and calculations will be done on each partition separately. This aim of this partitioning of data is to make algorithm memory efficient. Returns ------- folder : string the name of the folder in which results are stored model : model the fitted model itself. """ if opt_per_iter is None: opt_per_iter = {'mog': 40, 'hyp': 40, 'll': 40} folder_name = name + '_' + ModelLearn.get_ID() transformer = trans_class.get_transformation(Ytrain, Xtrain) Ytrain = transformer.transform_Y(Ytrain) Ytest = transformer.transform_Y(Ytest) Xtrain = transformer.transform_X(Xtrain) Xtest = transformer.transform_X(Xtest) opt_max_fun_evals = None total_time = None timer_per_iter = None tracker = None export_model = False git_hash, git_branch = get_git() properties = { 'method': method, 'sparsify_factor': sparsify_factor, 'sample_num': num_samples, 'll': cond_ll.__class__.__name__, 'opt_max_evals': opt_max_fun_evals, 'opt_per_iter': opt_per_iter, 'xtol': xtol, 'ftol': ftol, 'run_id': run_id, 'experiment': name, 'max_iter': max_iter, 'git_hash': git_hash, 'git_branch': git_branch, 'random_Z': random_Z, 'latent_noise:': latent_noise, 'model_init': model_image_file } logger = ModelLearn.get_logger( ModelLearn.get_output_path() + folder_name, folder_name, logging_level) logger.info('experiment started for:' + str(properties)) model_image = None current_iter = None if model_image_file is not None: model_image = pickle.load(open(model_image_file + 'model.dump')) opt_params = pickle.load(open(model_image_file + 'opt.dump')) current_iter = opt_params['current_iter'] if model_image: logger.info('loaded model - iteration started from: ' + str(opt_params['current_iter']) + ' Obj fun: ' + str(opt_params['obj_fun']) + ' fun evals: ' + str(opt_params['total_evals'])) if method == 'full': m = SAVIGP_SingleComponent(Xtrain, Ytrain, num_inducing, cond_ll, kernel, num_samples, None, latent_noise, False, random_Z, n_threads=n_threads, image=model_image, partition_size=partition_size) _, timer_per_iter, total_time, tracker, total_evals = \ Optimizer.optimize_model(m, opt_max_fun_evals, logger, to_optimize, xtol, opt_per_iter, max_iter, ftol, ModelLearn.opt_callback(folder_name), current_iter) if method == 'mix1': m = SAVIGP_Diag(Xtrain, Ytrain, num_inducing, 1, cond_ll, kernel, num_samples, None, latent_noise, False, random_Z, n_threads=n_threads, image=model_image, partition_size=partition_size) _, timer_per_iter, total_time, tracker, total_evals = \ Optimizer.optimize_model(m, opt_max_fun_evals, logger, to_optimize, xtol, opt_per_iter, max_iter, ftol, ModelLearn.opt_callback(folder_name), current_iter) if method == 'mix2': m = SAVIGP_Diag(Xtrain, Ytrain, num_inducing, 2, cond_ll, kernel, num_samples, None, latent_noise, False, random_Z, n_threads=n_threads, image=model_image, partition_size=partition_size) _, timer_per_iter, total_time, tracker, total_evals = \ Optimizer.optimize_model(m, opt_max_fun_evals, logger, to_optimize, xtol, opt_per_iter, max_iter, ftol, ModelLearn.opt_callback(folder_name), current_iter) if method == 'gp': m = GPy.models.GPRegression(Xtrain, Ytrain, kernel[0]) if 'll' in to_optimize and 'hyp' in to_optimize: m.optimize('bfgs') logger.debug("prediction started...") y_pred, var_pred, nlpd = m.predict(Xtest, Ytest) logger.debug("prediction finished") if not (tracker is None): ModelLearn.export_track(folder_name, tracker) ModelLearn.export_train(folder_name, transformer.untransform_X(Xtrain), transformer.untransform_Y(Ytrain), export_X) ModelLearn.export_test(folder_name, transformer.untransform_X(Xtest), transformer.untransform_Y(Ytest), [transformer.untransform_Y(y_pred)], [transformer.untransform_Y_var(var_pred)], transformer.untransform_NLPD(nlpd), [''], export_X) if export_model and isinstance(m, SAVIGP): ModelLearn.export_model(m, folder_name) properties['total_time'] = total_time properties['time_per_iter'] = timer_per_iter properties['total_evals'] = total_evals ModelLearn.export_configuration(folder_name, properties) return folder_name, m
path = '/Users/michal/PycharmProjects/MRP/datasets/*.tsp' data = Datasets(path) # name = 'ali535' name = 'berlin11_modified' # name = 'berlin52' # name = 'fl417' # name = 'gr666' # name = 'kroA100' # name = 'kroA150' # name = 'nrw1379' # name = 'pr2392' aco = Optimizer(ant_count=100, generations=100, alpha=1.0, beta=10.0, rho=0.5, q=10) graph = Graph(data.datasets[name]['cost_matrix'], data.datasets[name]['rank']) points_sequence, distance = aco(graph) print('Found best distance: {} for sequence: {}'.format(distance, points_sequence)) def paint_graph(nodes, edges, title, cost): # plot nodes x = [] y = [] for point in nodes: x.append(point[0]) y.append(point[1]) y = list(map(operator.sub, [max(y) for i in range(len(nodes))], y)) plt.plot(x, y, 'co', color='red') plt.grid()
def parse_exp1(lexer): exp = ExpParser.parse_exp0(lexer) if lexer.look_ahead() == TokenKind.OP_POW: line, op, _ = lexer.get_next_token() exp = lua_exp.BinopExp(line, op, exp, ExpParser.parse_exp2(lexer)) return Optimizer.optimize_pow(exp)
def test_lt(self): opt = Optimizer([IntBounds, GuardPropagation]) i0 = opt.add_input(Types.INT) i1 = opt.add_operation( Operations.INT_LT, [i0, opt.new_constant_int(10)], ) opt.add_operation(Operations.GUARD_TRUE, [i1]) opt.add_operation(Operations.INT_LT, [i0, opt.new_constant_int(15)]) ops = opt.build_operations() assert len(ops) == 2
def train(opt): # saving setting opt.saved_path = opt.saved_path + opt.project opt.log_path = os.path.join(opt.saved_path, 'tensorboard') os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) # gpu setting os.environ["CUDA_VISIBLE_DEVICES"] = '2, 3, 4, 5, 6' gpu_number = torch.cuda.device_count() # dataset setting n_classes = 17 n_img_all_gpu = opt.batch_size * gpu_number cropsize = [448, 448] data_root = '/home/data2/DATASET/vschallenge' num_workers = opt.num_workers ds = FaceMask(data_root, cropsize=cropsize, mode='train') dl = DataLoader(ds, batch_size=n_img_all_gpu, shuffle=True, num_workers=num_workers, drop_last=True) ds_eval = FaceMask(data_root, cropsize=cropsize, mode='val') dl_eval = DataLoader(ds_eval, batch_size=n_img_all_gpu, shuffle=True, num_workers=num_workers, drop_last=True) ignore_idx = -100 net = BiSeNet(n_classes=n_classes) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = net.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights ' 'with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') net = net.cuda() net = nn.DataParallel(net) score_thres = 0.7 n_min = n_img_all_gpu * cropsize[0] * cropsize[1] // opt.batch_size LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) # optimizer momentum = 0.9 weight_decay = 5e-4 lr_start = opt.lr max_iter = 80000 power = 0.9 warmup_steps = 1000 warmup_start_lr = 1e-5 optim = Optimizer(model=net.module, lr0=lr_start, momentum=momentum, wd=weight_decay, warmup_steps=warmup_steps, warmup_start_lr=warmup_start_lr, max_iter=max_iter, power=power) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim.optim, patience=3, verbose=True) # train loop loss_avg = [] step = max(0, last_step) max_iter = len(dl) best_epoch = 0 epoch = 0 best_loss = 1e5 net.train() try: for epoch in range(opt.num_epochs): last_epoch = step // max_iter if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(dl) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * max_iter: progress_bar.update() continue try: im = data['img'] lb = data['label'] lb = torch.squeeze(lb, 1) im = im.cuda() lb = lb.cuda() optim.zero_grad() out, out16, out32 = net(im) lossp = LossP(out, lb) loss2 = Loss2(out16, lb) loss3 = Loss3(out32, lb) loss = lossp + loss2 + loss3 if loss == 0 or not torch.isfinite(loss): continue loss.backward() optim.step() loss_avg.append(loss.item()) # print training log message # progress_bar.set_description( # 'Epoch: {}/{}. Iteration: {}/{}. p_loss: {:.5f}. 2_loss: {:.5f}. 3_loss: {:.5f}. loss_avg: {:.5f}'.format( # epoch, opt.num_epochs, iter + 1, max_iter, lossp.item(), # loss2.item(), loss3.item(), loss.item())) print( 'p_loss: {:.5f}. 2_loss: {:.5f}. 3_loss: {:.5f}. loss_avg: {:.5f}' .format(lossp.item(), loss2.item(), loss3.item(), loss.item())) writer.add_scalars('Lossp', {'train': lossp}, step) writer.add_scalars('loss2', {'train': loss2}, step) writer.add_scalars('loss3', {'train': loss3}, step) writer.add_scalars('loss_avg', {'train': loss}, step) # log learning_rate lr = optim.lr writer.add_scalar('learning_rate', lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth') print('checkpoint...') except Exception as e: print('[Erro]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: net.eval() loss_p = [] loss_2 = [] loss_3 = [] for iter, data in enumerate(dl_eval): with torch.no_grad(): im = data['img'] lb = data['label'] lb = torch.squeeze(lb, 1) im = im.cuda() lb = lb.cuda() out, out16, out32 = net(im) lossp = LossP(out, lb) loss2 = Loss2(out16, lb) loss3 = Loss3(out32, lb) loss = lossp + loss2 + loss3 if loss == 0 or not torch.isfinite(loss): continue loss_p.append(lossp.item()) loss_2.append(loss2.item()) loss_3.append(loss3.item()) lossp = np.mean(loss_p) loss2 = np.mean(loss_2) loss3 = np.mean(loss_3) loss = lossp + loss2 + loss3 print( 'Val. Epoch: {}/{}. p_loss: {:1.5f}. 2_loss: {:1.5f}. 3_loss: {:1.5f}. Total_loss: {:1.5f}' .format(epoch, opt.num_epochs, lossp, loss2, loss3, loss)) writer.add_scalars('Total_loss', {'val': loss}, step) writer.add_scalars('p_loss', {'val': lossp}, step) writer.add_scalars('2_loss', {'val': loss2}, step) writer.add_scalars('3_loss', {'val': loss3}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth') net.train() # ?? # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, loss)) break except KeyboardInterrupt: save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth') writer.close() writer.close()
num_pts = [60] y0 = np.linspace(1, 0, num_pts[0]) y0 = y0[1:-1] all_times = [] all_wall_times = [] all_function_evals = [] all_nit = [] #### Iterate over number of points for num in num_pts: all_x = np.linspace(0, 1, num) start = time.time() opt = Optimizer(brachistochrone, y0, 1, 1) # print(opt.gradient_func(y0)) opt.minimize() print(funct_eval) # set_trace() # fit = minimize(fun, y0, method="BFGS") end = time.time() #### Saving Values all_wall_times.append(end - start) # all_function_evals.append(fit.nfev) # all_nit.append(fit.nit) #### Output for debugging print("-------------")
def service_inputs(): cmd = raw_input("> ").strip() import interpretor import optimizer import ops import db import parse_expr import parse_sql from interpretor import PullBasedInterpretor, PushBasedInterpretor from optimizer import Optimizer from ops import Print from db import Database from parse_expr import parse as _parse_expr from parse_sql import parse as _parse_sql _db = Database() if cmd == "q": return elif cmd == "": pass elif cmd.startswith("help"): print(HELPTEXT) elif cmd.lower() == "reload": reload(parse_expr) reload(db) reload(ops) reload(parse_sql) reload(optimizer) reload(interpretor) from parse_expr import parse as _parse_expr from db import Database from ops import Print from parse_sql import parse as _parse_sql from optimizer import Optimizer from interpretor import PullBasedInterpretor, PushBasedInterpretor elif cmd.upper().startswith("TRACE"): traceback.print_exc() elif cmd.upper().startswith("PARSE"): q = cmd[len("PARSE"):] ast = None try: ast = _parse_expr(q) except Exception as err_expr: try: ast = _parse_sql(q) except Exception as err: print("ERROR:", err) if ast: print(ast) elif cmd.upper().startswith("SHOW TABLES"): for tablename in _db.tablenames: print tablename elif cmd.upper().startswith("SHOW "): tname = cmd[len("SHOW "):].strip() if tname in _db: print "Schema for %s" % tname t = _db[tname] for field in t.fields: if t.rows: typ = type(t.rows[0][field]) else: typ = "?" print field, "\t", typ else: print "%s not in database" % tname else: try: plan = _parse_sql(cmd) opt = Optimizer(_db) interp = PullBasedInterpretor(_db) #interp = PushBasedInterpretor(_db) plan = opt(plan) print(plan) for row in interp(plan): print row except Exception as err: print("ERROR:", err) del _db service_inputs()
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) #### get data set_seed(1111) data_obj = _Data() train_data, valid_data, vocab_obj = data_obj.f_load_data_amazon(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger_obj = Logger() logger_obj.f_add_writer(args) ### add count parameters if args.train: now_time = datetime.datetime.now() time_name = str(now_time.month)+"_"+str(now_time.day)+"_"+str(now_time.hour)+"_"+str(now_time.minute) model_file = os.path.join(args.model_path, args.model_name+"/model_best_"+time_name+"_"+args.data_name+".pt") args.model_file = model_file print("vocab_size", len(vocab_obj.m_w2i)) ### get model # user_num = 10 network = REVIEWDI(vocab_obj, args, device=device) total_param_num = 0 for name, param in network.named_parameters(): if param.requires_grad: param_num = param.numel() total_param_num += param_num print(name, "\t", param_num) print("total parameters num", total_param_num) if args.train: optimizer = Optimizer(network.parameters(), args) trainer = TRAINER(vocab_obj, args, device) trainer.f_train(train_data, valid_data, network, optimizer, logger_obj) if args.test or args.eval: print("="*10, "test", "="*10) infer = INFER(vocab_obj, args, device) infer.f_init_infer(network, args.model_file, reload_model=True) infer.f_inference(valid_data) if args.eval: print("="*10, "eval", "="*10) eval_obj = EVAL(vocab_obj, args, device) eval_obj.f_init_eval(network, args.model_file, reload_model=True) eval_obj.f_eval(valid_data) # infer = INFER(vocab_obj, args, device) # infer.f_init_infer(network, args.model_file, reload_model=True) # input_text = "verrry cheaply constructed , not as comfortable as i expected . i have been wearing this brand , but the first time i wore it , it was a little more than a few days ago . i have been wearing this brand before , so far , no complaints . i will be ordering more in different colors . update : after washing & drying , i will update after washing . after washing , this is a great buy . the fabric is not as soft as it appears to be made of cotton material . <eos>" # infer.f_search_text(input_text, train_data) logger_obj.f_close_writer()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', '-s', dest='save_model', action='store_true') parser.set_defaults(save=False) args = parser.parse_args() if args.save_model: print "Model will be saved" else: print "Model will not be saved" sess = tf.Session() state_ops = StateOps() opt_net = Optimizer() mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) net = MLP(opt_net) snfs = [] # Generate the set of SNFs print "Generating SNFs..." for i in range(num_SNFs): snf = SNF() snfs.append(snf) print "Initializing replay memory..." replay_memory = [] # Add some initial states to the replay memory for i in range(replay_mem_start_size): snf = random.choice(snfs) # Initializer computes a random point and the SNF loss state = State(snf, state_ops, sess) replay_memory.append(state) init = tf.initialize_all_variables() sess.run(init) best_loss = np.float('inf') best_accuracy = 0 # Training loop for i in range(num_iterations): batch_losses = [] batch_loss_change_sign = [] batch_grads = [] batch_counters = [] for j in range(batch_size): # Retrieve a random starting point from the replay memory state = random.choice(replay_memory) snf = state.snf if state.counter >= episode_length: snf = random.choice(snfs) state = State(snf, state_ops, sess) batch_counters.append(state.counter) prev_counter = state.counter # The RNN state is initially zero but this will become # rarer as old states are put back into the replay memory feed_dict = { opt_net.point: state.point, opt_net.variances: snf.variances, opt_net.weights: snf.weights, opt_net.hyperplanes: snf.hyperplanes, opt_net.initial_rnn_state: state.rnn_state } res = sess.run([ opt_net.new_point, opt_net.rnn_state_out, opt_net.loss_change_sign, opt_net.total_loss ] + [g for g, v in opt_net.gvs], feed_dict=feed_dict) new_point, rnn_state_out, loss_change_sign, loss = res[0:4] grads_out = res[4:] # Prepare a new state to add to the replay memory state = State(snf, state_ops, sess) state.point = new_point state.rnn_state = rnn_state_out state.counter = prev_counter + seq_length # Prevent these attributes from being used until their values are overridden state.loss = None state.grads = None # Only the last state is added. Adding more may result in a loss # of diversity in the replay memory replay_memory.append(state) if len(replay_memory) > replay_memory_max_size: replay_memory = replay_memory[-replay_memory_max_size:] batch_losses.append(loss) batch_loss_change_sign.append(loss_change_sign) batch_grads.append(grads_out) loss = np.mean(batch_losses) avg_loss_change_sign = np.mean(batch_loss_change_sign) avg_counter = np.mean(batch_counters) total_grads = batch_grads[0] for j in range(1, batch_size): for k in range(len(batch_grads[j])): total_grads[k] += batch_grads[j][k] total_grads = [j / batch_size for j in total_grads] #===# Train the optimizer #===# # By the derivative sum rule, the average of the derivatives (calculated here) # is identical to the derivative of the average (the usual method). feed_dict = {} for j in range(len(opt_net.grads_input)): feed_dict[opt_net.grads_input[j][0]] = total_grads[j] _ = sess.run([opt_net.train_step], feed_dict=feed_dict) if i % summary_freq == 0 and i > 0: print "{:>3}{:>10.3}{:>10.3}{:>10.3}".format( i, loss, avg_loss_change_sign, avg_counter) # Save model if args.save_model: accuracies = [] for k in range(1): # Evaluate on the MNIST MLP sess.run(net.init) # Reset parameters of the net to be trained rnn_state = np.zeros( [net.num_params, net.opt_net.cell.state_size]) for j in range(net.batches): batch_x, batch_y = mnist.train.next_batch(net.batch_size) # Compute gradients grads = sess.run(net.grads, feed_dict={ net.x: batch_x, net.y_: batch_y }) # Compute update feed_dict = { net.opt_net.input_grads: np.reshape(grads, [1, -1, 1]), net.opt_net.initial_rnn_state: rnn_state } [update, rnn_state] = sess.run([ net.opt_net.update, net.opt_net.rnn_state_out_compare ], feed_dict=feed_dict) # Update MLP parameters _ = sess.run([net.opt_net_train_step], feed_dict={net.update: update}) accuracy = sess.run(net.accuracy, feed_dict={ net.x: mnist.test.images, net.y_: mnist.test.labels }) accuracies.append(accuracy) a = np.mean(accuracies) if a > best_accuracy: best_accuracy = a #best_loss = loss saver = tf.train.Saver(tf.trainable_variables()) saver.save(sess, save_path) print "{:>3}{:>10.3}{:>10.3}{:>10.3}{:>10.3} (S)".format( i, loss, avg_loss_change_sign, avg_counter, a) else: print "{:>3}{:>10.3}{:>10.3}{:>10.3}{:>10.3}".format( i, loss, avg_loss_change_sign, avg_counter, a)
# Load data data = Data() data.load_dictionaries() # Load model model = Model(data) model.input_name = "best" model.output_name = "final" model.load() # Model loss function loss = Loss() # Optimizer optimizer = Optimizer(model) # Begin epochs for epoch in range(config["num_epochs"]): print("[EPOCH]", epoch + 1) # Process batches for caption, image_feature in data: pass # Pass data through model caption, image_feature = model(caption, image_feature) # Compute loss cost = loss(caption, image_feature)
def visualize(network, layer, idx, img_shape=(3, 224, 224), init_range=(0, 1), max_iter=400, lr=1, sigma=0, debug=False): """ Perform standard Deep Dream-style visualization on the network. Parameters: network: the network to be run, a torch module layer: the layer of the network that the desired neuron to visualize is part of, also a torch module idx: a tuple of indexes into the output of the given layer (like (0,0,0,0) for a BCHW conv layer) that extracts the desired neuron img_shape: a tuple specifying the shape of the images the network takes in, in CHW form (a batch dimension is expected by Optimizer and so is automatically added) init_range: the range of values to randomly initialize the image to max_iter: the maximum number of iterations to run the optimization for lr: the 'learning rate' (the multiplier of the gradient as it's added to the image at each step; sigma: the standard deviation (or list of stddevs)of a Gaussian filter that smooths the image each iteration, standard for inception loop-style visualization debug: prints loss at every iteration if true, useful for finding the right learning rateo Returns: optimized image loss for the last iteration """ # partial application, since the index can't be passed in optimizer code loss_func = partial(specific_output_loss, idx=idx) optimizer = Optimizer(network, layer, loss_func) # set the 'target' optimization to be very high -- just want # to increase it as much as possible # since optimizer is actually gradient descent, make it negative # TODO: allow selection of populations, not just single neurons target_shape = (1, ) target = torch.ones(*target_shape) * 100 target = target.cuda() # now start optimization rand_img = torch_rand_range(img_shape, init_range).unsqueeze(0).cuda() return optimizer.optimize(rand_img, target, max_iter=max_iter, lr=lr, sigma=sigma, debug=debug)
def main(): try: os.mkdir(args.snapshot_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_directory) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.pixel_n = args.pixel_n hyperparams.channels_chz = args.channels_chz hyperparams.generator_channels_u = args.channels_u hyperparams.inference_channels_map_x = args.channels_map_x hyperparams.pixel_sigma_i = args.initial_pixel_sigma hyperparams.pixel_sigma_f = args.final_pixel_sigma hyperparams.save(args.snapshot_directory) hyperparams.print() model = Model(hyperparams, snapshot_directory=args.snapshot_directory) if using_gpu: model.to_gpu() optimizer = Optimizer(model.parameters, mu_i=args.initial_lr, mu_f=args.final_lr) optimizer.print() if args.with_visualization: figure = gqn.imgplot.figure() axis1 = gqn.imgplot.image() axis2 = gqn.imgplot.image() axis3 = gqn.imgplot.image() figure.add(axis1, 0, 0, 1 / 3, 1) figure.add(axis2, 1 / 3, 0, 1 / 3, 1) figure.add(axis3, 2 / 3, 0, 1 / 3, 1) plot = gqn.imgplot.window( figure, (500 * 3, 500), "Query image / Reconstructed image / Generated image") plot.show() sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full((args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full((args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") num_pixels = hyperparams.image_size[0] * hyperparams.image_size[1] * 3 current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 mean_mse = 0 total_batch = 0 start_time = time.time() for subset_index, subset in enumerate(dataset): iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)) total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views + 1)) query_index = random.choice(range(total_views)) if num_views > 0: r = model.compute_observation_representation( images[:, :num_views], viewpoints[:, :num_views]) else: r = xp.zeros((args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.Variable(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) h0_gen, c0_gen, u_0, h0_enc, c0_enc = model.generate_initial_state( args.batch_size, xp) loss_kld = 0 hl_enc = h0_enc cl_enc = c0_enc hl_gen = h0_gen cl_gen = c0_gen ul_enc = u_0 xq = model.inference_downsampler.downsample(query_images) for l in range(model.generation_steps): inference_core = model.get_inference_core(l) inference_posterior = model.get_inference_posterior(l) generation_core = model.get_generation_core(l) generation_piror = model.get_generation_prior(l) h_next_enc, c_next_enc = inference_core.forward_onestep( hl_gen, hl_enc, cl_enc, xq, query_viewpoints, r) mean_z_q = inference_posterior.compute_mean_z(hl_enc) ln_var_z_q = inference_posterior.compute_ln_var_z(hl_enc) ze_l = cf.gaussian(mean_z_q, ln_var_z_q) mean_z_p = generation_piror.compute_mean_z(hl_gen) ln_var_z_p = generation_piror.compute_ln_var_z(hl_gen) h_next_gen, c_next_gen, u_next_enc = generation_core.forward_onestep( hl_gen, cl_gen, ul_enc, ze_l, query_viewpoints, r) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) hl_gen = h_next_gen cl_gen = c_next_gen ul_enc = u_next_enc hl_enc = h_next_enc cl_enc = c_next_enc mean_x = model.generation_observation.compute_mean_x(ul_enc) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_mse = cf.mean_squared_error(mean_x, query_images) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) if args.with_visualization and plot.closed() is False: axis1.update(make_uint8(query_images[0])) axis2.update(make_uint8(mean_x.data[0])) with chainer.no_backprop_mode(): generated_x = model.generate_image( query_viewpoints[None, 0], r[None, 0], xp) axis3.update(make_uint8(generated_x[0])) printr( "Iteration {}: Subset {} / {}: Batch {} / {} - loss: nll_per_pixel: {:.6f} mse: {:.6f} kld: {:.6f} - lr: {:.4e} - sigma_t: {:.6f}" .format(iteration + 1, subset_index + 1, len(dataset), batch_index + 1, len(iterator), float(loss_nll.data) / num_pixels, float(loss_mse.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max( sf + (si - sf) * (1.0 - current_training_step / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) total_batch += 1 current_training_step += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) mean_mse += float(loss_mse.data) model.serialize(args.snapshot_directory) elapsed_time = time.time() - start_time print( "\033[2KIteration {} - loss: nll_per_pixel: {:.6f} mse: {:.6f} kld: {:.6f} - lr: {:.4e} - sigma_t: {:.6f} - step: {} - elapsed_time: {:.3f} min" .format(iteration + 1, mean_nll / total_batch / num_pixels, mean_mse / total_batch, mean_kld / total_batch, optimizer.learning_rate, sigma_t, current_training_step, elapsed_time / 60))
def gen_one_image(network, layer, image, noise_level, loss_func, constant_area=0, max_iter=1000, lr=np.linspace(10, 0.5, 1000), sigma=0, grayscale=False, debug=False): """ Generate a single modified stimulus from a source image. (This function is primarily for use by other wrappers). Parameters: layer: the actual layer object, part of the network, that you're extracting features from for the generation image: a single image, in BCHW format, on the same device as the network (for now just GPU) grayscale: whether or not the optimization should be done in grayscale (enforcing the RGB channels stay the same) other arguments are same as std_generate """ # constant_area's default is actually dependent on image # so 0 there is just a non-None placeholder # set to the center (max_dim / 5) pixels by default if constant_area == 0: h_center = int(image.shape[2] / 2) w_center = int(image.shape[3] / 2) h_span = int(image.shape[2] / 10) w_span = int(image.shape[3] / 10) constant_area = (h_center - h_span, h_center + h_span, w_center - w_span, w_center + w_span) with torch.no_grad(): acts = [] hook = layer.register_forward_hook(lambda m, i, o: acts.append(o)) _ = network(image) act = acts[0] hook.remove() noisy_act = add_noise(act, noise_level) optimizer = Optimizer(network, layer, loss_func) new_img, loss = optimizer.optimize(image, noisy_act, constant_area=constant_area, max_iter=max_iter, lr=lr, sigma=sigma, clip_image=True, grayscale=grayscale, debug=debug) return new_img.detach().cpu().numpy().transpose(0, 2, 3, 1).squeeze(), loss
def test_ge_reverse(self): opt = Optimizer([IntBounds, GuardPropagation]) i0 = opt.add_input(Types.INT) i1 = opt.add_operation(Operations.INT_LT, [i0, opt.new_constant_int(5)]) opt.add_operation(Operations.GUARD_TRUE, [i1]) i2 = opt.add_operation(Operations.INT_GE, [i0, opt.new_constant_int(7)]) opt.add_operation(Operations.GUARD_FALSE, [i2]) ops = opt.build_operations() assert len(ops) == 2 assert opt.getvalue(i2).getint() == 0
from train import train_model from data_loader import load from examples.NIPS.MNIST.mnist import test_MNIST, MNIST_Net, neural_predicate from model import Model from optimizer import Optimizer from network import Network import torch queries = load('train_data.txt') with open('addition.pl') as f: problog_string = f.read() network = MNIST_Net() net = Network(network, 'mnist_net', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(),lr = 0.001) model = Model(problog_string, [net], caching=False) optimizer = Optimizer(model, 2) train_model(model,queries, 1, optimizer,test_iter=1000,test=test_MNIST,snapshot_iter=10000)
from Hamiltonian.hamiltonian import Hamiltonian # noqa: 401 from Wavefunction.wavefunction import Wavefunction # noqa: 401 from sampler import Sampler # noqa: 401 """ Restricted Boltzmann Machine with Gibbs sampling. Alternative, Metropolis Hastings algorithm for selection of configurations. Optimizing using Gradient descent. """ step_metropolis = 1.0 step_importance = 0.01 learning_rate = 0.1 gradient_iterations = 100 opt = Optimizer(learning_rate) # Initialize the variational parameters def non_interaction_case(monte_carlo_cycles, num_particles, num_dimensions, hidden_nodes): """Run Restricted Boltzmann Machine.""" # Initialize weights and biases visible_nodes = num_particles*num_dimensions a_i = np.random.normal(0, 1, visible_nodes) b_j = np.random.normal(0, 1, hidden_nodes) W_ij = np.random.normal(0, 1, (visible_nodes, hidden_nodes)) # a_i = np.random.rand(visible_nodes) # b_j = np.random.rand(hidden_nodes) # W_ij = np.random.rand(visible_nodes, hidden_nodes)