def batch_learning_update(actor, critic, target_actor, target_critic, params): mongo = MongoDB() actor.train() query = {'training_round': params['training_round']} projection = { 'obs': 1, 'state': 1, 'betsize_mask': 1, 'action_mask': 1, 'action': 1, 'reward': 1, '_id': 0 } db_data = mongo.get_data(query, projection) trainloader = return_trajectoryloader(db_data) for _ in range(params['learning_rounds']): losses = [] for i, data in enumerate(trainloader): critic_loss = update_actor_critic_batch(data, actor, critic, target_actor, target_critic, params) losses.append(critic_loss) # print(f'Learning Round {i}, critic loss {sum(losses)}, policy loss {sum(policy_losses)}') mongo.close() return actor, critic, params
def dual_learning_update(actor, critic, target_actor, target_critic, params, rank): mongo = MongoDB() actor.train() query = {'training_round': params['training_round'], 'rank': rank} projection = { 'obs': 1, 'state': 1, 'betsize_mask': 1, 'action_mask': 1, 'action': 1, 'reward': 1, '_id': 0 } data = mongo.get_data(query, projection) for i in range(params['learning_rounds']): policy_losses = [] losses = [] for poker_round in data: update_actor_critic(poker_round, critic, target_critic, actor, target_actor, params) soft_update(critic, target_critic, params['device']) soft_update(actor, target_actor, params['device']) mongo.close() del data return actor, critic, params
def __set_fast_data(img_file_path, lbl): payload = list() db_handle = MongoDB() feature_vector = FeatureExtractor().get_features(img_file_path) feature_map = dict() key_p = os.path.splitext(os.path.basename(img_file_path)) key = key_p[0] + '_' + key_p[1][1:] + '_' + str( int(time.time() * 1000.0)) key = key.replace('.', '_') feature_map['file'] = key feature_map['label'] = lbl feature_map['feature'] = feature_vector payload.append(feature_map) try: db_handle.to_db(payload=payload, key=None, db=MONGO_HOPS_DB, collection=MONGO_XRAY_COLLECTION) payload.clear() db_handle.close() except Exception as e: db_handle.close() print(img_file_path) print("Ignoring Exception : " + str(e))
learning_params['critic_lrscheduler'] = critic_lrscheduler # training loop # generate_trajectories(env,actor,critic,training_params,id=0) # actor,critic,learning_params = dual_learning_update(actor,critic,target_actor,target_critic,learning_params) if args.single: train_dual(0, env, actor, critic, target_actor, target_critic, training_params, learning_params, network_params, validation_params) else: actor.share_memory() critic.share_memory() for e in range(training_params['lr_steps']): # Clean mongo mongo = MongoDB() mongo.clean_db() mongo.close() now = datetime.datetime.now() print( f'Current date and time : {now.strftime("%Y-%m-%d %H:%M:%S")}' ) tic = time.time() mp.spawn(train_dual, args=(env, actor, critic, target_actor, target_critic, training_params, learning_params, network_params, validation_params), nprocs=num_processes) print(f'Training completed in {(time.time()-tic)/60} minutes') learning_params['actor_lrscheduler'].step() learning_params['critic_lrscheduler'].step() training_params['training_round'] = ( e + 1) * training_params['training_epochs']