def aggregate_target_results(result_id): # Check that we've completed the background tasks at least once. We need # this data in order to make a configuration recommendation (until we # implement a sampling technique to generate new training data). latest_pipeline_run = PipelineRun.objects.get_latest() newest_result = Result.objects.get(pk=result_id) if latest_pipeline_run is None or newest_result.session.tuning_session == 'randomly_generate': result = Result.objects.filter(pk=result_id) knobs_ = KnobCatalog.objects.filter(dbms=result[0].dbms, tunable=True) knobs_catalog = {k.name: k for k in knobs_} knobs = {k: v for k, v in list(knobs_catalog.items())} # generate a config randomly random_knob_result = gen_random_data(knobs) agg_data = DataUtil.aggregate_data(result) agg_data['newest_result_id'] = result_id agg_data['bad'] = True agg_data['config_recommend'] = random_knob_result return agg_data # Aggregate all knob config results tried by the target so far in this # tuning session and this tuning workload. target_results = Result.objects.filter(session=newest_result.session, dbms=newest_result.dbms, workload=newest_result.workload) if len(target_results) == 0: raise Exception( 'Cannot find any results for session_id={}, dbms_id={}'.format( newest_result.session, newest_result.dbms)) agg_data = DataUtil.aggregate_data(target_results) agg_data['newest_result_id'] = result_id agg_data['bad'] = False return agg_data
def aggregate_target_results(result_id): # Check that we've completed the background tasks at least once. We need # this data in order to make a configuration recommendation (until we # implement a sampling technique to generate new training data). latest_pipeline_run = PipelineRun.objects.get_latest() if latest_pipeline_run is None: result = Result.objects.filter(pk=result_id) knobs_ = KnobCatalog.objects.filter(dbms=result[0].dbms, tunable=True) knobs_catalog = {k.name: k for k in knobs_} knobs = {k: v for k, v in list(knobs_catalog.items())} # generate a config randomly random_knob_result = gen_random_data(knobs) agg_data = DataUtil.aggregate_data(result) agg_data['newest_result_id'] = result_id agg_data['bad'] = True agg_data['config_recommend'] = random_knob_result return agg_data # Aggregate all knob config results tried by the target so far in this # tuning session and this tuning workload. newest_result = Result.objects.get(pk=result_id) target_results = Result.objects.filter(session=newest_result.session, dbms=newest_result.dbms, workload=newest_result.workload) if len(target_results) == 0: raise Exception('Cannot find any results for session_id={}, dbms_id={}' .format(newest_result.session, newest_result.dbms)) agg_data = DataUtil.aggregate_data(target_results) agg_data['newest_result_id'] = result_id agg_data['bad'] = False return agg_data
def aggregate_target_results(result_id, algorithm): # Check that we've completed the background tasks at least once. We need # this data in order to make a configuration recommendation (until we # implement a sampling technique to generate new training data). newest_result = Result.objects.get(pk=result_id) has_pipeline_data = PipelineData.objects.filter( workload=newest_result.workload).exists() if not has_pipeline_data or newest_result.session.tuning_session == 'randomly_generate': if not has_pipeline_data and newest_result.session.tuning_session == 'tuning_session': LOG.debug( "Background tasks haven't ran for this workload yet, picking random data." ) result = Result.objects.filter(pk=result_id) knobs = SessionKnob.objects.get_knobs_for_session( newest_result.session) # generate a config randomly random_knob_result = gen_random_data(knobs) agg_data = DataUtil.aggregate_data(result) agg_data['newest_result_id'] = result_id agg_data['bad'] = True agg_data['config_recommend'] = random_knob_result LOG.debug('%s: Finished generating a random config.\n\ndata=%s\n', AlgorithmType.name(algorithm), JSONUtil.dumps(agg_data, pprint=True)) else: # Aggregate all knob config results tried by the target so far in this # tuning session and this tuning workload. target_results = Result.objects.filter(session=newest_result.session, dbms=newest_result.dbms, workload=newest_result.workload) if len(target_results) == 0: raise Exception( 'Cannot find any results for session_id={}, dbms_id={}'.format( newest_result.session, newest_result.dbms)) agg_data = DataUtil.aggregate_data(target_results) agg_data['newest_result_id'] = result_id agg_data['bad'] = False # Clean knob data cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], newest_result.session) agg_data['X_matrix'] = np.array(cleaned_agg_data[0]) agg_data['X_columnlabels'] = np.array(cleaned_agg_data[1]) LOG.debug('%s: Finished aggregating target results.\n\ndata=%s\n', AlgorithmType.name(algorithm), JSONUtil.dumps(agg_data, pprint=True)) return agg_data, algorithm
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' return conf_map_res
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_agg_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(metric_data) LOG.info('recommended knob: %s', knob_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' for k in knob_labels: LOG.info('%s: %f', k, conf_map[k]) return conf_map_res
def aggregate_results(): unique_clusters = WorkloadCluster.objects.all() unique_clusters = filter(lambda x: x.isdefault is False, unique_clusters) all_data = {} all_labels = {} for cluster in unique_clusters: results = ResultData.objects.filter(cluster=cluster) if len(results) < 2: continue if cluster.dbms.pk not in all_labels: knob_labels = np.asarray( sorted(JSONUtil.loads(results[0].param_data).keys())) metric_labels = np.asarray( sorted(JSONUtil.loads(results[0].metric_data).keys())) all_labels[cluster.dbms.pk] = (knob_labels, metric_labels) else: knob_labels, metric_labels = all_labels[cluster.dbms.pk] entry = DataUtil.aggregate_data(results, knob_labels, metric_labels) key = (cluster.dbms.pk, cluster.hardware.pk) if key not in all_data: all_data[key] = {} all_data[key][cluster.pk] = entry ts = now() tsf = ts.strftime("%Y%m%d-%H%M%S") for (dbkey, hwkey), cluster_data in all_data.iteritems(): task_name = PipelineTaskType.TYPE_NAMES[ PipelineTaskType.AGGREGATED_DATA].replace(' ', '').upper() savepaths = {} for clusterkey, entry in cluster_data.iteritems(): fname = '{}_{}_{}_{}_{}.npz'.format(task_name, dbkey, hwkey, clusterkey, tsf) savepath = os.path.join(PIPELINE_DIR, fname) savepaths[clusterkey] = savepath np.savez_compressed(savepath, **entry) value = {'data': savepaths} new_res = PipelineResult() new_res.dbms = DBMSCatalog.objects.get(pk=dbkey) new_res.hardware = Hardware.objects.get(pk=hwkey) new_res.creation_timestamp = ts new_res.task_type = PipelineTaskType.AGGREGATED_DATA new_res.value = JSONUtil.dumps(value) new_res.save()
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name start_ts = time.time() LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result_list = Result.objects.filter(pk=result_id) result = result_list.first() session = result.session params = JSONUtil.loads(session.hyperparameters) agg_data = DataUtil.aggregate_data(result_list) metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = create_and_save_recommendation(recommended_knobs=conf_map, result=result, status='good', info='INFO: ddpg') save_execution_time(start_ts, "configuration_recommendation_ddpg", result) return conf_map_res
def aggregate_data(workload): # Aggregates both the knob & metric data for the given workload. # # Parameters: # workload: aggregate data belonging to this specific workload # # Returns: two dictionaries containing the knob & metric data as # a tuple # Find the results for this workload wkld_results = Result.objects.filter(workload=workload) # Now call the aggregate_data helper function to combine all knob & # metric data into matrices and also create row/column labels # (see the DataUtil class in website/utils.py) # # The aggregate_data helper function returns a dictionary of the form: # - 'X_matrix': the knob data as a 2D numpy matrix (results x knobs) # - 'y_matrix': the metric data as a 2D numpy matrix (results x metrics) # - 'rowlabels': list of result ids that correspond to the rows in # both X_matrix & y_matrix # - 'X_columnlabels': a list of the knob names corresponding to the # columns in the knob_data matrix # - 'y_columnlabels': a list of the metric names corresponding to the # columns in the metric_data matrix aggregated_data = DataUtil.aggregate_data(wkld_results) # Separate knob & workload data into two "standard" dictionaries of the # same form knob_data = { 'data': aggregated_data['X_matrix'], 'rowlabels': aggregated_data['rowlabels'], 'columnlabels': aggregated_data['X_columnlabels'] } metric_data = { 'data': aggregated_data['y_matrix'], 'rowlabels': copy.deepcopy(aggregated_data['rowlabels']), 'columnlabels': aggregated_data['y_columnlabels'] } # Return the knob & metric data return knob_data, metric_data
def aggregate_data(wkld_results): # Aggregates both the knob & metric data for the given workload. # # Parameters: # wkld_results: result data belonging to this specific workload # # Returns: two dictionaries containing the knob & metric data as # a tuple # Now call the aggregate_data helper function to combine all knob & # metric data into matrices and also create row/column labels # (see the DataUtil class in website/utils.py) # # The aggregate_data helper function returns a dictionary of the form: # - 'X_matrix': the knob data as a 2D numpy matrix (results x knobs) # - 'y_matrix': the metric data as a 2D numpy matrix (results x metrics) # - 'rowlabels': list of result ids that correspond to the rows in # both X_matrix & y_matrix # - 'X_columnlabels': a list of the knob names corresponding to the # columns in the knob_data matrix # - 'y_columnlabels': a list of the metric names corresponding to the # columns in the metric_data matrix start_ts = time.time() aggregated_data = DataUtil.aggregate_data( wkld_results, ignore=['range_test', 'default', '*']) # Separate knob & workload data into two "standard" dictionaries of the # same form knob_data = { 'data': aggregated_data['X_matrix'], 'rowlabels': aggregated_data['rowlabels'], 'columnlabels': aggregated_data['X_columnlabels'] } metric_data = { 'data': aggregated_data['y_matrix'], 'rowlabels': copy.deepcopy(aggregated_data['rowlabels']), 'columnlabels': aggregated_data['y_columnlabels'] } # Return the knob & metric data save_execution_time(start_ts, "aggregate_data") return knob_data, metric_data
def aggregate_target_results(result_id): # Check that we've completed the background tasks at least once. We need # this data in order to make a configuration recommendation (until we # implement a sampling technique to generate new training data). latest_pipeline_run = PipelineRun.objects.get_latest() if latest_pipeline_run is None: raise Exception("No previous data available. Implement me!") # Aggregate all knob config results tried by the target so far in this # tuning session. newest_result = Result.objects.get(pk=result_id) target_results = Result.objects.filter( session=newest_result.session, dbms=newest_result.dbms) if len(target_results) == 0: raise Exception('Cannot find any results for session_id={}, dbms_id={}' .format(newest_result.session, newest_result.dbms)) agg_data = DataUtil.aggregate_data(target_results) agg_data['newest_result_id'] = result_id return agg_data
def aggregate_target_results(result_id): newest_result = Result.objects.get(pk=result_id) target_results = Result.objects.filter( application=newest_result.application, dbms=newest_result.dbms) if len(target_results) == 0: raise Exception( 'Cannot find any results for app_id={}, dbms_id={}'.format( newest_result.application, newest_result.dbms)) target_result_datas = [ ResultData.objects.get(result=tres) for tres in target_results ] knob_labels = np.asarray( sorted(JSONUtil.loads(target_result_datas[0].param_data).keys())) metric_labels = np.asarray( sorted(JSONUtil.loads(target_result_datas[0].metric_data).keys())) agg_data = DataUtil.aggregate_data(target_result_datas, knob_labels, metric_labels) agg_data['newest_result_id'] = result_id return agg_data
def test_aggregate(self): workload2 = Result.objects.filter(workload=2) num_results = Result.objects.filter(workload=2).count() knobs = list(JSONUtil.loads(workload2[0].knob_data.data).keys()) metrics = list(JSONUtil.loads(workload2[0].metric_data.data).keys()) num_knobs = len(knobs) num_metrics = len(metrics) test_result = DataUtil.aggregate_data(workload2) self.assertTrue('X_matrix' in list(test_result.keys())) self.assertTrue('y_matrix' in list(test_result.keys())) self.assertTrue('rowlabels' in list(test_result.keys())) self.assertTrue('X_columnlabels' in list(test_result.keys())) self.assertTrue('y_columnlabels' in list(test_result.keys())) self.assertEqual(test_result['X_columnlabels'], knobs) self.assertEqual(test_result['y_columnlabels'], metrics) self.assertEqual(test_result['X_matrix'].shape[0], num_results) self.assertEqual(test_result['y_matrix'].shape[0], num_results) self.assertEqual(test_result['X_matrix'].shape[1], num_knobs) self.assertEqual(test_result['y_matrix'].shape[1], num_metrics)
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id if len(session_results) == 0: LOG.info('No previous result. Abort.') return result_info # Extract data from result result = Result.objects.filter(pk=result_id) base_result_id = session_results[0].pk base_result = Result.objects.filter(pk=base_result_id) agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective target_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[target_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=0, batch_size=DDPG_BATCH_SIZE) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(25): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session params = JSONUtil.loads(session.hyperparameters) session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id # Extract data from result and previous results result = Result.objects.filter(pk=result_id) if len(session_results) == 0: base_result_id = result_id prev_result_id = result_id else: base_result_id = session_results[0].pk prev_result_id = session_results[len(session_results) - 1].pk base_result = Result.objects.filter(pk=base_result_id) prev_result = Result.objects.filter(pk=prev_result_id) agg_data = DataUtil.aggregate_data(result) base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() prev_metric_data = ( DataUtil.aggregate_data(prev_result))['y_matrix'].flatten() result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective prev_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] # Clean metric data metric_data, metric_labels = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric target_obj_idx = [ i for i, n in enumerate(metric_labels) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[prev_obj_idx] prev_objective = prev_metric_data[prev_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward if params['DDPG_SIMPLE_REWARD']: objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective else: if metric_meta[target_objective].improvement == '(less is better)': if objective - base_objective <= 0: # positive reward reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective else: # negative reward reward = -(np.square(objective / base_objective) - 1) * objective / prev_objective else: if objective - base_objective > 0: # positive reward reward = (np.square(objective / base_objective) - 1) * objective / prev_objective else: # negative reward reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=params['DDPG_ACTOR_LEARNING_RATE'], clr=params['DDPG_CRITIC_LEARNING_RATE'], gamma=params['DDPG_GAMMA'], batch_size=params['DDPG_BATCH_SIZE'], a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(params['DDPG_UPDATE_EPOCHS']): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info