def solution_submit(request, problem_id): if request.method == "POST": problem_obj = Problem.objects.get(problem_id=problem_id) user_prof = UserProfile.objects.get(user=request.user) solution_obj = SolutionCode( problem_associated=problem_obj, user_associated=user_prof, code_text=request.FILES['code_upload'], ) code = request.FILES['code_upload'].read() solution_obj.save() media_root = settings.MEDIA_ROOT sol_dir_path = joindir(media_root, 'solutions', 'user_{0}'.format(request.user), 'prob_{0}'.format(problem_id)) old_sol_name = joindir(sol_dir_path, 'sol') new_sol_name = joindir(sol_dir_path, 'sol_{0}.cpp'.format(solution_obj.pk)) print "apd " + str(old_sol_name) os.rename(old_sol_name, new_sol_name) checker(request, problem_obj, solution_obj) return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
def create_topojson(dict, name, directory='shapes-topojson'): # dict is in geojson format tmp_fnm = joindir(directory, '{}.geojson'.format(name)) with open(tmp_fnm, 'w+') as f: f.write(dump_json(dict)) # need to have topojson installed print('creating {}'.format(name)) os.system("topojson {tmp} -o {fnm} --properties".format( tmp=tmp_fnm, fnm=joindir(directory, name + '.topojson')))
def check_test_cases(request): """ 如果有新增测试点但是没有生存相应的info文件,那么就生产相应的文件 """ path = 'data/test_case/' count = 0 test_cases = listdir(path) for t in test_cases: if isdir(joindir(path, t)): if not isfile(joindir(path, t, 'info')): process_test_case(joindir(path, t)) count += 1 if count > 0: messages.success(request, '成功生成{}个测试点info'.format(count))
def checker(request, problem_obj, solution_obj): verdict = { 1: "Accepted", 2: "Wrong answer", 3: "Time Limit Exceeded", 4: "Compilation Error", } input_dir_path = os.path.dirname(os.path.dirname(__file__)) input_dir_path = joindir( input_dir_path, 'media/problems/problem_{0}/input'.format(problem_obj.problem_id)) all_input_list = os.listdir(input_dir_path) all_input_list.sort() # check for compilation error sol_dir_path = os.path.dirname(os.path.dirname(__file__)) sol_dir_path = joindir( sol_dir_path, 'media/solutions/user_{0}/prob_{1}'.format(request.user.username, problem_obj.problem_id)) compile_proc = subprocess.Popen(shlex.split( 'g++ sol_{0}.cpp -o sol_{0}'.format(str(solution_obj.pk))), cwd=sol_dir_path, shell=False) compile_proc.wait() all_sol_list = os.listdir(sol_dir_path) code_object_file = 'sol_{0}_{1}'.format(str(problem_obj.problem_id), str(solution_obj.pk)) if code_object_file in all_sol_list: for filename in all_input_list: file_path = joindir(input_dir_path, filename) file_obj = open(file_path, 'r') run_proc = subprocess.Popen(shlex.split('./' + str(code_object_file)), stdin=file_obj, cwd=sol_dir_path, shell=False) run_proc.wait() else: return verdict[4]
def examlist_legency(request): """ 显示考试列表,显示考试班级和用户班级相同的考试,并且装到dict里面传递给render """ path = 'data/exams' # get user's classes id if request.user.is_staff == 1 or request.user.myuser.usertype == 2: # represent match any classes user_classes = None else: user_classes = request.user.myuser.classes # list all the exams exam_list = listdir(path) exam_list = [item for item in exam_list if re.match('.*\.json', item)] exam_jsons = [] for exam in exam_list: with open(joindir(path, exam), 'r') as f: exam_json = json.load(f) exam_json['id'] = exam.replace('.json', '') if user_classes is None or exam_json['classes'] == user_classes: exam_jsons.append(exam_json) exam_list = [{'name': j['name'], 'id': j['id']} for j in exam_jsons] return render(request, 'examlist.html', {'exam_list': exam_list})
def test_record(args): record_dfs = [] for i in range(args.num_parallel_run): args.seed += 1 network, normalized_state, reward_record = ppo(args) record = pd.DataFrame(reward_record) record['parallel_run'] = i record_dfs.append(record) record_dfs = pd.concat(record_dfs, axis=0) record_dfs.to_csv( joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name))) torch.save( { 'model_state_dict': network.state_dict(), 'normalized_state': normalized_state }, joindir(RESULT_DIR, 'ppo-modelpara-{}.pth'.format(args.env_name)))
def process_test_case(path, spj=False): size_cache = {} md5_cache = {} test_case_list = listdir(path) test_case_list = sorted([ item for item in test_case_list if item[-2:] == 'in' or item[-3:] == 'out' ]) for item in test_case_list: with open(joindir(path, item), 'r+b') as f: content = f.read().replace(b'\r\n', b'\n') size_cache[item] = len(content) if item.endswith('.out'): md5_cache[item] = hashlib.md5(content.rstrip()).hexdigest() f.seek(0) f.write(content) f.truncate() test_case_info = {'spj': spj, 'test_cases': {}} info = [] if spj: for index, item in enumerate(test_case_list): data = {'input_name': item, 'input_size': size_cache[item]} info.append(data) test_case_info['test_cases'][str(index + 1)] = data else: # ["1.in", "1.out", "2.in", "2.out"] => [("1.in", "1.out"), ("2.in", "2.out")] test_case_list = zip(*[test_case_list[i::2] for i in range(2)]) for index, item in enumerate(test_case_list): data = { "stripped_output_md5": md5_cache[item[1]], "input_size": size_cache[item[0]], "output_size": size_cache[item[1]], "input_name": item[0], "output_name": item[1] } info.append(data) test_case_info["test_cases"][str(index + 1)] = data with open(joindir(path, 'info'), "w", encoding="utf-8") as f: f.write(json.dumps(test_case_info, indent=4)) return test_case_info
def test(args): record_dfs = [] for i in range(args.num_parallel_run): args.seed += 1 reward_record = pd.DataFrame(ppo(args)) reward_record['#parallel_run'] = i record_dfs.append(reward_record) record_dfs = pd.concat(record_dfs, axis=0) record_dfs.to_csv(joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name)))
def run_single_and_plot(config, algo_name='CEM'): record_dfs = pd.DataFrame(columns=['steps', 'reward']) reward_cols = [] for i in range(config['num_trials']): config['seed'] = np.random.randint(1000) master = Master(config) master.run() reward_record = pd.DataFrame(master.reward_record) record_dfs = record_dfs.merge(reward_record, how='outer', on='steps', suffixes=('', '_{}'.format(i))) reward_cols.append('reward_{}'.format(i)) record_dfs = record_dfs.drop(columns='reward').sort_values( by='steps', ascending=True).ffill().bfill() record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1) record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1) record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm( span=1000).mean() record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm( span=1000).mean() record_dfs.to_csv( joindir(TUNE_DIR, '{}-record-{}.csv'.format(algo_name, config['env_name']))) # Plot plt.figure(figsize=(12, 6)) plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward') plt.fill_between( record_dfs['steps'], record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'], record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'], color='b', alpha=0.2) plt.legend() plt.xlabel('steps of env interaction (sample complexity)') plt.ylabel('average reward') plt.title('{} on {}'.format(algo_name, config['env_name'])) plt.savefig( joindir(TUNE_DIR, '{}-plot-{}.pdf'.format(algo_name, config['env_name'])))
def check_exams(request): path = 'data/exams' exam_list = listdir(path) exam_list = [item for item in exam_list if re.match('.*\.json', item)] count = 0 for exam in exam_list: eid = exam[:-5] exam_ress = Exam.objects.filter(eid=eid) if len(exam_ress) == 0: count += 1 with open(joindir(path, exam), 'r') as f: exam_json = json.load(f) exam_json['eid'] = exam.replace('.json', '') exam_obj = Exam(**exam_json) exam_obj.save() dangerousremove(joindir(path, exam)) if count > 0: messages.success(request, '成功添加{}个考试'.format(count))
def test(args): record_dfs = [] for i in range(args.num_parallel_run): args.seed += 1 reward_record = pd.DataFrame(ppo(args)) reward_record['#parallel_run'] = i record_dfs.append(reward_record) record_dfs = pd.concat(record_dfs, axis=0) record_dfs.to_csv( joindir( RESULT_DIR, 'ppo-record-{0}_{1}hidden_{2}threshold_{3}repeat'.format( args.env_name, config.hid_num, config.thres, repeat)))
def train(args): record_dfs = [] assert len(args.num_parallel_run) == 1 for i in range(args.num_parallel_run): args.seed += 1 reward_df, rwds = ppo(args, network, policy_buffer, config) reward_record = pd.DataFrame(reward_df) reward_record['#parallel_run'] = i record_dfs.append(reward_record) record_dfs = pd.concat(record_dfs, axis=0) record_dfs.to_csv( joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name))) return rwds
def rollout_render(self, env, network, args, running_state, render, video_folder): counter = 0 state = env.reset() if args.state_norm: state = running_state(state) render.render() while counter < 5: action_mean = network._forward_actor(Tensor(state).unsqueeze(0)) action = action_mean.data.numpy()[0] next_state, _, done, _ = env.step(action) next_state = running_state(next_state) render.render() if done: counter += 1 state = env.reset() if args.state_norm: state = running_state(state) render.render() state = next_state render.to_mp4( joindir(video_folder, '{}-{}.mp4'.format(args.label, args.seed)))
times_loop = 1 # 选择股票池 pool_id = '000300' pool_list = { '000001': 'SHCI', '000300': 'hs300', '000905': 'zz500', '000906': 'zz800', '399005': 'SmallCap', '399006': 'GEMI', '399106': 'SZCI', '399317': 'ASCI' } OUTPUT_RESULT_DIR = './tuning_result_bayes_v1' OUTPUT_FORMULA_DIR = joindir(OUTPUT_RESULT_DIR, 'formula') OUTPUT_FORMULA_FILE = joindir(OUTPUT_FORMULA_DIR, 'tuned_formulas_bayes.txt') OUTPUT_DATA_DIR = joindir(OUTPUT_RESULT_DIR, 'data') OUTPUT_DATA_IS_DIR = joindir(OUTPUT_DATA_DIR, 'insample') # empty now OUTPUT_DATA_OS_DIR = joindir(OUTPUT_DATA_DIR, 'outsample') # empty now OUTPUT_AUX_DIR = joindir(OUTPUT_RESULT_DIR, 'auxilliary') OUTPUT_SCORE_FILE = joindir(OUTPUT_AUX_DIR, 'score.csv') OUTPUT_FAILURE_FILE = joindir(OUTPUT_AUX_DIR, 'failure.csv') OUTPUT_LOG_FILE = joindir(OUTPUT_AUX_DIR, 'log.txt') makedirs(OUTPUT_FORMULA_DIR, exist_ok=True) makedirs(OUTPUT_DATA_IS_DIR, exist_ok=True) makedirs(OUTPUT_DATA_OS_DIR, exist_ok=True) makedirs(OUTPUT_AUX_DIR, exist_ok=True) logging.basicConfig(filename=OUTPUT_LOG_FILE)
def check_problem_base(request): converters = [ { 'id': int, 'description': str, 'A': str, 'B': str, 'C': str, 'D': str, 'answer': str, 'tag': str }, { 'id': int, 'description': str, 'answer': str, 'tag': str }, { 'id': int, 'description': str, 'answer': str, 'tag': str }, { 'id': int, 'description': str, 'template': str, 'test_case_id': str, 'tag': str }, { 'id': int, 'description': str, 'answer': str, 'tag': str }, { 'id': int, 'description': str, 'test_case_id': str, 'tag': str }, ] path = 'data/problems' counter = 0 for count, file in enumerate(problem_files): if not isfile(joindir(path, file)): continue data = pd.read_csv(joindir(path, file), index_col=0, converters=converters[count]) data.rename(columns={ 'id': 'pid', 'A': 'choice_A', 'B': 'choice_B', 'C': 'choice_C', 'D': 'choice_D' }, inplace=True) data['problem_type'] = count dlist = data.to_dict('record') for d in dlist: problem_ress = Problem.objects.filter( pid=d['pid'], problem_type=d['problem_type']) if len(problem_ress) == 0: problem = Problem(**d) problem.save() counter += 1 if isfile(joindir(path, file)): dangerousremove(joindir(path, file)) if counter > 0: messages.success(request, '成功添加{}个试题'.format(counter))
record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'], record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'], color='b', alpha=0.2) plt.legend() plt.xlabel('steps of env interaction (sample complexity)') plt.ylabel('average reward') plt.title('{} on {}'.format(algo_name, config['env_name'])) plt.savefig( joindir(TUNE_DIR, '{}-plot-{}.pdf'.format(algo_name, config['env_name']))) if __name__ == '__main__': logger = Logger(joindir(LOG_DIR, 'log_cem.txt')) trials = grid_search(run_cem, config_hopper) best_trial = sorted(trials, key=lambda x: x['score'])[-1] best_config = best_trial['config'] best_score = best_trial['score'] with open(joindir(TUNE_DIR, 'ARS-{}.json'.format(best_config['env_name'])), 'w') as f: json.dump(best_config, f, indent=4, sort_keys=True) logger.info('========best solution found========') logger.info('best score: {}'.format(best_score)) logger.info('best config: {}'.format(best_config))
record_dfs = record_dfs.merge(reward_record, how='outer', on='steps', suffixes=('', '_{}'.format(i))) reward_cols.append('reward_{}'.format(i)) record_dfs = record_dfs.drop(labels='reward').sort_values( by='steps', ascending=True).ffill().bfill() record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1) record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1) record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm( span=1000).mean() record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm( span=1000).mean() record_dfs.to_csv( joindir(RESULT_DIR, 'cem-record-{}-{}.csv'.format(args.env_name, datestr))) # Plot plt.figure(figsize=(12, 6)) plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward') plt.fill_between( record_dfs['steps'], record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'], record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'], color='b', alpha=0.2) plt.legend() plt.xlabel('steps of env interaction (sample complexity)') plt.ylabel('average reward') plt.title('CEM on {}'.format(args.env_name)) plt.savefig(
import pandas as pd import numpy as np import argparse import datetime import math from model import Policy, Value from optimizer import conjugate_gradient, line_search from utils import * from running_state import * if not os.path.exists('./result'): os.mkdir('./result') Transition = namedtuple('Transition', ('state', 'action', 'mask', 'next_state', 'reward')) EPS = 1e-10 RESULT_DIR = joindir('./result', '.'.join(__file__.split('.')[:-1])) if not os.path.exists(RESULT_DIR): os.makedirs(RESULT_DIR) def add_arguments(): parser = argparse.ArgumentParser() parser.add_argument('--env_name', type=str, default='Hopper-v2', help = 'gym environment to test algorithm') parser.add_argument('--seed', type=int, default=64, help = 'random seed') parser.add_argument('--num_episode', type=int, default=200, help = '') parser.add_argument('--batch_size', type=int, default=5000, help = '') parser.add_argument('--max_step_per_episode', type=int, default=200,
def load_problem_base_legency(exam_json): """ 载入题库并且根据exam_json来生成一套试卷 返回的是该套试卷的一个dict,分别是每种题型的DataFrame """ path = 'data/problems' # 选择题 data_ch = _load_problem(path=joindir(path, 'Choice.csv'), converters={ 'id': int, 'description': str, 'A': str, 'B': str, 'C': str, 'D': str, 'answer': str, 'tag': str }, tag=exam_json['tag_choice'], num=exam_json['num_choice']) # 填空题 data_cp = _load_problem(path=joindir(path, 'Completion.csv'), converters={ 'id': int, 'description': str, 'answer': str, 'tag': str }, tag=exam_json['tag_completion'], num=exam_json['num_completion']) # 判断题 data_tf = _load_problem(path=joindir(path, 'TrueOrFalse.csv'), converters={ 'id': int, 'description': str, 'answer': str, 'tag': str }, tag=exam_json['tag_trueorfalse'], num=exam_json['num_trueorfalse']) # 程序改错题 data_pc = _load_problem(path=joindir(path, 'ProgramCorrection.csv'), converters={ 'id': int, 'description': str, 'template': str, 'test_case_id': str, 'tag': str }, tag=exam_json['tag_programcorrection'], num=exam_json['num_programcorrection']) # 程序阅读题 data_pr = _load_problem(path=joindir(path, 'ProgramReading.csv'), converters={ 'id': int, 'description': str, 'answer': str, 'tag': str }, tag=exam_json['tag_programreading'], num=exam_json['num_programreading']) # 程序设计题 data_pd = _load_problem(path=joindir(path, 'ProgramDesign.csv'), converters={ 'id': int, 'description': str, 'test_case_id': str, 'tag': str }, tag=exam_json['tag_programdesign'], num=exam_json['num_programdesign']) ans = { 'choice': data_ch, 'completion': data_cp, 'true_or_false': data_tf, 'program_correction': data_pc, 'program_reading': data_pr, 'program_design': data_pd, } return ans
from os import makedirs as mkdir import pandas as pd import numpy as np import argparse import datetime import math import gym import env from scipy import stats import sys Transition = namedtuple( 'Transition', ('state', 'value', 'action', 'logproba', 'mask', 'next_state', 'reward')) EPS = 1e-10 RESULT_DIR = joindir('../result', '.'.join(__file__.split('.')[:-1])) mkdir(RESULT_DIR, exist_ok=True) # 超参数设置 class args(object): seed = 10 num_episode = 200 batch_size = 2048 max_step_per_round = 1000 gamma = 0.995 lamda = 0.97 log_num_episode = 1 num_epoch = 10 minibatch_size = 256 clip = 0.2
# resize all panes: panes = self.filesArea.panes() panesize = self.filesArea.winfo_width() / len(panes) for pane in panes: self.filesArea.paneconfigure(pane, width = panesize) if __name__ == '__main__': app = Application() app.master.title('QLC+ Multi-file Helper Utility') iconsdir = joindir(dirname(abspath(__file__)), 'icons') for functype, _ in FUNCTION_TYPES: icon = tk.PhotoImage(file=joindir(iconsdir, functype.lower() + '.png')) icon = icon.subsample(2, 2) ICONS[functype] = icon for f in (sys.argv[1:]): try: app.load_file(f) except Exception as e: print(e) app.mainloop()
import pandas as pd from ppo import ActorCritic from ppo import ZFilter from ppo import RunningStat import gym from os.path import join as joindir model_path = '/home/wangxu/PycharmProjects/torchprojects/result/' game = 'Humanoid-v2' env = gym.make(game) num_states = env.observation_space.shape[0] num_actions = env.action_space.shape[0] network = ActorCritic(num_states, num_actions) network.eval() checkpoint = torch.load( joindir(model_path, 'ppo-modelpara-{}.pth'.format(game))) network.load_state_dict(checkpoint['model_state_dict']) normalized_state = checkpoint['normalized_state'] #print(normalized_state.rs.mean) #print(normalized_state.rs.std) for t in range(3): state = env.reset() done = False i = 0 while (not done): env.render() state = normalized_state(state) action_mean, action_logstd, value = network(Tensor(state).unsqueeze(0)) action, logproba = network.select_action(action_mean, action_logstd) action = action.data.numpy()[0]
if __name__ == '__main__': datestr = datetime.datetime.now().strftime('%Y-%m-%d') args = add_arguments() record_dfs = pd.DataFrame(columns=['steps', 'reward']) reward_cols = [] for i in range(args.num_parallel_run): args.seed += 1 reward_record = pd.DataFrame(vpg()) record_dfs = record_dfs.merge(reward_record, how='outer', on='steps', suffixes=('', '_{}'.format(i))) reward_cols.append('reward_{}'.format(i)) record_dfs = record_dfs.drop(columns='reward').sort_values(by='steps', ascending=True).ffill().bfill() record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1) record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1) record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm(span=1000).mean() record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm(span=1000).mean() record_dfs.to_csv(joindir(RESULT_DIR, 'vpg-record-{}-{}.csv'.format(args.env_name, datestr))) # Plot plt.figure(figsize=(12, 6)) plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward') plt.fill_between(record_dfs['steps'], record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'], record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'], color='b', alpha=0.2) plt.legend() plt.xlabel('steps of env interaction (sample complexity)') plt.ylabel('average reward') plt.title('VPG on {}'.format(args.env_name)) plt.savefig(joindir(RESULT_DIR, 'vpg-{}-{}.pdf'.format(args.env_name, datestr)))
def exam_legency(request): if request.method == 'GET': # >>>> 生成并且载入试题 <<<< data = { 'choice': [], 'completion': [], 'true_or_false': [], 'program_correction': [], 'program_reading': [], 'program_design': [], } # 该用户是否有此次考试的权限 permission_flag = False if 'id' in request.GET: exam_id = request.GET['id'] # get user's classes id if request.user.is_staff == 1 or request.user.myuser.usertype == 2: # represent match any classes user_classes = None else: user_classes = request.user.myuser.classes user_name = request.user.username path = 'data/exams' filename = joindir(path, '{}.json'.format(exam_id)) if isfile(filename): with open(filename, 'r') as f: exam_json = json.load(f) if (user_classes is not None) and (exam_json['classes'] != user_classes): messages.error(request, '您不能参加此次考试') else: exam_ress = ExamResult.objects.filter(username=user_name, examname=exam_id) if (len(exam_ress) > 0) and ( user_classes is not None) and (exam_ress[0].is_submitted == 1): messages.success(request, '您已经完成此次考试') data = exam_ress[0].data.copy() data.update({'score': exam_ress[0].score}) return render(request, 'exam_submit.html', data) else: permission_flag = True else: messages.error(request, '考试{}不存在'.format(exam_id)) else: messages.error(request, '请在考试列表中选择您要参加的考试') if permission_flag: if len(exam_ress) > 0: # 如果非首次登陆,载入该用户此次考试的题目 data = exam_ress[0].data else: # 如果首次登陆,生成考试题目,并存下该用户此次考试的题目 problems = load_problem_base(exam_json) data = problems_to_data(problems, exam_json) exam_res = ExamResult(username=user_name, examname=exam_id, data=data, is_submitted=0, examjson=exam_json) exam_res.save() return render(request, 'exam.html', process_for_render(data)) elif request.method == 'POST': # >>>> 保存试题或者提交判分 <<<< # 样例 request.post = <QueryDict: {'2_A': ['on'], 'save': ['']}> exam_id = request.GET['id'] user_name = request.user.username if 'save' in request.POST: # 保存结果 exam_res = ExamResult.objects.get(username=user_name, examname=exam_id) exam_res.data = post_to_answers(exam_res.data, request.POST) exam_res.save() messages.success(request, '保存成功') return render(request, 'exam.html', process_for_render(exam_res.data)) elif 'submit' in request.POST: # 提交并且判分 exam_res = ExamResult.objects.get(username=user_name, examname=exam_id) exam_res.data = post_to_answers(exam_res.data, request.POST, exam_res.examjson) exam_res.data = score_answers(exam_res.data) exam_res.score = generate_total_score(exam_res.data) exam_res.is_submitted = 1 exam_res.save() messages.success(request, '提交成功') data = exam_res.data.copy() data.update({'score': exam_res.score}) return render(request, 'exam_submit.html', process_for_render(data))
for i in range(100): problem.loc[i, 'id'] = i problem.loc[i, 'description'] = hashlib.md5( str(i).encode('utf-8')).hexdigest() problem.loc[i, 'A'] = hashlib.md5( 'A{}'.format(i).encode('utf-8')).hexdigest()[:10] problem.loc[i, 'B'] = hashlib.md5( 'B{}'.format(i).encode('utf-8')).hexdigest()[:10] problem.loc[i, 'C'] = hashlib.md5( 'C{}'.format(i).encode('utf-8')).hexdigest()[:10] problem.loc[i, 'D'] = hashlib.md5( 'D{}'.format(i).encode('utf-8')).hexdigest()[:10] problem.loc[i, 'answer'] = np.random.choice(list('ABCD')) problem.loc[i, 'tag'] = np.random.choice(['C_class', 'Cpp_class']) problem['id'] = problem['id'].astype(int) problem.to_csv(joindir(problem_path, 'Choice.csv')) # Completion problem = pd.DataFrame() for i in range(100): problem.loc[i, 'id'] = i problem.loc[i, 'description'] = \ hashlib.md5(str(i).encode('utf-8')).hexdigest() + ' ____ ' + hashlib.md5(str(i).encode('utf-8')).hexdigest() problem.loc[i, 'answer'] = hashlib.md5(str(i).encode('utf-8')).hexdigest()[:5] problem.loc[i, 'tag'] = np.random.choice(['C_class', 'Cpp_class']) problem['id'] = problem['id'].astype(int) problem.to_csv(joindir(problem_path, 'Completion.csv')) # TrueOrFalse problem = pd.DataFrame()
dd[0:x] = d.values[0:x] smooth_data = dd else: smooth_data = data['mean_ep_reward'] return smooth_data # 1 for PPO_simple # 0 for PPO 5 parallel run # 10 for PPO and PPO_origin comparison point = 2 env = 'Pendulum-v0' RESULT_DIR = '/home/wangxu/PycharmProjects/torchprojects/result/' if point == 10: df0 = pd.read_csv(joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(env))) df1 = pd.read_csv( joindir(RESULT_DIR, 'ppo-origin-record-{}.csv'.format(env))) # no index columns df0['mean_ep_reward'] = move_avg(df0, parallel_num=5, sm=11) df1['mean_ep_reward'] = move_avg(df1, parallel_num=5, sm=11) df0['isorigin'] = 0 df1['isorigin'] = 1 df = pd.concat([df0, df1], ignore_index=True) sns.set(style='darkgrid', font_scale=1.0) sns.tsplot(data=df, time='episode', value='mean_ep_reward', unit='parallel_run', condition='isorigin') #, err_style='ci_band') # tsplot(data, time= ,value= ,unit= compute mean ,condition= different Env)
def runner(args): # cuda id cid = mp.current_process().pid % NUM_TOTAL_GPUS # environment env = gym.make(args.env_name) dim_obs = env.observation_space.shape[0] dim_act = env.action_space.shape[0] max_episode_steps = env._max_episode_steps # seeding env.seed(args.seed) torch.manual_seed(args.seed) # policy and value network network = ActorCritic(dim_obs, dim_act, layer_norm=args.layer_norm, append_time=args.append_time, init_std=args.init_std) optimizer = opt.Adam(network.parameters(), lr=args.lr) # running state normalization running_state = ZFilter((dim_obs, ), clip=5.0) # total number of steps of interaction with environment global_steps = 0 # render to mp4 render = Render(env.env.sim, args.env_name) video_folder = joindir(RESULT_DIR, args.label) mkdir(video_folder, exist_ok=True) step_size = Schduler(args.step_size, args.schedule_stepsize, args.num_episode, end_val=0.01) lr = Schduler(args.lr, args.schedule_adam, args.num_episode) y2_max = Schduler(args.y2_max, args.schedule_y2max, args.num_episode, end_val=0.05) mean_ratio = Schduler(args.mean_ratio, args.schedule_meanratio, args.num_episode) rollout = Rollout() # for direct method # y2_max = compute_allowed_mu2(0, args.epsilon, args.delta, dim_act) for i_episode in range(args.num_episode): # step0: validation if i_episode % args.val_num_episode == 0: if args.record_KL: meanepreward_val, meaneplen_val = rollout.rollout_validate_KL( env, network, args, running_state, max_episode_steps) else: meanepreward_val, meaneplen_val = rollout.rollout_validate( env, network, args, running_state, max_episode_steps) else: meanepreward_val, meaneplen_val = np.nan, np.nan # step0: save mp4 if (i_episode + 1) == args.num_episode: rollout.rollout_render(env, network, args, running_state, render, video_folder) # step1: perform current policy to collect on-policy transitions memory, meanepreward, meaneplen, num_steps = rollout.rollout_train( env, network, args, running_state, max_episode_steps) global_steps += num_steps # step2: extract variables from trajectories batch_size = len(memory) states, values, action_means, actions, ys, masks, next_states, rewards = memory.tsample( ) returns, deltas, advantages = Tensor(batch_size), Tensor( batch_size), Tensor(batch_size) prev_return = 0 prev_value = 0 prev_advantage = 0 for i in reversed(range(batch_size)): returns[i] = rewards[i] + args.gamma * prev_return * masks[i] deltas[i] = rewards[ i] + args.gamma * prev_value * masks[i] - values[i] # ref: https://arxiv.org/pdf/1506.02438.pdf (generalization advantage estimate) advantages[i] = deltas[ i] + args.gamma * args.lamda * prev_advantage * masks[i] prev_return = returns[i] prev_value = values[i] prev_advantage = advantages[i] # we cannot do advantages normalization here, moreover, only the sign matters if args.advantage_norm: advantages = advantages - advantages.median() # step3: set targets if args.method == 'ES': target_means = Variable(action_means + step_size.value() * (actions - action_means)) elif args.method == 'direct': y2_max_value = y2_max.value() ys_norm = ys.pow(2).sum(dim=1, keepdim=True) ys_scale = ys / ys_norm.sqrt() * np.sqrt(y2_max_value) ys_target = torch.where(ys_norm > y2_max_value, ys_scale, ys) ys_target = torch.where( advantages.unsqueeze(1) >= 0, ys_target, -ys_target) target_means = Variable(action_means + ys_target * network.action_std) elif args.method == 'ES-MA1': mask_advantages = advantages.clone().masked_fill_( advantages < 0, 0) half_width = args.n_points ratio = mean_ratio.value() new_ys = torch.zeros(ys.shape) start_ind = 0 for i in range(batch_size): if mask_advantages[i] > 0: norm = 0.0 for j in range(max(start_ind, i - half_width), i + half_width + 1): weight = mask_advantages[j] * args.gamma**np.abs(i - j) norm += weight new_ys[i] += ys[j] * weight if masks[j] == 0: break if norm > 0: new_ys[i] = new_ys[i] / norm * ratio + ys[i] * (1 - ratio) if masks[i] == 0: start_ind = i + 1 target_means = Variable(action_means + step_size.value() * new_ys * network.action_std) elif args.method == 'ES-MA2': mask_advantages = advantages.clone().masked_fill_( advantages < 0, 0) half_width = args.n_points new_ys = torch.zeros(ys.shape) start_ind = 0 for i in range(batch_size): if mask_advantages[i] > 0: norm = 0.0 for j in range(max(start_ind, i - half_width), i + half_width + 1): weight = mask_advantages[j] * args.beta**np.abs(i - j) norm += weight new_ys[i] += ys[j] * weight if masks[j] == 0: break if norm > 0: new_ys[i] = new_ys[i] / norm if masks[i] == 0: start_ind = i + 1 target_means = Variable(action_means + step_size.value() * new_ys * network.action_std) # self adaptation multiplier = ys.pow(2) multiplier = multiplier.masked_fill_(advantages.unsqueeze(1) < 0, 1.0).mean(dim=0).sqrt() network.action_std = nn.Parameter((network.action_std * multiplier)) # step4: learn # load the network to GPU and train on GPU, then use the network on CPU if args.use_cuda: states, target_means, returns, network_train = to_cuda( cid, states, target_means, returns, network) else: network_train = network for i_epoch in range( int(args.num_epoch * batch_size / args.minibatch_size)): # sample from current batch minibatch_ind = np.random.choice(batch_size, args.minibatch_size, replace=False) minibatch_states = states[minibatch_ind] minibatch_action_means = network_train._forward_actor( minibatch_states[:, :-1]) minibatch_target_means = target_means[minibatch_ind] minibatch_returns = returns[minibatch_ind] minibatch_newvalues = network_train._forward_critic( minibatch_states).flatten() loss_policy = torch.mean( (minibatch_target_means - minibatch_action_means).pow(2)) # not sure the value loss should be clipped as well # clip example: https://github.com/Jiankai-Sun/Proximal-Policy-Optimization-in-Pytorch/blob/master/ppo.py # however, it does not make sense to clip score-like value by a dimensionless clipping parameter # moreover, original paper does not mention clipped value if args.lossvalue_norm: minibatch_return_6std = 6 * minibatch_returns.std() loss_value = torch.mean( (minibatch_newvalues - minibatch_returns).pow(2)) / minibatch_return_6std else: loss_value = torch.mean( (minibatch_newvalues - minibatch_returns).pow(2)) if i_epoch > int(30 * batch_size / args.minibatch_size): loss_coeff_value = 0 else: loss_coeff_value = args.loss_coeff_value # https://en.wikipedia.org/wiki/Differential_entropy # entropy of normal distribution should be ln(sig * sqrt(2 * pi * e)) # torch.mean(network.actor_logstd + .5 * math.log(2.0 * math.pi * math.e)) # the const term is of no use, ignored if args.loss_coeff_entropy > 0: loss_entropy = torch.mean(torch.log(network_train.action_std)) total_loss = loss_policy + loss_coeff_value * loss_value - args.loss_coeff_entropy * loss_entropy else: loss_entropy = np.nan total_loss = loss_policy + loss_coeff_value * loss_value optimizer.zero_grad() total_loss.backward() optimizer.step() if lr.schedule != 'none': lr_now = lr.value() # ref: https://stackoverflow.com/questions/48324152/ for g in optimizer.param_groups: g['lr'] = lr_now if args.use_cuda: network = network_train.cpu() if (i_episode % args.val_num_episode == 0) and (args.record_KL): mean_KL, max_KL = rollout.calculate_KL(network) else: mean_KL, max_KL = np.nan, np.nan if (i_episode + 1) % args.log_num_episode == 0: print('==============================================') print('[{}] #ep={} Reward: {:.4f} ({:.4f}) Len: {:.0f}({:.0f})'. format( args.label, i_episode, meanepreward, meanepreward_val, meaneplen, meaneplen_val, )) print( 'advantages_positive_ratio={:3f} action_std={} mean_KL={} max_KL={}' .format( float((advantages > 0).sum()) / batch_size, network.action_std, mean_KL, max_KL)) network_action_std = [ '{:.2f}'.format(i) for i in list(network.action_std.detach().numpy()[0]) ] print('action_std={}'.format(network_action_std)) lr.step() step_size.step() y2_max.step() mean_ratio.step() return network