Exemple #1
0
def solution_submit(request, problem_id):
    if request.method == "POST":
        problem_obj = Problem.objects.get(problem_id=problem_id)
        user_prof = UserProfile.objects.get(user=request.user)
        solution_obj = SolutionCode(
            problem_associated=problem_obj,
            user_associated=user_prof,
            code_text=request.FILES['code_upload'],
        )
        code = request.FILES['code_upload'].read()
        solution_obj.save()

        media_root = settings.MEDIA_ROOT
        sol_dir_path = joindir(media_root, 'solutions',
                               'user_{0}'.format(request.user),
                               'prob_{0}'.format(problem_id))
        old_sol_name = joindir(sol_dir_path, 'sol')
        new_sol_name = joindir(sol_dir_path,
                               'sol_{0}.cpp'.format(solution_obj.pk))
        print "apd " + str(old_sol_name)
        os.rename(old_sol_name, new_sol_name)

        checker(request, problem_obj, solution_obj)

    return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
def create_topojson(dict, name, directory='shapes-topojson'):
    # dict is in geojson format
    tmp_fnm = joindir(directory, '{}.geojson'.format(name))
    with open(tmp_fnm, 'w+') as f:
        f.write(dump_json(dict))
    # need to have topojson installed
    print('creating {}'.format(name))
    os.system("topojson {tmp} -o {fnm} --properties".format(
        tmp=tmp_fnm, fnm=joindir(directory, name + '.topojson')))
Exemple #3
0
def check_test_cases(request):
    """
    如果有新增测试点但是没有生存相应的info文件,那么就生产相应的文件
    """
    path = 'data/test_case/'
    count = 0
    test_cases = listdir(path)
    for t in test_cases:
        if isdir(joindir(path, t)):
            if not isfile(joindir(path, t, 'info')):
                process_test_case(joindir(path, t))
                count += 1
    if count > 0:
        messages.success(request, '成功生成{}个测试点info'.format(count))
Exemple #4
0
def checker(request, problem_obj, solution_obj):

    verdict = {
        1: "Accepted",
        2: "Wrong answer",
        3: "Time Limit Exceeded",
        4: "Compilation Error",
    }

    input_dir_path = os.path.dirname(os.path.dirname(__file__))
    input_dir_path = joindir(
        input_dir_path,
        'media/problems/problem_{0}/input'.format(problem_obj.problem_id))
    all_input_list = os.listdir(input_dir_path)
    all_input_list.sort()

    # check for compilation error
    sol_dir_path = os.path.dirname(os.path.dirname(__file__))
    sol_dir_path = joindir(
        sol_dir_path,
        'media/solutions/user_{0}/prob_{1}'.format(request.user.username,
                                                   problem_obj.problem_id))

    compile_proc = subprocess.Popen(shlex.split(
        'g++ sol_{0}.cpp -o sol_{0}'.format(str(solution_obj.pk))),
                                    cwd=sol_dir_path,
                                    shell=False)
    compile_proc.wait()

    all_sol_list = os.listdir(sol_dir_path)

    code_object_file = 'sol_{0}_{1}'.format(str(problem_obj.problem_id),
                                            str(solution_obj.pk))

    if code_object_file in all_sol_list:

        for filename in all_input_list:
            file_path = joindir(input_dir_path, filename)
            file_obj = open(file_path, 'r')
            run_proc = subprocess.Popen(shlex.split('./' +
                                                    str(code_object_file)),
                                        stdin=file_obj,
                                        cwd=sol_dir_path,
                                        shell=False)

            run_proc.wait()
    else:
        return verdict[4]
Exemple #5
0
def examlist_legency(request):
    """
    显示考试列表,显示考试班级和用户班级相同的考试,并且装到dict里面传递给render
    """
    path = 'data/exams'
    # get user's classes id
    if request.user.is_staff == 1 or request.user.myuser.usertype == 2:
        # represent match any classes
        user_classes = None
    else:
        user_classes = request.user.myuser.classes
    # list all the exams
    exam_list = listdir(path)
    exam_list = [item for item in exam_list if re.match('.*\.json', item)]

    exam_jsons = []
    for exam in exam_list:
        with open(joindir(path, exam), 'r') as f:
            exam_json = json.load(f)
            exam_json['id'] = exam.replace('.json', '')
            if user_classes is None or exam_json['classes'] == user_classes:
                exam_jsons.append(exam_json)

    exam_list = [{'name': j['name'], 'id': j['id']} for j in exam_jsons]

    return render(request, 'examlist.html', {'exam_list': exam_list})
Exemple #6
0
def test_record(args):
    record_dfs = []
    for i in range(args.num_parallel_run):
        args.seed += 1
        network, normalized_state, reward_record = ppo(args)
        record = pd.DataFrame(reward_record)
        record['parallel_run'] = i
        record_dfs.append(record)
    record_dfs = pd.concat(record_dfs, axis=0)
    record_dfs.to_csv(
        joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name)))
    torch.save(
        {
            'model_state_dict': network.state_dict(),
            'normalized_state': normalized_state
        }, joindir(RESULT_DIR, 'ppo-modelpara-{}.pth'.format(args.env_name)))
Exemple #7
0
def process_test_case(path, spj=False):
    size_cache = {}
    md5_cache = {}

    test_case_list = listdir(path)
    test_case_list = sorted([
        item for item in test_case_list
        if item[-2:] == 'in' or item[-3:] == 'out'
    ])

    for item in test_case_list:
        with open(joindir(path, item), 'r+b') as f:
            content = f.read().replace(b'\r\n', b'\n')
            size_cache[item] = len(content)
            if item.endswith('.out'):
                md5_cache[item] = hashlib.md5(content.rstrip()).hexdigest()
            f.seek(0)
            f.write(content)
            f.truncate()

    test_case_info = {'spj': spj, 'test_cases': {}}

    info = []

    if spj:
        for index, item in enumerate(test_case_list):
            data = {'input_name': item, 'input_size': size_cache[item]}
            info.append(data)
            test_case_info['test_cases'][str(index + 1)] = data
    else:
        # ["1.in", "1.out", "2.in", "2.out"] => [("1.in", "1.out"), ("2.in", "2.out")]
        test_case_list = zip(*[test_case_list[i::2] for i in range(2)])
        for index, item in enumerate(test_case_list):
            data = {
                "stripped_output_md5": md5_cache[item[1]],
                "input_size": size_cache[item[0]],
                "output_size": size_cache[item[1]],
                "input_name": item[0],
                "output_name": item[1]
            }
            info.append(data)
            test_case_info["test_cases"][str(index + 1)] = data

    with open(joindir(path, 'info'), "w", encoding="utf-8") as f:
        f.write(json.dumps(test_case_info, indent=4))

    return test_case_info
Exemple #8
0
 def test(args):
     record_dfs = []
     for i in range(args.num_parallel_run):
         args.seed += 1
         reward_record = pd.DataFrame(ppo(args))
         reward_record['#parallel_run'] = i
         record_dfs.append(reward_record)
     record_dfs = pd.concat(record_dfs, axis=0)
     record_dfs.to_csv(joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name)))
Exemple #9
0
def run_single_and_plot(config, algo_name='CEM'):
    record_dfs = pd.DataFrame(columns=['steps', 'reward'])
    reward_cols = []
    for i in range(config['num_trials']):
        config['seed'] = np.random.randint(1000)
        master = Master(config)
        master.run()
        reward_record = pd.DataFrame(master.reward_record)
        record_dfs = record_dfs.merge(reward_record,
                                      how='outer',
                                      on='steps',
                                      suffixes=('', '_{}'.format(i)))
        reward_cols.append('reward_{}'.format(i))

    record_dfs = record_dfs.drop(columns='reward').sort_values(
        by='steps', ascending=True).ffill().bfill()
    record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1)
    record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1)
    record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm(
        span=1000).mean()
    record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm(
        span=1000).mean()
    record_dfs.to_csv(
        joindir(TUNE_DIR, '{}-record-{}.csv'.format(algo_name,
                                                    config['env_name'])))

    # Plot
    plt.figure(figsize=(12, 6))
    plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward')
    plt.fill_between(
        record_dfs['steps'],
        record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'],
        record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'],
        color='b',
        alpha=0.2)
    plt.legend()
    plt.xlabel('steps of env interaction (sample complexity)')
    plt.ylabel('average reward')
    plt.title('{} on {}'.format(algo_name, config['env_name']))
    plt.savefig(
        joindir(TUNE_DIR, '{}-plot-{}.pdf'.format(algo_name,
                                                  config['env_name'])))
Exemple #10
0
def check_exams(request):
    path = 'data/exams'

    exam_list = listdir(path)
    exam_list = [item for item in exam_list if re.match('.*\.json', item)]

    count = 0
    for exam in exam_list:
        eid = exam[:-5]
        exam_ress = Exam.objects.filter(eid=eid)
        if len(exam_ress) == 0:
            count += 1
            with open(joindir(path, exam), 'r') as f:
                exam_json = json.load(f)
                exam_json['eid'] = exam.replace('.json', '')
                exam_obj = Exam(**exam_json)
                exam_obj.save()
        dangerousremove(joindir(path, exam))
    if count > 0:
        messages.success(request, '成功添加{}个考试'.format(count))
Exemple #11
0
 def test(args):
     record_dfs = []
     for i in range(args.num_parallel_run):
         args.seed += 1
         reward_record = pd.DataFrame(ppo(args))
         reward_record['#parallel_run'] = i
         record_dfs.append(reward_record)
     record_dfs = pd.concat(record_dfs, axis=0)
     record_dfs.to_csv(
         joindir(
             RESULT_DIR,
             'ppo-record-{0}_{1}hidden_{2}threshold_{3}repeat'.format(
                 args.env_name, config.hid_num, config.thres, repeat)))
Exemple #12
0
def train(args):
    record_dfs = []
    assert len(args.num_parallel_run) == 1
    for i in range(args.num_parallel_run):
        args.seed += 1
        reward_df, rwds = ppo(args, network, policy_buffer, config)
        reward_record = pd.DataFrame(reward_df)
        reward_record['#parallel_run'] = i
        record_dfs.append(reward_record)
    record_dfs = pd.concat(record_dfs, axis=0)
    record_dfs.to_csv(
        joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(args.env_name)))
    return rwds
Exemple #13
0
 def rollout_render(self, env, network, args, running_state, render,
                    video_folder):
     counter = 0
     state = env.reset()
     if args.state_norm:
         state = running_state(state)
     render.render()
     while counter < 5:
         action_mean = network._forward_actor(Tensor(state).unsqueeze(0))
         action = action_mean.data.numpy()[0]
         next_state, _, done, _ = env.step(action)
         next_state = running_state(next_state)
         render.render()
         if done:
             counter += 1
             state = env.reset()
             if args.state_norm:
                 state = running_state(state)
             render.render()
         state = next_state
     render.to_mp4(
         joindir(video_folder, '{}-{}.mp4'.format(args.label, args.seed)))
times_loop = 1
# 选择股票池
pool_id = '000300'

pool_list = {
    '000001': 'SHCI',
    '000300': 'hs300',
    '000905': 'zz500',
    '000906': 'zz800',
    '399005': 'SmallCap',
    '399006': 'GEMI',
    '399106': 'SZCI',
    '399317': 'ASCI'
}
OUTPUT_RESULT_DIR = './tuning_result_bayes_v1'
OUTPUT_FORMULA_DIR = joindir(OUTPUT_RESULT_DIR, 'formula')
OUTPUT_FORMULA_FILE = joindir(OUTPUT_FORMULA_DIR, 'tuned_formulas_bayes.txt')
OUTPUT_DATA_DIR = joindir(OUTPUT_RESULT_DIR, 'data')
OUTPUT_DATA_IS_DIR = joindir(OUTPUT_DATA_DIR, 'insample')  # empty now
OUTPUT_DATA_OS_DIR = joindir(OUTPUT_DATA_DIR, 'outsample')  # empty now
OUTPUT_AUX_DIR = joindir(OUTPUT_RESULT_DIR, 'auxilliary')
OUTPUT_SCORE_FILE = joindir(OUTPUT_AUX_DIR, 'score.csv')
OUTPUT_FAILURE_FILE = joindir(OUTPUT_AUX_DIR, 'failure.csv')
OUTPUT_LOG_FILE = joindir(OUTPUT_AUX_DIR, 'log.txt')

makedirs(OUTPUT_FORMULA_DIR, exist_ok=True)
makedirs(OUTPUT_DATA_IS_DIR, exist_ok=True)
makedirs(OUTPUT_DATA_OS_DIR, exist_ok=True)
makedirs(OUTPUT_AUX_DIR, exist_ok=True)

logging.basicConfig(filename=OUTPUT_LOG_FILE)
Exemple #15
0
def check_problem_base(request):
    converters = [
        {
            'id': int,
            'description': str,
            'A': str,
            'B': str,
            'C': str,
            'D': str,
            'answer': str,
            'tag': str
        },
        {
            'id': int,
            'description': str,
            'answer': str,
            'tag': str
        },
        {
            'id': int,
            'description': str,
            'answer': str,
            'tag': str
        },
        {
            'id': int,
            'description': str,
            'template': str,
            'test_case_id': str,
            'tag': str
        },
        {
            'id': int,
            'description': str,
            'answer': str,
            'tag': str
        },
        {
            'id': int,
            'description': str,
            'test_case_id': str,
            'tag': str
        },
    ]
    path = 'data/problems'
    counter = 0
    for count, file in enumerate(problem_files):
        if not isfile(joindir(path, file)):
            continue
        data = pd.read_csv(joindir(path, file),
                           index_col=0,
                           converters=converters[count])
        data.rename(columns={
            'id': 'pid',
            'A': 'choice_A',
            'B': 'choice_B',
            'C': 'choice_C',
            'D': 'choice_D'
        },
                    inplace=True)
        data['problem_type'] = count
        dlist = data.to_dict('record')
        for d in dlist:
            problem_ress = Problem.objects.filter(
                pid=d['pid'], problem_type=d['problem_type'])
            if len(problem_ress) == 0:
                problem = Problem(**d)
                problem.save()
                counter += 1
        if isfile(joindir(path, file)):
            dangerousremove(joindir(path, file))
    if counter > 0:
        messages.success(request, '成功添加{}个试题'.format(counter))
Exemple #16
0
        record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'],
        record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'],
        color='b',
        alpha=0.2)
    plt.legend()
    plt.xlabel('steps of env interaction (sample complexity)')
    plt.ylabel('average reward')
    plt.title('{} on {}'.format(algo_name, config['env_name']))
    plt.savefig(
        joindir(TUNE_DIR, '{}-plot-{}.pdf'.format(algo_name,
                                                  config['env_name'])))


if __name__ == '__main__':

    logger = Logger(joindir(LOG_DIR, 'log_cem.txt'))

    trials = grid_search(run_cem, config_hopper)

    best_trial = sorted(trials, key=lambda x: x['score'])[-1]
    best_config = best_trial['config']
    best_score = best_trial['score']

    with open(joindir(TUNE_DIR, 'ARS-{}.json'.format(best_config['env_name'])),
              'w') as f:
        json.dump(best_config, f, indent=4, sort_keys=True)

    logger.info('========best solution found========')
    logger.info('best score: {}'.format(best_score))
    logger.info('best config: {}'.format(best_config))
Exemple #17
0
        record_dfs = record_dfs.merge(reward_record,
                                      how='outer',
                                      on='steps',
                                      suffixes=('', '_{}'.format(i)))
        reward_cols.append('reward_{}'.format(i))

    record_dfs = record_dfs.drop(labels='reward').sort_values(
        by='steps', ascending=True).ffill().bfill()
    record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1)
    record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1)
    record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm(
        span=1000).mean()
    record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm(
        span=1000).mean()
    record_dfs.to_csv(
        joindir(RESULT_DIR,
                'cem-record-{}-{}.csv'.format(args.env_name, datestr)))

    # Plot
    plt.figure(figsize=(12, 6))
    plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward')
    plt.fill_between(
        record_dfs['steps'],
        record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'],
        record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'],
        color='b',
        alpha=0.2)
    plt.legend()
    plt.xlabel('steps of env interaction (sample complexity)')
    plt.ylabel('average reward')
    plt.title('CEM on {}'.format(args.env_name))
    plt.savefig(
Exemple #18
0
import pandas as pd
import numpy as np
import argparse
import datetime
import math
from model import Policy, Value
from optimizer import conjugate_gradient, line_search
from utils import *
from running_state import *

if not os.path.exists('./result'):
    os.mkdir('./result')
    
Transition = namedtuple('Transition', ('state', 'action', 'mask', 'next_state', 'reward'))
EPS = 1e-10
RESULT_DIR = joindir('./result', '.'.join(__file__.split('.')[:-1]))
if not os.path.exists(RESULT_DIR):
    os.makedirs(RESULT_DIR)

    
def add_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name', type=str, default='Hopper-v2', 
                        help = 'gym environment to test algorithm')
    parser.add_argument('--seed', type=int, default=64, 
                        help = 'random seed')
    parser.add_argument('--num_episode', type=int, default=200, 
                        help = '')
    parser.add_argument('--batch_size', type=int, default=5000,
                        help = '')
    parser.add_argument('--max_step_per_episode', type=int, default=200,
Exemple #19
0
def load_problem_base_legency(exam_json):
    """
    载入题库并且根据exam_json来生成一套试卷
    返回的是该套试卷的一个dict,分别是每种题型的DataFrame
    """
    path = 'data/problems'

    # 选择题
    data_ch = _load_problem(path=joindir(path, 'Choice.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'A': str,
                                'B': str,
                                'C': str,
                                'D': str,
                                'answer': str,
                                'tag': str
                            },
                            tag=exam_json['tag_choice'],
                            num=exam_json['num_choice'])

    # 填空题
    data_cp = _load_problem(path=joindir(path, 'Completion.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'answer': str,
                                'tag': str
                            },
                            tag=exam_json['tag_completion'],
                            num=exam_json['num_completion'])

    # 判断题
    data_tf = _load_problem(path=joindir(path, 'TrueOrFalse.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'answer': str,
                                'tag': str
                            },
                            tag=exam_json['tag_trueorfalse'],
                            num=exam_json['num_trueorfalse'])

    # 程序改错题
    data_pc = _load_problem(path=joindir(path, 'ProgramCorrection.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'template': str,
                                'test_case_id': str,
                                'tag': str
                            },
                            tag=exam_json['tag_programcorrection'],
                            num=exam_json['num_programcorrection'])

    # 程序阅读题
    data_pr = _load_problem(path=joindir(path, 'ProgramReading.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'answer': str,
                                'tag': str
                            },
                            tag=exam_json['tag_programreading'],
                            num=exam_json['num_programreading'])

    # 程序设计题
    data_pd = _load_problem(path=joindir(path, 'ProgramDesign.csv'),
                            converters={
                                'id': int,
                                'description': str,
                                'test_case_id': str,
                                'tag': str
                            },
                            tag=exam_json['tag_programdesign'],
                            num=exam_json['num_programdesign'])

    ans = {
        'choice': data_ch,
        'completion': data_cp,
        'true_or_false': data_tf,
        'program_correction': data_pc,
        'program_reading': data_pr,
        'program_design': data_pd,
    }

    return ans
from os import makedirs as mkdir
import pandas as pd
import numpy as np
import argparse
import datetime
import math
import gym
import env
from scipy import stats
import sys

Transition = namedtuple(
    'Transition',
    ('state', 'value', 'action', 'logproba', 'mask', 'next_state', 'reward'))
EPS = 1e-10
RESULT_DIR = joindir('../result', '.'.join(__file__.split('.')[:-1]))
mkdir(RESULT_DIR, exist_ok=True)


# 超参数设置
class args(object):
    seed = 10
    num_episode = 200
    batch_size = 2048
    max_step_per_round = 1000
    gamma = 0.995
    lamda = 0.97
    log_num_episode = 1
    num_epoch = 10
    minibatch_size = 256
    clip = 0.2
Exemple #21
0
        # resize all panes:

        panes = self.filesArea.panes()
        panesize = self.filesArea.winfo_width() / len(panes)
        for pane in panes:
            self.filesArea.paneconfigure(pane, width = panesize)



if __name__ == '__main__':
    app = Application()

    app.master.title('QLC+ Multi-file Helper Utility')

    iconsdir = joindir(dirname(abspath(__file__)), 'icons')
    for functype, _ in FUNCTION_TYPES:
        icon = tk.PhotoImage(file=joindir(iconsdir, functype.lower() + '.png'))
        icon = icon.subsample(2, 2)
        ICONS[functype] = icon


    for f in (sys.argv[1:]):
        try:
            app.load_file(f)
        except Exception as e:
            print(e)

    app.mainloop()

Exemple #22
0
import pandas as pd
from ppo import ActorCritic
from ppo import ZFilter
from ppo import RunningStat
import gym
from os.path import join as joindir

model_path = '/home/wangxu/PycharmProjects/torchprojects/result/'
game = 'Humanoid-v2'
env = gym.make(game)
num_states = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]
network = ActorCritic(num_states, num_actions)
network.eval()
checkpoint = torch.load(
    joindir(model_path, 'ppo-modelpara-{}.pth'.format(game)))
network.load_state_dict(checkpoint['model_state_dict'])
normalized_state = checkpoint['normalized_state']
#print(normalized_state.rs.mean)
#print(normalized_state.rs.std)

for t in range(3):
    state = env.reset()
    done = False
    i = 0
    while (not done):
        env.render()
        state = normalized_state(state)
        action_mean, action_logstd, value = network(Tensor(state).unsqueeze(0))
        action, logproba = network.select_action(action_mean, action_logstd)
        action = action.data.numpy()[0]
Exemple #23
0
if __name__ == '__main__':
    datestr = datetime.datetime.now().strftime('%Y-%m-%d')
    args = add_arguments()

    record_dfs = pd.DataFrame(columns=['steps', 'reward'])
    reward_cols = []
    for i in range(args.num_parallel_run):
        args.seed += 1
        reward_record = pd.DataFrame(vpg())
        record_dfs = record_dfs.merge(reward_record, how='outer', on='steps', suffixes=('', '_{}'.format(i)))
        reward_cols.append('reward_{}'.format(i))

    record_dfs = record_dfs.drop(columns='reward').sort_values(by='steps', ascending=True).ffill().bfill()
    record_dfs['reward_mean'] = record_dfs[reward_cols].mean(axis=1)
    record_dfs['reward_std'] = record_dfs[reward_cols].std(axis=1)
    record_dfs['reward_smooth'] = record_dfs['reward_mean'].ewm(span=1000).mean()
    record_dfs['reward_smooth_std'] = record_dfs['reward_std'].ewm(span=1000).mean()
    record_dfs.to_csv(joindir(RESULT_DIR, 'vpg-record-{}-{}.csv'.format(args.env_name, datestr)))

    # Plot
    plt.figure(figsize=(12, 6))
    plt.plot(record_dfs['steps'], record_dfs['reward_smooth'], label='reward')
    plt.fill_between(record_dfs['steps'], record_dfs['reward_smooth'] - record_dfs['reward_smooth_std'], 
        record_dfs['reward_smooth'] + record_dfs['reward_smooth_std'], color='b', alpha=0.2)
    plt.legend()
    plt.xlabel('steps of env interaction (sample complexity)')
    plt.ylabel('average reward')
    plt.title('VPG on {}'.format(args.env_name))
    plt.savefig(joindir(RESULT_DIR, 'vpg-{}-{}.pdf'.format(args.env_name, datestr)))
    
Exemple #24
0
def exam_legency(request):

    if request.method == 'GET':
        # >>>> 生成并且载入试题 <<<<

        data = {
            'choice': [],
            'completion': [],
            'true_or_false': [],
            'program_correction': [],
            'program_reading': [],
            'program_design': [],
        }

        # 该用户是否有此次考试的权限
        permission_flag = False
        if 'id' in request.GET:
            exam_id = request.GET['id']

            # get user's classes id
            if request.user.is_staff == 1 or request.user.myuser.usertype == 2:
                # represent match any classes
                user_classes = None
            else:
                user_classes = request.user.myuser.classes

            user_name = request.user.username
            path = 'data/exams'
            filename = joindir(path, '{}.json'.format(exam_id))
            if isfile(filename):
                with open(filename, 'r') as f:
                    exam_json = json.load(f)
                if (user_classes is not None) and (exam_json['classes'] !=
                                                   user_classes):
                    messages.error(request, '您不能参加此次考试')
                else:
                    exam_ress = ExamResult.objects.filter(username=user_name,
                                                          examname=exam_id)
                    if (len(exam_ress) > 0) and (
                            user_classes
                            is not None) and (exam_ress[0].is_submitted == 1):
                        messages.success(request, '您已经完成此次考试')
                        data = exam_ress[0].data.copy()
                        data.update({'score': exam_ress[0].score})
                        return render(request, 'exam_submit.html', data)
                    else:
                        permission_flag = True
            else:
                messages.error(request, '考试{}不存在'.format(exam_id))
        else:
            messages.error(request, '请在考试列表中选择您要参加的考试')

        if permission_flag:
            if len(exam_ress) > 0:
                # 如果非首次登陆,载入该用户此次考试的题目
                data = exam_ress[0].data
            else:
                # 如果首次登陆,生成考试题目,并存下该用户此次考试的题目
                problems = load_problem_base(exam_json)

                data = problems_to_data(problems, exam_json)

                exam_res = ExamResult(username=user_name,
                                      examname=exam_id,
                                      data=data,
                                      is_submitted=0,
                                      examjson=exam_json)
                exam_res.save()

        return render(request, 'exam.html', process_for_render(data))

    elif request.method == 'POST':

        # >>>> 保存试题或者提交判分 <<<<
        # 样例 request.post = <QueryDict: {'2_A': ['on'], 'save': ['']}>

        exam_id = request.GET['id']
        user_name = request.user.username

        if 'save' in request.POST:
            # 保存结果
            exam_res = ExamResult.objects.get(username=user_name,
                                              examname=exam_id)
            exam_res.data = post_to_answers(exam_res.data, request.POST)
            exam_res.save()

            messages.success(request, '保存成功')

            return render(request, 'exam.html',
                          process_for_render(exam_res.data))

        elif 'submit' in request.POST:
            # 提交并且判分
            exam_res = ExamResult.objects.get(username=user_name,
                                              examname=exam_id)
            exam_res.data = post_to_answers(exam_res.data, request.POST,
                                            exam_res.examjson)
            exam_res.data = score_answers(exam_res.data)
            exam_res.score = generate_total_score(exam_res.data)
            exam_res.is_submitted = 1
            exam_res.save()

            messages.success(request, '提交成功')

            data = exam_res.data.copy()
            data.update({'score': exam_res.score})

            return render(request, 'exam_submit.html',
                          process_for_render(data))
Exemple #25
0
for i in range(100):
    problem.loc[i, 'id'] = i
    problem.loc[i, 'description'] = hashlib.md5(
        str(i).encode('utf-8')).hexdigest()
    problem.loc[i, 'A'] = hashlib.md5(
        'A{}'.format(i).encode('utf-8')).hexdigest()[:10]
    problem.loc[i, 'B'] = hashlib.md5(
        'B{}'.format(i).encode('utf-8')).hexdigest()[:10]
    problem.loc[i, 'C'] = hashlib.md5(
        'C{}'.format(i).encode('utf-8')).hexdigest()[:10]
    problem.loc[i, 'D'] = hashlib.md5(
        'D{}'.format(i).encode('utf-8')).hexdigest()[:10]
    problem.loc[i, 'answer'] = np.random.choice(list('ABCD'))
    problem.loc[i, 'tag'] = np.random.choice(['C_class', 'Cpp_class'])
problem['id'] = problem['id'].astype(int)
problem.to_csv(joindir(problem_path, 'Choice.csv'))

# Completion
problem = pd.DataFrame()
for i in range(100):
    problem.loc[i, 'id'] = i
    problem.loc[i, 'description'] = \
        hashlib.md5(str(i).encode('utf-8')).hexdigest() + ' ____ ' + hashlib.md5(str(i).encode('utf-8')).hexdigest()
    problem.loc[i,
                'answer'] = hashlib.md5(str(i).encode('utf-8')).hexdigest()[:5]
    problem.loc[i, 'tag'] = np.random.choice(['C_class', 'Cpp_class'])
problem['id'] = problem['id'].astype(int)
problem.to_csv(joindir(problem_path, 'Completion.csv'))

# TrueOrFalse
problem = pd.DataFrame()
            dd[0:x] = d.values[0:x]
            smooth_data = dd
    else:
        smooth_data = data['mean_ep_reward']
    return smooth_data


# 1 for PPO_simple
# 0 for PPO  5 parallel run
# 10 for PPO and PPO_origin comparison
point = 2
env = 'Pendulum-v0'
RESULT_DIR = '/home/wangxu/PycharmProjects/torchprojects/result/'

if point == 10:
    df0 = pd.read_csv(joindir(RESULT_DIR, 'ppo-record-{}.csv'.format(env)))
    df1 = pd.read_csv(
        joindir(RESULT_DIR,
                'ppo-origin-record-{}.csv'.format(env)))  # no index columns
    df0['mean_ep_reward'] = move_avg(df0, parallel_num=5, sm=11)
    df1['mean_ep_reward'] = move_avg(df1, parallel_num=5, sm=11)
    df0['isorigin'] = 0
    df1['isorigin'] = 1
    df = pd.concat([df0, df1], ignore_index=True)
    sns.set(style='darkgrid', font_scale=1.0)
    sns.tsplot(data=df,
               time='episode',
               value='mean_ep_reward',
               unit='parallel_run',
               condition='isorigin')  #, err_style='ci_band')
    # tsplot(data,  time=    ,value=    ,unit= compute mean ,condition= different Env)
def runner(args):
    # cuda id
    cid = mp.current_process().pid % NUM_TOTAL_GPUS

    # environment
    env = gym.make(args.env_name)
    dim_obs = env.observation_space.shape[0]
    dim_act = env.action_space.shape[0]
    max_episode_steps = env._max_episode_steps

    # seeding
    env.seed(args.seed)
    torch.manual_seed(args.seed)

    # policy and value network
    network = ActorCritic(dim_obs,
                          dim_act,
                          layer_norm=args.layer_norm,
                          append_time=args.append_time,
                          init_std=args.init_std)
    optimizer = opt.Adam(network.parameters(), lr=args.lr)

    # running state normalization
    running_state = ZFilter((dim_obs, ), clip=5.0)

    # total number of steps of interaction with environment
    global_steps = 0

    # render to mp4
    render = Render(env.env.sim, args.env_name)
    video_folder = joindir(RESULT_DIR, args.label)
    mkdir(video_folder, exist_ok=True)

    step_size = Schduler(args.step_size,
                         args.schedule_stepsize,
                         args.num_episode,
                         end_val=0.01)
    lr = Schduler(args.lr, args.schedule_adam, args.num_episode)
    y2_max = Schduler(args.y2_max,
                      args.schedule_y2max,
                      args.num_episode,
                      end_val=0.05)
    mean_ratio = Schduler(args.mean_ratio, args.schedule_meanratio,
                          args.num_episode)

    rollout = Rollout()

    # for direct method
    # y2_max = compute_allowed_mu2(0, args.epsilon, args.delta, dim_act)

    for i_episode in range(args.num_episode):
        # step0: validation
        if i_episode % args.val_num_episode == 0:
            if args.record_KL:
                meanepreward_val, meaneplen_val = rollout.rollout_validate_KL(
                    env, network, args, running_state, max_episode_steps)
            else:
                meanepreward_val, meaneplen_val = rollout.rollout_validate(
                    env, network, args, running_state, max_episode_steps)
        else:
            meanepreward_val, meaneplen_val = np.nan, np.nan

        # step0: save mp4
        if (i_episode + 1) == args.num_episode:
            rollout.rollout_render(env, network, args, running_state, render,
                                   video_folder)

        # step1: perform current policy to collect on-policy transitions
        memory, meanepreward, meaneplen, num_steps = rollout.rollout_train(
            env, network, args, running_state, max_episode_steps)
        global_steps += num_steps

        # step2: extract variables from trajectories
        batch_size = len(memory)
        states, values, action_means, actions, ys, masks, next_states, rewards = memory.tsample(
        )
        returns, deltas, advantages = Tensor(batch_size), Tensor(
            batch_size), Tensor(batch_size)

        prev_return = 0
        prev_value = 0
        prev_advantage = 0
        for i in reversed(range(batch_size)):
            returns[i] = rewards[i] + args.gamma * prev_return * masks[i]
            deltas[i] = rewards[
                i] + args.gamma * prev_value * masks[i] - values[i]
            # ref: https://arxiv.org/pdf/1506.02438.pdf (generalization advantage estimate)
            advantages[i] = deltas[
                i] + args.gamma * args.lamda * prev_advantage * masks[i]

            prev_return = returns[i]
            prev_value = values[i]
            prev_advantage = advantages[i]

        # we cannot do advantages normalization here, moreover, only the sign matters
        if args.advantage_norm:
            advantages = advantages - advantages.median()

        # step3: set targets

        if args.method == 'ES':
            target_means = Variable(action_means + step_size.value() *
                                    (actions - action_means))
        elif args.method == 'direct':
            y2_max_value = y2_max.value()
            ys_norm = ys.pow(2).sum(dim=1, keepdim=True)
            ys_scale = ys / ys_norm.sqrt() * np.sqrt(y2_max_value)
            ys_target = torch.where(ys_norm > y2_max_value, ys_scale, ys)
            ys_target = torch.where(
                advantages.unsqueeze(1) >= 0, ys_target, -ys_target)
            target_means = Variable(action_means +
                                    ys_target * network.action_std)
        elif args.method == 'ES-MA1':
            mask_advantages = advantages.clone().masked_fill_(
                advantages < 0, 0)
            half_width = args.n_points
            ratio = mean_ratio.value()
            new_ys = torch.zeros(ys.shape)
            start_ind = 0
            for i in range(batch_size):
                if mask_advantages[i] > 0:
                    norm = 0.0
                    for j in range(max(start_ind, i - half_width),
                                   i + half_width + 1):
                        weight = mask_advantages[j] * args.gamma**np.abs(i - j)
                        norm += weight
                        new_ys[i] += ys[j] * weight
                        if masks[j] == 0:
                            break
                    if norm > 0:
                        new_ys[i] = new_ys[i] / norm * ratio + ys[i] * (1 -
                                                                        ratio)
                if masks[i] == 0:
                    start_ind = i + 1
            target_means = Variable(action_means + step_size.value() * new_ys *
                                    network.action_std)
        elif args.method == 'ES-MA2':
            mask_advantages = advantages.clone().masked_fill_(
                advantages < 0, 0)
            half_width = args.n_points
            new_ys = torch.zeros(ys.shape)
            start_ind = 0
            for i in range(batch_size):
                if mask_advantages[i] > 0:
                    norm = 0.0
                    for j in range(max(start_ind, i - half_width),
                                   i + half_width + 1):
                        weight = mask_advantages[j] * args.beta**np.abs(i - j)
                        norm += weight
                        new_ys[i] += ys[j] * weight
                        if masks[j] == 0:
                            break
                    if norm > 0:
                        new_ys[i] = new_ys[i] / norm
                if masks[i] == 0:
                    start_ind = i + 1
            target_means = Variable(action_means + step_size.value() * new_ys *
                                    network.action_std)

        # self adaptation
        multiplier = ys.pow(2)
        multiplier = multiplier.masked_fill_(advantages.unsqueeze(1) < 0,
                                             1.0).mean(dim=0).sqrt()
        network.action_std = nn.Parameter((network.action_std * multiplier))

        # step4: learn
        # load the network to GPU and train on GPU, then use the network on CPU
        if args.use_cuda:
            states, target_means, returns, network_train = to_cuda(
                cid, states, target_means, returns, network)
        else:
            network_train = network

        for i_epoch in range(
                int(args.num_epoch * batch_size / args.minibatch_size)):
            # sample from current batch
            minibatch_ind = np.random.choice(batch_size,
                                             args.minibatch_size,
                                             replace=False)
            minibatch_states = states[minibatch_ind]
            minibatch_action_means = network_train._forward_actor(
                minibatch_states[:, :-1])
            minibatch_target_means = target_means[minibatch_ind]
            minibatch_returns = returns[minibatch_ind]
            minibatch_newvalues = network_train._forward_critic(
                minibatch_states).flatten()

            loss_policy = torch.mean(
                (minibatch_target_means - minibatch_action_means).pow(2))

            # not sure the value loss should be clipped as well
            # clip example: https://github.com/Jiankai-Sun/Proximal-Policy-Optimization-in-Pytorch/blob/master/ppo.py
            # however, it does not make sense to clip score-like value by a dimensionless clipping parameter
            # moreover, original paper does not mention clipped value
            if args.lossvalue_norm:
                minibatch_return_6std = 6 * minibatch_returns.std()
                loss_value = torch.mean(
                    (minibatch_newvalues -
                     minibatch_returns).pow(2)) / minibatch_return_6std
            else:
                loss_value = torch.mean(
                    (minibatch_newvalues - minibatch_returns).pow(2))
            if i_epoch > int(30 * batch_size / args.minibatch_size):
                loss_coeff_value = 0
            else:
                loss_coeff_value = args.loss_coeff_value

            # https://en.wikipedia.org/wiki/Differential_entropy
            # entropy of normal distribution should be ln(sig * sqrt(2 * pi * e))
            # torch.mean(network.actor_logstd + .5 * math.log(2.0 * math.pi * math.e))
            # the const term is of no use, ignored
            if args.loss_coeff_entropy > 0:
                loss_entropy = torch.mean(torch.log(network_train.action_std))
                total_loss = loss_policy + loss_coeff_value * loss_value - args.loss_coeff_entropy * loss_entropy
            else:
                loss_entropy = np.nan
                total_loss = loss_policy + loss_coeff_value * loss_value
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        if lr.schedule != 'none':
            lr_now = lr.value()
            # ref: https://stackoverflow.com/questions/48324152/
            for g in optimizer.param_groups:
                g['lr'] = lr_now

        if args.use_cuda:
            network = network_train.cpu()

        if (i_episode % args.val_num_episode == 0) and (args.record_KL):
            mean_KL, max_KL = rollout.calculate_KL(network)
        else:
            mean_KL, max_KL = np.nan, np.nan

        if (i_episode + 1) % args.log_num_episode == 0:
            print('==============================================')
            print('[{}] #ep={} Reward: {:.4f} ({:.4f})  Len: {:.0f}({:.0f})'.
                  format(
                      args.label,
                      i_episode,
                      meanepreward,
                      meanepreward_val,
                      meaneplen,
                      meaneplen_val,
                  ))
            print(
                'advantages_positive_ratio={:3f} action_std={} mean_KL={} max_KL={}'
                .format(
                    float((advantages > 0).sum()) / batch_size,
                    network.action_std, mean_KL, max_KL))
            network_action_std = [
                '{:.2f}'.format(i)
                for i in list(network.action_std.detach().numpy()[0])
            ]
            print('action_std={}'.format(network_action_std))

        lr.step()
        step_size.step()
        y2_max.step()
        mean_ratio.step()

    return network