Ejemplo n.º 1
0
 def initialize(self, trainer):
     self._out = trainer.out
     CommandItem.remove_commands_file(trainer.out)
     CommandsState.run(trainer.out)
     if isinstance(trainer.stop_trigger, IntervalTrigger):
         trainer.stop_trigger = _CommandIntervalTrigger(
             trainer.stop_trigger)
     else:
         trainer.stop_trigger = _CommandTrigger(trainer.stop_trigger)
Ejemplo n.º 2
0
    def test_call_no_command(self):
        out_path = os.path.join(self._dir, 'results')
        os.makedirs(out_path)
        commands_path = os.path.join(out_path, 'commands')
        open(commands_path, 'w').close()
        assert os.path.isfile(commands_path)

        target = CommandsExtension()
        trainer = _MockTrainer(out_path)
        target.initialize(trainer)
        assert type(trainer.stop_trigger) is _CommandIntervalTrigger
        assert not os.path.isfile(commands_path)
        assert CommandsState.job_status(out_path) == JobStatus.RUNNING

        target.finalize()
        assert CommandsState.job_status(out_path) == JobStatus.STOPPED
Ejemplo n.º 3
0
    def post(self, result_id, project_id):
        """POST /api/v1/results/<int:id>/commands."""

        result = db.session.query(Result).filter_by(id=result_id).first()

        if result is None:
            return jsonify({
                'result': None,
                'message': 'No interface defined for URL.'
            }), 404

        job_status = CommandsState.job_status(result.path_name)
        if job_status != JobStatus.RUNNING:
            if job_status == JobStatus.NO_EXTENSION_ERROR:
                return jsonify({
                    'message':
                    '\'CommandsExtension\' is not set or disabled.'
                }), 400
            elif job_status == JobStatus.INITIALIZED:
                return jsonify(
                    {'message':
                     'The target training job has not run, yet'}), 400
            elif job_status == JobStatus.STOPPED:
                return jsonify(
                    {'message':
                     'The target training job has already stopped'}), 400
            else:
                return jsonify(
                    {'message':
                     'Cannot get the target training job status'}), 400

        request_json = request.get_json()
        if request_json is None:
            return jsonify({'message': 'Empty request.'}), 400

        command_name = request_json.get('name', None)
        if command_name is None:
            return jsonify({'message': 'Name is required.'}), 400

        schedule = request_json.get('schedule', None)
        if not CommandItem.is_valid_schedule(schedule):
            return jsonify({'message': 'Schedule is invalid.'}), 400

        command = CommandItem(name=command_name, )

        command.set_request(CommandItem.REQUEST_OPEN,
                            request_json.get('body', None),
                            request_json.get('schedule', None))

        commands = CommandItem.load_commands(result.path_name)
        commands.append(command)

        CommandItem.dump_commands(commands, result.path_name)

        new_result = crawl_result(result, force=True)
        new_result_dict = new_result.serialize

        return jsonify({'commands': new_result_dict['commands']})
Ejemplo n.º 4
0
 def finalize(self):
     if self._out != '':
         CommandsState.stop(self._out)
Ejemplo n.º 5
0
def test_post_result_command(func_dir, project, app):
    project2_path = os.path.join(func_dir, 'test_project2')
    result_path = os.path.join(project2_path, '10003')
    os.makedirs(result_path)
    with open(os.path.join(result_path, 'log'), 'w') as f:
        json.dump([], f)
    Project.create(project2_path, 'command-test-project')

    request_jsons = [
        {
            'name': 'adjust_hyperparams',
            'body': {
                'alpha': 0.0007,
                'beta1': 0.8,
                'beat2': 1.0,
            },
            'schedule': {
                'value': 4,
                'key': 'epoch',
            },
            'resultId': 1,
        },
        {
            'name': 'adjust_hyperparams',
            'body': None,
            'schedule': None,
            'resultId': 2,
        },
        {
            'name': 'take_snapshot',
            'schedule': {
                'value': 4800,
                'key': 'iteration',
            },
            'resultId': 3,
        },
    ]

    # not set extension
    resp = app.post('/api/v1/projects/2/results/4/commands',
                    data=json.dumps(request_jsons[0]),
                    content_type='application/json')
    data = assert_json_api(resp, 400)
    assert len(data) == 1
    assert isinstance(data['message'], string_types)
    assert 'not set' in data['message']

    # job run on v0.1.0 so .chainerui_commands is not created
    with open(os.path.join(result_path, 'commands'), 'w') as f:
        json.dump([], f)
    resp = app.post('/api/v1/projects/2/results/4/commands',
                    data=json.dumps(request_jsons[0]),
                    content_type='application/json')
    data = assert_json_api(resp, 400)
    assert len(data) == 1
    assert isinstance(data['message'], string_types)
    assert 'stopped' in data['message']

    # extension is set up but not run
    os.remove(os.path.join(result_path, '.chainerui_commands'))
    CommandsState._dump(result_path,
                        CommandsState._load(result_path, initialize=True))
    resp = app.post('/api/v1/projects/2/results/4/commands',
                    data=json.dumps(request_jsons[0]),
                    content_type='application/json')
    data = assert_json_api(resp, 400)
    assert len(data) == 1
    assert isinstance(data['message'], string_types)
    assert 'not run' in data['message']

    # job has already started
    CommandsState.run(result_path)
    for i in range(3):
        resp = app.post('/api/v1/projects/2/results/4/commands',
                        data=json.dumps(request_jsons[i]),
                        content_type='application/json')
        data = assert_json_api(resp)
        assert len(data) == 1
        assert len(data['commands']) > 0
        command = data['commands'][0]
        assert isinstance(command['id'], int)
        assert isinstance(command['name'], string_types)
        assert len(command['request']) == 4
        assert command['request']['schedule'] is None or isinstance(
            command['request']['schedule'], dict)
        assert command['request']['body'] is None or isinstance(
            command['request']['body'], dict)
        assert isinstance(command['request']['created_at'], string_types)
        assert isinstance(command['request']['status'], string_types)
        assert 'response' in command

    # job has stopped
    CommandsState.stop(result_path)
    resp = app.post('/api/v1/projects/2/results/4/commands',
                    data=json.dumps(request_jsons[0]),
                    content_type='application/json')
    data = assert_json_api(resp, 400)
    assert len(data) == 1
    assert isinstance(data['message'], string_types)
    assert 'stopped' in data['message']

    request_jsons = [
        {
            'name': 'invalid_schedule',
            'schedule': {
                'value': None,
                'key': 'epoch',
            },
            'resultId': 1,
        },
        {
            'name': None,
            'resultId': 2,
        },
    ]

    for i in range(2):
        resp = app.post('/api/v1/projects/2/results/4/commands',
                        data=json.dumps(request_jsons[i]),
                        content_type='application/json')
        data = assert_json_api(resp, 400)
        assert len(data) == 1
        assert isinstance(data['message'], string_types)

    resp = app.post('/api/v1/projects/2/results/4/commands')
    data = assert_json_api(resp, 400)
    assert len(data) == 1
    assert isinstance(data['message'], string_types)

    resp = app.post('/api/v1/projects/2/results/12345/commands')
    data = assert_json_api(resp, 404)
    assert isinstance(data['message'], string_types)
    assert data['result'] is None
Ejemplo n.º 6
0
    def test_call(self):
        out_path = os.path.join(self.dir, 'results')
        os.makedirs(out_path)
        commands_path = os.path.join(out_path, 'commands')
        open(commands_path, 'w').close()
        assert os.path.isfile(commands_path)

        # initialize
        target = CommandsExtension(trigger=(1, 'iteration'))
        trainer = _MockTrainer(out_path)
        target.initialize(trainer)
        assert not trainer.stop_trigger._loop_stop
        assert not os.path.isfile(commands_path)
        assert CommandsState.job_status(out_path) == JobStatus.RUNNING

        # setup valid command
        commands = CommandItem.load_commands(out_path)
        command = CommandItem(name='take_snapshot')
        command.set_request(CommandItem.REQUEST_OPEN, None, None)
        commands.append(command)
        command2 = CommandItem(name='stop')
        command2.set_request(CommandItem.REQUEST_OPEN, None, {
            'key': 'epoch',
            'value': 10
        })
        commands.append(command2)
        command3 = CommandItem(name='adjust_hyperparams')
        command3.set_request(
            CommandItem.REQUEST_OPEN, {
                'optimizer': 'MomentumSGD',
                'hyperparam': {
                    'lr': 0.01,
                    'beta': None,
                    'gamma': 1.0
                }
            }, {
                'key': 'iteration',
                'value': 10
            })
        commands.append(command3)
        CommandItem.dump_commands(commands, out_path)

        # call but skip by interval trigger
        target(trainer)
        commands = CommandItem.load_commands(out_path)
        assert len(commands) == 3
        assert commands[0].response is None
        assert commands[1].response is None
        assert commands[2].response is None

        # call 'take_sanpshot'
        trainer.updater.iteration = 1
        target(trainer)
        commands = CommandItem.load_commands(out_path)
        assert len(commands) == 3
        res = commands[0].response
        assert res['epoch'] == 0
        assert res['iteration'] == 1
        assert res['status'] == CommandItem.RESPONSE_SUCCESS
        assert commands[1].response is None
        assert commands[2].response is None

        # call 'adjust_hyperparams'
        trainer.updater.iteration = 10
        target(trainer)
        commands = CommandItem.load_commands(out_path)
        assert len(commands) == 3
        res = commands[2].response
        assert res['epoch'] == 0
        assert res['iteration'] == 10
        assert res['status'] == CommandItem.RESPONSE_SUCCESS
        assert res['body'] is not None
        assert res['body']['optimizer'] == 'MomentumSGD'
        assert res['body']['hyperparam'] == {'lr': 0.01}
        assert commands[1].response is None

        # call 'stop'
        trainer.updater.iteration = 100
        trainer.updater.epoch = 10
        target(trainer)
        commands = CommandItem.load_commands(out_path)
        assert len(commands) == 3
        res = commands[1].response
        assert res['epoch'] == 10
        assert res['iteration'] == 100
        assert res['status'] == CommandItem.RESPONSE_SUCCESS
        assert res['body'] is None
        assert trainer.stop_trigger._loop_stop

        target.finalize()
        assert CommandsState.job_status(out_path) == JobStatus.STOPPED
Ejemplo n.º 7
0
def setup_test_project(root_path):
    # log only
    path = os.path.join(root_path, '10000')
    os.makedirs(path)
    log = [{
        "main/loss": 0.1933198869228363,
        "validation/main/loss": 0.09147150814533234,
        "iteration": 600,
        "elapsed_time": 16.052587032318115,
        "epoch": 1,
        "main/accuracy": 0.9421835541725159,
        "validation/main/accuracy": 0.9703000783920288
    }, {
        "main/loss": 0.07222291827201843,
        "validation/main/loss": 0.08141259849071503,
        "iteration": 1200,
        "elapsed_time": 19.54666304588318,
        "epoch": 2,
        "main/accuracy": 0.9771820902824402,
        "validation/main/accuracy": 0.975399911403656
    }]
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)

    # log, args
    path = os.path.join(root_path, '10001')
    os.makedirs(path)
    args = {
        "resume": "",
        "batchsize": 100,
        "epoch": 20,
        "frequency": -1,
        "gpu": 0,
        "unit": 1000,
        "out": "results"
    }
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)
    with open(os.path.join(path, 'args'), 'w') as f:
        json.dump(args, f)

    # log, args, commands
    path = os.path.join(root_path, '10002')
    os.makedirs(path)
    commands = [{
        "name": "take_snapshot",
        "request": {
            "created_at": "2017-09-26T16:44:33.410023",
            "status": "OPEN",
            "body": None,
            "schedule": {
                "value": 4,
                "key": "epoch"
            }
        },
        "response": {
            "executed_at": "2017-09-26T16:44:35.730431",
            "epoch": 4,
            "iteration": 2400,
            "elapsed_time": 76.96686792373657,
            "status": "SUCCESS",
            "body": None
        }
    }]
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)
    with open(os.path.join(path, 'args'), 'w') as f:
        json.dump(args, f)
    with open(os.path.join(path, 'commands'), 'w') as f:
        json.dump(commands, f)
    open(os.path.join(path, 'snapshot_iter_2400'), 'w').close()

    # log, args, commands, status(run)
    path = os.path.join(root_path, '10003')
    os.makedirs(path)
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)
    with open(os.path.join(path, 'args'), 'w') as f:
        json.dump(args, f)
    with open(os.path.join(path, 'commands'), 'w') as f:
        json.dump(commands, f)
    CommandsState.run(path)

    # log, args, commands, status(stop)
    path = os.path.join(root_path, '10004')
    os.makedirs(path)
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)
    with open(os.path.join(path, 'args'), 'w') as f:
        json.dump(args, f)
    with open(os.path.join(path, 'commands'), 'w') as f:
        json.dump(commands, f)
    CommandsState.stop(path)

    # log, args, commands, status(not run)
    path = os.path.join(root_path, '10005')
    os.makedirs(path)
    with open(os.path.join(path, 'log'), 'w') as f:
        json.dump(log, f)
    with open(os.path.join(path, 'args'), 'w') as f:
        json.dump(args, f)
    with open(os.path.join(path, 'commands'), 'w') as f:
        json.dump(commands, f)
    CommandsState._dump(path, CommandsState._load(path, initialize=True))