Ejemplo n.º 1
0
def crawl_result(result, force=False, commit=True):
    """crawl_results."""
    if not result.crawlable:
        return result
    now = datetime.datetime.now()

    if (not force) and (now - result.updated_at).total_seconds() < 4:
        return result

    # if log file is not updated, not necessary to get log contents
    is_updated = _check_log_updated(result)
    crawled_result = crawl_result_path(result.path_name, is_updated)

    if is_updated:
        current_log_idx = len(result.logs)
        if len(crawled_result['logs']) < current_log_idx:
            current_log_idx = 0
            result.logs = []
            result.args = None
        for log in crawled_result['logs'][current_log_idx:]:
            result.logs.append(Log(log))

    if result.args is None:
        result.args = Argument(json.dumps(crawled_result['args']))

    if result.name is None:
        _update_to_default_name(result)

    # commands list includes new commands and already registered commands.
    # registered commands can be get response, so need to update
    current_cmd_idx = len(result.commands)
    if len(crawled_result['commands']) < current_cmd_idx:
        current_cmd_idx = 0
        result.commands = []
        result.snapshots = []
    for cmd in crawled_result['commands'][current_cmd_idx:]:
        result.commands.append(Command(**cmd))
    for i, cmd in enumerate(crawled_result['commands'][:current_cmd_idx]):
        result.commands[i].update(cmd.get('response', None))

    # snapshots file list are sorted but not natural order, for example,
    # 'iter_900' set latter than 'iter_1000', so need to check the file
    # is registered or not.
    registered_snapshot_keys = [ss.iteration for ss in result.snapshots]
    for i, snapshot in enumerate(crawled_result['snapshots']):
        number_str = snapshot.split('snapshot_iter_')[1]
        if not is_numberable(number_str):
            continue
        number = int(number_str)
        if number in registered_snapshot_keys:
            continue
        result.snapshots.append(Snapshot(snapshot, number))

    result.updated_at = datetime.datetime.now()
    if commit:
        db.session.commit()

    return result
Ejemplo n.º 2
0
def test_crawl_result_reset(func_dir):
    # basic test is checked on 'test_api.py', so this test checks only
    # reset logic.
    result = Result(func_dir)
    result.updated_at = datetime.datetime.now()
    result.logs = [Log({'loss': 0.5}), Log({'loss': 0.2}), Log({'loss': 0.01})]
    result.commands = [Command('take_sanpshot'), Command('stop')]
    result.snapshots = [
        Snapshot('snapshot_iter_10', 10), Snapshot('snapshot_iter_11', 11)]

    actual = crawl_result(result, force=True, commit=False)
    assert len(actual.logs) == 2
    assert len(actual.commands) == 1
    assert len(actual.snapshots) == 1

    open(os.path.join(func_dir, 'snapshot_iter_200'), 'w').close()

    actual2 = crawl_result(actual, force=True, commit=False)
    assert len(actual2.logs) == 2
    assert len(actual2.commands) == 1
    assert len(actual2.snapshots) == 2
Ejemplo n.º 3
0
def crawl_result(result_id, force=False):
    """crawl_results."""

    current_result = DB_SESSION.query(Result).filter_by(id=result_id).first()

    now = datetime.datetime.now()

    if (not force) and (now - current_result.updated_at).total_seconds() < 4:
        return current_result

    # if log file is not updated, not necessary to get log contents
    is_updated = _check_log_updated(current_result)
    crawled_result = crawl_result_path(current_result.path_name, is_updated)

    if is_updated:
        current_log_idx = len(current_result.logs)
        if len(crawled_result['logs']) < current_log_idx:
            current_log_idx = 0
            current_result.logs = []
            current_result.args = None
        for log in crawled_result['logs'][current_log_idx:]:
            current_result.logs.append(Log(log))

    if current_result.args is None:
        current_result.args = Argument(json.dumps(crawled_result['args']))

    current_result.commands = []
    current_result.snapshots = []

    for cmd in crawled_result['commands']:
        current_result.commands.append(cmd.to_model())

    for snapshot in crawled_result['snapshots']:
        number_str = snapshot.split('snapshot_iter_')[1]
        if is_numberable(number_str):
            current_result.snapshots.append(Snapshot(snapshot,
                                                     int(number_str)))

    current_result.updated_at = datetime.datetime.now()
    DB_SESSION.commit()

    return current_result
Ejemplo n.º 4
0
def crawl_result(result_id, force=None):
    """crawl_results."""

    current_result = DB_SESSION.query(Result).filter_by(id=result_id).first()

    now = datetime.datetime.now()

    if force is None and (now - current_result.updated_at).total_seconds() < 4:
        return current_result

    crawled_result = crawl_result_path(current_result.path_name)

    need_reset = len(crawled_result['logs']) < len(current_result.logs)

    if need_reset:
        current_result.logs = []
        current_result.args = None

    current_result.commands = []
    current_result.snapshots = []

    for log in crawled_result['logs'][len(current_result.logs):]:
        current_result.logs.append(Log(json.dumps(log)))

    if current_result.args is None:
        current_result.args = Argument(json.dumps(crawled_result['args']))

    for cmd in crawled_result['commands'][len(current_result.commands):]:
        current_result.commands.append(cmd.to_model())

    for snapshot in crawled_result['snapshots'][len(current_result.snapshots
                                                    ):]:

        number_str = snapshot.split('snapshot_iter_')[1]
        if is_numberable(number_str):
            current_result.snapshots.append(Snapshot(snapshot,
                                                     int(number_str)))

    current_result.updated_at = datetime.datetime.now()
    DB_SESSION.commit()

    return current_result
Ejemplo n.º 5
0
def crawl_result(result, force=False, commit=True):
    """crawl_results."""
    now = datetime.datetime.now()

    if (not force) and (now - result.updated_at).total_seconds() < 4:
        return result

    # if log file is not updated, not necessary to get log contents
    is_updated = _check_log_updated(result)
    crawled_result = crawl_result_path(result.path_name, is_updated)

    if is_updated:
        current_log_idx = len(result.logs)
        if len(crawled_result['logs']) < current_log_idx:
            current_log_idx = 0
            result.logs = []
            result.args = None
        for log in crawled_result['logs'][current_log_idx:]:
            result.logs.append(Log(log))

    if result.args is None:
        result.args = Argument(json.dumps(crawled_result['args']))

    result.commands = []
    result.snapshots = []

    for cmd in crawled_result['commands']:
        result.commands.append(Command(**cmd))

    for snapshot in crawled_result['snapshots']:
        number_str = snapshot.split('snapshot_iter_')[1]
        if is_numberable(number_str):
            result.snapshots.append(Snapshot(snapshot, int(number_str)))

    result.updated_at = datetime.datetime.now()
    if commit:
        db.session.commit()

    return result