def submit(user, feature, from_, dry_run, github_token, create_pull, cleanup):
    """Submit feature within path to project"""
    project = _get_project()
    repo = project.repo
    to = project.get("contrib", "module_path")

    src = pathlib.Path(
        from_,
        "user_{user:02d}".format(user=user),
        "feature_{feature:02d}.py".format(feature=feature),
    )
    dst = pathlib.Path(
        to,
        "user_{user:02d}".format(user=user),
        "feature_{feature:02d}.py".format(feature=feature),
    )

    # register cleanup
    if cleanup:

        @atexit.register
        @funcy.silent
        def cleanup():
            # check out default branch
            repo.branches[ballet.update.DEFAULT_BRANCH].checkout()

            # delete new branch
            name = _make_branch_name(user, feature)
            repo.delete_head(name)

    _check_environment(repo)
    create_and_switch_to_new_branch(repo, user, feature)
    create_dirs_if_needed(dst)
    read_format_write(src, dst)
    add_init_if_needed(dst)
    commit_changes(repo)
    push_changes(repo, user, feature, dry_run=dry_run)
    gh = _make_github_client(github_token)

    if create_pull:
        pr = create_pull_request(gh, project, user, feature, dry_run=dry_run)
        print("Created pull request: {pr.url}".format(pr=pr))

    print("Submission successful.")
Beispiel #2
0
def make_train_test_split(output_dir, seed=641137):
    # load and split data
    X, y = load_data()
    inds = X.index.copy()
    inds_tr, inds_te = train_test_split(inds,
                                        train_size=0.67,
                                        test_size=0.33,
                                        random_state=seed)
    X_tr_df = X.loc[inds_tr]
    X_te_df = X.loc[inds_te]
    y_tr_df = y.loc[inds_tr]
    y_te_df = y.loc[inds_te]

    # load config
    tables = conf.get('tables')
    entities_table_name = conf.get('data', 'entities_table_name')
    entities_config = one_or_raise(lwhere(tables, name=entities_table_name))
    entities_path = entities_config['path']
    targets_table_name = conf.get('data', 'targets_table_name')
    targets_config = one_or_raise(lwhere(tables, name=targets_table_name))
    targets_path = targets_config['path']

    # prepare directories
    output_dir = pathlib.Path(output_dir)
    train_dir = output_dir.joinpath('train')
    train_dir.mkdir(exist_ok=True)
    test_dir = output_dir.joinpath('test')
    test_dir.mkdir(exist_ok=True)

    # save tables
    kwargs = {'header': True}
    X_tr_df.to_csv(train_dir.joinpath(entities_path), **kwargs)
    X_te_df.to_csv(test_dir.joinpath(entities_path), **kwargs)
    y_tr_df.to_csv(train_dir.joinpath(targets_path), **kwargs)
    y_te_df.to_csv(test_dir.joinpath(targets_path), **kwargs)

    return X_tr_df, X_te_df, y_tr_df, y_te_df
Beispiel #3
0
def create_dirs_if_needed(dst):
    dst = pathlib.Path(dst)
    dst.parent.mkdir(parents=True, exist_ok=True)
 def __init__(self, feature_path, random_seed=1, start=None, end=None):
     self.feature_path = pathlib.Path(feature_path)
     self.start = start
     self.end = end
     self.rng = np.random.RandomState(seed=random_seed)
     self._create_queue()
def run_simulation(from_, start, end):
    configure_logging(pathlib.Path("."))
    submitter = SubmissionClient(from_, start=start, end=end)
    while not submitter.is_completed():
        submitter.submit_with_delay()