def submit(user, feature, from_, dry_run, github_token, create_pull, cleanup): """Submit feature within path to project""" project = _get_project() repo = project.repo to = project.get("contrib", "module_path") src = pathlib.Path( from_, "user_{user:02d}".format(user=user), "feature_{feature:02d}.py".format(feature=feature), ) dst = pathlib.Path( to, "user_{user:02d}".format(user=user), "feature_{feature:02d}.py".format(feature=feature), ) # register cleanup if cleanup: @atexit.register @funcy.silent def cleanup(): # check out default branch repo.branches[ballet.update.DEFAULT_BRANCH].checkout() # delete new branch name = _make_branch_name(user, feature) repo.delete_head(name) _check_environment(repo) create_and_switch_to_new_branch(repo, user, feature) create_dirs_if_needed(dst) read_format_write(src, dst) add_init_if_needed(dst) commit_changes(repo) push_changes(repo, user, feature, dry_run=dry_run) gh = _make_github_client(github_token) if create_pull: pr = create_pull_request(gh, project, user, feature, dry_run=dry_run) print("Created pull request: {pr.url}".format(pr=pr)) print("Submission successful.")
def make_train_test_split(output_dir, seed=641137): # load and split data X, y = load_data() inds = X.index.copy() inds_tr, inds_te = train_test_split(inds, train_size=0.67, test_size=0.33, random_state=seed) X_tr_df = X.loc[inds_tr] X_te_df = X.loc[inds_te] y_tr_df = y.loc[inds_tr] y_te_df = y.loc[inds_te] # load config tables = conf.get('tables') entities_table_name = conf.get('data', 'entities_table_name') entities_config = one_or_raise(lwhere(tables, name=entities_table_name)) entities_path = entities_config['path'] targets_table_name = conf.get('data', 'targets_table_name') targets_config = one_or_raise(lwhere(tables, name=targets_table_name)) targets_path = targets_config['path'] # prepare directories output_dir = pathlib.Path(output_dir) train_dir = output_dir.joinpath('train') train_dir.mkdir(exist_ok=True) test_dir = output_dir.joinpath('test') test_dir.mkdir(exist_ok=True) # save tables kwargs = {'header': True} X_tr_df.to_csv(train_dir.joinpath(entities_path), **kwargs) X_te_df.to_csv(test_dir.joinpath(entities_path), **kwargs) y_tr_df.to_csv(train_dir.joinpath(targets_path), **kwargs) y_te_df.to_csv(test_dir.joinpath(targets_path), **kwargs) return X_tr_df, X_te_df, y_tr_df, y_te_df
def create_dirs_if_needed(dst): dst = pathlib.Path(dst) dst.parent.mkdir(parents=True, exist_ok=True)
def __init__(self, feature_path, random_seed=1, start=None, end=None): self.feature_path = pathlib.Path(feature_path) self.start = start self.end = end self.rng = np.random.RandomState(seed=random_seed) self._create_queue()
def run_simulation(from_, start, end): configure_logging(pathlib.Path(".")) submitter = SubmissionClient(from_, start=start, end=end) while not submitter.is_completed(): submitter.submit_with_delay()