def test_run_work_flow_impl(): def show_predictions_handler(predictions: list) -> int: LOGGER.debug("=== start predictions handler ===") for idx, p in enumerate(predictions): LOGGER.debug("(%d): %s", idx, p) LOGGER.debug("=== end predictions handler ===") return 1 db = database.Database(FALCON_SETUP["db_file"]) callbacks_add_issue = Callbacks(lambda: True, show_predictions_handler) callbacks_reject_issue = Callbacks(lambda: False, lambda arg: None) fs_paths = { k: FALCON_SETUP[k] for k in ["dot_git_dir", "spojitr_dir", "db_file"] } third_party: Path = Path("/data/spojitr_install") / "3rd" fs_paths["weka_run_script"] = third_party / "run_weka.py" fs_paths["weka_jar"] = third_party / "weka.jar" _run_workflow_impl( FALCON_COMMIT_WITHOUT_ISSUE_ID, callbacks_add_issue, FALCON_SETUP["project_config"], db, fs_paths, )
def _prepare_crunch(weka_run_script: Path, weka_jar: Path): import spojitr_classifier as classifier import spojitr_database as database reference_t = "2017-03-01T00:00:00Z" LOGGER.info("=" * 40) LOGGER.info( """\ Setup demo for project "crunch" reference training date: %s """, reference_t, ) LOGGER.info("=" * 40) db = database.Database(CRUNCH_CONFIG["db_file"]) data_source = classifier._create_spojit_data_source( db, max_date_time=reference_t) classifier._create_train_profile(data_source, CRUNCH_CONFIG["train_file"], db) classifier._train_model_with_weka( CRUNCH_CONFIG["train_file"], CRUNCH_CONFIG["model_file"], weka_run_script, weka_jar, dry_run=False, )
def test_case_reject_adding_issue_id(): db = database.Database(FALCON_SETUP["db_file"]) latest_commit = _git_get_latest_commit(FALCON_SETUP["dot_git_dir"]) dbu = _case_reject_adding_issue_id(latest_commit, db, FALCON_SETUP["dot_git_dir"]) dbu.execute()
def test_build_issue_choices(): config = CRUNCH_SETUP db = database.Database(config["db_file"]) choices = _build_issue_choices( config["spojitr_dir"] / "demo_prediction.csv", db) LOGGER.debug("Choices: %s", choices)
def test(): lc = _git_get_latest_commit(Path("~/falcon/.git").expanduser()) LOGGER.info("LC %s", pprint.pformat(lc)) LOGGER.info("fake %s", pprint.pformat(FALCON_COMMIT_WITH_ISSUE_ID)) db = database.Database(Path("~/falcon/.spojitr/db.sqlite3").expanduser()) info = _db_get_commit_info("00a2b3a95aee3fc68a8adf3f04c988df205fe4fe", db) LOGGER.info("query %s", pprint.pformat(info))
def test_create_spojit_data_source(): config = FALCON_SETUP db = database.Database(config["db_file"]) complete_ds = _create_spojit_data_source(db) LOGGER.debug("Full data source\n%s", _pformat_datasource(complete_ds)) t = "2017-03-01T00:00:00Z" partial_ds = _create_spojit_data_source(db, max_date_time=t) LOGGER.debug("Data source at t=%s\n%s", t, _pformat_datasource(partial_ds))
def test_update_jira_issues(): base: Path = Path("~/falcon_checkout").expanduser() spojitr_dir: Path = base / ".spojitr" db_file = spojitr_dir / "falcon.sqlite3" project_config = { "jiraProjectKey": "FALCON", "jiraRestUri": "https://issues.apache.org/jira/rest/api/2", } db = database.Database(db_file) _update_jira_issues(db, project_config)
def test_case_commit_with_existing_issue_id(): db = database.Database(FALCON_SETUP["db_file"]) latest_commit = _git_get_latest_commit(FALCON_SETUP["dot_git_dir"]) dbu = _case_commit_with_existing_issue_id( ["FALCON-2341", "FOO-BAR", "FALCON-2341"], latest_commit, db, FALCON_SETUP["dot_git_dir"], ) dbu.execute()
def populate_database( project_config: dict, db_file: Path, spojitr_dir: Path, dot_git_dir: Path ): LOGGER.info("Build database ...") db = database.Database(db_file) db.create_tables() _fetch_all_commits(db, spojitr_dir=spojitr_dir, dot_git_dir=dot_git_dir) _fetch_all_jira_issues(db, project_config) _fill_issues_to_change_set_table(db) _fill_issue_to_commit_similarity_table(db) _fill_issue_to_code_similarity_table(db)
def test_populate_falcon(): base: Path = Path("~/falcon").expanduser() dot_git_dir: Path = base / ".git" spojitr_dir: Path = base / ".spojitr" db_file = spojitr_dir / "falcon_perform.sqlite3" project_config = { "jiraProjectKey": "FALCON", "jiraRestUri": "https://issues.apache.org/jira/rest/api/2", } LOGGER.setLevel(logging.INFO) db = database.Database(db_file) db.create_tables() _fetch_all_commits(db, spojitr_dir=spojitr_dir, dot_git_dir=dot_git_dir)
def train(db_file: Path, spojitr_dir: Path, weka_run_script: Path, weka_jar: Path): LOGGER.info("Training ... ") train_file: Path = spojitr_dir / TRAIN_ARFF_FILE_NAME model_file: Path = spojitr_dir / MODEL_FILE_NAME db = database.Database(db_file) data_source = _create_spojit_data_source(db) _create_train_profile(data_source, train_file, db) _train_model_with_weka(train_file, model_file, weka_run_script, weka_jar, dry_run=False)
def setup_crunch_demo(): LOGGER.info("Setup Crunch demo") # config config = CRUNCH_SETUP reference_t = "2017-03-01T00:00:00Z" train_file: Path = config["spojitr_dir"] / ("demo_" + TRAIN_ARFF_FILE_NAME) model_file: Path = config["spojitr_dir"] / ("demo_" + MODEL_FILE_NAME) db = database.Database(config["db_file"]) data_source = _create_spojit_data_source(db, max_date_time=reference_t) _create_train_profile(data_source, train_file, db) _train_model_with_weka(train_file, model_file, WEKA_RUN_SCRIPT, WEKA_JAR, dry_run=False)
def test_create_profile(): config = FALCON_SETUP db = database.Database(config["db_file"]) _create_train_profile(config["spojitr_dir"] / "train.arff", db)
def test_fill_issue_to_code_similarity_table(): db = database.Database("/data/spojitr_install/db.sqlite3") _fill_issue_to_code_similarity_table(db)
def test_calculate_commit_to_issue_pairs_for_similarity(): db = database.Database("/data/spojitr_install/db.sqlite3") res = _calculate_commit_to_issue_pairs_for_similarity(db) LOGGER.info("res %s", res)
def run_workflow(callbacks: Callbacks, project_config: dict, fs_paths: dict): """ :param fs_paths: file system paths """ LOGGER.debug("Starting spojitr workflow") LOGGER.debug("path setup %s", pprint.pformat(fs_paths)) demo_mode = _get_demo_config(fs_paths["spojitr_dir"]) if demo_mode: demo_mode["nesting_level"] += 1 _write_demo_config(demo_mode, fs_paths["spojitr_dir"]) if demo_mode["nesting_level"] == 1: LOGGER.warning("""\ ================================================= = SPOJITR DEMO MODE = =================================================""") db = database.Database(fs_paths["db_file"]) git_latest_commit_info = _git_get_latest_commit(fs_paths["dot_git_dir"]) if demo_mode: # modify commit info to match the original author, date, ... db_commit_info = _db_get_commit_info(demo_mode["commit_hash"], db) # modify essential fields latest_commit_info = copy.deepcopy(git_latest_commit_info) latest_commit_info["commit_hash"] = db_commit_info["commit_hash"] latest_commit_info["author"] = db_commit_info["author"] latest_commit_info["email"] = db_commit_info["email"] latest_commit_info["date"] = db_commit_info["date"] # TODO: sanity check: db filepaths and git file paths should match! LOGGER.debug( "DEMO MODE: changed %s -> %s", pprint.pformat(git_latest_commit_info), pprint.pformat(latest_commit_info), ) else: latest_commit_info = git_latest_commit_info # -- Begin core workflow if not demo_mode: _case_update_issues(db, project_config) db_updater = _run_workflow_impl( latest_commit_info, callbacks, project_config, db=db, fs_paths=fs_paths, demo_mode=demo_mode, ) if not demo_mode: db_updater.execute(db) # -- End core workflow if demo_mode: demo_mode["nesting_level"] -= 1 _write_demo_config(demo_mode, fs_paths["spojitr_dir"])
def test_fill_issues_to_change_set_table(): db = database.Database("/data/spojitr_install/db.sqlite3") _fill_issues_to_change_set_table(db)