def __init__(self, dataset: str, dir: Path) -> None: self.ont = get_ontology(dataset) self.sms = get_semantic_models(dataset) self.sm_prefix_index = {sm.id[:3]: sm for sm in self.sms} self.sm_attr2stypes: Dict[str, Dict[str, List[SemanticType]]] = {} assert len(self.sm_prefix_index) == len( self.sms), "No duplicated prefix" class_uris = set() predicates = set() for sm in self.sms: for n in sm.graph.iter_data_nodes(): e = n.get_first_incoming_link() class_uri = e.get_source_node().label.decode() predicate = e.label.decode() class_uris.add(class_uri) predicates.add(predicate) for file in dir.iterdir(): if file.name.endswith(".df.csv"): prefix = file.name[:3] self.sm_attr2stypes[prefix] = self.read_serene_stypes(file) for attr_lbl, stypes in self.sm_attr2stypes[prefix].items(): for stype in stypes: stype.domain = self.recover_class_uris( stype.domain, class_uris) stype.type = self.recover_predicates( stype.type, predicates)
def compute_mohsen_stypes(dataset: str, train_sms: List[SemanticModel]): sms = get_semantic_models(dataset) train_sm_ids = [sm.id for sm in train_sms] exec_dir = Path(config.fsys.debug.as_path()) / "tmp" / f"mohsen-styper-{get_short_train_name(train_sms)}" if exec_dir.exists(): shutil.rmtree(exec_dir) exec_dir.mkdir(exist_ok=True, parents=True) semantic_types = {} # now we parallel to save time # with ThreadPool(os.cpu_count() // 2) as pool: with ThreadPool(6) as pool: results = {} # because karma re-learn semantic types for every data source, we parallel for every data source for sm in sms: if sm.id in train_sm_ids: local_train_sms = [s for s in train_sms if s.id != sm.id] else: local_train_sms = train_sms local_exec_dir = exec_dir / sm.id local_exec_dir.mkdir(exist_ok=True) results[sm.id] = pool.apply_async(worker_get_stype, (dataset, local_train_sms, sm, local_exec_dir)) for sid, result in results.items(): semantic_types[sid] = result.get() output_dir = Path(config.datasets[dataset].karma_version.as_path()) / "semantic-types" output_dir.mkdir(exist_ok=True) serializeJSON(semantic_types, output_dir / f"{get_short_train_name(train_sms)}.json", indent=4) return semantic_types
def __init__(self, dataset: str, train_source_ids: List[str], load_circular_dependency: bool = True, training_examples: Optional[List[Example]] = None): """ :param dataset: :param train_source_ids: :param top_k_semantic_types: :param n_sample: :param load_circular_dependency: :param training_examples: list of training examples use to build weak models, don't need it at testing time (i.e = NULL), because weak models has been built before """ self.dataset = dataset self.source_models = {sm.id: sm for sm in get_semantic_models(dataset)} self.train_source_ids = set(train_source_ids) self.top_k_semantic_types = Settings.get_instance( ).semantic_labeling_top_n_stypes self.training_models = [ self.source_models[sid] for sid in train_source_ids ] self.typer: SemanticTyper = create_semantic_typer( dataset, self.training_models) self.testing_models = [ self.source_models[sid] for sid in set( self.source_models.keys()).difference(train_source_ids) ] self.training_examples = training_examples # local models self.multival_predicate = MultiValuePredicate.get_instance( self.training_models) self.statistic = Statistic.get_instance(self.training_models) # self.data_constraint = get_data_constraint_model(dataset, self.training_models) self.stype_assistant = get_stype_assistant_model( dataset, self.training_models) self.local_structure = LocalStructure.get_instance( self.training_models) self.attribute_same_scope = AttributeScope.get_instance(self.dataset) self.duplication_tensors = DuplicationTensors.get_instance( self.training_models) self.primary_key: PrimaryKey = PrimaryKey.get_instance( dataset, self.training_models) self.cardinality = CardinalityFeatures.get_instance(dataset) # STEP 1: add semantic types self.typer.semantic_labeling(self.training_models, self.testing_models, self.top_k_semantic_types, eval_train=True) # STEP 2: load circular dependency like node_prob if load_circular_dependency: self.node_prob = NodeProb(self, load_classifier=True)
def create_semantic_typer(dataset: str, train_sms: List[SemanticModel]) -> SemanticTyper: settings = Settings.get_instance() if settings.semantic_labeling_method == Settings.MohsenJWS: # noinspection PyTypeChecker return MohsenSemanticTyper.get_instance(dataset, train_sms) if settings.semantic_labeling_method == Settings.ReImplMinhISWC: return SemanticTyper.get_instance(dataset, train_sms) if settings.semantic_labeling_method == Settings.MohsenJWS + "-Oracle": # noinspection PyTypeChecker return SemiOracleSemanticLabeling( MohsenSemanticTyper.get_instance(dataset, train_sms)) if settings.semantic_labeling_method == Settings.ReImplMinhISWC + "-Oracle": # noinspection PyTypeChecker return SemiOracleSemanticLabeling( SemanticTyper.get_instance(dataset, train_sms)) if settings.semantic_labeling_method == Settings.OracleSL: # noinspection PyTypeChecker return OracleSemanticLabeling() if settings.semantic_labeling_method == "OracleSL-Constraint": # noinspection PyTypeChecker return ConstraintOracleSemanticLabeling() if settings.semantic_labeling_method == "SereneSemanticType": sms = get_semantic_models(dataset) if dataset == "museum_edm" and train_sms == sms[:14]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_edm_stypes/kfold-s01-s14" elif dataset == "museum_edm" and train_sms == sms[14:]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_edm_stypes/kfold-s15-s28" elif dataset == "museum_edm" and train_sms == sms[7:21]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_edm_stypes/kfold-s08-s21" elif dataset == "museum_crm" and train_sms == sms[:14]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_crm_stypes/kfold-s01-s14" elif dataset == "museum_crm" and train_sms == sms[14:]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_crm_stypes/kfold-s15-s28" elif dataset == "museum_crm" and train_sms == sms[7:21]: serene_dir = "/workspace/tmp/serene-python-client/datasets/GOLD/museum_crm_stypes/kfold-s08-s21" else: raise Exception("Invalid configuration of serene semantic types") # noinspection PyTypeChecker return SereneSemanticTypes(dataset, Path(serene_dir)) raise Exception( f"Invalid semantic typer: {settings.semantic_labeling_method}")
def __init__(self, dataset: str, model: TemplateLogLinearModel, tf_domain: GrowableBinaryVectorDomain, pairwise_domain: GrowableBinaryVectorDomain) -> None: self.dataset = dataset self.source_models: Dict[str, SemanticModel] = { s.id: s for s in get_semantic_models(dataset) } self.inference = BeliefPropagation.get_constructor(InferProb.MARGINAL) self.map_inference = BeliefPropagation.get_constructor(InferProb.MAP) self.model: TemplateLogLinearModel = model for template in model.templates: if isinstance(template, CachedTemplateFactorConstructor): template.disable_cache() self.tf_domain: GrowableBinaryVectorDomain = tf_domain self.pairwise_domain = pairwise_domain self.example_annotator: ExampleAnnotator = None self.max_n_tasks = Settings.get_instance().max_n_tasks
def __init__(self, dataset: str) -> None: self.dataset = dataset self.attribute_same_scope_matrix: Dict[str, Dict[Tuple[bytes, bytes], bool]] = {} for sm in get_semantic_models(dataset): self.attribute_same_scope_matrix[sm.id] = {} attr_paths = [ attr.label.split(Schema.PATH_DELIMITER) for attr in sm.attrs ] for i in range(len(sm.attrs)): for j in range(i + 1, len(sm.attrs)): is_same_scope = attr_paths[i][:-1] == attr_paths[j][:-1] self.attribute_same_scope_matrix[sm.id][( sm.attrs[i].label.encode('utf-8'), sm.attrs[j].label.encode('utf-8'))] = is_same_scope self.attribute_same_scope_matrix[sm.id][( sm.attrs[j].label.encode('utf-8'), sm.attrs[i].label.encode('utf-8'))] = is_same_scope
def _init(self): self.source_mappings: Dict[str, SemanticModel] = { s.id: s for s in get_semantic_models(self.dataset) } self.train_columns = [ col for tbl in self.train_tables for col in tbl.columns ] self.train_column_stypes: List[str] = [] for tbl in self.train_tables: sm = self.source_mappings[tbl.id] for col in tbl.columns: dnode = sm.graph.get_node_by_id( sm.get_attr_by_label(col.name).id) dlink = dnode.get_first_incoming_link() self.train_column_stypes.append(dlink.label.decode("utf-8")) self.test_columns = [ col for tbl in self.test_tables for col in tbl.columns ] self.name2table: Dict[str, ColumnBasedTable] = { tbl.id: tbl for tbl in chain(self.train_tables, self.test_tables) } self.col2idx: Dict[str, int] = { col.id: i for i, col in enumerate( chain(self.train_columns, self.test_columns)) } self.col2types: Dict[str, Tuple[str, str]] = {} self.col2dnodes: Dict[str, GraphNode] = {} col: Column for col in chain(self.train_columns, self.test_columns): sm = self.source_mappings[col.table_name] attr = sm.get_attr_by_label(col.name) dnode = sm.graph.get_node_by_id(attr.id) link = dnode.get_first_incoming_link() self.col2types[col.id] = (link.get_source_node().label, link.label) self.col2dnodes[col.id] = dnode assert len(self.col2types) == len(self.train_columns) + len( self.test_columns), "column name must be unique"
def get_stype_assistant_model(dataset: str, train_sms: List[SemanticModel]): global _instance if _instance is None: cache_file = get_cache_dir(dataset, train_sms) / "weak_models" / "stype_assistant.pkl" cache_file.parent.mkdir(exist_ok=True, parents=True) need_rebuilt = True if cache_file.exists(): SemanticTypeAssistant.logger.debug("Try to load previous run...") model, cache_dataset, cache_train_sm_ids = deserialize(cache_file) if cache_dataset == dataset and cache_train_sm_ids == {sm.id for sm in train_sms}: need_rebuilt = False ont_graph = get_ont_graph(dataset) ont = get_ontology(dataset) stat = Statistic.get_instance(train_sms) ota = EmpiricalTripleAdviser(ont_graph, ont, stat.p_triple, 15) model.triple_adviser = ota if need_rebuilt: ont_graph = get_ont_graph(dataset) ont = get_ontology(dataset) stat = Statistic.get_instance(train_sms) ota = EmpiricalTripleAdviser(ont_graph, ont, stat.p_triple, 15) typer = SemanticTyper.get_instance(dataset, train_sms) try: typer.load_model() except: sms = get_semantic_models(dataset) train_ids = {sm.id for sm in train_sms} typer.semantic_labeling(train_sms, [sm for sm in sms if sm.id not in train_ids], 4) model = SemanticTypeAssistant(train_sms, typer, ota) model.triple_adviser = None serialize((model, dataset, {sm.id for sm in train_sms}), cache_file) model.triple_adviser = ota _instance = model return _instance
def serialize_rust_input(dataset: str, workdir: str, train_sms: List[SemanticModel], test_sms: List[SemanticModel], foutput: Path): primary_key = PrimaryKey.get_instance(dataset, train_sms) sms = get_semantic_models(dataset) sm_index = {sm.id: i for i, sm in enumerate(sms)} train_sm_idxs = [sm_index[sm.id] for sm in train_sms] test_sm_idxs = [sm_index[sm.id] for sm in test_sms] predicted_parent_stypes = serialize_stype_assistant( dataset, sms, train_sms, test_sms) cardinality = CardinalityFeatures.get_instance(dataset) semantic_labeling(dataset, train_sms, test_sms) data = { "dataset": dataset, "workdir": str(workdir), "semantic_models": [sm.to_dict() for sm in sms], "predicted_parent_stypes": { "stype_details": predicted_parent_stypes }, "train_sm_idxs": train_sm_idxs, "test_sm_idxs": test_sm_idxs, "feature_primary_keys": primary_key.to_dict(), "feature_cardinality_features": { sm_id: { "columns": matrix.columns, "matrix": matrix.matrix } for sm_id, matrix in cardinality.cardinality_matrices.items() }, "ont_graph": serialize_ont_graph(dataset) } serializeJSON(data, foutput, indent=4)
print("Saved!!") if len(self.exec_buffer) > 0: self.proceed_cmd() self.worksheet.save(self.model_file) run_in_terminal(noti) def app_bottom_toolbar(self): return HTML( 'Tips: ctrl-p (render page), ctrl-u (undo), ctrl-r (redo), ctrl-s (save), ctrl-c (abort current prompt)' ) if __name__ == '__main__': dataset = "museum_crm" sm_names = [sm.id for sm in get_semantic_models("museum_edm")] ont = get_ontology(dataset) train_sms = get_semantic_models(dataset) R2RML.load_python_scripts( Path(config.datasets[dataset].python_code.as_path())) dataset_dir = Path("/workspace/semantic-modeling/data/museum-jws-crm") data_files = [] # data_files = [file for file in (dataset_dir / "tmp").iterdir() if file.name.startswith("s")] for file in (dataset_dir / "sources").iterdir(): if file.name.startswith("s"): data_files.append(file) for sm_name in sm_names: # if int(sm_name[1:3]) <= 18: # continue
eval_results[chuffed_idx] = {'precision': 0, 'recall': 0, 'f1': 0} else: ssd = ssds[0] # ssd.graph.render() result = smodel_eval.f1_precision_recall(gold_graph, ssd.graph, DataNodeMode.NO_TOUCH) eval_results[chuffed_idx]['precision'] = result['precision'] eval_results[chuffed_idx]['recall'] = result['recall'] eval_results[chuffed_idx]['f1'] = result['f1'] return eval_results if __name__ == '__main__': dataset = "museum_crm" sms = get_semantic_models(dataset) sms_index = {sm.id[:3]: sm for sm in sms} ont = get_ontology(dataset) ont.register_namespace("serene", "http://au.csiro.data61/serene/dev#") # get serene output by sms kfold_results = [] stype = "ReImplMinhISWC_False_pat" for kfold in ["kfold-s01-s14", "kfold-s15-s28", "kfold-s08-s21"]: kfold_sms_prefix = { sm[:3] for sm in get_sm_ids_by_name_range( *kfold.replace("kfold-", "").split("-"), [sm.id for sm in sms]) } print("==== KFOLD:", kfold, "====")
eid, train_sm_ids) test_examples.append(example) serializeJSON(test_examples, workdir / "examples" / "test.json") return test_examples if __name__ == '__main__': dataset = "museum_edm" Settings.get_instance(False).parallel_n_process = 6 Settings.get_instance().max_n_tasks = 160 Settings.get_instance().semantic_labeling_top_n_stypes = 4 Settings.get_instance().searching_beam_width = 5 Settings.get_instance().log_current_settings() source_models = get_semantic_models(dataset) train_sms = source_models[:6] test_sms = [sm for sm in source_models if sm not in train_sms] workdir = Path(config.fsys.debug.as_path( )) / dataset / "main_experiments" / get_short_train_name(train_sms) workdir.mkdir(exist_ok=True, parents=True) create_semantic_typer(dataset, train_sms).semantic_labeling( train_sms, test_sms, top_n=Settings.get_instance().semantic_labeling_top_n_stypes, eval_train=True) model_dir = workdir / "models" / "exp_no_3" model = Model.from_file(dataset, model_dir)
help='Experiment directory, must be existed before') args = parser.parse_args() try: assert args.dataset is not None args.kfold = parse_kfold(args.dataset, args.kfold) except AssertionError: parser.print_help() raise return args if __name__ == '__main__': args = get_shell_args() source_models: List[SemanticModel] = get_semantic_models(args.dataset) train_sms = [ sm for sm in source_models if sm.id in args.kfold['train_sm_ids'] ] test_sms = [ sm for sm in source_models if sm.id in args.kfold['test_sm_ids'] ] Settings.get_instance(False).semantic_labeling_method = args.semantic_typer Settings.get_instance().log_current_settings() typer = create_semantic_typer(args.dataset, train_sms) typer.semantic_labeling(train_sms, test_sms, 4, eval_train=True) exp_dir = Path(args.exp_dir) eval_sources(
# dump result into test_sources for source in chain(train_sources, test_sources): for col in source.attrs: try: if col.label not in result[source.id].columns: # this column is ignored stypes = [] else: stypes = result[source.id].columns[col.label] col.semantic_types = [ KarmaSemanticType(col.id, stype.domain, stype.type, "Minhptx-ISWC2016-SemanticLabeling", stype.weight) for stype in stypes ][:top_n] except Exception: self.logger.exception( "Hit exception for source: %s, col: %s", source.get_id(), col.id) raise if __name__ == '__main__': dataset = "museum_crm" sources: List[SemanticModel] = get_semantic_models(dataset)[:5] train_size = 3 typer = MinhptxSemanticLabeling(dataset, 200) typer.semantic_labeling(sources[:train_size], sources[train_size:], 4)
key=lambda x: space['children'][x], reverse=True) self.node_structure_space[n] = NodeStructureSpace( n, {x: i for i, x in enumerate(space['parents'].keys())}, {x: i for i, x in enumerate(children_attrs)}, [x[1] != b'DATA_NODE' for x in children_attrs], [space['children'][x] for x in children_attrs]) @staticmethod def get_instance(train_sms: List[SemanticModel]) -> 'LocalStructure': sm_ids = {sm.id for sm in train_sms} if LocalStructure.instance is None: LocalStructure.instance = LocalStructure(train_sms) return LocalStructure.instance assert LocalStructure.instance.train_sm_ids == sm_ids return LocalStructure.instance if __name__ == '__main__': import ujson dataset = "museum_edm" train_size = 14 source_models = get_semantic_models(dataset)[:train_size] local_structure = LocalStructure.get_instance(source_models) print(ujson.dumps(local_structure.node_structure_space, indent=4))
node_id: crm:E12_Production1 domain: crm:E12_Production type: karma:dummy input_attr_path: %s""" % attr_path) if __name__ == '__main__': dataset = "museum_crm" ont = get_ontology(dataset) dataset_dir = Path(config.datasets[dataset].as_path()) R2RML.load_python_scripts( Path(config.datasets[dataset].python_code.as_path())) # train the model first train_sms = get_semantic_models(dataset)[:-1] styper = SemanticTyper.get_instance(dataset, train_sms) # doing interactive modeling for tbl in get_raw_data_tables(dataset): if tbl.id in [sm.id for sm in train_sms]: continue print("Processing table:", tbl.id) print(tbl.head(10).to_string("double")) r2rml = R2RML.load_from_file(dataset_dir / "models-y2rml" / f"{tbl.id}-model.yml") sm = r2rml.apply_cmds(tbl) # gen_dummy_sm(sm, tbl)
with Pool() as p: tf_cols = p.map(TfidfDatabase._compute_tf, [(self.tokenizer, col) for col in cols]) for col, tf_col in zip(cols, tf_cols): tfidf = numpy.zeros(len(self.vocab)) for w, tf in tf_col.items(): if w in self.vocab: tfidf[self.vocab[w]] = tf * numpy.log( self.n_docs / (1 + self.invert_token_idx[w])) self.cache_col2tfidf[col.id] = tfidf @staticmethod def _compute_tf(args): tokenizer, col = args counter = Counter() sents = (subsent for sent in col.get_textual_data() for subsent in sent.decode('utf-8').split("/")) for doc in tokenizer.pipe(sents, batch_size=50, n_threads=4): counter.update((str(w) for w in doc)) number_of_token = sum(counter.values()) for token, val in counter.items(): counter[token] = val / number_of_token return counter if __name__ == '__main__': stype_db = SemanticTypeDB.create( "museum_edm", [sm.id for sm in get_semantic_models("museum_edm")[:14]]) stype_db._build_db()
def run_evaluation_workflow(dataset: str, scenario: Scenario, train_sms, test_sms): ont: Ontology = get_ontology(dataset) karma_models: List[KarmaModel] = get_karma_models(dataset) semantic_models: List[SemanticModel] = get_semantic_models(dataset) train_sm_ids = [sm.id for sm in train_sms] sdesc_args = dict( dataset=dataset, train_sm_ids=train_sm_ids, use_correct_type= False, # we always put semantic types to learnedSemanticTypes, even for userSetSemanticTypes use_old_semantic_typer=False, exec_dir=get_cache_dir(dataset, train_sms) / "mohsen_jws2015", sm_type_dir=Path(config.fsys.debug.as_path()) / "tmp" / "models-json-temp") # STEP 1: run semantic typing to generate semantic typing and put result to a temporal folder if sdesc_args['sm_type_dir'].exists(): shutil.rmtree(sdesc_args['sm_type_dir']) sdesc_args['sm_type_dir'].mkdir(exist_ok=True, parents=True) top_k_types = Settings.get_instance().semantic_labeling_top_n_stypes typer = create_semantic_typer(dataset, train_sms) typer.semantic_labeling(train_sms, test_sms, top_k_types, eval_train=True) for sm, ksm in zip(semantic_models, karma_models): # assign semantic types to learnedSemanticTypes sm_alignment = SemanticModelAlignment(sm, ksm) for col in ksm.source_columns: attr = sm.get_attr_by_label( sm.graph.get_node_by_id( sm_alignment.alignment[col.id]).label.decode('utf-8')) node = ksm.karma_graph.get_node_by_id(col.id) link = node.get_first_incoming_link() node.learned_semantic_types = [ KarmaSemanticType(node.id, stype.domain, stype.type, typer.__class__.__name__, stype.confidence_score) for stype in attr.semantic_types ] node.user_semantic_types = [ KarmaSemanticType(node.id, link.get_source_node().label.decode(), link.label.decode(), "User", 1.0) ] serializeJSON(ksm.to_normalized_json_model(ont), sdesc_args['sm_type_dir'] / f"{ksm.id}-model.json", indent=4) # STEP 2: invoking semantic modeling modeler = MohsenSemanticModeling(**sdesc_args) pred_sms = modeler.sm_prediction(train_sms, test_sms) # STEP 3: prediction semantic mapping result eval_hist = [["source", "precision", "recall", "f1", "stype-acc"]] if scenario == Scenario.SCENARIO_1: data_node_mode = DataNodeMode.IGNORE_DATA_NODE else: data_node_mode = DataNodeMode.NO_TOUCH for sm, pred_sm in zip(test_sms, pred_sms): eval_result = smodel_eval.f1_precision_recall(sm.graph, pred_sm.graph, data_node_mode) eval_hist.append([ sm.id, eval_result["precision"], eval_result["recall"], eval_result["f1"], smodel_eval.stype_acc(sm.graph, pred_sm.graph) ]) eval_hist.append([ 'average', np.average([float(x[1]) for x in eval_hist[1:]]), np.average([float(x[2]) for x in eval_hist[1:]]), np.average([float(x[3]) for x in eval_hist[1:]]), np.average([float(x[4]) for x in eval_hist[1:]]) ]) serializeCSV( eval_hist, sdesc_args["exec_dir"] / f"evaluation_result_{scenario.value}.csv") # STEP 4: prediction semantic labeling result pred_stypes = modeler.semantic_labeling(train_sms, test_sms) for pred_stype, sm in zip(pred_stypes, test_sms): for attr in sm.attrs: if attr.label not in pred_stype: attr.semantic_types = [] else: attr.semantic_types = pred_stype[attr.label] eval_sources( test_sms, sdesc_args["exec_dir"] / f"evaluation_result_{scenario.value}_stype.csv") # STEP 5: visualize the prediction (sdesc_args['exec_dir'] / "prediction-viz").mkdir(exist_ok=True) need_render_graphs = [ (colorize_prediction( pred_sm.graph, AutoLabel.auto_label_max_f1(sm.graph, pred_sm.graph, False)[0]), sdesc_args['exec_dir'] / "prediction-viz" / f"{sm.id}.png") for sm, pred_sm in zip(test_sms, pred_sms) ] with ThreadPool(32) as p: p.map(render_graph, need_render_graphs) return eval_hist
if __name__ == '__main__': # HYPER-ARGS args = get_shell_args() Settings.get_instance( False ).semantic_labeling_top_n_stypes = args.semantic_labeling_top_n_stypes Settings.get_instance().semantic_labeling_method = args.semantic_typer Settings.get_instance().log_current_settings() exp_dir = Path(args.exp_dir) assert exp_dir.exists() source_models = {sm.id: sm for sm in get_semantic_models(args.dataset)} train_sms = [source_models[sid] for sid in args.kfold['train_sm_ids']] test_sms = [source_models[sid] for sid in args.kfold['test_sm_ids']] eval_hist = run_evaluation_workflow(args.dataset, Scenario.SCENARIO_2, train_sms, test_sms) serializeCSV(eval_hist, exp_dir / f"kfold-{get_short_train_name(train_sms)}.test.csv") serializeJSON(args, exp_dir / f"kfold-{get_short_train_name(train_sms)}.meta.json", indent=4) shutil.move( get_cache_dir(args.dataset, train_sms) / "mohsen_jws2015", exp_dir / f"kfold-{get_short_train_name(train_sms)}")