def evaluate(self): result_collector = ResultCollector(self.dataset_name, self.experiment_type) ds_eval = self.prepare_eval_dataset() y_true = ds_eval['category'].values #Preprocess data eval_path, ds_eval = self.prepare_fasttext(ds_eval, 'eval') y_pred, y_prob = self.model.predict(ds_eval['title'].values.tolist()) # Postprocess labels y_pred = [self.encoder[prediction[0]] for prediction in y_pred] normalized_encoder, normalized_decoder, number_leaf_nodes = self.encode_labels() evaluator = scorer.HierarchicalScorer(self.experiment_name, self.tree, transformer_decoder=normalized_decoder) result_collector.results[self.experiment_name] = evaluator.compute_metrics_transformers_flat(y_true, y_pred) # Persist prediction ds_eval.to_pickle(self.prediction_output) self.logger.info('Prediction results persisted to {}!'.format(self.prediction_output)) # Persist results timestamp = time.time() result_collector.persist_results(timestamp)
def test_compute_metrics_no_encoding(self): dataset_name = 'wdc_ziqi' experiment_name = 'test_compute_metrics_no_encoding' project_dir = Path(__file__).resolve().parents[3] path_to_tree = project_dir.joinpath('data', 'raw', dataset_name, 'tree', 'tree_{}.pkl'.format(dataset_name)) with open(path_to_tree, 'rb') as f: tree = pickle.load(f) y_true = [ '51121600_Vitamins/Minerals/Nutritional_Supplements', '64010200_Personal_Carriers/Accessories', '67010800_Upper_Body_Wear/Tops', '67010800_Upper_Body_Wear/Tops', '86011100_Toy_Vehicles_�_Non-ride', '67010800_Upper_Body_Wear/Tops', '75010300_Household/Office_Tables/Desks', '75030100_Ornaments', '64010200_Personal_Carriers/Accessories', '79010700_Plumbing/Heating_Ventilation/Air_Conditioning_Variety_Packs', '67010300_Lower_Body_Wear/Bottoms', '67010300_Lower_Body_Wear/Bottoms', '67010800_Upper_Body_Wear/Tops', '50202300_Non_Alcoholic_Beverages_�_Ready_to_Drink', '67010800_Upper_Body_Wear/Tops', '10101600_Pet_Nutritional_Supplements', '77030100_Cars', '67010100_Clothing_Accessories', '65010100_Computer/Video_Game_Accessories', '67010800_Upper_Body_Wear/Tops' ] y_pred = [ '51121600_Vitamins/Minerals/Nutritional_Supplements', '51101600_Drug_Administration', '67010800_Upper_Body_Wear/Tops', '67010100_Clothing_Accessories', '70011300_Arts/Crafts_Variety_Packs', '67010800_Upper_Body_Wear/Tops', '75010300_Household/Office_Tables/Desks', '75030200_Pictures/Mirrors/Frames', '64010200_Personal_Carriers/Accessories', '79010700_Plumbing/Heating_Ventilation/Air_Conditioning_Variety_Packs', '67010300_Lower_Body_Wear/Bottoms', '67010300_Lower_Body_Wear/Bottoms', '67010200_Full_Body_Wear', '50193800_Ready-Made_Combination_Meals', '67010800_Upper_Body_Wear/Tops', '86010400_Developmental/Educational_Toys', '77030100_Cars', '67010100_Clothing_Accessories', '71011600_Sporting_Firearms_Equipment', '67010800_Upper_Body_Wear/Tops' ] evaluator = scorer.HierarchicalScorer(experiment_name, tree) results = evaluator.compute_metrics_no_encoding(y_true, y_pred) # Lvl3 must match leaf node prediction in this scenario self.assertEqual(results['weighted_f1_lvl_3'], results['leaf_weighted_f1'])
def test_compute_metrics(self): # Setup dataset_name = 'icecat' experiment_name = 'test_compute_metrics' project_dir = Path(__file__).resolve().parents[3] path_to_tree = project_dir.joinpath('data', 'raw', dataset_name, 'tree', 'tree_{}.pkl'.format(dataset_name)) with open(path_to_tree, 'rb') as f: tree = pickle.load(f) precision = 0.47 recall = 0.4 f1 = 0.43 h_f1 = 0.45 labels = [ 'Notebooks', 'Ink_Cartridges', 'Toner_Cartridges', 'Notebooks', 'Notebooks', 'Servers', 'Motherboards', 'Notebook_Spare_Parts', 'Warranty_&_Support_Extensions', 'Fibre_Optic_Cables', 'Notebook_Spare_Parts', 'Toner_Cartridges', 'Digital_Photo_Frames', 'Notebooks', 'Notebook_Spare_Parts', 'Notebooks', 'Notebooks', 'PCs/Workstations', 'PCs/Workstations', 'Notebook_Cases' ] preds = [ 'IT_Courses', 'Notebooks', 'Toner_Cartridges', 'AV_Extenders', 'Notebooks', 'Servers', 'Other_Input_Devices', 'Notebooks', 'Warranty_&_Support_Extensions', 'Fibre_Optic_Cables', 'Cable_Splitters_or_Combiners', 'Stick_PCs', 'Digital_Photo_Frames', 'Notebooks', 'Notebooks', 'Projection_Screens', 'Cable_Splitters_or_Combiners', 'Cable_Splitters_or_Combiners', 'Cable_Splitters_or_Combiners', 'Notebook_Cases' ] #Run Function evaluator = scorer.HierarchicalScorer(experiment_name, tree) decoder = dict(tree.nodes(data="name")) encoder = dict([(value, key) for key, value in decoder.items()]) labels = [encoder[label] for label in labels] preds = [encoder[pred] for pred in preds] # To-Do: change input [labels], [preds](!) labels_per_lvl, preds_per_lvl = evaluator.determine_label_preds_per_lvl( labels, preds) scores = evaluator.compute_metrics(labels, preds, labels_per_lvl, preds_per_lvl) self.assertEqual(precision, round(scores['leaf_weighted_prec'], 2)) self.assertEqual(recall, round(scores['leaf_weighted_rec'], 2)) self.assertEqual(f1, round(scores['leaf_weighted_f1'], 2)) self.assertEqual(h_f1, round(scores['h_f1'], 2))
def evaluate(self): ds_eval = self.prepare_eval_dataset() normalized_encoder, normalized_decoder, number_of_labels = self.intialize_hierarchy_paths( ) evaluator = scorer.HierarchicalScorer( self.experiment_name, self.tree, transformer_decoder=normalized_decoder) trainer = Trainer( model=self. model, # the instantiated 🤗 Transformers model to be trained compute_metrics=evaluator.compute_metrics_transformers_flat) if self.preprocessing: texts = list(ds_eval['preprocessed_title'].values) else: texts = list(ds_eval['title'].values) ds_eval['category'] = ds_eval['category'].str.replace(' ', '_') labels = list(ds_eval['category'].values) tokenizer = utils.roberta_base_tokenizer() ds_wdc = CategoryDatasetFlat(texts, labels, tokenizer, normalized_encoder) result_collector = ResultCollector(self.dataset_name, self.experiment_type) result_collector.results[self.experiment_name] = trainer.evaluate( ds_wdc) # Predict values for error analysis prediction = trainer.predict(ds_wdc) preds = prediction.predictions.argmax(-1) ds_eval['prediction'] = [ normalized_decoder[pred]['value'] for pred in preds ] full_prediction_output = '{}/{}'.format(self.data_dir, self.prediction_output) ds_eval.to_csv(full_prediction_output, index=False, sep=';', encoding='utf-8', quotechar='"', quoting=csv.QUOTE_ALL) # Persist results timestamp = time.time() result_collector.persist_results(timestamp)
def run(self): """Run experiments""" result_collector = ResultCollector(self.dataset_name, self.experiment_type) dict_classifier = DictClassifier(self.dataset_name, self.most_frequent_leaf, self.tree) # fallback classifier pipeline = Pipeline([ ('vect', CountVectorizer()), ('clf', MultinomialNB()), ]) classifier_dictionary_based = pipeline.fit(self.dataset['train']['title'].values, self.dataset['train']['category'].values) for configuration in self.parameter: y_true = self.dataset['validate']['category'].values fallback_classifier = None if configuration['fallback']: fallback_classifier = classifier_dictionary_based y_pred = dict_classifier.classify_dictionary_based(self.dataset['validate']['title'], fallback_classifier, configuration['lemmatizing'], configuration['synonyms']) experiment_name = '{}; title only; synonyms: {}, lemmatizing: {}, fallback: {}'.format( self.experiment_type, configuration['synonyms'], configuration['lemmatizing'], configuration['fallback']) evaluator = scorer.HierarchicalScorer(experiment_name, self.tree) result_collector.results[experiment_name] = evaluator.compute_metrics_no_encoding(y_true, y_pred) # Persist results timestamp = time.time() result_collector.persist_results(timestamp)
def run(self): """Run experiments""" result_collector = ResultCollector(self.dataset_name, self.experiment_type) normalized_encoder, normalized_decoder, number_of_labels = self.encode_labels() config = RobertaConfig.from_pretrained("roberta-base") config.num_labels = number_of_labels tokenizer, model = utils.provide_model_and_tokenizer(self.parameter['model_name'], self.parameter['pretrained_model_or_path'], config) tf_ds = {} for key in self.dataset: df_ds = self.dataset[key] if self.test: # load only subset of the data df_ds = df_ds[:20] self.logger.warning('Run in test mode - dataset reduced to 20 records!') if self.parameter['description']: texts = list((df_ds['title'] + ' - ' + df_ds['description']).values) else: texts = df_ds['title'].values if self.parameter['preprocessing'] == True: texts = [preprocess(value) for value in texts] # Normalize label values labels = [value.replace(' ', '_') for value in df_ds['category'].values] tf_ds[key] = CategoryDatasetFlat(texts, labels, tokenizer, normalized_encoder) timestamp = time.time() string_timestamp = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d_%H-%M-%S') training_args = TrainingArguments( output_dir='{}/models/{}/transformers/model/{}' .format(self.data_dir, self.dataset_name, self.parameter['experiment_name']), # output directory num_train_epochs=self.parameter['epochs'], # total # of training epochs learning_rate=self.parameter['learning_rate'], per_device_train_batch_size=self.parameter['per_device_train_batch_size'], # batch size per device during training per_device_eval_batch_size=64, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=self.parameter['weight_decay'], # strength of weight decay logging_dir='{}/models/{}/transformers/logs-{}'.format(self.data_dir, self.dataset_name, string_timestamp), # directory for storing logs save_total_limit=5, # Save only the last 5 Checkpoints metric_for_best_model=self.parameter['metric_for_best_model'], load_best_model_at_end=True, gradient_accumulation_steps=self.parameter['gradient_accumulation_steps'], seed=self.parameter['seed'], disable_tqdm=True ) evaluator = scorer.HierarchicalScorer(self.parameter['experiment_name'], self.tree, transformer_decoder=normalized_decoder) trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=tf_ds['train'], # tensorflow_datasets training dataset eval_dataset=tf_ds['validate'], # tensorflow_datasets evaluation dataset compute_metrics=evaluator.compute_metrics_transformers_flat ) self.logger.info('Start training!') trainer.train() for split in ['train', 'validate', 'test']: result_collector.results['{}+{}'.format(self.parameter['experiment_name'], split)] \ = trainer.evaluate(tf_ds[split]) trainer.save_model() # Persist results result_collector.persist_results(timestamp)
def evaluate(self): ds_eval = self.prepare_eval_dataset() normalized_encoder, normalized_decoder, number_of_labels = self.encode_labels( ) evaluator = scorer.HierarchicalScorer( self.experiment_name, self.tree, transformer_decoder=normalized_decoder) trainer = Trainer( model=self. model, # the instantiated 🤗 Transformers model to be trained compute_metrics=evaluator.compute_metrics_transformers_rnn) if self.preprocessing: texts = list(ds_eval['preprocessed_title'].values) else: texts = list(ds_eval['title'].values) ds_eval['category'] = ds_eval['category'].str.replace(' ', '_') labels = list(ds_eval['category'].values) tokenizer = utils.roberta_base_tokenizer() ds_wdc = CategoryDatasetRNN(texts, labels, tokenizer, normalized_encoder) result_collector = ResultCollector(self.dataset_name, self.experiment_type) result_collector.results[self.experiment_name] = trainer.evaluate( ds_wdc) # Predict values for error analysis --> Uncomment to retrieve prediction results pred = trainer.predict(ds_wdc) labels, preds, labels_per_lvl, preds_per_lvl = evaluator.transpose_rnn_hierarchy( pred) ds_eval['Leaf Label'] = [label for label in labels] ds_eval['Leaf Prediction'] = [pred for pred in preds] counter = 1 for labs, predictions in zip(labels_per_lvl, preds_per_lvl): column_name_label = 'Hierarchy Level {} Label'.format(counter) column_name_prediction = 'Hierarchy Level {} Prediction'.format( counter) ds_eval[column_name_label] = [label for label in labs] ds_eval[column_name_prediction] = [ prediction for prediction in predictions ] counter += 1 full_prediction_output = '{}/{}'.format(self.data_dir, self.prediction_output) ds_eval.to_csv(self.prediction_output, index=False, sep=';', encoding='utf-8', quotechar='"', quoting=csv.QUOTE_ALL) # Persist results timestamp = time.time() result_collector.persist_results(timestamp)
def run(self): """Run experiments""" result_collector = ResultCollector(self.dataset_name, self.experiment_type) # Reduce data if run in test mode: if self.test: for key in self.dataset: self.dataset[key] = self.dataset[key][:50] self.logger.warning( 'Run in test mode - dataset reduced to 50 records!') # Train Classifier on train and validate ds_train = self.dataset['train'] ds_validate = self.dataset['validate'] ds_test = self.dataset['test'] #Prepare data train_path, ds_train, orig_categories_train = self.prepare_fasttext( ds_train, 'train') validate_path, ds_validate, orig_categories_validate = self.prepare_fasttext( ds_validate, 'validate') test_path, ds_test, orig_categories_test = self.prepare_fasttext( ds_test, 'test') y_true = list(orig_categories_validate) # Use best performing configuration according to Nils' results! - Run more experiments if necessary if self.parameter['autotune'] == "True": classifier = fasttext.train_supervised( input=train_path, autotuneValidationFile=validate_path, autotuneMetric="f1") else: classifier = fasttext.train_supervised( input=train_path, epoch=self.parameter['epoch'], wordNgrams=self.parameter['wordNgrams'], loss=self.parameter['loss'], minn=self.parameter['minn'], maxn=self.parameter['maxn'], neg=self.parameter['neg'], thread=self.parameter['thread'], dim=self.parameter['dim']) y_pred, y_prob = classifier.predict( ds_validate['title'].values.tolist()) # Postprocess labels y_pred = [self.fasttextencoder[prediction[0]] for prediction in y_pred] evaluator = scorer.HierarchicalScorer( self.parameter['experiment_name'], self.tree) result_collector.results[self.parameter[ 'experiment_name']] = evaluator.compute_metrics_no_encoding( y_true, y_pred) # Save classifier output_file = '{}/models/{}/fasttext/model/{}.bin'\ .format(self.data_dir, self.dataset_name, self.parameter['experiment_name']) classifier.save_model(output_file) self.logger.info( 'Classifier serialized to file {}'.format(output_file)) # Save classifier output_file = '{}/models/{}/fasttext/model/encoder-{}.pkl' \ .format(self.data_dir, self.dataset_name, self.parameter['experiment_name']) with open(output_file, "wb") as file: pickle.dump(self.fasttextencoder, file=file) self.logger.info('Encoder serialized to file {}'.format(output_file)) # Persist results timestamp = time.time() result_collector.persist_results(timestamp)