def test_make_upload_args(self): self.assertTrue(hasattr(YamlHelper, 'make_upload_args')) yg = YamlHelper(self.test_yaml_upload_good) self.assertDictEqual( yg.make_upload_args(), {'local_directory': 'tests/resources/test_output_data_dir/', 's3_bucket': 'some_bucket', 's3_bucket_dir': 'some/remote/directory/', 'extra_args': {'ACL': 'public-read'}})
def test_make_embeddings_metrics_class_list(self): self.assertTrue(hasattr(YamlHelper, 'make_embeddings_metrics_class_list')) yh = YamlHelper("tests/resources/test_make_embeddings_metrics.yaml") cl = yh.make_embeddings_metrics_class_list() self.assertEqual(3, len(cl)) self.assertCountEqual(["<class 'keras.metrics.AUC'>", "<class 'keras.metrics.Recall'>", "<class 'keras.metrics.Precision'>"], [str(klass.__class__) for klass in cl])
def test_make_graph_embeddings(self): yhelp = YamlHelper( "tests/resources/test_graph_embedding_bert_tsne.yaml") node_embedding_args = yhelp.make_node_embeddings_args() make_node_embeddings(**node_embedding_args) self.assertTrue(os.path.exists(self.expected_embedding_file)) self.assertTrue(os.path.exists(self.expected_history_file)) with open(self.expected_history_file) as f: data = f.read() obj = json.loads(data) self.assertListEqual(list(obj.keys()), ['loss', 'lr'])
def test_node_edge_urls_converted_to_path(self, mock_from_csv, mock_download_file): this_yh = YamlHelper('tests/resources/test_urls_for_node_and_edge_paths.yaml') self.assertTrue(is_url(this_yh.main_graph_args()['node_path'])) self.assertTrue(is_url(this_yh.main_graph_args()['edge_path'])) this_yh.load_graph() self.assertFalse(is_url(this_yh.main_graph_args()['node_path'])) self.assertEqual('output_data/https___someremoteurl.com_nodes.tsv', this_yh.main_graph_args()['node_path']) self.assertFalse(is_url(this_yh.main_graph_args()['edge_path'])) self.assertEqual('output_data/https___someremoteurl.com_edges.tsv', this_yh.main_graph_args()['edge_path'])
def test_pre_run_check_bad_credentials_but_no_check( self, mock_boto_client) -> None: mock_boto_client.side_effect = ClientError( error_response=mock_boto_client, operation_name=mock_boto_client) return_val = pre_run_checks( YamlHelper('tests/resources/test_no_upload.yaml'), check_s3_credentials=False) # returns true if bad creds, but we don't want to check credentials self.assertTrue(return_val)
def test_make_tsne(self): yhelp = YamlHelper( "tests/resources/test_graph_embedding_bert_tsne.yaml") g = Graph.from_csv(nodes_column="id", node_list_node_types_column="category", default_node_type="biolink:NamedThing", node_path=os.path.join( yhelp.yaml['input_directory'], yhelp.yaml['graph_data']['graph']['node_path']), edge_path=os.path.join( yhelp.yaml['input_directory'], yhelp.yaml['graph_data']['graph']['edge_path']), sources_column="subject", destinations_column="object", directed=False) tsne_kwargs = yhelp.make_tsne_args(graph=g) tsne_kwargs['embedding_file'] = 'tests/resources/test_embeddings.tsv' make_tsne(**tsne_kwargs) self.assertTrue(os.path.exists(self.expected_tsne_file))
def test_do_upload(self): self.assertTrue(hasattr(YamlHelper, 'do_upload')) yg = YamlHelper(self.test_yaml_upload_good) self.assertTrue(yg.do_upload())
def test_do_tsne(self): self.assertTrue(hasattr(YamlHelper, 'do_tsne')) self.assertTrue(not self.yh.do_tsne()) ybt = YamlHelper(self.test_yaml_bert_tsne) self.assertTrue(ybt.do_tsne())
def test_bad_indir(self) -> None: with self.assertRaises(FileNotFoundError) as context: YamlHelper("tests/resources/test_bad_indir.yaml").indir()
def test_no_indir(self) -> None: yh = YamlHelper("tests/resources/test_no_indir.yaml") self.assertEqual("", yh.indir())
def test_node_edge_urls_file_downloaded(self, mock_from_csv, mock_download_file): this_yh = YamlHelper('tests/resources/test_urls_for_node_and_edge_paths.yaml') this_yh.load_graph() self.assertTrue(mock_download_file.called) self.assertEqual(2, mock_download_file.call_count)
def setUp(self) -> None: self.bad_yaml = 'tests/resources/test_bad_upload_info.yaml' self.good_yaml = 'tests/resources/test_good_upload_info.yaml' self.good_kwargs = YamlHelper(self.good_yaml).make_upload_args()
def test_catch_keyerror(self): yh = YamlHelper("tests/resources/test_no_graph.yaml") yh.pos_val_graph_args() # no assertion needed, just testing for no exception
def setUp(self) -> None: self.upload_yaml = YamlHelper('tests/resources/test_upload_full.yaml')
def run(config: str) -> None: """Run a NEAT pipeline using the given YAML file [neat.yaml] \f Args: config: Specify the YAML file containing instructions of what ML tasks to perform Returns: None. """ yhelp = YamlHelper(config) # pre run checks for failing early if not pre_run_checks(yhelp=yhelp): raise RuntimeError("Failed pre_run_check") # generate embeddings if config has 'embeddings' block if yhelp.do_embeddings() and not os.path.exists(yhelp.embedding_outfile()): node_embedding_args = yhelp.make_node_embeddings_args() make_node_embeddings(**node_embedding_args) if yhelp.do_tsne() and not os.path.exists(yhelp.tsne_outfile()): graph: Graph = yhelp.load_graph() tsne_kwargs = yhelp.make_tsne_args(graph) make_tsne(**tsne_kwargs) if yhelp.do_classifier(): for classifier in tqdm(yhelp.classifiers()): model: object = None if classifier['type'] == 'neural network': model = MLPModel(classifier, outdir=yhelp.outdir()) elif classifier['type'] in \ ['Decision Tree', 'Logistic Regression', 'Random Forest']: model = SklearnModel(classifier, outdir=yhelp.outdir()) else: raise NotImplementedError(f"{model} isn't implemented yet") model.compile() train_data, validation_data = \ model.make_link_prediction_data(yhelp.embedding_outfile(), yhelp.main_graph_args(), yhelp.pos_val_graph_args(), yhelp.neg_train_graph_args(), yhelp.neg_val_graph_args(), yhelp.edge_embedding_method()) history_obj = model.fit(train_data, validation_data) if yhelp.classifier_history_file_name(classifier): with open(yhelp.classifier_history_file_name(classifier), 'w') as f: # type: ignore json.dump(history_obj.history, f) model.save() if yhelp.do_upload(): upload_kwargs = yhelp.make_upload_args() upload_dir_to_s3(**upload_kwargs) return None
def test_classifier_history_file_name(self): self.assertTrue(hasattr(YamlHelper, 'classifier_history_file_name')) yg = YamlHelper(self.test_yaml) self.assertEqual( yg.classifier_history_file_name(yg.yaml['classifier']['classifiers'][0]), "mlp_classifier_history.json")
def setUpClass(cls) -> None: cls.test_yaml = "tests/resources/test.yaml" cls.yh = YamlHelper(cls.test_yaml) cls.embedding_args = cls.yh.make_node_embeddings_args()