コード例 #1
0
 def test_make_upload_args(self):
     self.assertTrue(hasattr(YamlHelper, 'make_upload_args'))
     yg = YamlHelper(self.test_yaml_upload_good)
     self.assertDictEqual(
         yg.make_upload_args(),
         {'local_directory': 'tests/resources/test_output_data_dir/',
          's3_bucket': 'some_bucket', 's3_bucket_dir': 'some/remote/directory/',
          'extra_args': {'ACL': 'public-read'}})
コード例 #2
0
 def test_make_embeddings_metrics_class_list(self):
     self.assertTrue(hasattr(YamlHelper, 'make_embeddings_metrics_class_list'))
     yh = YamlHelper("tests/resources/test_make_embeddings_metrics.yaml")
     cl = yh.make_embeddings_metrics_class_list()
     self.assertEqual(3, len(cl))
     self.assertCountEqual(["<class 'keras.metrics.AUC'>",
                            "<class 'keras.metrics.Recall'>",
                            "<class 'keras.metrics.Precision'>"],
                           [str(klass.__class__) for klass in cl])
コード例 #3
0
    def test_make_graph_embeddings(self):
        yhelp = YamlHelper(
            "tests/resources/test_graph_embedding_bert_tsne.yaml")
        node_embedding_args = yhelp.make_node_embeddings_args()
        make_node_embeddings(**node_embedding_args)
        self.assertTrue(os.path.exists(self.expected_embedding_file))

        self.assertTrue(os.path.exists(self.expected_history_file))
        with open(self.expected_history_file) as f:
            data = f.read()
            obj = json.loads(data)
            self.assertListEqual(list(obj.keys()), ['loss', 'lr'])
コード例 #4
0
    def test_node_edge_urls_converted_to_path(self, mock_from_csv, mock_download_file):
        this_yh = YamlHelper('tests/resources/test_urls_for_node_and_edge_paths.yaml')
        self.assertTrue(is_url(this_yh.main_graph_args()['node_path']))
        self.assertTrue(is_url(this_yh.main_graph_args()['edge_path']))

        this_yh.load_graph()

        self.assertFalse(is_url(this_yh.main_graph_args()['node_path']))
        self.assertEqual('output_data/https___someremoteurl.com_nodes.tsv',
                         this_yh.main_graph_args()['node_path'])
        self.assertFalse(is_url(this_yh.main_graph_args()['edge_path']))
        self.assertEqual('output_data/https___someremoteurl.com_edges.tsv',
                         this_yh.main_graph_args()['edge_path'])
コード例 #5
0
 def test_pre_run_check_bad_credentials_but_no_check(
         self, mock_boto_client) -> None:
     mock_boto_client.side_effect = ClientError(
         error_response=mock_boto_client, operation_name=mock_boto_client)
     return_val = pre_run_checks(
         YamlHelper('tests/resources/test_no_upload.yaml'),
         check_s3_credentials=False)
     # returns true if bad creds, but we don't want to check credentials
     self.assertTrue(return_val)
コード例 #6
0
    def test_make_tsne(self):
        yhelp = YamlHelper(
            "tests/resources/test_graph_embedding_bert_tsne.yaml")
        g = Graph.from_csv(nodes_column="id",
                           node_list_node_types_column="category",
                           default_node_type="biolink:NamedThing",
                           node_path=os.path.join(
                               yhelp.yaml['input_directory'],
                               yhelp.yaml['graph_data']['graph']['node_path']),
                           edge_path=os.path.join(
                               yhelp.yaml['input_directory'],
                               yhelp.yaml['graph_data']['graph']['edge_path']),
                           sources_column="subject",
                           destinations_column="object",
                           directed=False)

        tsne_kwargs = yhelp.make_tsne_args(graph=g)
        tsne_kwargs['embedding_file'] = 'tests/resources/test_embeddings.tsv'
        make_tsne(**tsne_kwargs)

        self.assertTrue(os.path.exists(self.expected_tsne_file))
コード例 #7
0
 def test_do_upload(self):
     self.assertTrue(hasattr(YamlHelper, 'do_upload'))
     yg = YamlHelper(self.test_yaml_upload_good)
     self.assertTrue(yg.do_upload())
コード例 #8
0
 def test_do_tsne(self):
     self.assertTrue(hasattr(YamlHelper, 'do_tsne'))
     self.assertTrue(not self.yh.do_tsne())
     ybt = YamlHelper(self.test_yaml_bert_tsne)
     self.assertTrue(ybt.do_tsne())
コード例 #9
0
 def test_bad_indir(self) -> None:
     with self.assertRaises(FileNotFoundError) as context:
         YamlHelper("tests/resources/test_bad_indir.yaml").indir()
コード例 #10
0
 def test_no_indir(self) -> None:
     yh = YamlHelper("tests/resources/test_no_indir.yaml")
     self.assertEqual("", yh.indir())
コード例 #11
0
 def test_node_edge_urls_file_downloaded(self, mock_from_csv, mock_download_file):
     this_yh = YamlHelper('tests/resources/test_urls_for_node_and_edge_paths.yaml')
     this_yh.load_graph()
     self.assertTrue(mock_download_file.called)
     self.assertEqual(2, mock_download_file.call_count)
コード例 #12
0
 def setUp(self) -> None:
     self.bad_yaml = 'tests/resources/test_bad_upload_info.yaml'
     self.good_yaml = 'tests/resources/test_good_upload_info.yaml'
     self.good_kwargs = YamlHelper(self.good_yaml).make_upload_args()
コード例 #13
0
 def test_catch_keyerror(self):
     yh = YamlHelper("tests/resources/test_no_graph.yaml")
     yh.pos_val_graph_args()  # no assertion needed, just testing for no exception
コード例 #14
0
 def setUp(self) -> None:
     self.upload_yaml = YamlHelper('tests/resources/test_upload_full.yaml')
コード例 #15
0
def run(config: str) -> None:
    """Run a NEAT pipeline using the given YAML file [neat.yaml]
    \f

    Args:
        config: Specify the YAML file containing instructions of what ML tasks to perform

    Returns:
        None.

    """

    yhelp = YamlHelper(config)

    # pre run checks for failing early
    if not pre_run_checks(yhelp=yhelp):
        raise RuntimeError("Failed pre_run_check")

    # generate embeddings if config has 'embeddings' block
    if yhelp.do_embeddings() and not os.path.exists(yhelp.embedding_outfile()):
        node_embedding_args = yhelp.make_node_embeddings_args()
        make_node_embeddings(**node_embedding_args)

    if yhelp.do_tsne() and not os.path.exists(yhelp.tsne_outfile()):
        graph: Graph = yhelp.load_graph()
        tsne_kwargs = yhelp.make_tsne_args(graph)
        make_tsne(**tsne_kwargs)

    if yhelp.do_classifier():
        for classifier in tqdm(yhelp.classifiers()):
            model: object = None
            if classifier['type'] == 'neural network':
                model = MLPModel(classifier, outdir=yhelp.outdir())
            elif classifier['type'] in \
                    ['Decision Tree', 'Logistic Regression', 'Random Forest']:
                model = SklearnModel(classifier, outdir=yhelp.outdir())
            else:
                raise NotImplementedError(f"{model} isn't implemented yet")

            model.compile()
            train_data, validation_data = \
                model.make_link_prediction_data(yhelp.embedding_outfile(),
                                                yhelp.main_graph_args(),
                                                yhelp.pos_val_graph_args(),
                                                yhelp.neg_train_graph_args(),
                                                yhelp.neg_val_graph_args(),
                                                yhelp.edge_embedding_method())
            history_obj = model.fit(train_data, validation_data)

            if yhelp.classifier_history_file_name(classifier):
                with open(yhelp.classifier_history_file_name(classifier),
                          'w') as f:  # type: ignore
                    json.dump(history_obj.history, f)

            model.save()

    if yhelp.do_upload():
        upload_kwargs = yhelp.make_upload_args()
        upload_dir_to_s3(**upload_kwargs)

    return None
コード例 #16
0
 def test_classifier_history_file_name(self):
     self.assertTrue(hasattr(YamlHelper, 'classifier_history_file_name'))
     yg = YamlHelper(self.test_yaml)
     self.assertEqual(
         yg.classifier_history_file_name(yg.yaml['classifier']['classifiers'][0]),
         "mlp_classifier_history.json")
コード例 #17
0
 def setUpClass(cls) -> None:
     cls.test_yaml = "tests/resources/test.yaml"
     cls.yh = YamlHelper(cls.test_yaml)
     cls.embedding_args = cls.yh.make_node_embeddings_args()