def setUp(self): sql_url = "https://raw.githubusercontent.com/asyml/stave/master" \ "/simple-backend/example_db.sql" self.datapack_table: str = StaveMultiDocSqlReader.default_configs( )['datapack_table'] self.multipack_table: str = StaveMultiDocSqlReader.default_configs( )['multipack_table'] self.project_table: str = StaveDataPackSqlReader.default_configs( )['project_table'] self.temp_dir = tempfile.TemporaryDirectory() maybe_download(sql_url, self.temp_dir.name, 'example_db.sql') sql_script: str = os.path.join(self.temp_dir.name, 'example_db.sql') self.sql_db: str = os.path.join(self.temp_dir.name, 'db.sqlite3') pack_count: int mp_count: int with open(sql_script) as q_file: # Build the example database by executing the sample sql script. q = q_file.read() conn = sqlite3.connect(self.sql_db) c = conn.cursor() c.executescript(q) conn.commit()
def test_reader_with_dir(self): tmp_dir = tempfile.TemporaryDirectory() maybe_download('https://en.wikipedia.org/wiki/Machine_learning', tmp_dir.name, 'test_wikipedia.html') maybe_download('https://www.yahoo.com/', tmp_dir.name, 'test_yahoo.html') for pack in self.pl1.process_dataset(tmp_dir.name): self.assertIsInstance(pack, DataPack) tmp_dir.cleanup()
def main(): model_path = Path("examples/Cliner/CliNER/models") pkg_path = Path("examples/Cliner/CliNER/tools") # create the model path if it doesn't exist model_path.mkdir(parents=True, exist_ok=True) # download the pre-trained model maybe_download(urls=[ "https://drive.google.com/file/d/1Jlm2wdmNA-GotTWF60zZRUs1MbnzYox2" ], path=model_path, filenames=["train_full.model"]) # download the dependency package for evaluation maybe_download(urls=[ "https://drive.google.com/file/d/1ZVgJ7EQtMjPpg_v-lCycCLdFgVzmdTxI" ], path=pkg_path, filenames=["i2b2va-eval.jar"]) maybe_download(urls=[ "https://drive.google.com/file/d/1QvenOvRx7R9XjA5tUxeZnDdoroW94R-N" "/view?usp=sharing" ], path=pkg_path, filenames=["py3_maxent_treebank_pos_tagger.pickle"]) maybe_download(urls=[ "https://drive.google.com/file/d/162jCbpzf5Jez8h0zzA-lAyMkCLcjgAep" "/view?usp=sharing" ], path=pkg_path, filenames=["py2_maxent_treebank_pos_tagger.pickle"])
def test_data_utils(self): urls = [ "https://drive.google.com/file/d/1YHXMiIne5MjSBePsPHPWO6hdRj4w" "EnSk/view?usp=sharing" ] data_utils.maybe_download(urls=urls, path=self.test_path, filenames=[self.file_name]) path = Path(self.test_path) self.assertEqual(path.exists(), True) files = list(os.walk(path)) self.assertEqual(len(files), 1) with open(f"{self.test_path}/{self.file_name}", "r") as f: lines = f.readlines() self.assertEqual(lines, self.text)
def test_reader_with_filepath(self): tmp_dir = tempfile.TemporaryDirectory() filepath = maybe_download("https://www.yahoo.com/", tmp_dir.name, "test_yahoo.html") for pack in self.pl1.process_dataset(filepath): self.assertIsInstance(pack, DataPack) tmp_dir.cleanup()
from forte.data.data_utils import maybe_download # download resources urls = [ "https://drive.google.com/file/d/15RSfFkW9syQKtx-_fQ9KshN3BJ27Jf8t/" "view?usp=sharing", "https://drive.google.com/file/d/1Nh7D6Xam5JefdoSXRoL7S0DZK1d4i2UK/" "view?usp=sharing", "https://drive.google.com/file/d/1YWcI60lGKtTFH01Ai1HnwOKBsrFf2r29/" "view?usp=sharing", "https://drive.google.com/file/d/1ElHUEMPQIuWmV0GimroqFphbCvFKskYj/" "view?usp=sharing", "https://drive.google.com/file/d/1EhMXlieoEg-bGUbbQ2vN-iyNJvC4Dajl/" "view?usp=sharing", ] filenames = [ "config.json", "pytorch_model.bin", "special_tokens_map.json", "tokenizer_config.json", "vocab.txt", ] maybe_download(urls=urls, path="resources/NCBI-disease", filenames=filenames)
"url from where the model will be directly" "downloaded", ) parser.add_argument( "--model_dir", default=default_model_dir, help="Directory to which the model will be downloaded", ) args = parser.parse_args() resource_path = Path(args.model_dir) # create the path if it doesn't exist resource_path.mkdir(parents=True, exist_ok=True) # download data if args.pretrained_model_name.lower() == "bert-base-uncased": gd_id = "1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX" url = f"https://drive.google.com/file/d/{gd_id}/view" elif args.pretrained_model_name.lower() == "bert-large-uncased": gd_id = "1crlASTMlsihALlkabAQP6JTYIZwC1Wm8" url = f"https://drive.google.com/file/d/{gd_id}/view" else: url = args.pretrained_model_name maybe_download(urls=[url], path=resource_path, extract=True)
from forte.data.data_utils import maybe_download # download resources urls = [ "https://drive.google.com/file/d/1j3i5U1YffYqKTdSbnlsrMAm9j86nLjxC/view" "?usp=sharing", "https://drive.google.com/file/d/1aRPS_b4AFaZTsk9uZ41tkWIBvWbO_s_V/" "view?usp=sharing", "https://drive.google.com/file/d/1SYpcWEDeTlbOsXlRevS8YS_dyP_k29g0/" "view?usp=sharing", "https://drive.google.com/file/d/1S2UMDBX7Ci-Mrm30434t0LOBL__Db92Y/" "view?usp=sharing", "https://drive.google.com/file/d/1O4iFhBPuogwEgz7bpJjEqDqAlYf5caP4/" "view?usp=sharing", ] filenames = [ "model.pkl", "word_embedding_table.pkl", "word_alphabet.pkl", "ner_alphabet.pkl", "char_alphabet.pkl", ] maybe_download(urls=urls, path="resources/", filenames=filenames)
embedding_path / "char_vocab.english.txt", "https://drive.google.com/file/d/1hgwmUBk8Mb3iZYiHi1UpCpPFOCfOQLLB/" "view?usp=sharing": embedding_path / "glove.840B.300d.05.filtered", "https://drive.google.com/file/d/1H4PZhJhGoFBqrSMRPufjJ-9zwROw8hAK/" "view?usp=sharing": embedding_path / "glove_50_300_2.filtered", "https://drive.google.com/file/d/1uoA5EnZMWl5m5DMevGcI7UjiXxQRlD9W/" "view?usp=sharing": embedding_path / "word_vocab.english.txt", "https://drive.google.com/file/d/1UZc8x-mhdXg7Rtt6FSBDlEoJb_nHxDAQ/" "view?usp=sharing": pretrained_path / "model.pt" }) maybe_download(urls=list(urls_to_file_names.keys()), path=model_path, filenames=list(urls_to_file_names.values())) # download indexer model elif args.model_name.lower() == "indexer": urls_to_file_names = OrderedDict({ "https://drive.google.com/file/d/14lL6AoyjdCp-fj8DOlyZhQZrNwoJBfQm/" "view?usp=sharing": "index.faiss", "https://drive.google.com/file/d/1DdgMA7jttgA113EIlebVb33JpGCGbEYf/" "view?usp=sharing": "index.meta_data" }) maybe_download(urls=list(urls_to_file_names.keys()), path=model_path / "chatbot", filenames=list(urls_to_file_names.values()))
from pathlib import Path from forte.data.data_utils import maybe_download if __name__ == "__main__": model_path = Path("examples/Cliner/CliNER/models") pkg_path = Path("examples/Cliner/CliNER/tools") # create the model path if it doesn't exist model_path.mkdir(parents=True, exist_ok=True) # download the pre-trained model maybe_download(urls=[ "https://drive.google.com/file/d/1Jlm2wdmNA-GotTWF60zZRUs1MbnzYox2"], path=model_path, filenames=["train_full.model"]) # download the dependency package for evaluation maybe_download(urls=[ "https://drive.google.com/file/d/1ZVgJ7EQtMjPpg_v-lCycCLdFgVzmdTxI"], path=pkg_path, filenames=["i2b2va-eval.jar"]) maybe_download(urls=[ "https://drive.google.com/file/d/1QvenOvRx7R9XjA5tUxeZnDdoroW94R-N" "/view?usp=sharing"], path=pkg_path, filenames=["py3_maxent_treebank_pos_tagger.pickle"]) maybe_download(urls=[ "https://drive.google.com/file/d/162jCbpzf5Jez8h0zzA-lAyMkCLcjgAep"
os.path.join(str(embedding_path), "char_vocab.english.txt"), "https://drive.google.com/file/d/1hgwmUBk8Mb3iZYiHi1UpCpPFOCfOQLLB/" "view?usp=sharing": os.path.join(str(embedding_path), "glove.840B.300d.05.filtered"), "https://drive.google.com/file/d/1H4PZhJhGoFBqrSMRPufjJ-9zwROw8hAK/" "view?usp=sharing": os.path.join(str(embedding_path), "glove_50_300_2.filtered"), "https://drive.google.com/file/d/1uoA5EnZMWl5m5DMevGcI7UjiXxQRlD9W/" "view?usp=sharing": os.path.join(str(embedding_path), "word_vocab.english.txt"), "https://drive.google.com/file/d/1UZc8x-mhdXg7Rtt6FSBDlEoJb_nHxDAQ/" "view?usp=sharing": os.path.join(str(pretrained_path), "model.pt") }) maybe_download(urls=list(urls_to_file_names.keys()), path=model_path, filenames=list(urls_to_file_names.values())) # download ner model elif args.model_name.lower() == "ner": urls = [ "https://drive.google.com/file/d/1j3i5U1YffYqKTdSbnlsrMAm9j86nL" "jxC/view?usp=sharing", "https://drive.google.com/file/d/1aRPS_b4AFaZTsk9uZ41tkWIBvWbO" "_s_V/view?usp=sharing", "https://drive.google.com/file/d/1SYpcWEDeTlbOsXlRevS8YS_dyP_k2" "9g0/view?usp=sharing", "https://drive.google.com/file/d/1S2UMDBX7Ci-Mrm30434t0LOBL__Db" "92Y/view?usp=sharing", "https://drive.google.com/file/d/1O4iFhBPuogwEgz7bpJjEqDqAlYf5" "caP4/view?usp=sharing"