def __init__( self, parse: bool = True, rerank_model: PathLike = None, asp_thresh: int = 3, op_thresh: int = 2, max_iter: int = 3, ): self.acquire_lexicon = AcquireTerms(asp_thresh, op_thresh, max_iter) if parse: from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser() else: self.parser = None if not rerank_model: print("using pre-trained reranking model") rerank_model = _download_pretrained_rerank_model( RERANK_MODEL_DEFAULT_PATH) download_unzip(*EMBEDDING_URL, EMBEDDING_PATH, license_msg="Glove word embeddings.") self.rerank = RerankTerms(vector_cache=True, rerank_model=rerank_model, emb_model_path=EMBEDDING_PATH)
def _download_pretrained_model(): """Downloads the pre-trained BIST model if non-existent.""" if not path.isfile(SpacyBISTParser.dir / "bist.model"): print("Downloading pre-trained BIST model..") zip_path = SpacyBISTParser.dir / "bist-pretrained" makedirs(SpacyBISTParser.dir, exist_ok=True) download_unzip( "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/", "bist-pretrained.zip", zip_path, ) print("Done.")
def test_solution(generate_new=False): lexicons_dir = Path(LIBRARY_ROOT) / "examples" / "absa" expected_dir = Path( LIBRARY_ROOT) / "tests" / "fixtures" / "data" / "absa_solution" data_url = "https://d2zs9tzlek599f.cloudfront.net/tests/" parsed_data = download_unzip( data_url, "tripadvisor_test_parsed.zip", SENTIMENT_OUT / "test" / "tripadvisor_test_parsed") predicted_stats = SentimentSolution().run( parsed_data=parsed_data, aspect_lex=lexicons_dir / "aspects.csv", opinion_lex=lexicons_dir / "opinions.csv", ) predicted_stats.to_csv("predicted.csv", encoding="utf-8") predicted_trimmed = pd.read_csv("predicted.csv", encoding="utf-8").loc[:, "Aspect":"Score"] predicted_trimmed.loc[:, "Score"] = np.around( predicted_trimmed.loc[:, "Score"], 2) os.remove("predicted.csv") if generate_new: with open("expected.csv", "w", encoding="utf-8", newline="") as f: predicted_trimmed.to_csv(f) assert False else: with open(expected_dir / "expected.csv", encoding="utf-8") as expected_fp: assert predicted_trimmed.to_csv() == expected_fp.read()
def _download_pretrained_model(): """Downloads the pre-trained BIST model if non-existent.""" if not path.isfile(SpacyBISTParser.dir / "bist.model"): print("Downloading pre-trained BIST model..") zip_path = SpacyBISTParser.dir / "bist-pretrained" makedirs(SpacyBISTParser.dir, exist_ok=True) download_unzip( "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/", "bist-pretrained.zip", zip_path, ) # download_unlicensed_file( # "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/", # "bist-pretrained.zip", # zip_path, # ) # print("Unzipping...") # uncompress_file(zip_path, outpath=str(SpacyBISTParser.dir)) # remove(zip_path) print("Done.")
def test_solution(): lexicons_dir = Path(LIBRARY_ROOT) / 'examples' / 'absa' / 'inference' expected_dir = Path( LIBRARY_ROOT) / 'tests' / 'fixtures' / 'data' / 'absa_solution' data_url = 'https://s3-us-west-2.amazonaws.com/nlp-architect-data/tests/' parsed_data = download_unzip( data_url, 'tripadvisor_test_parsed.zip', SENTIMENT_OUT / 'test' / 'tripadvisor_test_parsed') predicted_stats = SentimentSolution().run( parsed_data=parsed_data, aspect_lex=lexicons_dir / 'aspects.csv', opinion_lex=lexicons_dir / 'opinions.csv', ui=False) predicted_stats.to_csv('predicted.csv', encoding='utf-8') predicted_trimmed = pd.read_csv('predicted.csv', encoding='utf-8').loc[:, 'Aspect':'Score'] predicted_trimmed.loc[:, 'Score'] = np.around( predicted_trimmed.loc[:, 'Score'], 2) os.remove('predicted.csv') with open(expected_dir / 'expected.csv', encoding='utf-8') as expected_fp: assert predicted_trimmed.to_csv() == expected_fp.read()