예제 #1
0
    def __init__(
        self,
        parse: bool = True,
        rerank_model: PathLike = None,
        asp_thresh: int = 3,
        op_thresh: int = 2,
        max_iter: int = 3,
    ):
        self.acquire_lexicon = AcquireTerms(asp_thresh, op_thresh, max_iter)
        if parse:
            from nlp_architect.pipelines.spacy_bist import SpacyBISTParser

            self.parser = SpacyBISTParser()
        else:
            self.parser = None

        if not rerank_model:
            print("using pre-trained reranking model")
            rerank_model = _download_pretrained_rerank_model(
                RERANK_MODEL_DEFAULT_PATH)

        download_unzip(*EMBEDDING_URL,
                       EMBEDDING_PATH,
                       license_msg="Glove word embeddings.")
        self.rerank = RerankTerms(vector_cache=True,
                                  rerank_model=rerank_model,
                                  emb_model_path=EMBEDDING_PATH)
예제 #2
0
def _download_pretrained_model():
    """Downloads the pre-trained BIST model if non-existent."""
    if not path.isfile(SpacyBISTParser.dir / "bist.model"):
        print("Downloading pre-trained BIST model..")
        zip_path = SpacyBISTParser.dir / "bist-pretrained"
        makedirs(SpacyBISTParser.dir, exist_ok=True)

        download_unzip(
            "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/",
            "bist-pretrained.zip",
            zip_path,
        )
        print("Done.")
예제 #3
0
def test_solution(generate_new=False):
    lexicons_dir = Path(LIBRARY_ROOT) / "examples" / "absa"
    expected_dir = Path(
        LIBRARY_ROOT) / "tests" / "fixtures" / "data" / "absa_solution"
    data_url = "https://d2zs9tzlek599f.cloudfront.net/tests/"
    parsed_data = download_unzip(
        data_url, "tripadvisor_test_parsed.zip",
        SENTIMENT_OUT / "test" / "tripadvisor_test_parsed")

    predicted_stats = SentimentSolution().run(
        parsed_data=parsed_data,
        aspect_lex=lexicons_dir / "aspects.csv",
        opinion_lex=lexicons_dir / "opinions.csv",
    )

    predicted_stats.to_csv("predicted.csv", encoding="utf-8")
    predicted_trimmed = pd.read_csv("predicted.csv",
                                    encoding="utf-8").loc[:, "Aspect":"Score"]
    predicted_trimmed.loc[:, "Score"] = np.around(
        predicted_trimmed.loc[:, "Score"], 2)
    os.remove("predicted.csv")

    if generate_new:
        with open("expected.csv", "w", encoding="utf-8", newline="") as f:
            predicted_trimmed.to_csv(f)
        assert False

    else:
        with open(expected_dir / "expected.csv",
                  encoding="utf-8") as expected_fp:
            assert predicted_trimmed.to_csv() == expected_fp.read()
예제 #4
0
def _download_pretrained_model():
    """Downloads the pre-trained BIST model if non-existent."""
    if not path.isfile(SpacyBISTParser.dir / "bist.model"):
        print("Downloading pre-trained BIST model..")
        zip_path = SpacyBISTParser.dir / "bist-pretrained"
        makedirs(SpacyBISTParser.dir, exist_ok=True)

        download_unzip(
            "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/",
            "bist-pretrained.zip",
            zip_path,
        )

        # download_unlicensed_file(
        #     "https://d2zs9tzlek599f.cloudfront.net/models/dep_parse/",
        #     "bist-pretrained.zip",
        #     zip_path,
        # )
        # print("Unzipping...")
        # uncompress_file(zip_path, outpath=str(SpacyBISTParser.dir))
        # remove(zip_path)
        print("Done.")
def test_solution():
    lexicons_dir = Path(LIBRARY_ROOT) / 'examples' / 'absa' / 'inference'
    expected_dir = Path(
        LIBRARY_ROOT) / 'tests' / 'fixtures' / 'data' / 'absa_solution'
    data_url = 'https://s3-us-west-2.amazonaws.com/nlp-architect-data/tests/'
    parsed_data = download_unzip(
        data_url, 'tripadvisor_test_parsed.zip',
        SENTIMENT_OUT / 'test' / 'tripadvisor_test_parsed')

    predicted_stats = SentimentSolution().run(
        parsed_data=parsed_data,
        aspect_lex=lexicons_dir / 'aspects.csv',
        opinion_lex=lexicons_dir / 'opinions.csv',
        ui=False)

    predicted_stats.to_csv('predicted.csv', encoding='utf-8')
    predicted_trimmed = pd.read_csv('predicted.csv',
                                    encoding='utf-8').loc[:, 'Aspect':'Score']
    predicted_trimmed.loc[:, 'Score'] = np.around(
        predicted_trimmed.loc[:, 'Score'], 2)
    os.remove('predicted.csv')

    with open(expected_dir / 'expected.csv', encoding='utf-8') as expected_fp:
        assert predicted_trimmed.to_csv() == expected_fp.read()