Esempio n. 1
0
def main() -> list:
    current_dir = Path(path.dirname(path.realpath(__file__)))
    inference = SentimentInference(current_dir / 'aspects.csv',
                                   current_dir / 'opinions.csv')
    print('\n' + '=' * 40 + '\n' +
          'Running inference on examples from sample test set:\n')

    docs = [
        "The food was very fresh and flavoursome the service was very attentive. Would go back"
        " to this restaurant if visiting London again.",
        "The food was wonderful and fresh, I really enjoyed it and will definitely go back. "
        "Staff were friendly.",
        "The ambiance is charming. Uncharacteristically, the service was DREADFUL. When we"
        " wanted to pay our bill at the end of the evening, our waitress was nowhere to be "
        "found..."
    ]

    sentiment_docs = []

    for doc_raw in docs:
        print('Raw Document: \n{}'.format(doc_raw))
        sentiment_doc = inference.run(doc=doc_raw)
        sentiment_docs.append(sentiment_doc)
        print('SentimentDocument: \n{}\n'.format(sentiment_doc) + '=' * 40 +
              '\n')
    return sentiment_docs
Esempio n. 2
0
def test_inference():
    lexicons_dir = Path(LIBRARY_ROOT) / 'examples' / 'absa'
    inference = SentimentInference(lexicons_dir / 'aspects.csv', lexicons_dir / 'opinions.csv',
                                   parse=False)
    data_dir = Path(LIBRARY_ROOT) / 'tests' / 'fixtures' / 'data' / 'absa'
    for i in range(1, 4):
        with open(data_dir / 'core_nlp_doc_{}.json'.format(i)) as f:
            predicted_doc = inference.run(parsed_doc=json.load(f, object_hook=CoreNLPDoc.decoder))
        with open(data_dir / 'sentiment_doc_{}.json'.format(i)) as f:
            expected_doc = json.load(f, object_hook=SentimentDoc.decoder)
        assert expected_doc == predicted_doc
    def run(
        self,
        aspect_lex: PathLike = None,
        opinion_lex: PathLike = None,
        data: PathLike = None,
        parsed_data: PathLike = None,
        inference_results: PathLike = None,
    ) -> Optional[pd.DataFrame]:

        opinions = load_opinion_lex(opinion_lex)
        if not opinions:
            raise ValueError("Empty opinion lexicon!")
        aspects = pd.read_csv(aspect_lex, header=None, encoding="utf-8")[0]
        if aspects.empty:
            raise ValueError("Empty aspect lexicon!")
        if inference_results:
            with open(inference_results, encoding="utf-8") as f:
                results = json.loads(f.read(),
                                     object_hook=SentimentDoc.decoder)
        elif data or parsed_data:
            inference = SentimentInference(aspect_lex, opinions, parse=False)
            parse = None
            if not parsed_data:  # source data is raw text, need to parse
                from nlp_architect.pipelines.spacy_bist import SpacyBISTParser

                parse = SpacyBISTParser().parse

            results = {}
            print("Running inference on data files... (Iterating data files)")
            data_source = parsed_data if parsed_data else data
            for file, doc in self._iterate_docs(data_source):
                parsed_doc = (parse(doc) if parse else json.loads(
                    doc, object_hook=CoreNLPDoc.decoder))
                sentiment_doc = inference.run(parsed_doc=parsed_doc)
                if sentiment_doc:
                    results[file] = sentiment_doc
            with open(SENTIMENT_OUT / "inference_results.json",
                      "w",
                      encoding="utf-8") as f:
                json.dump(results,
                          f,
                          cls=SentimentDocEncoder,
                          indent=4,
                          sort_keys=True)
        else:
            print(
                "No input given. Please supply one of: "
                "data directory, parsed data directory, or inference results.")
            return None

        print("\nComputing statistics...")
        stats = self._compute_stats(results, aspects, opinions)
        print("Done.")
        return stats
Esempio n. 4
0
def main() -> None:
    parser = argparse.ArgumentParser(description='ABSA Inference')
    parser.add_argument('--aspects', type=validate_existing_path,
                        help='Path to aspect lexicon (csv)', required=True)
    parser.add_argument('--opinions', type=validate_existing_path, required=True,
                        help='Path to opinion lexicon (csv)')
    args = parser.parse_args()

    inference = SentimentInference(aspect_lex=args.aspects, opinion_lex=args.opinions)

    while True:
        doc = input('\nEnter sentence >> ')
        print(inference.run(doc))
Esempio n. 5
0
def test_inference():
    lexicons_dir = Path(LIBRARY_ROOT) / "examples" / "absa"
    inference = SentimentInference(lexicons_dir / "aspects.csv",
                                   lexicons_dir / "opinions.csv",
                                   parse=False)
    data_dir = Path(LIBRARY_ROOT) / "tests" / "fixtures" / "data" / "absa"
    for i in range(1, 4):
        with open(data_dir / "core_nlp_doc_{}.json".format(i)) as f:
            predicted_doc = inference.run(
                parsed_doc=json.load(f, object_hook=CoreNLPDoc.decoder))
        with open(data_dir / "sentiment_doc_{}.json".format(i)) as f:
            expected_doc = json.load(f, object_hook=SentimentDoc.decoder)
        assert expected_doc == predicted_doc
def main() -> list:
    inference = SentimentInference("/home/coeuser/Desktop/absa/examples/generated_aspect_lex_updated_v3.csv", "/home/coeuser/Desktop/absa/examples/generated_opinion_lex_reranked_v3.csv", parse = True)
    sentiment_docs = []
    with open('/home/coeuser/Desktop/absa/examples/step1_competitor_news_data.txt', 'r', encoding = 'latin-1') as csv:
        lines = csv.readlines()
        for id, line in enumerate(tqdm(lines)):
            if line:
                line = line.split('\t')
                sentiment_doc = inference.run(doc = line[2])
                if sentiment_doc != None:
                    labels = doc2label(sentiment_doc)
                    labels['sent_id'] = line[0]
                    labels['_vendor_name'] = line[1]
                    labels = labels_enhancer(labels)
                    with open('/home/coeuser/Desktop/absa/examples/' + 'step2_competitor_news_data_sentiment_scores.json', 'a') as json_file:
                        json_file.write(json.dumps(labels))
                        json_file.write('\n')
inference = SentimentInference(
    LEXICONS_OUT / 'generated_aspect_lex.csv',
    LEXICONS_OUT / 'generated_opinion_lex_reranked.csv')

clothing_val = os.path.join(args.data_folder,
                            'clothing_data/clothing-absa-validation.json')

with open(clothing_val) as json_file:
    val = json.load(json_file)

predictions = []
vals = []
for doc in val["data"]:
    doc_raw = " ".join([token[0] for token in doc])
    sentiment_doc = inference.run(doc=doc_raw)
    if sentiment_doc is not None:
        predictions.append(doc2IO(sentiment_doc))
        vals.append(doc)

y_pred = flatten(predictions)[1::2]
y_true = flatten(vals)[1::2]

from sklearn.metrics import f1_score

# Log metrics
run = Run.get_context()
run.log('Aspect Lexicon Size', len(aspect_lex))
run.log('Opinion Lexicon Size', len(opinion_lex))
run.log('f1_weighted', float(f1_score(y_true, y_pred, average='weighted')))
Esempio n. 8
0
    documents['_sentences'] = sentences_list
    return documents


inference = SentimentInference(aspect_path, opinion_path)
shutil.copyfile(aspect_path, './outputs/news_content_aspect.csv')
shutil.copyfile(opinion_path, './outputs/news_content_opinion.csv')

input_file_path = os.path.join(args.data_folder,
                               'news_data/all_news_content.csv')
print(
    f'Aspect and Opinion lexicons files loaded from {aspect_path} and {opinion_path}'
)
print(f'Input file loaded from {input_file_path}')

inference = SentimentInference(aspect_path, opinion_path)

# Get Inference Results
with open(input_file_path, 'r') as csv_file:
    lines = csv_file.readlines()
    for id, line in enumerate(lines):
        if line:
            sentiment_doc = inference.run(doc=line)
            if sentiment_doc != None:
                labels = doc2label(sentiment_doc)
                labels['sent_id'] = id + 1
                with open('./outputs/' + 'sentiment_labels_v1.json',
                          'a') as json_file:
                    json_file.write(json.dumps(labels))
                    json_file.write('\n')