def run( self, aspect_lex: PathLike = None, opinion_lex: PathLike = None, data: PathLike = None, parsed_data: PathLike = None, inference_results: PathLike = None, ) -> Optional[pd.DataFrame]: opinions = load_opinion_lex(opinion_lex) if not opinions: raise ValueError("Empty opinion lexicon!") aspects = pd.read_csv(aspect_lex, header=None, encoding="utf-8")[0] if aspects.empty: raise ValueError("Empty aspect lexicon!") if inference_results: with open(inference_results, encoding="utf-8") as f: results = json.loads(f.read(), object_hook=SentimentDoc.decoder) elif data or parsed_data: inference = SentimentInference(aspect_lex, opinions, parse=False) parse = None if not parsed_data: # source data is raw text, need to parse from nlp_architect.pipelines.spacy_bist import SpacyBISTParser parse = SpacyBISTParser().parse results = {} print("Running inference on data files... (Iterating data files)") data_source = parsed_data if parsed_data else data for file, doc in self._iterate_docs(data_source): parsed_doc = (parse(doc) if parse else json.loads( doc, object_hook=CoreNLPDoc.decoder)) sentiment_doc = inference.run(parsed_doc=parsed_doc) if sentiment_doc: results[file] = sentiment_doc with open(SENTIMENT_OUT / "inference_results.json", "w", encoding="utf-8") as f: json.dump(results, f, cls=SentimentDocEncoder, indent=4, sort_keys=True) else: print( "No input given. Please supply one of: " "data directory, parsed data directory, or inference results.") return None print("\nComputing statistics...") stats = self._compute_stats(results, aspects, opinions) print("Done.") return stats
def __init__(self, aspect_lex: PathLike, opinion_lex: PathLike or dict, parse: bool = True): """Inits SentimentInference with given aspect and opinion lexicons.""" INFERENCE_OUT.mkdir(parents=True, exist_ok=True) self.opinion_lex = \ opinion_lex if type(opinion_lex) is dict else load_opinion_lex(opinion_lex) self.aspect_lex = _load_aspect_lexicon(aspect_lex) self.intensifier_lex = _read_lexicon_from_csv('IntensifiersLex.csv') self.negation_lex = _read_lexicon_from_csv('NegationSentLex.csv') if parse: from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser() else: self.parser = None
def __init__( self, aspect_lex: Union[str, PathLike], opinion_lex: Union[str, PathLike, dict], parse: bool = True, ): """Inits SentimentInference with given aspect and opinion lexicons.""" INFERENCE_OUT.mkdir(parents=True, exist_ok=True) self.opinion_lex = (opinion_lex if type(opinion_lex) is dict else load_opinion_lex(Path(opinion_lex))) self.aspect_lex = _load_aspect_lexicon(Path(aspect_lex)) self.intensifier_lex = _read_lexicon_from_csv("IntensifiersLex.csv") self.negation_lex = _read_lexicon_from_csv("NegationSentLex.csv") if parse: from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser(spacy_model="en") else: self.parser = None
def __init__( self, aspect_lex: Union[str, PathLike], opinion_lex: Union[str, PathLike, dict], parse: bool = True, parser="spacy", spacy_model="en_core_web_sm", ): """Inits SentimentInference with given aspect and opinion lexicons.""" INFERENCE_OUT.mkdir(parents=True, exist_ok=True) self.opinion_lex = ( opinion_lex if type(opinion_lex) is dict else load_opinion_lex(Path(opinion_lex)) ) self.aspect_lex = _load_aspect_lexicon(Path(aspect_lex)) self.intensifier_lex = _read_lexicon_from_csv("IntensifiersLex.csv") self.negation_lex = _read_lexicon_from_csv("NegationSentLex.csv") self.parser_name = parser if parse: if parser == "bist": from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser(spacy_model=spacy_model) elif parser == "spacy": from nlp_architect.utils.text import SpacyInstance disable = [ "merge_noun_chunks", "ner", "entity_linker", "textcat", "entity_ruler", "sentencizer", "merge_entities", ] self.parser = SpacyInstance( model=spacy_model, disable=disable, ptb_pos=True, n_jobs=1 ) else: self.parser = None