def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ])
def processQuestion(gloveModel, question, minLen=1, maxLen=3, useAPI=False, useSynonyms=False): tagger = POSTagger() pos = tagger.parse(question) # create splitter and generalizer splitter = Splitter() if question[-1] == '?' or question[-1] == '.': question = question[:-1] gen_question = splitter.generalize(question, pos) labels = [] resultsExists = False if not useAPI: parts = list(splitter.split(gen_question, min=minLen, max=maxLen)) else: resultsExists = True apiResult, _ = api.getBinaryRelations(question) parts = [ rel.predicate for rel in apiResult if len(rel.predicate_positions_) > 1 ] for part in parts: if len(part.split()) > 1: labels.append(part.split()[0] + ''.join(''.join([w[0].upper(), w[1:].lower()]) for w in part.split()[1:])) if useSynonyms: predicates = [max(part.split(), key=len) for part in parts] if predicates is not None and len(predicates) > 0: for predicate in predicates: for part in list(parts): if predicate in part: for syn in gloveModel.gloveModel.most_similar( predicate.lower()): parts.append(part.replace(predicate, syn[0])) if len(parts) == 0: resultsExists = False parts = list(splitter.split(gen_question, min=minLen, max=maxLen)) # create embedder part vectors = [] for part in parts: vectors.append(gloveModel.getVector(part)) return vectors, parts, pos, gen_question, labels, resultsExists
def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] )
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ]) def valueOf(self, sentiment): if sentiment == 'positive': return 1 if sentiment == 'negative': return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if 'inc' in previous_tags: token_score *= 2.0 elif 'dec' in previous_tags: token_score /= 2.0 elif 'inv' in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([ self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences ]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences( textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] ) def valueOf(self, sentiment): if sentiment == "positive": return 1 if sentiment == "negative": return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if "inc" in previous_tags: token_score *= 2.0 elif "dec" in previous_tags: token_score /= 2.0 elif "inv" in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score
def extract_text(arg, punkt): """ Extract sentences from argument text and find the passive sentences """ with open(arg) as f: logger.info("Reading Text file") text = f.read() sentences = punkt.tokenize(text) logger.info(f"{len(sentences)} sentences detected") for sent in sentences: find_passives(sent) print("-" * 60) if __name__ == "__main__": TAGGER = POSTagger().get() if len(sys.argv) > 1: # pre-trained version of PunktSentenceTokenizer punkt = nltk.tokenize.punkt.PunktSentenceTokenizer() for arg in sys.argv[1:]: extract_text(arg, punkt) else: print("No sentences")