Example #1
0
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger([
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
     ])
Example #2
0
def processQuestion(gloveModel,
                    question,
                    minLen=1,
                    maxLen=3,
                    useAPI=False,
                    useSynonyms=False):
    tagger = POSTagger()
    pos = tagger.parse(question)
    # create splitter and generalizer
    splitter = Splitter()
    if question[-1] == '?' or question[-1] == '.':
        question = question[:-1]
    gen_question = splitter.generalize(question, pos)
    labels = []
    resultsExists = False
    if not useAPI:
        parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    else:
        resultsExists = True
        apiResult, _ = api.getBinaryRelations(question)
        parts = [
            rel.predicate for rel in apiResult
            if len(rel.predicate_positions_) > 1
        ]
        for part in parts:
            if len(part.split()) > 1:
                labels.append(part.split()[0] +
                              ''.join(''.join([w[0].upper(), w[1:].lower()])
                                      for w in part.split()[1:]))
        if useSynonyms:
            predicates = [max(part.split(), key=len) for part in parts]
            if predicates is not None and len(predicates) > 0:
                for predicate in predicates:
                    for part in list(parts):
                        if predicate in part:
                            for syn in gloveModel.gloveModel.most_similar(
                                    predicate.lower()):
                                parts.append(part.replace(predicate, syn[0]))
        if len(parts) == 0:
            resultsExists = False
            parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    # create embedder part
    vectors = []
    for part in parts:
        vectors.append(gloveModel.getVector(part))
    return vectors, parts, pos, gen_question, labels, resultsExists
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger(
         [
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
         ]
     )
Example #4
0
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger([
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
        ])

    def valueOf(self, sentiment):
        if sentiment == 'positive': return 1
        if sentiment == 'negative': return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if 'inc' in previous_tags:
                    token_score *= 2.0
                elif 'dec' in previous_tags:
                    token_score /= 2.0
                elif 'inv' in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token,
                                       acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([
            self.sentence_score(sentence, None, 0.0)
            for sentence in dictTaggedSentences
        ])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(
            textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger(
            [
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
            ]
        )

    def valueOf(self, sentiment):
        if sentiment == "positive":
            return 1
        if sentiment == "negative":
            return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if "inc" in previous_tags:
                    token_score *= 2.0
                elif "dec" in previous_tags:
                    token_score /= 2.0
                elif "inv" in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score
Example #6
0
def extract_text(arg, punkt):
    """
    Extract sentences from argument text and find the passive sentences
    """

    with open(arg) as f:
        logger.info("Reading Text file")
        text = f.read()
        sentences = punkt.tokenize(text)

        logger.info(f"{len(sentences)} sentences detected")

        for sent in sentences:
            find_passives(sent)
            print("-" * 60)


if __name__ == "__main__":

    TAGGER = POSTagger().get()

    if len(sys.argv) > 1:

        # pre-trained version of PunktSentenceTokenizer
        punkt = nltk.tokenize.punkt.PunktSentenceTokenizer()

        for arg in sys.argv[1:]:
            extract_text(arg, punkt)
    else:
        print("No sentences")