Esempio n. 1
0
    def _read_dict(self, a_fname, a_class, a_dict):
        """
        Class constructor

        @param a_fname - source file to read from
        @param a_class - expected target class of the entries
        @param a_dict - dictionary to be populated

        @return \c void
        """
        scores = []
        itags = []
        iforms = []
        iform = ilemma = tag = iclass = iscores = ""
        with codecs.open(a_fname, 'r', encoding=ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline:
                    continue
                iform, ilemma, itag, iclass, iscores, _ = TAB_RE.split(iline)
                assert a_class == iclass, \
                    "Mismatching classes: '{:s}' vs. '{:s}'".format(a_class, iclass)
                score = SLASH_RE.split(iscores)[CLASS2IDX[a_class]]
                score = 0.0 if score == '-' else float(score)
                if itag == "AD":
                    itags = ["ADJA", "ADJD"]
                else:
                    itags = [itag]
                iform = normalize(iform)
                ilemma = normalize(ilemma)
                if ilemma in self.lemma2forms:
                    self.lemma2forms[ilemma].update([iform])
                else:
                    self.lemma2forms[ilemma] = set([iform])
                if ilemma not in a_dict:
                    iforms = set([iform, ilemma])
                else:
                    iforms = [iform]
                for itag in itags:
                    ivalue = (itag, score, iclass)
                    for iform in iforms:
                        if iform in a_dict:
                            if abs(a_dict[iform][1]) < abs(ivalue[1]):
                                a_dict[iform] = ivalue
                        else:
                            a_dict[iform] = ivalue
Esempio n. 2
0
    def _read_dict(self, a_dict, a_class):
        """
        Class constructor

        @param a_dict - file containing dictionary entries
        @param a_class - expected target class of the entries

        @return dictionary read
        """
        ret = dict()
        fields = []
        tags = []
        iword = iscore = iforms = ivalue = None
        word = ""
        tag = ""
        forms = []
        score = 0.
        with codecs.open(a_dict, 'r', encoding=ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline:
                    continue
                fields = TAB_RE.split(iline)
                iword, iscore = fields[:2]
                if len(fields) == 3:
                    iforms = fields[-1]
                else:
                    iforms = ""
                word, tag = BAR_RE.split(iword)
                score = float(iscore)
                if tag == "ADJX":
                    tags = ["ADJA", "ADJD"]
                else:
                    tags = [tag]
                forms = [normalize(f) for f in COMMA_RE.split(iforms) if f]
                forms.append(normalize(word))
                for itag in tags:
                    ivalue = (itag, score, a_class)
                    for iform in forms:
                        if iform in a_dict:
                            if abs(a_dict[iform][1]) < abs(ivalue[1]):
                                ret[iform] = ivalue
                        else:
                            ret[iform] = ivalue
        return ret
Esempio n. 3
0
    def _read_dict(self, a_fname, a_class, a_dict):
        """
        Class constructor

        @param a_fname - source file to read from
        @param a_class - expected target class of the entries
        @param a_dict - dictionary to be populated

        @return \c void
        """
        scores = []; itags = []; iforms = []
        iform = ilemma = tag = iclass = iscores = ""
        with codecs.open(a_fname, 'r', encoding = ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline:
                    continue
                iform, ilemma, itag, iclass, iscores, _ = TAB_RE.split(iline)
                assert a_class == iclass, \
                    "Mismatching classes: '{:s}' vs. '{:s}'".format(a_class, iclass)
                score = SLASH_RE.split(iscores)[CLASS2IDX[a_class]]
                score = 0.0 if score == '-' else float(score)
                if itag == "AD":
                    itags = ["ADJA", "ADJD"]
                else:
                    itags = [itag]
                iform = normalize(iform); ilemma = normalize(ilemma)
                if ilemma in self.lemma2forms:
                    self.lemma2forms[ilemma].update([iform])
                else:
                    self.lemma2forms[ilemma] = set([iform])
                if ilemma not in a_dict:
                    iforms = set([iform, ilemma])
                else:
                    iforms = [iform]
                for itag in itags:
                    ivalue = (itag, score, iclass)
                    for iform in iforms:
                        if iform in a_dict:
                            if abs(a_dict[iform][1]) < abs(ivalue[1]):
                                a_dict[iform] = ivalue
                        else:
                            a_dict[iform] = ivalue
Esempio n. 4
0
    def _read_dict(self, a_dict, a_class):
        """
        Class constructor

        @param a_dict - file containing dictionary entries
        @param a_class - expected target class of the entries

        @return dictionary read
        """
        ret = dict()
        fields = []; tags = []
        iword = iscore = iforms = ivalue = None
        word = ""; tag = ""; forms = []; score = 0.
        with codecs.open(a_dict, 'r', encoding = ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline:
                    continue
                fields = TAB_RE.split(iline)
                iword, iscore = fields[:2]
                if len(fields) == 3:
                    iforms = fields[-1]
                else:
                    iforms = ""
                word, tag = BAR_RE.split(iword)
                score = float(iscore)
                if tag == "ADJX":
                    tags = ["ADJA", "ADJD"]
                else:
                    tags = [tag]
                forms = [normalize(f) for f in COMMA_RE.split(iforms) if f]
                forms.append(normalize(word))
                for itag in tags:
                    ivalue = (itag, score, a_class)
                    for iform in forms:
                        if iform in a_dict:
                            if abs(a_dict[iform][1]) < abs(ivalue[1]):
                                ret[iform] = ivalue
                        else:
                            ret[iform] = ivalue
        return ret
Esempio n. 5
0
    def check_word(self, a_word):
        """
        Check if given word is present in the lexicon

        @param a_word - word to be checked

        @return list of word's tags and scores found in dictionaries
        """
        ret = []
        iword = normalize(a_word)
        if iword in self.negative:
            ret.append(self.negative[iword])
        if iword in self.positive:
            ret.append(self.positive[iword])
        return ret
Esempio n. 6
0
    def check_word(self, a_word):
        """
        Check if given word is present in the lexicon

        @param a_word - word to be checked

        @return list of word's tags and scores found in dictionaries
        """
        ret = []
        iword = normalize(a_word)
        if iword in self.negative:
            ret.append(self.negative[iword])
        if iword in self.positive:
            ret.append(self.positive[iword])
        return ret
Esempio n. 7
0
    def _read_dict(self, a_fname):
        """
        Class constructor

        @param a_fname - source file to read from

        @return \c void
        """
        score = 0.0
        trg_dict = None
        ivalue = None
        iform = iclass_score = iclass = iscore = ""
        with codecs.open(a_fname, 'r', encoding=ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline or COMMENT_RE.match(iline):
                    continue
                iform, iclass_score = SPACE_RE.split(iline)[:2]
                iclass, iscore = EQUAL_RE.split(iclass_score)
                assert iclass in KNOWN_CLASSES, \
                    "Unknown polarity class: {:s}".format(iclass).encode(ENCODING)
                score = float(iscore)
                iform = normalize(iform)
                if iclass == POSITIVE:
                    trg_dict = self.positive
                elif iclass == NEGATIVE:
                    trg_dict = self.negative
                elif iclass == NEUTRAL:
                    trg_dict = self.neutral
                else:
                    continue

                ivalue = ("NONE", score, iclass)
                if iform in trg_dict:
                    if abs(trg_dict[iform][1]) < abs(score):
                        trg_dict[iform] = ivalue
                else:
                    trg_dict[iform] = ivalue
Esempio n. 8
0
    def _read_dict(self, a_fname):
        """
        Class constructor

        @param a_fname - source file to read from

        @return \c void
        """
        score = 0.0
        trg_dict = None; ivalue = None
        iform = iclass_score = iclass = iscore = ""
        with codecs.open(a_fname, 'r', encoding = ENCODING) as ifile:
            for iline in ifile:
                iline = iline.strip()
                if not iline or COMMENT_RE.match(iline):
                    continue
                iform, iclass_score = SPACE_RE.split(iline)[:2]
                iclass, iscore = EQUAL_RE.split(iclass_score)
                assert iclass in KNOWN_CLASSES, \
                    "Unknown polarity class: {:s}".format(iclass).encode(ENCODING)
                score = float(iscore)
                iform = normalize(iform)
                if iclass == POSITIVE:
                    trg_dict = self.positive
                elif iclass == NEGATIVE:
                    trg_dict = self.negative
                elif iclass == NEUTRAL:
                    trg_dict = self.neutral
                else:
                    continue

                ivalue = ("NONE", score, iclass)
                if iform in trg_dict:
                    if abs(trg_dict[iform][1]) < abs(score):
                        trg_dict[iform] = ivalue
                else:
                    trg_dict[iform] = ivalue