def _read_dict(self, a_fname, a_class, a_dict): """ Class constructor @param a_fname - source file to read from @param a_class - expected target class of the entries @param a_dict - dictionary to be populated @return \c void """ scores = [] itags = [] iforms = [] iform = ilemma = tag = iclass = iscores = "" with codecs.open(a_fname, 'r', encoding=ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline: continue iform, ilemma, itag, iclass, iscores, _ = TAB_RE.split(iline) assert a_class == iclass, \ "Mismatching classes: '{:s}' vs. '{:s}'".format(a_class, iclass) score = SLASH_RE.split(iscores)[CLASS2IDX[a_class]] score = 0.0 if score == '-' else float(score) if itag == "AD": itags = ["ADJA", "ADJD"] else: itags = [itag] iform = normalize(iform) ilemma = normalize(ilemma) if ilemma in self.lemma2forms: self.lemma2forms[ilemma].update([iform]) else: self.lemma2forms[ilemma] = set([iform]) if ilemma not in a_dict: iforms = set([iform, ilemma]) else: iforms = [iform] for itag in itags: ivalue = (itag, score, iclass) for iform in iforms: if iform in a_dict: if abs(a_dict[iform][1]) < abs(ivalue[1]): a_dict[iform] = ivalue else: a_dict[iform] = ivalue
def _read_dict(self, a_dict, a_class): """ Class constructor @param a_dict - file containing dictionary entries @param a_class - expected target class of the entries @return dictionary read """ ret = dict() fields = [] tags = [] iword = iscore = iforms = ivalue = None word = "" tag = "" forms = [] score = 0. with codecs.open(a_dict, 'r', encoding=ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline: continue fields = TAB_RE.split(iline) iword, iscore = fields[:2] if len(fields) == 3: iforms = fields[-1] else: iforms = "" word, tag = BAR_RE.split(iword) score = float(iscore) if tag == "ADJX": tags = ["ADJA", "ADJD"] else: tags = [tag] forms = [normalize(f) for f in COMMA_RE.split(iforms) if f] forms.append(normalize(word)) for itag in tags: ivalue = (itag, score, a_class) for iform in forms: if iform in a_dict: if abs(a_dict[iform][1]) < abs(ivalue[1]): ret[iform] = ivalue else: ret[iform] = ivalue return ret
def _read_dict(self, a_fname, a_class, a_dict): """ Class constructor @param a_fname - source file to read from @param a_class - expected target class of the entries @param a_dict - dictionary to be populated @return \c void """ scores = []; itags = []; iforms = [] iform = ilemma = tag = iclass = iscores = "" with codecs.open(a_fname, 'r', encoding = ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline: continue iform, ilemma, itag, iclass, iscores, _ = TAB_RE.split(iline) assert a_class == iclass, \ "Mismatching classes: '{:s}' vs. '{:s}'".format(a_class, iclass) score = SLASH_RE.split(iscores)[CLASS2IDX[a_class]] score = 0.0 if score == '-' else float(score) if itag == "AD": itags = ["ADJA", "ADJD"] else: itags = [itag] iform = normalize(iform); ilemma = normalize(ilemma) if ilemma in self.lemma2forms: self.lemma2forms[ilemma].update([iform]) else: self.lemma2forms[ilemma] = set([iform]) if ilemma not in a_dict: iforms = set([iform, ilemma]) else: iforms = [iform] for itag in itags: ivalue = (itag, score, iclass) for iform in iforms: if iform in a_dict: if abs(a_dict[iform][1]) < abs(ivalue[1]): a_dict[iform] = ivalue else: a_dict[iform] = ivalue
def _read_dict(self, a_dict, a_class): """ Class constructor @param a_dict - file containing dictionary entries @param a_class - expected target class of the entries @return dictionary read """ ret = dict() fields = []; tags = [] iword = iscore = iforms = ivalue = None word = ""; tag = ""; forms = []; score = 0. with codecs.open(a_dict, 'r', encoding = ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline: continue fields = TAB_RE.split(iline) iword, iscore = fields[:2] if len(fields) == 3: iforms = fields[-1] else: iforms = "" word, tag = BAR_RE.split(iword) score = float(iscore) if tag == "ADJX": tags = ["ADJA", "ADJD"] else: tags = [tag] forms = [normalize(f) for f in COMMA_RE.split(iforms) if f] forms.append(normalize(word)) for itag in tags: ivalue = (itag, score, a_class) for iform in forms: if iform in a_dict: if abs(a_dict[iform][1]) < abs(ivalue[1]): ret[iform] = ivalue else: ret[iform] = ivalue return ret
def check_word(self, a_word): """ Check if given word is present in the lexicon @param a_word - word to be checked @return list of word's tags and scores found in dictionaries """ ret = [] iword = normalize(a_word) if iword in self.negative: ret.append(self.negative[iword]) if iword in self.positive: ret.append(self.positive[iword]) return ret
def _read_dict(self, a_fname): """ Class constructor @param a_fname - source file to read from @return \c void """ score = 0.0 trg_dict = None ivalue = None iform = iclass_score = iclass = iscore = "" with codecs.open(a_fname, 'r', encoding=ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline or COMMENT_RE.match(iline): continue iform, iclass_score = SPACE_RE.split(iline)[:2] iclass, iscore = EQUAL_RE.split(iclass_score) assert iclass in KNOWN_CLASSES, \ "Unknown polarity class: {:s}".format(iclass).encode(ENCODING) score = float(iscore) iform = normalize(iform) if iclass == POSITIVE: trg_dict = self.positive elif iclass == NEGATIVE: trg_dict = self.negative elif iclass == NEUTRAL: trg_dict = self.neutral else: continue ivalue = ("NONE", score, iclass) if iform in trg_dict: if abs(trg_dict[iform][1]) < abs(score): trg_dict[iform] = ivalue else: trg_dict[iform] = ivalue
def _read_dict(self, a_fname): """ Class constructor @param a_fname - source file to read from @return \c void """ score = 0.0 trg_dict = None; ivalue = None iform = iclass_score = iclass = iscore = "" with codecs.open(a_fname, 'r', encoding = ENCODING) as ifile: for iline in ifile: iline = iline.strip() if not iline or COMMENT_RE.match(iline): continue iform, iclass_score = SPACE_RE.split(iline)[:2] iclass, iscore = EQUAL_RE.split(iclass_score) assert iclass in KNOWN_CLASSES, \ "Unknown polarity class: {:s}".format(iclass).encode(ENCODING) score = float(iscore) iform = normalize(iform) if iclass == POSITIVE: trg_dict = self.positive elif iclass == NEGATIVE: trg_dict = self.negative elif iclass == NEUTRAL: trg_dict = self.neutral else: continue ivalue = ("NONE", score, iclass) if iform in trg_dict: if abs(trg_dict[iform][1]) < abs(score): trg_dict[iform] = ivalue else: trg_dict[iform] = ivalue