Ejemplo n.º 1
0
class PythonSeg(object):
  def __init__(self):
    self.seg = Segment()
    self.keywords = {}
    self.load()

  def load(self):
    for tag in Tag.objects():
      score = getattr(tag, 'score', settings.NEW_WORD_DEFAULT_VALUE)
      self.add_word(tag.name, score)

  def add_word(self, name, score=settings.NEW_WORD_DEFAULT_VALUE):
    self.seg.add(name.strip())
    self.add_keyword(name.strip(), score)

  def add_keyword(self, word, score=settings.NEW_WORD_DEFAULT_VALUE):
    self.keywords.update({word.strip(): score})

  def is_keyword(self, word):
    return self.keywords.has_key(word)

  def parse(self, words, weight=1, TF_IDF=True):
    if not isinstance(words, basestring):
      return []

    results = []

    for token in self.seg.seg_text(words):
      if len(token) > 1:
        results.append(token)

    d = {}
    for r in results:
      if r in d:
        d[r] += weight * self.keywords.get(r, 1) if TF_IDF else weight
      else:
        d[r] = weight * self.keywords.get(r, 1) if TF_IDF else weight

    return d
Ejemplo n.º 2
0
 def __init__(self):
   self.seg = Segment()
   self.keywords = {}
   self.load()