Ejemplos de WordDF.df en Python

Lenguaje de programación: Python

Namespace/Package Name: worddf

Clase / Tipo: WordDF

Método / Función: df

Ejemplos en hotexamples.com: 2

Python WordDF.df - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de worddf.WordDF.df extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

WordDF(1)

close(1)

df(1)

doc_size(1)

Métodos usados con frecuencia

WordDF (1)

close (1)

df (1)

doc_size (1)

Ejemplo n.º 1

Mostrar archivo

Archivo: wordweight.py Proyecto: piaofu110/docclustering

class WordWeightEvaluation:
    def __init__(self, kw_num=20, worddfdir='../data/worddf'):
        self.worddf = WordDF('r', worddfdir)
        self.docsize = self.worddf.doc_size()
        self.kwnum = kw_num

    def close(self):
        self.worddf.close()

    def extract_kw(self, title, content, seg_text=False):
        """ 
        给出一篇文本的标题和内容，输出关键词及相应权重的列表。
        如果seg_text为True,说明还没有分过词，需要分词处理
        """
        if seg_text:
            if title:
                title = self.seg_text(title)
            content = self.seg_text(content)

        word_dict = dict()
        if title:
            self.__stats_tf(word_dict, title, coef=2.0)
        self.__stats_tf(word_dict, content, coef=1.0)

        #sum_tf = sum(word_dict.itervalues())
        for w, v in word_dict.iteritems():
            df = self.worddf.df(w) + 1.0
            idf = math.log(self.docsize / df, 2)
            ntf = v * idf
            word_dict[w] = ntf
        wwlist = heapq.nlargest(self.kwnum,
                                word_dict.iteritems(),
                                key=lambda x: x[1])
        sumw = sum([ww[1] for ww in wwlist])
        result = []
        for word, weight in wwlist:
            weight = weight / sumw
            result.append((word, weight))
        return result

    def __stats_tf(self, word_dict, wordtagstr, coef=1.0):
        for wotstr in wordtagstr.split():
            wot = wotstr.split('/')
            weight = 0
            if wot[1] == 'n':
                weight += 2
            elif wot[1] == 'v':
                weight += 1
            else:
                continue
            try:
                word_dict[wot[0]] += coef * weight
            except:
                word_dict[wot[0]] = coef * weight

    def seg_text(self, text):
        raise NotImplementedError()

Ejemplo n.º 2

Mostrar archivo

Archivo: wordweight.py Proyecto: Calvin-he/docclustering

class WordWeightEvaluation:
    def __init__(self, kw_num=20, worddfdir = '../data/worddf'):
        self.worddf = WordDF('r', worddfdir)
        self.docsize = self.worddf.doc_size()
        self.kwnum = kw_num

    def close(self):
        self.worddf.close()

    def extract_kw(self, title, content, seg_text=False):
        """ 
        给出一篇文本的标题和内容，输出关键词及相应权重的列表。
        如果seg_text为True,说明还没有分过词，需要分词处理
        """
        if seg_text:
            if title:
                title = self.seg_text(title)
            content = self.seg_text(content)

        word_dict = dict()
        if title:
            self.__stats_tf(word_dict, title, coef=2.0)
        self.__stats_tf(word_dict, content, coef=1.0)

        #sum_tf = sum(word_dict.itervalues())
        for w,v in word_dict.iteritems():
            df = self.worddf.df(w) + 1.0
            idf = math.log(self.docsize/df, 2)
            ntf = v * idf
            word_dict[w] = ntf
        wwlist = heapq.nlargest(self.kwnum, word_dict.iteritems(), key=lambda x:x[1])
        sumw = sum([ww[1] for ww in wwlist])
        result = []
        for word,weight in wwlist:
            weight = weight / sumw
            result.append((word,weight))
        return result

    def __stats_tf(self, word_dict, wordtagstr, coef=1.0):
        for wotstr in wordtagstr.split():
            wot = wotstr.split('/')
            weight = 0
            if wot[1] == 'n':
                weight += 2
            elif wot[1] == 'v':
                weight += 1
            else: continue
            try:
                word_dict[wot[0]] += coef*weight
            except:
                word_dict[wot[0]] = coef*weight
        

    def seg_text(self, text):
        raise NotImplementedError()