Ejemplos de Lemmer en Python

Lenguaje de programación: Python

Namespace/Package Name: lemmer

Clase / Tipo: Lemmer

Ejemplos en hotexamples.com: 3

Python Lemmer - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de lemmer.Lemmer extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

translate(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: search.py Proyecto: aromanovich/simple-tf-idf-implementation

class SearchEngine(object):
    def __init__(self, mystem_path, documents, dictionary):
        self._documents = documents
        self._N = len(documents)
        self._dictionary = dictionary
        self._lemmer = Lemmer(mystem_path)

    def _get_df(self, postings):
        return len(set(postings))

    def _get_top(self, scores):
        result = []
        sorted_scores = sorted(scores.items(), key=lambda (document_id, score): score)
        for (document_id, score) in sorted_scores[-10:]:
            path = self._documents[document_id]
            result.append((path, score))
        return result

    def search(self, *args):
        tfidf = defaultdict(dict)
        query = [self._lemmer.translate(word) for word in args]
        query_dictionary = dict((word, self._dictionary.get(word)) for word in query)

        for (word, postings) in query_dictionary.iteritems():
            df = self._get_df(postings)
            idf = math.log(self._N / float(df))

            for document_id in xrange(self._N):
                tf = query_dictionary[word].count(document_id)
                tfidf[word][document_id] = idf * tf

        scores = {}
        for document_id in xrange(self._N):
            score = 0
            for word in query:
                score += tfidf[word][document_id]
            scores[document_id] = score

        pprint.pprint(self._get_top(scores))

Ejemplo n.º 2

Mostrar archivo

Archivo: create_index.py Proyecto: aromanovich/simple-tf-idf-implementation

if options.collection_dir is None:
    parser.error('Collection directory option is required!')

if options.index_path is None:
    parser.error('Index file is not specified!')


def walk(path):
    for root_dir, dirs, files in os.walk(path):
        for file in files:
            yield os.path.join(root_dir, file)

dictionary = defaultdict(list)
documents = []

lemmer = Lemmer(options.mystem_path)
scanner = Scanner()

for (document_id, document_path) in enumerate(walk(options.collection_dir)):
    with codecs.open(document_path, 'r', 'cp1251') as f:
        words = scanner.scan(f.read())
        for word in words:
            if word:
                stem = lemmer.translate(word)
                dictionary[stem].append(document_id)
        documents.append(document_path)
        print '.',

items = dictionary.items()
items.sort(key=lambda (stem, postings): len(documents))

Ejemplo n.º 3

Mostrar archivo

Archivo: search.py Proyecto: aromanovich/simple-tf-idf-implementation

 def __init__(self, mystem_path, documents, dictionary):
     self._documents = documents
     self._N = len(documents)
     self._dictionary = dictionary
     self._lemmer = Lemmer(mystem_path)