Python remove_stopwords Exemples, skid.utils.remove_stopwords Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : bibkeys.py Projet : pombredanne/skid

def dump():

    for f in config.CACHE.files():
        d = Document(f)
        m = d.parse_notes()

        if not m['author']:   # skip skid marks with out annotated authors.
            continue

        author = ' '.join(map(lastname, m['author']))

        title = remove_stopwords(m['title'])
        title = re.findall('\w+', title)

        year = m['year'][-2:]

        title = ' '.join(title)

        author = author.replace('-', ' ')
        title = title.replace('-', ' ')
        year = year.replace('-', ' ')

        key = '%s-%s-%s' % (author, year, title)
        key = key.lower()
        print key.encode('utf8')

Exemple #2

0

Afficher le fichier

def search(q, limit=None):
    q = unicode(q.decode('utf8'))
    ix = open_dir(DIRECTORY, NAME)
    with ix.searcher() as searcher:
        qp = MultifieldParser(
            fieldnames=[
                'title',
                'author',
                'tags',
                'notes',
                'text',
                'source',
                #                                          'cached',
                'year'
            ],
            fieldboosts={
                'title': 7,
                'year': 6,
                'author': 10,
                'tags': 4,
                'notes': 2,
                'text': 1
            },
            schema=ix.schema)

        # Whoosh chokes on queries with stop words, so remove them.
        q = remove_stopwords(q)

        q = qp.parse(q)
        for hit in searcher.search(q, limit=limit):
            yield hit

Exemple #3

0

Afficher le fichier

Fichier : index.py Projet : timvieira/skid

def search(q, limit=None):
#    q = str(q)
    ix = open_dir(DIRECTORY, NAME)
    with ix.searcher() as searcher:
        qp = MultifieldParser(fieldnames=['title',
                                          'author',
                                          'tags',
                                          'notes',
                                          'text',
                                          'source',
#                                          'cached',
                                          'year'],
                              fieldboosts={'title':  7,
                                           'year':   6,
                                           'author': 10,
                                           'tags':   4,
                                           'notes':  2,
                                           'text':   1},
                              schema=ix.schema)

        # Whoosh chokes on queries with stop words, so remove them.
        q = remove_stopwords(q)

        q = qp.parse(q)
        for hit in searcher.search(q, limit=limit):
            yield hit

Exemple #4

0

Afficher le fichier

Fichier : bibkeys.py Projet : afcarl/skid

def dump():

    for f in config.CACHE.files():
        d = Document(f)
        m = d.parse_notes()

        if not m['author']:   # skip skid marks with out annotated authors.
            continue

        author = ' '.join(map(lastname, m['author']))

        title = remove_stopwords(m['title'])
        title = re.findall('\w+', title)

        year = m['year'][-2:]

        title = ' '.join(title)

        author = author.replace('-', ' ')
        title = title.replace('-', ' ')
        year = year.replace('-', ' ')

        key = '%s-%s-%s' % (author, year, title)
        key = key.lower()
        print key.encode('utf8')