Python noun_phrases 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: chunking

메소드/함수: noun_phrases

hotexamples.com에서의 예제들: 6

Python noun_phrases - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 chunking.noun_phrases에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: build_sdb_database.py 프로젝트: dpfried/mocs

def update_terms(grant):
    terms = []
    if grant.title:
        terms += noun_phrases(grant.title.lower())
    if grant.abstract:
        terms += noun_phrases(grant.abstract)
    grant.terms = sdb_db.stringify_terms(terms)

예제 #2

파일 보기

파일: build_sdb_database.py 프로젝트: zzx88991/mocs

def update_terms(grant):
    terms = []
    if grant.title:
        terms += noun_phrases(grant.title.lower())
    if grant.abstract:
        terms += noun_phrases(grant.abstract)
    grant.terms = sdb_db.stringify_terms(terms)

예제 #3

파일 보기

파일: build_dblp_database.py 프로젝트: dpfried/mocs

            # check to see if we've reached the end of a document tag
            if elem.tag in CATEGORIES:
                # store attribute info, do preprocessing if necessary
                title = data.get('title')
                year = data.get('year')
                author_names = data.get('author_names', [])
                journal_name = data.get('journal_name')
                conference_name = data.get('conference_name')

                # clear out attribute info, and write
                data = {}
                doc = db.Document(title=title, year=year)
                # if this item has a title, memoize the terms and check if it's
                # clean (aka usable)
                if title != None:
                    doc.terms = ','.join([' '.join(phrase) for phrase in noun_phrases(preprocess(title))])
                    doc.clean = ok_title(title)
                else:
                # doc doesn't have a title, so mark it as unusable
                    doc.clean = False
                # take care of authors and journal
                for author_name in author_names:
                    doc.authors.append(memoized_row(db.Author, author_memo, author_name))
                if journal_name != None:
                    doc.journal = memoized_row(db.Journal, journal_memo, journal_name)
                if conference_name != None:
                    doc.conference = memoized_row(db.Conference, conference_memo, conference_name)

                session.add(doc)
                count += 1
                # commit changes periodically

예제 #4

파일 보기

파일: build_dblp_database.py 프로젝트: zzx88991/mocs

                # store attribute info, do preprocessing if necessary
                title = data.get('title')
                year = data.get('year')
                author_names = data.get('author_names', [])
                journal_name = data.get('journal_name')
                conference_name = data.get('conference_name')

                # clear out attribute info, and write
                data = {}
                doc = db.Document(title=title, year=year)
                # if this item has a title, memoize the terms and check if it's
                # clean (aka usable)
                if title != None:
                    doc.terms = ','.join([
                        ' '.join(phrase)
                        for phrase in noun_phrases(preprocess(title))
                    ])
                    doc.clean = ok_title(title)
                else:
                    # doc doesn't have a title, so mark it as unusable
                    doc.clean = False
                # take care of authors and journal
                for author_name in author_names:
                    doc.authors.append(
                        memoized_row(db.Author, author_memo, author_name))
                if journal_name != None:
                    doc.journal = memoized_row(db.Journal, journal_memo,
                                               journal_name)
                if conference_name != None:
                    doc.conference = memoized_row(db.Conference,
                                                  conference_memo,

예제 #5

파일 보기

파일: memoize_terms.py 프로젝트: zzx88991/mocs

import mocs_database as db
from chunking import noun_phrases
from build_dblp_database import ok_title
from database import ManagedSession


def preprocess(title):
    return title.lower()


if __name__ == "__main__":
    with ManagedSession() as session:
        query = session.query(db.Document)
        N = query.count()
        count = 0
        for record in db.sliced_query(query, session_to_write=session):
            count += 1
            if record.title:
                record.terms = ",".join([" ".join(phrase) for phrase in noun_phrases(preprocess(record.title))])
                record.clean = ok_title(record.title)
            else:
                record.clean = False
            if count % 1000 == 0:
                print "updated %s records (%.f%%)" % (count, float(count) * 100 / N)
    print "finished, updated %s records" % count

예제 #6

파일 보기

파일: memoize_terms.py 프로젝트: zzx88991/mocs

import mocs_database as db
from chunking import noun_phrases
from build_dblp_database import ok_title
from database import ManagedSession


def preprocess(title):
    return title.lower()


if __name__ == "__main__":
    with ManagedSession() as session:
        query = session.query(db.Document)
        N = query.count()
        count = 0
        for record in db.sliced_query(query, session_to_write=session):
            count += 1
            if record.title:
                record.terms = ','.join([
                    ' '.join(phrase)
                    for phrase in noun_phrases(preprocess(record.title))
                ])
                record.clean = ok_title(record.title)
            else:
                record.clean = False
            if (count % 1000 == 0):
                print 'updated %s records (%.f%%)' % (count,
                                                      float(count) * 100 / N)
    print 'finished, updated %s records' % count