コード例 #1
0
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #2
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)

    Session.expunge_all()
    Session.close()

    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir != None)
    articles = articles.filter(Entity.sep_dir != '')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #3
0
def filter_apriori_input(occur_filename, output_filename, entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)
    
    with open(output_filename, 'w') as f:
        f.writelines(lines)
コード例 #4
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def filter_apriori_input(occur_filename,
                         output_filename,
                         entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)

    with open(output_filename, 'w') as f:
        f.writelines(lines)
コード例 #5
0
ファイル: sep.py プロジェクト: camerontt2000/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    # fix search patterns
    for term in terms:
        newpatterns = []
        for pattern in term.searchpatterns:
            if '(' in pattern and ')' in pattern:
                pattern = pattern.replace('( ', '(\\b')
                pattern = pattern.replace(' )', '\\b)')
            else:
                pattern = '\\b%s\\b' % pattern.strip()

            newpatterns.append(pattern)

        term.searchpatterns = newpatterns

    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #6
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(entity_type).filter(entity_type.sep_dir!='').all()
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #7
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    articles = Session.query(entity_type).filter(
        entity_type.sep_dir != '').all()

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)