Example #1
0
File: sep.py Project: etboggs/inpho
def process_article(article,
                    terms=None,
                    entity_type=Idea,
                    output_filename=None,
                    corpus_root='corpus/'):
    if terms is None:
        terms = select_terms(entity_type)

    lines = []

    filename = article.get_filename(corpus_root)
    if filename and os.path.isfile(filename):
        print "processing:", article.sep_dir, filename
        try:
            doc = extract_article_body(filename)
            lines = dm.prepare_apriori_input(doc, terms, article)
        except:
            print "ERROR PROCESSING:", article.sep_dir, filename
    else:
        print "BAD SEP_DIR:", article.sep_dir

    if output_filename:
        with open(output_filename, 'w') as f:
            f.writelines(lines)
    else:
        return lines
Example #2
0
def filter_apriori_input(occur_filename, output_filename, entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)
    
    with open(output_filename, 'w') as f:
        f.writelines(lines)
Example #3
0
File: sep.py Project: we1l1n/inpho
def filter_apriori_input(occur_filename,
                         output_filename,
                         entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)

    with open(output_filename, 'w') as f:
        f.writelines(lines)
Example #4
0
File: sep.py Project: etboggs/inpho
def process_article(article, terms=None, entity_type=Idea, output_filename=None,
                    corpus_root='corpus/'):
    if terms is None:
        terms = select_terms(entity_type)
    

    lines = []

    filename = article.get_filename(corpus_root)
    if filename and os.path.isfile(filename):
        print "processing:", article.sep_dir, filename
        try: 
            doc = extract_article_body(filename)
            lines = dm.prepare_apriori_input(doc, terms, article)
        except:
            print "ERROR PROCESSING:", article.sep_dir, filename
    else:
        print "BAD SEP_DIR:", article.sep_dir

    if output_filename:
        with open(output_filename, 'w') as f:
            f.writelines(lines)
    else:
        return lines