コード例 #1
0
def update_graph(entity_type, sql_filename):
    """
    Performs a complete update of the database graph information, adding
    jweight, entropy and occurrence data from the sql file generated by
    complete_mining. This will remove ALL previous graph data.
    """
    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
    else:
        table = "idea_thinker_graph_edges"

    connection = Session.connection()

    print "deleting old graph information ..."
    connection.execute("""
    TRUNCATE TABLE %(table)s;
    """ % {'filename' : sql_filename, 'table' : table })
    
    print "inserting new graph information"
    connection.execute("""
    SET foreign_key_checks=0;
    LOCK TABLES %(table)s WRITE;
    LOAD DATA INFILE '%(filename)s'
    INTO TABLE %(table)s
    FIELDS TERMINATED BY '::'
    (ante_id, cons_id, confidence, jweight, weight, occurs_in);
    UNLOCK TABLES;
    SET foreign_key_checks=1;
    """ % {'filename' : sql_filename, 'table' : table })
    Session.close()
コード例 #2
0
ファイル: sep.py プロジェクト: camerontt2000/inpho
def update_graph(entity_type, sql_filename):
    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
    else:
        table = "idea_thinker_graph_edges"

    connection = Session.connection()

    print "deleting old graph information ..."
    connection.execute("""
    TRUNCATE TABLE %(table)s;
    """ % {'filename' : sql_filename, 'table' : table })
    
    print "inserting new graph information"
    connection.execute("""
    SET foreign_key_checks=0;
    LOCK TABLES %(table)s WRITE;
    LOAD DATA INFILE '%(filename)s'
    INTO TABLE %(table)s
    FIELDS TERMINATED BY '::'
    (ante_id, cons_id, confidence, jweight, weight, occurs_in);
    UNLOCK TABLES;
    SET foreign_key_checks=1;
    """ % {'filename' : sql_filename, 'table' : table })
    Session.close()
コード例 #3
0
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #4
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)

    Session.expunge_all()
    Session.close()

    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir != None)
    articles = articles.filter(Entity.sep_dir != '')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #5
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def complete_mining(entity_type=Idea,
                    filename='graph.txt',
                    root='./',
                    corpus_root='corpus/',
                    update_entropy=False,
                    update_occurrences=False,
                    update_db=False):
    occur_filename = os.path.abspath(root + "occurrences.txt")
    graph_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)

    doc_terms = doc_terms_list()

    if update_occurrences:
        print "processing articles..."
        process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "filtering occurrences..."
    filter_apriori_input(occur_filename, graph_filename, entity_type,
                         doc_terms)

    print "running apriori miner..."
    dm.apriori(graph_filename, edge_filename)

    print "processing edges..."
    edges = dm.process_edges(graph_filename, edge_filename, occur_filename,
                             doc_terms)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)

    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante, cons = edge
            row = "%s::%s" % edge
            row += ("::%(confidence)s::%(jweight)s::%(weight)s"
                    "::%(occurs_in)s\n" % props)
            f.write(row)

    if update_entropy:
        print "updating term entropy..."

        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()

    if update_db:
        print "updating the database..."
        update_graph(entity_type, sql_filename)
コード例 #6
0
def filter_apriori_input(occur_filename, output_filename, entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)
    
    with open(output_filename, 'w') as f:
        f.writelines(lines)
コード例 #7
0
def complete_mining(entity_type=Idea, filename='graph.txt', root='./',
                    corpus_root='corpus/', update_entropy=False,
                    update_occurrences=False, update_db=False): 
    occur_filename = os.path.abspath(root + "occurrences.txt")
    graph_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)

    doc_terms = doc_terms_list()

    if update_occurrences:
        print "processing articles..."
        process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "filtering occurrences..."
    filter_apriori_input(
        occur_filename, graph_filename, entity_type, doc_terms)

    print "running apriori miner..."
    dm.apriori(graph_filename, edge_filename)
    
    print "processing edges..."
    edges = dm.process_edges(
        graph_filename, edge_filename, occur_filename, doc_terms)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)
    
    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante,cons = edge
            row = "%s::%s" % edge
            row += ("::%(confidence)s::%(jweight)s::%(weight)s"
                    "::%(occurs_in)s\n" % props)
            f.write(row)

    if update_entropy:
        print "updating term entropy..."

        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()

    if update_db:
        print "updating the database..."
        update_graph(entity_type, sql_filename)
コード例 #8
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def filter_apriori_input(occur_filename,
                         output_filename,
                         entity_type=Idea,
                         doc_terms=None):
    #select terms
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms)

    with open(output_filename, 'w') as f:
        f.writelines(lines)
コード例 #9
0
ファイル: sep.py プロジェクト: camerontt2000/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    # fix search patterns
    for term in terms:
        newpatterns = []
        for pattern in term.searchpatterns:
            if '(' in pattern and ')' in pattern:
                pattern = pattern.replace('( ', '(\\b')
                pattern = pattern.replace(' )', '\\b)')
            else:
                pattern = '\\b%s\\b' % pattern.strip()

            newpatterns.append(pattern)

        term.searchpatterns = newpatterns

    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #10
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(entity_type).filter(entity_type.sep_dir!='').all()
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #11
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    articles = Session.query(entity_type).filter(
        entity_type.sep_dir != '').all()

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #12
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def update_graph(entity_type, sql_filename):
    """
    Performs a complete update of the database graph information, adding
    jweight, entropy and occurrence data from the sql file generated by
    complete_mining. This will remove ALL previous graph data.
    """
    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
    else:
        table = "idea_thinker_graph_edges"

    connection = Session.connection()

    print "deleting old graph information ..."
    connection.execute("""
    TRUNCATE TABLE %(table)s;
    """ % {
        'filename': sql_filename,
        'table': table
    })

    print "inserting new graph information"
    connection.execute("""
    SET foreign_key_checks=0;
    LOCK TABLES %(table)s WRITE;
    LOAD DATA LOCAL INFILE '%(filename)s'
    INTO TABLE %(table)s
    FIELDS TERMINATED BY '::'
    (ante_id, cons_id, confidence, jweight, weight, occurs_in);
    UNLOCK TABLES;
    SET foreign_key_checks=1;
    """ % {
        'filename': sql_filename,
        'table': table
    })
    Session.close()
コード例 #13
0
ファイル: sep.py プロジェクト: etboggs/inpho
def complete_mining(entity_type=Idea,
                    filename='graph.txt',
                    root='./',
                    corpus_root='corpus/',
                    update_entropy=False):
    occur_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)

    print "processing articles..."
    process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "running apriori miner..."
    dm.apriori(occur_filename, edge_filename)

    print "processing edges..."
    edges = dm.process_edges(occur_filename, edge_filename)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)

    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante, cons = edge
            row = "%s::%s" % edge
            row += "::%(confidence)s::%(jweight)s::%(weight)s\n" % props
            f.write(row)

    print "updating term entropy..."

    if update_entropy:
        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()

    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
    else:
        table = "idea_thinker_graph_edges"

    connection = Session.connection()

    print "deleting old graph information ..."
    connection.execute("""
    DELETE FROM %(table)s;
    """ % {
        'filename': sql_filename,
        'table': table
    })

    print "inserting new graph information"
    connection.execute("""
    SET foreign_key_checks=0;
    LOAD DATA INFILE '%(filename)s'
    INTO TABLE %(table)s
    FIELDS TERMINATED BY '::'
    (ante_id, cons_id, confidence, jweight, weight);
    SET foreign_key_checks=1;
    """ % {
        'filename': sql_filename,
        'table': table
    })
    Session.close()
コード例 #14
0
ファイル: sep.py プロジェクト: etboggs/inpho
def complete_mining(entity_type=Idea, filename='graph.txt', root='./',
                    corpus_root='corpus/', update_entropy=False):
    occur_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)


    print "processing articles..."
    process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "running apriori miner..."
    dm.apriori(occur_filename, edge_filename)
    
    print "processing edges..."
    edges = dm.process_edges(occur_filename, edge_filename)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)
    
    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante,cons = edge
            row = "%s::%s" % edge
            row += "::%(confidence)s::%(jweight)s::%(weight)s\n" % props
            f.write(row)

    print "updating term entropy..."

    if update_entropy:
        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()


    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
    else:
        table = "idea_thinker_graph_edges"

    connection = Session.connection()

    print "deleting old graph information ..."
    connection.execute("""
    DELETE FROM %(table)s;
    """ % {'filename' : sql_filename, 'table' : table })
    
    print "inserting new graph information"
    connection.execute("""
    SET foreign_key_checks=0;
    LOAD DATA INFILE '%(filename)s'
    INTO TABLE %(table)s
    FIELDS TERMINATED BY '::'
    (ante_id, cons_id, confidence, jweight, weight);
    SET foreign_key_checks=1;
    """ % {'filename' : sql_filename, 'table' : table })
    Session.close()