Example #1
0
fn_sim_word = os.path.join( BASE_DIR, 'sim/sim.gz' )
fn_sim_pair = os.path.join( BASE_DIR, 'sim/sim_flipped.gz' )
fn_lda_word = os.path.join( BASE_DIR, 'lda/model_final_doc2topic_5_5' )
fn_lda_pair = os.path.join( BASE_DIR, 'lda/model_final_doc2topic_5_5_flipped' )

refresh = False
ctx = True
sim = False
lda = False

# instantiate logger
reload( logging )
logging.basicConfig( format='%(asctime)s - %(message)s', level=logging.DEBUG )

logging.info( 'loading labels from file' )
y_true, d_triples = tm.arg_l_arg_r_pairs_vector( fn_labels, \
    file_contains_context=False, has_header=False )

num_triples = len( d_triples )

# Note: the prefix d_ indicates a dictionary, m_ a matrix, mb_ a boolean matrix

if ctx:

    logging.info( 'loading context features for word pairs' )
    d_ctx_pair = td.Dict()
    m_ctx_pair = tm.arg_l_arg_r_asjo_matrix( d_triples._rtuple2ids, fn_ctx_pair, 
        num_triples, col_indices=d_ctx_pair, mmfile_presuffix='_pairs', reload=refresh )

    logging.info( 'loading context features for words' )
    d_ctx_word = td.Dict()
    m_ctx_w1 = tm.arg_asjo_matrix( d_triples._m2ids, d_ctx_word, fn_ctx_word, num_triples,
Example #2
0
def load_classification_data():
    logging.info("loading true labels")
    true_labels, d_triples = tm.arg_l_arg_r_pairs_vector(labels, file_contains_context=False, has_header=False)
    return d_triples, true_labels
Example #3
0
fn_sim_word = os.path.join(BASE_DIR, 'sim/sim.gz')
fn_sim_pair = os.path.join(BASE_DIR, 'sim/sim_flipped.gz')
fn_lda_word = os.path.join(BASE_DIR, 'lda/model_final_doc2topic_5_5')
fn_lda_pair = os.path.join(BASE_DIR, 'lda/model_final_doc2topic_5_5_flipped')

refresh = False
ctx = True
sim = False
lda = False

# instantiate logger
reload(logging)
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG)

logging.info('loading labels from file')
y_true, d_triples = tm.arg_l_arg_r_pairs_vector( fn_labels, \
    file_contains_context=False, has_header=False )

num_triples = len(d_triples)

# Note: the prefix d_ indicates a dictionary, m_ a matrix, mb_ a boolean matrix

if ctx:

    logging.info('loading context features for word pairs')
    d_ctx_pair = td.Dict()
    m_ctx_pair = tm.arg_l_arg_r_asjo_matrix(d_triples._rtuple2ids,
                                            fn_ctx_pair,
                                            num_triples,
                                            col_indices=d_ctx_pair,
                                            mmfile_presuffix='_pairs',
                                            reload=refresh)
Example #4
0
def load_classification_data():
    logging.info('loading true labels')
    true_labels, d_triples = tm.arg_l_arg_r_pairs_vector(\
        labels, file_contains_context=False, has_header=False)
    return d_triples, true_labels