fn_sim_word = os.path.join( BASE_DIR, 'sim/sim.gz' ) fn_sim_pair = os.path.join( BASE_DIR, 'sim/sim_flipped.gz' ) fn_lda_word = os.path.join( BASE_DIR, 'lda/model_final_doc2topic_5_5' ) fn_lda_pair = os.path.join( BASE_DIR, 'lda/model_final_doc2topic_5_5_flipped' ) refresh = False ctx = True sim = False lda = False # instantiate logger reload( logging ) logging.basicConfig( format='%(asctime)s - %(message)s', level=logging.DEBUG ) logging.info( 'loading labels from file' ) y_true, d_triples = tm.arg_l_arg_r_pairs_vector( fn_labels, \ file_contains_context=False, has_header=False ) num_triples = len( d_triples ) # Note: the prefix d_ indicates a dictionary, m_ a matrix, mb_ a boolean matrix if ctx: logging.info( 'loading context features for word pairs' ) d_ctx_pair = td.Dict() m_ctx_pair = tm.arg_l_arg_r_asjo_matrix( d_triples._rtuple2ids, fn_ctx_pair, num_triples, col_indices=d_ctx_pair, mmfile_presuffix='_pairs', reload=refresh ) logging.info( 'loading context features for words' ) d_ctx_word = td.Dict() m_ctx_w1 = tm.arg_asjo_matrix( d_triples._m2ids, d_ctx_word, fn_ctx_word, num_triples,
def load_classification_data(): logging.info("loading true labels") true_labels, d_triples = tm.arg_l_arg_r_pairs_vector(labels, file_contains_context=False, has_header=False) return d_triples, true_labels
fn_sim_word = os.path.join(BASE_DIR, 'sim/sim.gz') fn_sim_pair = os.path.join(BASE_DIR, 'sim/sim_flipped.gz') fn_lda_word = os.path.join(BASE_DIR, 'lda/model_final_doc2topic_5_5') fn_lda_pair = os.path.join(BASE_DIR, 'lda/model_final_doc2topic_5_5_flipped') refresh = False ctx = True sim = False lda = False # instantiate logger reload(logging) logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG) logging.info('loading labels from file') y_true, d_triples = tm.arg_l_arg_r_pairs_vector( fn_labels, \ file_contains_context=False, has_header=False ) num_triples = len(d_triples) # Note: the prefix d_ indicates a dictionary, m_ a matrix, mb_ a boolean matrix if ctx: logging.info('loading context features for word pairs') d_ctx_pair = td.Dict() m_ctx_pair = tm.arg_l_arg_r_asjo_matrix(d_triples._rtuple2ids, fn_ctx_pair, num_triples, col_indices=d_ctx_pair, mmfile_presuffix='_pairs', reload=refresh)
def load_classification_data(): logging.info('loading true labels') true_labels, d_triples = tm.arg_l_arg_r_pairs_vector(\ labels, file_contains_context=False, has_header=False) return d_triples, true_labels