Example #1
0
mat = sparse.hstack((mat, mat_arg_ctx_union_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_diff_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_inters_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_l_minus_r_arg.astype(np.float64)))
mat = sparse.hstack(
    (mat, mat_arg_ctx_r_minus_l_arg.astype(np.float64))).tocsr()
# feat from similar context examples
mat = sparse.hstack((mat, mat_arg_l_ctx_sim_paths.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_r_ctx_sim_paths.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_sim_union_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_sim_diff_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_sim_inters_arg.astype(np.float64)))
mat = sparse.hstack((mat, mat_arg_ctx_sim_l_minus_r_arg.astype(np.float64)))
mat = sparse.hstack(
    (mat, mat_arg_ctx_sim_r_minus_l_arg.astype(np.float64))).tocsr()

mat = mat.tocsr()[:, 1:]
# remove the first dummy vector

model = tc.run_classification_test(mat,
                                   true_labels,
                                   binarize=True,
                                   percentage_train=0.8,
                                   print_train_test_set_stat=True,
                                   test_thresholds=False,
                                   random_seed=623519,
                                   d_args=d_triples)

names = d_paths_ctx._id2w
l = zip(names, model.coef_[0])
ls = sorted(l, reverse=True, key=lambda x: x[1])
Example #2
0
# mat = sparse.hstack(( mat, mb_sim_intersect_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_minus_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_minus_w2_w1.astype( np.float64 )))

# pairs <set operation> single noun

rand_seed = 623519
# rand_seed = 234123

logging.info("training classifier and predicting labels")
mat = mat.tocsr()[:, 1:]
model = tc.run_classification_test(
    mat,
    y_true,
    binarize=True,  # True
    percentage_train=0.8,
    print_train_test_set_stat=True,
    test_thresholds=False,
    random_seed=rand_seed,
    d_triples=d_triples)

# names = d_ctx_word._id2w
# l = zip( names, model.coef_[0] )
# ls = sorted( l, reverse=True, key= lambda x: x[1] )

# interesting_id = 10
# names = d_ctx_word._id2w
# x = zip( names, np.squeeze( np.array( mat[interesting_id,:].todense() )))
# x = sorted([ (x,i) for (i, x) in enumerate(x) if x[1] > 0 ], key= lambda x: x[0][1], reverse=True )

logging.info("inspect some of the features")
Example #3
0
# mat = sparse.hstack(( mat, mb_sim_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_union_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_diff_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_intersect_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_minus_w1_w2.astype( np.float64 )))
# mat = sparse.hstack(( mat, mb_sim_minus_w2_w1.astype( np.float64 )))

# pairs <set operation> single noun

rand_seed = 623519
# rand_seed = 234123

logging.info( "training classifier and predicting labels" )
mat = mat.tocsr()[:,1:]
model = tc.run_classification_test( mat, y_true, binarize=True, # True
    percentage_train=0.8, print_train_test_set_stat=True, 
    test_thresholds=False, random_seed=rand_seed, d_triples=d_triples )

# names = d_ctx_word._id2w
# l = zip( names, model.coef_[0] )
# ls = sorted( l, reverse=True, key= lambda x: x[1] )

# interesting_id = 10
# names = d_ctx_word._id2w
# x = zip( names, np.squeeze( np.array( mat[interesting_id,:].todense() )))
# x = sorted([ (x,i) for (i, x) in enumerate(x) if x[1] > 0 ], key= lambda x: x[0][1], reverse=True )

logging.info( "inspect some of the features" )
# replicate hstacking here to attain names
# names = d_ctx_word._id2w
names = d_ctx_pair._id2w + d_ctx_word._id2w + d_ctx_word._id2w 
Example #4
0
mat = sparse.hstack((mat, mat_sim_arg_l.astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_arg_r.astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_union_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_diff_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_inters_arg .astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_l_minus_r_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_sim_r_minus_l_arg.astype(np.float64))).tocsr();
# feat from example contexts
mat = sparse.hstack((mat, mat_arg_l_ctx_paths.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_r_ctx_paths.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_union_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_diff_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_inters_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_l_minus_r_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_r_minus_l_arg.astype(np.float64))).tocsr();
# feat from similar context examples
mat = sparse.hstack((mat, mat_arg_l_ctx_sim_paths.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_r_ctx_sim_paths.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_sim_union_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_sim_diff_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_sim_inters_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_sim_l_minus_r_arg.astype(np.float64)));
mat = sparse.hstack((mat, mat_arg_ctx_sim_r_minus_l_arg.astype(np.float64))).tocsr();

mat = mat.tocsr()[:,1:]; # remove the first dummy vector

model = tc.run_classification_test(mat, true_labels, binarize=True, percentage_train=0.8, print_train_test_set_stat=True, test_thresholds=False, random_seed=623519, d_args=d_triples);

names = d_paths_ctx._id2w
l = zip(names, model.coef_[0])
ls = sorted(l, reverse=True, key= lambda x: x[1]);