Beispiel #1
0
    logging.info( "computing set operations on context matrices " )
    mb_ctx_w1               = m_ctx_w1.astype( bool )
    mb_ctx_w2               = m_ctx_w2.astype( bool )
    mb_ctx_union_w1_w2      = mb_ctx_w1 + mb_ctx_w2
    mb_ctx_diff_w1_w2       = mb_ctx_w1 != mb_ctx_w2
    mb_ctx_intersect_w1_w2  = mb_ctx_union_w1_w2 - mb_ctx_diff_w1_w2
    mb_ctx_minus_w1_w2      = mb_ctx_union_w1_w2 - mb_ctx_w2
    mb_ctx_minus_w2_w1      = mb_ctx_union_w1_w2 - mb_ctx_w1

if lda:

    logging.info( 'loading topic features ( LDA ) for words and word pairs' )
    m_topic_pair = tm.arg_l_arg_r_to_topic_matrix( d_triples._rtuple2ids, fn_lda_pair, 
        num_triples, mmfile_presuffix='_pairs', reload=refresh )
    m_topic_w1 = tm.arg_to_topic_matrix( d_triples._m2ids, fn_lda_word, 
        num_triples, mmfile_presuffix='_w1', reload=refresh )
    m_topic_w2 = tm.arg_to_topic_matrix( d_triples._r2ids, fn_lda_word, 
        num_triples, mmfile_presuffix='_w2', reload=refresh )

if sim:

    logging.info( 'loading similarity features for word pairs' )
    d_sim_pair = td.Dict()
    m_sim_pair = tm.arg_l_arg_r_asjo_matrix( d_triples._rtuple2ids, fn_sim_pair, 
        num_triples, col_indices = d_sim_pair, 
        transform_w2sig=lambda w2sig: sorted( list(w2sig), key=lambda x: float( x[1] ), reverse=True )[:20],
        mmfile_presuffix='_pairs', reload=refresh )

    logging.info( 'loading similarity features for words' )
    d_sim_word = td.Dict()
    m_sim_w1 = tm.arg_asjo_matrix(d_triples._m2ids, d_sim_word, fn_sim_word, num_triples,
Beispiel #2
0
def load_matrices(d_triples):
    matrices = []

    ## just the left argument as feature
    logging.info("creating w1 as feature matrix")
    d_w1 = td.Dict()
    w1_mat = tm.w1Asfeature(d_triples, d_w1)
    matrices.append(("W1 as Feature", w1_mat, d_w1))

    ## just the right argument as feature
    logging.info("creating w2 as feature matrix")
    d_w2 = td.Dict()
    w2_mat = tm.w2Asfeature(d_triples, d_w2)
    matrices.append(("W2 as Feature", w2_mat, d_w2))

    ## relation pair features
    logging.info("loading paths between argument pairs")
    d_paths = td.Dict()
    mat_paths = tm.arg_l_arg_r_asjo_matrix(
        d_triples._rtuple2ids,
        svo_flipped_counts,
        len(d_triples),
        col_indices=d_paths,
        mmfile_presuffix=".paths",
        reload=False,
    )
    matrices.append(("paths between ArgL and ArgR", mat_paths, d_paths))

    logging.info("loading similar argument pairs")
    d_sim_pairs = td.Dict()
    mat_sim_pairs = tm.arg_l_arg_r_asjo_matrix(
        d_triples._rtuple2ids,
        svo_flipped_dt,
        len(d_triples),
        col_indices=d_sim_pairs,
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],
        mmfile_presuffix=".simpairs",
        reload=False,
    )
    matrices.append(("similar ArgL - ArgR pairs", mat_sim_pairs, d_sim_pairs))

    ## context features
    logging.info("loading argument context matrices")
    d_ctx = td.Dict()
    mat_arg_l_ctx = tm.arg_asjo_matrix(
        d_triples._m2ids,
        d_ctx,
        svo_counts,
        len(d_triples),
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],
        mmfile_presuffix=".ctx_w1",
        reload=False,
    )

    mat_arg_r_ctx = tm.arg_asjo_matrix(
        d_triples._r2ids,
        d_ctx,
        svo_counts,
        len(d_triples),
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],
        mmfile_presuffix=".ctx_w2",
        reload=False,
    )

    ## create some extra matrices
    logging.info("creating argument context intersection and set minus matrices.")
    # adjust dimensions, in case they are different
    if mat_arg_l_ctx.shape[1] < mat_arg_r_ctx.shape[1]:
        if sparse.isspmatrix_coo(mat_arg_l_ctx):
            mat_arg_l_ctx = mat_arg_l_ctx.todok()
        mat_arg_l_ctx.resize(mat_arg_r_ctx.shape)
    if mat_arg_r_ctx.shape[1] < mat_arg_l_ctx.shape[1]:
        if sparse.isspmatrix_coo(mat_arg_r_ctx):
            mat_arg_r_ctx = mat_arg_r_ctx.todok()
        mat_arg_r_ctx.resize(mat_arg_l_ctx.shape)

    if not sparse.isspmatrix_coo(mat_arg_l_ctx):
        mat_arg_l_ctx = mat_arg_l_ctx.tocoo()
    if not sparse.isspmatrix_coo(mat_arg_r_ctx):
        mat_arg_r_ctx = mat_arg_r_ctx.tocoo()

    mat_arg_l_ctx = mat_arg_l_ctx.astype(bool)
    mat_arg_r_ctx = mat_arg_r_ctx.astype(bool)

    mat_arg_union_ctx = mat_arg_l_ctx + mat_arg_r_ctx
    mat_arg_diff_ctx = mat_arg_l_ctx != mat_arg_r_ctx
    mat_arg_inters_ctx = mat_arg_union_ctx - mat_arg_diff_ctx
    mat_arg_l_minus_r_ctx = mat_arg_union_ctx - mat_arg_r_ctx
    mat_arg_r_minus_l_ctx = mat_arg_union_ctx - mat_arg_l_ctx

    matrices.append(("Contexts of ArgL", mat_arg_l_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts of ArgR", mat_arg_r_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts of ArgL or ArgR", mat_arg_union_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts of ArgL and ArgR", mat_arg_inters_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts difference of ArgL and ArgR", mat_arg_diff_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts of ArgL but not ArgR", mat_arg_l_minus_r_ctx.astype(np.float64), d_ctx))
    matrices.append(("Contexts of ArgR but not ArgL", mat_arg_r_minus_l_ctx.astype(np.float64), d_ctx))

    # topic features
    logging.info("loading lda feature matrices.")
    mat_topic = tm.arg_l_arg_r_to_topic_matrix(
        d_triples._rtuple2ids, svo_flipped_lda_w2t, len(d_triples), mmfile_presuffix=".bless.topic_pairs", reload=False
    )
    matrices.append(("Topic of ArgL - ArgR pair", mat_topic, None))

    mat_arg_l_topic = tm.arg_to_topic_matrix(
        d_triples._m2ids, svo_lda_w2t, len(d_triples), mmfile_presuffix=".bless.topic_w1", reload=False
    )
    matrices.append(("Topic of ArgL", mat_arg_l_topic, None))

    mat_arg_r_topic = tm.arg_to_topic_matrix(
        d_triples._r2ids, svo_lda_w2t, len(d_triples), mmfile_presuffix=".bless.topic_w2", reload=False
    )
    matrices.append(("Topic of ArgR", mat_arg_r_topic, None))

    # distributionally similar args for each arg
    logging.info("loading similar arguments.")
    d_arg = td.Dict()
    mat_sim_arg_l = tm.arg_asjo_matrix(
        d_triples._m2ids,
        d_arg,
        svo_dt,
        len(d_triples),
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],
        mmfile_presuffix=".sim_w1",
        reload=False,
    )

    mat_sim_arg_r = tm.arg_asjo_matrix(
        d_triples._r2ids,
        d_arg,
        svo_dt,
        len(d_triples),
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],
        mmfile_presuffix=".sim_w2",
        reload=False,
    )

    ### create some extra matrices
    logging.info("creating similar arguments intersection and set minus matrices.")
    # adjust dimensions, in case they are different
    if mat_sim_arg_l.shape[1] < mat_sim_arg_r.shape[1]:
        if sparse.isspmatrix_coo(mat_sim_arg_l):
            mat_sim_arg_l = mat_sim_arg_l.todok()
        mat_sim_arg_l.resize(mat_sim_arg_r.shape)
    if mat_sim_arg_r.shape[1] < mat_sim_arg_l.shape[1]:
        if sparse.isspmatrix_coo(mat_sim_arg_r):
            mat_sim_arg_r = mat_sim_arg_r.todok()
        mat_sim_arg_r.resize(mat_sim_arg_l.shape)

    if not sparse.isspmatrix_coo(mat_sim_arg_l):
        mat_sim_arg_l = mat_sim_arg_l.tocoo()
    if not sparse.isspmatrix_coo(mat_sim_arg_r):
        mat_sim_arg_r = mat_sim_arg_r.tocoo()
    #
    mat_sim_arg_l = mat_sim_arg_l.astype(bool)
    mat_sim_arg_r = mat_sim_arg_r.astype(bool)
    #
    mat_sim_union_arg = mat_sim_arg_l + mat_sim_arg_r
    mat_sim_diff_arg = mat_sim_arg_l != mat_sim_arg_r
    mat_sim_inters_arg = mat_sim_union_arg - mat_sim_diff_arg
    mat_sim_l_minus_r_arg = mat_sim_union_arg - mat_sim_arg_r
    mat_sim_r_minus_l_arg = mat_sim_union_arg - mat_sim_arg_l

    matrices.append(("Similar Args to ArgL", mat_sim_arg_l, d_arg))
    matrices.append(("Similar Args to ArgR", mat_sim_arg_r, d_arg))
    matrices.append(("Similar Args to ArgL or ArgR", mat_sim_union_arg, d_arg))
    matrices.append(("Similar Args to ArgL and ArgR", mat_sim_inters_arg, d_arg))
    matrices.append(("Difference of similar Args to ArgL and ArgR", mat_sim_diff_arg, d_arg))
    matrices.append(("Similar Args to ArgL but not to ArgR", mat_sim_l_minus_r_arg, d_arg))
    matrices.append(("Similar Args to ArgR but not to ArgL", mat_sim_r_minus_l_arg, d_arg))

    return matrices
Beispiel #3
0
    mb_ctx_diff_w1_w2 = mb_ctx_w1 != mb_ctx_w2
    mb_ctx_intersect_w1_w2 = mb_ctx_union_w1_w2 - mb_ctx_diff_w1_w2
    mb_ctx_minus_w1_w2 = mb_ctx_union_w1_w2 - mb_ctx_w2
    mb_ctx_minus_w2_w1 = mb_ctx_union_w1_w2 - mb_ctx_w1

if lda:

    logging.info('loading topic features ( LDA ) for words and word pairs')
    m_topic_pair = tm.arg_l_arg_r_to_topic_matrix(d_triples._rtuple2ids,
                                                  fn_lda_pair,
                                                  num_triples,
                                                  mmfile_presuffix='_pairs',
                                                  reload=refresh)
    m_topic_w1 = tm.arg_to_topic_matrix(d_triples._m2ids,
                                        fn_lda_word,
                                        num_triples,
                                        mmfile_presuffix='_w1',
                                        reload=refresh)
    m_topic_w2 = tm.arg_to_topic_matrix(d_triples._r2ids,
                                        fn_lda_word,
                                        num_triples,
                                        mmfile_presuffix='_w2',
                                        reload=refresh)

if sim:

    logging.info('loading similarity features for word pairs')
    d_sim_pair = td.Dict()
    m_sim_pair = tm.arg_l_arg_r_asjo_matrix(
        d_triples._rtuple2ids,
        fn_sim_pair,
Beispiel #4
0
def load_matrices(d_triples):
    matrices = []

    ## just the left argument as feature
    logging.info('creating w1 as feature matrix')
    d_w1 = td.Dict()
    w1_mat = tm.w1Asfeature(d_triples, d_w1)
    matrices.append(('W1 as Feature', w1_mat, d_w1))

    ## just the right argument as feature
    logging.info('creating w2 as feature matrix')
    d_w2 = td.Dict()
    w2_mat = tm.w2Asfeature(d_triples, d_w2)
    matrices.append(('W2 as Feature', w2_mat, d_w2))

    ## relation pair features
    logging.info('loading paths between argument pairs')
    d_paths = td.Dict()
    mat_paths = tm.arg_l_arg_r_asjo_matrix(d_triples._rtuple2ids, \
        svo_flipped_counts,\
        len(d_triples),
        col_indices = d_paths, \
        mmfile_presuffix='.paths', reload=False)
    matrices.append(('paths between ArgL and ArgR', mat_paths, d_paths))

    logging.info('loading similar argument pairs')
    d_sim_pairs = td.Dict()
    mat_sim_pairs = tm.arg_l_arg_r_asjo_matrix(d_triples._rtuple2ids, \
        svo_flipped_dt,\
        len(d_triples),
        col_indices = d_sim_pairs, \
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],\
        mmfile_presuffix='.simpairs', reload=False)
    matrices.append(('similar ArgL - ArgR pairs', mat_sim_pairs, d_sim_pairs))

    ## context features
    logging.info('loading argument context matrices')
    d_ctx = td.Dict()
    mat_arg_l_ctx = tm.arg_asjo_matrix(d_triples._m2ids,\
        d_ctx,
        svo_counts,\
        len(d_triples),\
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],\
        mmfile_presuffix='.ctx_w1', reload=False)

    mat_arg_r_ctx = tm.arg_asjo_matrix(d_triples._r2ids,\
        d_ctx,
        svo_counts,\
        len(d_triples),\
        transform_w2sig=lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20],\
        mmfile_presuffix='.ctx_w2', reload=False)

    ## create some extra matrices
    logging.info(
        'creating argument context intersection and set minus matrices.')
    # adjust dimensions, in case they are different
    if mat_arg_l_ctx.shape[1] < mat_arg_r_ctx.shape[1]:
        if sparse.isspmatrix_coo(mat_arg_l_ctx):
            mat_arg_l_ctx = mat_arg_l_ctx.todok()
        mat_arg_l_ctx.resize(mat_arg_r_ctx.shape)
    if mat_arg_r_ctx.shape[1] < mat_arg_l_ctx.shape[1]:
        if sparse.isspmatrix_coo(mat_arg_r_ctx):
            mat_arg_r_ctx = mat_arg_r_ctx.todok()
        mat_arg_r_ctx.resize(mat_arg_l_ctx.shape)

    if not sparse.isspmatrix_coo(mat_arg_l_ctx):
        mat_arg_l_ctx = mat_arg_l_ctx.tocoo()
    if not sparse.isspmatrix_coo(mat_arg_r_ctx):
        mat_arg_r_ctx = mat_arg_r_ctx.tocoo()

    mat_arg_l_ctx = mat_arg_l_ctx.astype(bool)
    mat_arg_r_ctx = mat_arg_r_ctx.astype(bool)

    mat_arg_union_ctx = mat_arg_l_ctx + mat_arg_r_ctx
    mat_arg_diff_ctx = mat_arg_l_ctx != mat_arg_r_ctx
    mat_arg_inters_ctx = mat_arg_union_ctx - mat_arg_diff_ctx
    mat_arg_l_minus_r_ctx = mat_arg_union_ctx - mat_arg_r_ctx
    mat_arg_r_minus_l_ctx = mat_arg_union_ctx - mat_arg_l_ctx

    matrices.append(
        ('Contexts of ArgL', mat_arg_l_ctx.astype(np.float64), d_ctx))
    matrices.append(
        ('Contexts of ArgR', mat_arg_r_ctx.astype(np.float64), d_ctx))
    matrices.append(('Contexts of ArgL or ArgR',
                     mat_arg_union_ctx.astype(np.float64), d_ctx))
    matrices.append(('Contexts of ArgL and ArgR',
                     mat_arg_inters_ctx.astype(np.float64), d_ctx))
    matrices.append(('Contexts difference of ArgL and ArgR',
                     mat_arg_diff_ctx.astype(np.float64), d_ctx))
    matrices.append(('Contexts of ArgL but not ArgR',
                     mat_arg_l_minus_r_ctx.astype(np.float64), d_ctx))
    matrices.append(('Contexts of ArgR but not ArgL',
                     mat_arg_r_minus_l_ctx.astype(np.float64), d_ctx))

    # topic features
    logging.info('loading lda feature matrices.')
    mat_topic = tm.arg_l_arg_r_to_topic_matrix(d_triples._rtuple2ids,\
        svo_flipped_lda_w2t,\
        len(d_triples), \
        mmfile_presuffix='.bless.topic_pairs', reload=False)
    matrices.append(('Topic of ArgL - ArgR pair', mat_topic, None))

    mat_arg_l_topic = tm.arg_to_topic_matrix(d_triples._m2ids,\
        svo_lda_w2t,\
        len(d_triples),\
        mmfile_presuffix='.bless.topic_w1', reload=False)
    matrices.append(('Topic of ArgL', mat_arg_l_topic, None))

    mat_arg_r_topic = tm.arg_to_topic_matrix(d_triples._r2ids,\
        svo_lda_w2t,\
        len(d_triples),\
        mmfile_presuffix='.bless.topic_w2', reload=False)
    matrices.append(('Topic of ArgR', mat_arg_r_topic, None))

    # distributionally similar args for each arg
    logging.info('loading similar arguments.')
    d_arg = td.Dict()
    mat_sim_arg_l = tm.arg_asjo_matrix(d_triples._m2ids,\
        d_arg,
        svo_dt,\
        len(d_triples),\
        transform_w2sig = lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20], \
        mmfile_presuffix='.sim_w1', reload=False)

    mat_sim_arg_r = tm.arg_asjo_matrix(d_triples._r2ids,\
        d_arg,
        svo_dt,\
        len(d_triples),\
        transform_w2sig = lambda w2sig: sorted(list(w2sig), key=lambda x: float(x[1]), reverse=True)[:20], \
        mmfile_presuffix='.sim_w2', reload=False)

    ### create some extra matrices
    logging.info(
        'creating similar arguments intersection and set minus matrices.')
    # adjust dimensions, in case they are different
    if mat_sim_arg_l.shape[1] < mat_sim_arg_r.shape[1]:
        if sparse.isspmatrix_coo(mat_sim_arg_l):
            mat_sim_arg_l = mat_sim_arg_l.todok()
        mat_sim_arg_l.resize(mat_sim_arg_r.shape)
    if mat_sim_arg_r.shape[1] < mat_sim_arg_l.shape[1]:
        if sparse.isspmatrix_coo(mat_sim_arg_r):
            mat_sim_arg_r = mat_sim_arg_r.todok()
        mat_sim_arg_r.resize(mat_sim_arg_l.shape)

    if not sparse.isspmatrix_coo(mat_sim_arg_l):
        mat_sim_arg_l = mat_sim_arg_l.tocoo()
    if not sparse.isspmatrix_coo(mat_sim_arg_r):
        mat_sim_arg_r = mat_sim_arg_r.tocoo()
    #
    mat_sim_arg_l = mat_sim_arg_l.astype(bool)
    mat_sim_arg_r = mat_sim_arg_r.astype(bool)
    #
    mat_sim_union_arg = mat_sim_arg_l + mat_sim_arg_r
    mat_sim_diff_arg = mat_sim_arg_l != mat_sim_arg_r
    mat_sim_inters_arg = mat_sim_union_arg - mat_sim_diff_arg
    mat_sim_l_minus_r_arg = mat_sim_union_arg - mat_sim_arg_r
    mat_sim_r_minus_l_arg = mat_sim_union_arg - mat_sim_arg_l

    matrices.append(('Similar Args to ArgL', mat_sim_arg_l, d_arg))
    matrices.append(('Similar Args to ArgR', mat_sim_arg_r, d_arg))
    matrices.append(('Similar Args to ArgL or ArgR', mat_sim_union_arg, d_arg))
    matrices.append(
        ('Similar Args to ArgL and ArgR', mat_sim_inters_arg, d_arg))
    matrices.append(('Difference of similar Args to ArgL and ArgR',
                     mat_sim_diff_arg, d_arg))
    matrices.append(
        ('Similar Args to ArgL but not to ArgR', mat_sim_l_minus_r_arg, d_arg))
    matrices.append(
        ('Similar Args to ArgR but not to ArgL', mat_sim_r_minus_l_arg, d_arg))

    return matrices