Esempio n. 1
0
def fungo_test_wrapper(name='cellcycle_FUN'):
    X_train, X_test, train_ids, test_ids, id2doc, nodes = read_fungo(name)
    X_train, X_test = np.array(X_train), np.array(X_test)
    id2doc_train = id2doc
    args = conf()
    # id2doc_train = filter_ancestors(id2doc, nodes)
    tree = Tree(args, train_ids, test_ids, id2doc=id2doc_train, id2doc_a=id2doc, nodes=nodes, rootname='Top')
    mlb = MultiLabelBinarizer(classes=tree.class_idx)
    Y_train = mlb.fit_transform([tree.id2doc_ancestors[docid]['class_idx'] for docid in train_ids])
    Y_test = mlb.transform([tree.id2doc_ancestors[docid]['class_idx'] for docid in test_ids])
    return X_train, Y_train, X_test, Y_test
Esempio n. 2
0
def fungo_test(data_name):
    X_train, X_test, train_ids, test_ids, id2doc, nodes = read_fungo(data_name)
    Y_train = [id2doc[bid]['categories'] for bid in train_ids]
    Y_test = [id2doc[bid]['categories'] for bid in test_ids]
    # Actually here Y is not used. We use id2doc for labels.
    mlb = MultiLabelBinarizer()
    Y = mlb.fit_transform(np.concatenate([Y_train, Y_test]))
    Y_train = Y[:len(Y_train)]
    Y_test = Y[-len(Y_test):]

    return X_train, Y_train, X_test, Y_test, train_ids, test_ids
Esempio n. 3
0
    save_minibatch(logger,
                   args,
                   word_index,
                   X_test,
                   test_ids,
                   32,
                   name='test_nyt')
    tree = Tree(args,
                train_ids,
                test_ids,
                id2doc=id2doc_train,
                id2doc_a=id2doc,
                nodes=nodes,
                rootname='Top')
elif 'FUN' in args.dataset or 'GO' in args.dataset:
    X_train, _, train_ids, test_ids, id2doc, nodes = read_fungo(args.dataset)
    if args.filter_ancestors:
        id2doc_train = filter_ancestors(id2doc, nodes)
    else:
        id2doc_train = id2doc
    tree = Tree(args,
                train_ids,
                test_ids,
                id2doc=id2doc_train,
                id2doc_a=id2doc,
                nodes=nodes,
                rootname='Top')
else:
    logger.error('No such dataset: {}'.format(args.dataset))
    exit(1)
if args.stat_check: