Ejemplo n.º 1
0
def main(args):
    init_property_of_dataset()
    load_dataset()
    print('relation size:', args.rel_size, 'entity size:', args.entity_size)

    xp = Backend(args)
    m = model.Model(args)
    opt = get_opt(args)
    opt.setup(m)
    for epoch in range(args.epoch_size):
        opt.alpha = args.beta0 / (1.0 + args.beta1 * epoch)
        trLoss, Ntr = train(args, m, xp, opt)
        trace('epoch:', epoch, 'tr Loss:', trLoss, Ntr)
        dump_current_scores_of_devtest(args, m, xp)
Ejemplo n.º 2
0
def main(args):
    loadflag = False
    initilize_dataset()
    args.rel_size, args.entity_size = get_sizes(args)
    print('relation size:', args.rel_size, 'entity size:', args.entity_size)

    RelCovVal = [1.0] * args.rel_size
    RelCovValTmp = [0.0] * args.rel_size
    EntCovVal = [1.0] * args.entity_size
    EntCovValTmp = [0.0] * args.entity_size

    xp = Backend(args)
    m = get_model(args)
    opt = get_opt(args)
    if (loadflag == True):
        serializers.load_npz(
            './savedWN/ModelA3CNN_OOKBtest_' + args.pooling_method + '.model',
            m)
    opt.setup(m)

    for epoch in range(args.epoch_size):
        '''
		fileTmp = open('Ent100Eval_v4.txt','a')
		for i in range(100):
			fileTmp.write(str(EntCovVal[i])+'\t')
		fileTmp.write('\n')
		fileTmp.close()
		'''

        opt.alpha = args.beta0 / (1.0 + args.beta1 * epoch)
        trLoss, Ntr = train(args, m, xp, opt, RelCovVal, RelCovValTmp,
                            EntCovVal, EntCovValTmp)
        '''
		for i in range(args.rel_size):
			RelCovVal[i] = RelCovValTmp[i]
			RelCovValTmp[i] = 0.0
		for i in range(args.entity_size):
			EntCovVal[i] = EntCovValTmp[i]
			EntCovValTmp[i] = 0.0'''
        tool.trace('epoch:', epoch, 'tr Loss:', tool.dress(trLoss), Ntr)
        if (epoch % 5 == 0):
            serializers.save_npz(
                './savedWN/ModelA3CNN_OOKBtest_' + args.pooling_method +
                '.model', m)
        dump_current_scores_of_devtest(args, m, xp, RelCovVal, EntCovVal)
Ejemplo n.º 3
0
def main(args):
	loadflag = False
	initilize_dataset()
	args.rel_size,args.entity_size = get_sizes(args)
	print('relation size:',args.rel_size,'entity size:',args.entity_size)

	W1lists = []
	file1 = open(args.pretrain_file + '/relation2vec.vec','r')
	for line in file1.readlines():
		line = line.strip().split('\t')
		W1lists.append(line)
	W1 = np.array(W1lists).astype(np.float32)

	W2lists = []
	file2 = open(args.pretrain_file + '/entity2vec.vec','r')
	for line in file2.readlines():
		line = line.strip().split('\t')
		W2lists.append(line)
	W2 = np.array(W2lists).astype(np.float32)

	#print W1.shape, W2.shape

	RelCovVal = [1.0] * args.rel_size
	RelCovValTmp = [0.0] * args.rel_size
	EntCovVal = [1.0] * args.entity_size
	EntCovValTmp = [0.0] * args.entity_size

	xp = Backend(args)
	m = get_model(args,W1,W2)
	opt = get_opt(args)
	if(loadflag == True):
		serializers.load_npz('./savedFB/ModelA3CNN_ATT_0817_head.model',m)
	opt.setup(m)
	normalizeR = m.Normal(xp)

	#random.shuffle(train_data)

	for epoch in range(args.epoch_size):
		opt.alpha = args.beta0/(1.0+args.beta1*epoch)
		trLoss,Ntr = train(args,m,xp,opt,RelCovVal,RelCovValTmp,EntCovVal,EntCovValTmp)
		normalizeR = m.Normal(xp)
		tool.trace('epoch:',epoch,'tr Loss:',tool.dress(trLoss),Ntr)
		if(epoch % 5 == 0):
			serializers.save_npz('./savedFB/ModelA3CNN_ATT_0817_head.model',m)
		dump_current_scores_of_devtest(args,m,xp,RelCovVal,EntCovVal)
Ejemplo n.º 4
0
def main(args):
    # 读入规则
    rules_addition()
    # 初始化所有数据集
    initilize_dataset()
    # 根据文件内容修改关系和实体的默认值
    args.rel_size, args.entity_size = get_sizes(args)
    print('relation size:', args.rel_size, 'entity size:', args.entity_size)

    xp = Backend(args)  #返回一个可调用的对象
    m = get_model(args)  # return A0(args)
    # Setup an optimizer
    # 设置训练时用的优化方法,默认为Adam
    opt = get_opt(args)  # return optimizers.Adam()
    # setup()方法只是为优化器提供一个link
    opt.setup(m)
    for epoch in range(args.epoch_size):
        opt.alpha = args.beta0 / (1.0 + args.beta1 * epoch)
        trLoss, Ntr = train(args, m, xp, opt)  #对应到main.py的train
        tool.trace('epoch:', epoch, 'tr Loss:', tool.dress(trLoss), Ntr)
        dump_current_scores_of_devtest(args, m, xp)
Ejemplo n.º 5
0
def main(args):
    loadflag = False
    initilize_dataset()
    args.rel_size, args.entity_size = get_sizes(args)
    print('relation size:', args.rel_size, 'entity size:', args.entity_size)

    xp = Backend(args)
    m = get_model(args)
    opt = get_opt(args)
    if (loadflag == True):
        serializers.load_npz(
            './savedWN/ModelA0_OOKBtest_' + args.pooling_method, m)
    opt.setup(m)
    for epoch in range(args.epoch_size):
        #dump_current_scores_of_devtest(args,m,xp)
        opt.alpha = args.beta0 / (1.0 + args.beta1 * epoch)
        trLoss, Ntr = train(args, m, xp, opt)
        tool.trace('epoch:', epoch, 'tr Loss:', tool.dress(trLoss), Ntr)
        if (epoch % 10 == 0):
            serializers.save_npz(
                './savedWN/ModelA0_OOKBtest_' + args.pooling_method, m)
        dump_current_scores_of_devtest(args, m, xp)
Ejemplo n.º 6
0
def main(args):
    global candidate_heads, gold_heads, candidate_tails, gold_tails, black_set
    xp = XP(args)
    args.rel_size, args.entity_size = get_sizes(args)
    print('relation size:', args.rel_size, 'entity size:', args.entity_size)
    m = get_model(args)
    opt = get_opt(args)
    opt.setup(m)

    relations = dict()
    links = defaultdict(set)
    for line in tool.read(args.train_file):
        items = list(map(int, line.strip().split('\t')))
        if len(items) == 4:
            h, r, t, l = items
            if l == 0: continue
        else:
            h, r, t = items
        relations[(h, t)] = r
        links[t].add(h)
        links[h].add(t)
        gold_heads[(r, t)].add(h)
        gold_tails[(h, r)].add(t)
        candidate_heads[r].add(h)
        candidate_tails[r].add(t)
        tail_per_head[h].add(t)
        head_per_tail[t].add(h)
    for e in links:
        links[e] = list(links[e])

    for p in gold_heads:
        if len(candidate_heads[p[0]] - gold_heads[p]) == 0:
            p = (-p[0], p[1])
            black_set.add(p)
    for p in gold_tails:
        if len(candidate_tails[p[1]] - gold_tails[p]) == 0:
            black_set.add(p)
    print('black list size:', len(black_set))
    for r in candidate_heads:
        candidate_heads[r] = list(candidate_heads[r])
    for r in candidate_tails:
        candidate_tails[r] = list(candidate_tails[r])
    for h in tail_per_head:
        tail_per_head[h] = len(tail_per_head[h]) + 0.0
    for t in head_per_tail:
        head_per_tail[t] = len(head_per_tail[t]) + 0.0

    if args.train_file == args.auxiliary_file:
        tool.trace('use: edges=links')
        edges = links
    else:
        tool.trace('use: different edges')
        edges = defaultdict(set)
        for line in tool.read(args.auxiliary_file):
            items = list(map(int, line.strip().split('\t')))
            if len(items) == 4:
                h, r, t, l = items
                if l == 0: continue
            else:
                h, r, t = items
            relations[(h, t)] = r
            edges[t].add(h)
            edges[h].add(t)
        for e in edges:
            edges[e] = list(edges[e])

    for epoch in range(args.epoch_size):
        opt.alpha = args.beta0 / (1.0 + args.beta1 * epoch)
        trLoss, Ntr = train(args, m, xp, opt, links, relations, edges)
        evaluate(args, m, xp, links, relations, edges)
        tool.trace('epoch:', epoch, 'tr Loss:', tool.dress(trLoss), Ntr)