Example #1
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    worker = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(worker, best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    #train_docs_iter = DataReader.DataGnerater("train"+reduced)
    train_docs_iter = DataReader.DataGnerater("dev" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs_iter = DataReader.DataGnerater("dev" + reduced)
    test_docs_iter = DataReader.DataGnerater("test" + reduced)

    print "Performance after pretraining..."
    print "DEV"
    metric = performance.performance(dev_docs_iter, worker, manager)
    print "Average:", metric["average"]
    print "TEST"
    metric = performance.performance(test_docs_iter, worker, manager)
    print "Average:", metric["average"]
    print "***"
    print
    sys.stdout.flush()

    lr = nnargs["lr"]
    top_k = nnargs["top_k"]

    model_save_dir = "./model/reinforce/"
    utils.mkdir(model_save_dir)

    score_softmax = nn.Softmax()

    optimizer_manager = optim.RMSprop(manager.parameters(), lr=lr, eps=1e-6)
    optimizer_worker = optim.RMSprop(worker.parameters(), lr=lr, eps=1e-6)

    MAX_AVE = 2048

    for echo in range(nnargs["epoch"]):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        reward_log = Logger(Tensorboard + args.tb +
                            "/acl2018/%d/reward/" % echo,
                            flush_secs=3)
        entropy_log_manager = Logger(Tensorboard + args.tb +
                                     "/acl2018/%d/entropy/worker" % echo,
                                     flush_secs=3)
        entropy_log_worker = Logger(Tensorboard + args.tb +
                                    "/acl2018/%d/entropy/manager" % echo,
                                    flush_secs=3)

        #train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl')
        train_docs = utils.load_pickle(args.DOCUMENT + 'dev_docs.pkl')
        docs_by_id = {doc.did: doc for doc in train_docs}

        ave_reward = []
        ave_manager_entropy = []
        ave_worker_entropy = []

        print >> sys.stderr, "Link docs ..."
        tmp_data = []
        cluster_info = {0: [0]}
        cluster_list = [0]
        current_new_cluster = 1
        predict_action_embedding = []
        choose_action = []
        mid = 1

        step = 0

        statistic = {
            "worker_hits": 0,
            "manager_hits": 0,
            "total": 0,
            "manager_predict_last": 0,
            "worker_predict_last": 0
        }

        for data in train_docs_iter.rl_case_generater(shuffle=True):

            rl = data["rl"]

            scores_manager, representations_manager = get_score_representations(
                manager, data)

            for s, e in zip(rl["starts"], rl["ends"]):
                action_embeddings = representations_manager[s:e]

                probs = F.softmax(torch.transpose(scores_manager[s:e], 0, 1))

                m = Categorical(probs)
                this_action = m.sample()
                index = this_action.data.cpu().numpy()[0]

                if index == (e - s - 1):
                    should_cluster = current_new_cluster
                    cluster_info[should_cluster] = []
                    current_new_cluster += 1
                else:
                    should_cluster = cluster_list[index]

                choose_action.append(index)
                cluster_info[should_cluster].append(mid)
                cluster_list.append(should_cluster)
                mid += 1

                cluster_indexs = torch.cuda.LongTensor(
                    cluster_info[should_cluster])
                action_embedding_predict = torch.mean(
                    action_embeddings[cluster_indexs], 0, keepdim=True)
                predict_action_embedding.append(action_embedding_predict)

            tmp_data.append(data)

            if rl["end"] == True:

                inside_index = 0
                manager_path = []
                worker_path = []

                doc = docs_by_id[rl["did"]]

                for data in tmp_data:

                    rl = data["rl"]
                    pair_target = data["pair_target"]
                    anaphoricity_target = 1 - data["anaphoricity_target"]
                    target = numpy.concatenate(
                        (pair_target, anaphoricity_target))[rl["reindex"]]

                    scores_worker, representations_worker = get_score_representations(
                        worker, data)

                    for s, e in zip(rl["starts"], rl["ends"]):
                        action_embeddings = representations_worker[s:e]
                        score = score_softmax(
                            torch.transpose(scores_worker[s:e], 0,
                                            1)).data.cpu().numpy()[0]

                        action_embedding_choose = predict_action_embedding[
                            inside_index]
                        similarities = torch.sum(
                            torch.abs(action_embeddings -
                                      action_embedding_choose), 1)
                        similarities = similarities.data.cpu().numpy()

                        action_probabilities = []
                        action_list = []
                        action_candidates = heapq.nlargest(
                            top_k, -similarities)
                        for action in action_candidates:
                            action_index = numpy.argwhere(
                                similarities == -action)[0][0]
                            action_probabilities.append(score[action_index])
                            action_list.append(action_index)

                        manager_action = choose_action[inside_index]
                        if not manager_action in action_list:
                            action_list.append(manager_action)
                            action_probabilities.append(score[manager_action])

                        this_target = target[s:e]
                        manager_action = choose_action[inside_index]

                        sample_action = utils.sample_action(
                            numpy.array(action_probabilities))
                        worker_action = action_list[sample_action]

                        if this_target[worker_action] == 1:
                            statistic["worker_hits"] += 1
                        if this_target[manager_action] == 1:
                            statistic["manager_hits"] += 1
                        if worker_action == (e - s - 1):
                            statistic["worker_predict_last"] += 1
                        if manager_action == (e - s - 1):
                            statistic["manager_predict_last"] += 1
                        statistic["total"] += 1

                        inside_index += 1

                        #link = manager_action
                        link = worker_action
                        m1, m2 = rl['ids'][s + link]
                        doc.link(m1, m2)

                        manager_path.append(manager_action)
                        worker_path.append(worker_action)

                reward = doc.get_f1()
                for data in tmp_data:
                    for s, e in zip(rl["starts"], rl["ends"]):
                        ids = rl['ids'][s:e]
                        ana = ids[0, 1]
                        old_ant = doc.ana_to_ant[ana]
                        doc.unlink(ana)
                        costs = rl['costs'][s:e]
                        for ant_ind in range(e - s):
                            costs[ant_ind] = doc.link(ids[ant_ind, 0],
                                                      ana,
                                                      hypothetical=True,
                                                      beta=1)
                        doc.link(old_ant, ana)
                        #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))

                inside_index = 0
                worker_entropy = 0.0

                for data in tmp_data:
                    new_step = step
                    # worker
                    scores_worker, representations_worker = get_score_representations(
                        worker, data, dropout=nnargs["dropout_rate"])
                    optimizer_worker.zero_grad
                    worker_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        costs = rl['costs'][s:e]
                        costs = autograd.Variable(
                            torch.from_numpy(costs).type(
                                torch.cuda.FloatTensor))
                        action = worker_path[inside_index]
                        score = F.softmax(
                            torch.transpose(scores_worker[s:e], 0, 1))
                        if not score.size()[1] == costs.size()[0]:
                            continue
                        score = torch.squeeze(score)

                        baseline = torch.sum(costs * score)
                        this_cost = torch.log(
                            score[action]) * -1.0 * (reward - baseline)

                        if worker_loss is None:
                            worker_loss = this_cost
                        else:
                            worker_loss += this_cost
                        worker_entropy += torch.sum(
                            score * torch.log(score + 1e-7)
                        ).data.cpu().numpy()[
                            0]  #+ 0.001*torch.sum(score*torch.log(score+1e-7))
                        inside_index += 1

                    worker_loss.backward()
                    torch.nn.utils.clip_grad_norm(worker.parameters(),
                                                  nnargs["clip"])
                    optimizer_worker.step()

                    ave_worker_entropy.append(worker_entropy)
                    if len(ave_worker_entropy) >= MAX_AVE:
                        ave_worker_entropy = ave_worker_entropy[1:]
                    entropy_log_worker.log_value(
                        'entropy',
                        float(sum(ave_worker_entropy)) /
                        float(len(ave_worker_entropy)), new_step)
                    new_step += 1

                inside_index = 0
                manager_entropy = 0.0
                for data in tmp_data:
                    new_step = step
                    rl = data["rl"]

                    ave_reward.append(reward)
                    if len(ave_reward) >= MAX_AVE:
                        ave_reward = ave_reward[1:]
                    reward_log.log_value(
                        'reward',
                        float(sum(ave_reward)) / float(len(ave_reward)),
                        new_step)

                    scores_manager, representations_manager = get_score_representations(
                        manager, data, dropout=nnargs["dropout_rate"])

                    optimizer_manager.zero_grad
                    manager_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        score = F.softmax(
                            torch.transpose(scores_manager[s:e], 0, 1))
                        costs = rl['costs'][s:e]
                        costs = autograd.Variable(
                            torch.from_numpy(costs).type(
                                torch.cuda.FloatTensor))
                        if not score.size()[1] == costs.size()[0]:
                            continue

                        action = manager_path[inside_index]
                        score = torch.squeeze(score)

                        baseline = torch.sum(costs * score)
                        this_cost = torch.log(score[action]) * -1.0 * (
                            reward - baseline
                        )  # + 0.001*torch.sum(score*torch.log(score+1e-7))

                        #this_cost = torch.sum(score*costs) + 0.001*torch.sum(score*torch.log(score+1e-7))

                        if manager_loss is None:
                            manager_loss = this_cost
                        else:
                            manager_loss += this_cost

                        manager_entropy += torch.sum(
                            score *
                            torch.log(score + 1e-7)).data.cpu().numpy()[0]
                        inside_index += 1

                    manager_loss.backward()
                    torch.nn.utils.clip_grad_norm(manager.parameters(),
                                                  nnargs["clip"])
                    optimizer_manager.step()

                    ave_manager_entropy.append(manager_entropy)
                    if len(ave_manager_entropy) >= MAX_AVE:
                        ave_manager_entropy = ave_manager_entropy[1:]
                    entropy_log_manager.log_value(
                        'entropy',
                        float(sum(ave_manager_entropy)) /
                        float(len(ave_manager_entropy)), new_step)
                    new_step += 1

                step = new_step
                tmp_data = []
                cluster_info = {0: [0]}
                cluster_list = [0]
                current_new_cluster = 1
                mid = 1
                predict_action_embedding = []
                choose_action = []

        end_time = timeit.default_timer()
        print >> sys.stderr, "TRAINING Use %.3f seconds" % (end_time -
                                                            start_time)
        print >> sys.stderr, "save model ..."
        #print "Top k",top_k
        print "Worker Hits", statistic[
            "worker_hits"], "Manager Hits", statistic[
                "manager_hits"], "Total", statistic["total"]
        print "Worker predict last", statistic[
            "worker_predict_last"], "Manager predict last", statistic[
                "manager_predict_last"]
        #torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo)
        #torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo)

        print "DEV"
        metric = performance.performance(dev_docs_iter, worker, manager)
        print "Average:", metric["average"]
        print "DEV manager"
        metric = performance_manager.performance(dev_docs_iter, worker,
                                                 manager)
        print "Average:", metric["average"]
        print "TEST"
        metric = performance.performance(test_docs_iter, worker, manager)
        print "Average:", metric["average"]
        print
        sys.stdout.flush()
Example #2
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best"
    print >> sys.stderr,"Read model from",best_network_file
    best_network_model = torch.load(best_network_file)
        
    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    print >> sys.stderr,"save model ..."

    net_copy(network_model,best_network_model)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    print >> sys.stderr,"prepare data for train ..."
    train_docs = DataReader.DataGnerater("train"+reduced)
    print >> sys.stderr,"prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test"+reduced)


    l2_lambda = 1e-6
    lr = nnargs["lr"]
    dropout_rate = nnargs["dropout_rate"]
    epoch = nnargs["epoch"]

    model_save_dir = "./model/bp/"
   
    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
        }
  
    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(epoch):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo
        
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        for data in train_docs.train_generater(shuffle=True):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))
            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,dropout_rate)
            loss = F.binary_cross_entropy(output,top_gold,size_average=False)/train_docs.scale_factor_top

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss    
            
            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain",echo,"Pair total cost:",pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "Learning Rate",lr

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))

            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))
            
            gold += data["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()
        
            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()
        
        gold = numpy.array(gold,dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3,0.35,0.4,0.45,0.5,0.55,0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
 
        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model, model_save_dir+"network_model_pretrain.best.top")

        ana_gold = numpy.array(ana_gold,dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if (echo+1)%10 == 0:
            best_network_model = torch.load(model_save_dir+"network_model_pretrain.best.top") 
            print "DEV:"
            performance.performance(dev_docs,best_network_model)
            print "TEST:"
            performance.performance(test_docs,best_network_model)
Example #3
0
    metrics = {}
    metrics["muc"] = (mr, mp, mf)
    metrics["b3"] = (br, bp, bf)
    metrics["ceaf"] = (cr, cp, cf)
    return metrics


def print_performance(m):
    mp, mr, mf = m["muc"]
    print "MUC: recall: %f precision: %f  f1: %f" % (mr, mp, mf)
    bp, br, bf = m["b3"]
    print "BCUBED: recall: %f precision: %f  f1: %f" % (br, bp, bf)
    cp, cr, cf = m["ceaf"]
    print "CEAF: recall: %f precision: %f  f1: %f" % (cr, cp, cf)


if __name__ == "__main__":

    #network_file = "./model/pretrain/network_model_pretrain.best"
    network_file = "./model/pretrain/network_model_pretrain.top.best"
    #network_file = "./model/model.pkl"
    print >> sys.stderr, "Read model from ./model/model.pkl"
    network_model = torch.load(network_file)

    #dev_docs = DataReader.DataGnerater("dev")
    dev_docs = DataReader.DataGnerater("test")

    best_thres = 0.4

    best_thres = evaluate(network_model, dev_docs, best_thres)
Example #4
0
File: test.py Project: yqy/torch
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    network_file = "./model/model.pkl"
    if os.path.isfile(network_file):
        print >> sys.stderr,"Read model from ./model/model.pkl"
        network_model = torch.load(network_file)
    else:
        embedding_matrix = numpy.load(embedding_file)

        "Building torch model"
        network_model = network.Network(pair_feature_dimention,mention_feature_dimention,word_embedding_dimention,span_dimention,1000,embedding_size,embedding_dimention,embedding_matrix).cuda()
        print >> sys.stderr,"save model ..."
        torch.save(network_model,network_file)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    train_docs = DataReader.DataGnerater("train"+reduced)
    dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test"+reduced)


    l2_lambda = 1e-5
    lr = 0.002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/pretrain/"
   
    last_cost = 0.0
     
    for echo in range(30):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo

        optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)

        cost_this_turn = 0.0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0
    
        loss = None

        for data,doc_end in train_docs.generater(shuffle):
            ana_word_index,ana_span,ana_feature,candi_word_index,candi_span,pair_feature_array,target,mention_ids = data


            if len(pair_feature_array) >= 500:
                continue
            if len(target) == 0:
                continue
                

            mention_index = autograd.Variable(torch.from_numpy(ana_word_index).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(ana_span).type(torch.cuda.FloatTensor))
            mention_feature = autograd.Variable(torch.from_numpy(ana_feature).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(pair_feature_array).type(torch.cuda.FloatTensor))

            gold = [0] + target.tolist()
            if sum(target) == 0:
                neg_num += 1
                gold[0] = 1
            else:
                pos_num += 1

            inside_time_start = timeit.default_timer()

            lable = autograd.Variable(torch.cuda.FloatTensor([gold]))
            output,scores = network_model.forward(word_embedding_dimention,mention_index,mention_span,mention_feature,mention_index,mention_span,candi_index,candi_spans,pair_feature,dropout_rate)
            optimizer.zero_grad()
            loss = F.binary_cross_entropy(output,lable)
            loss.backward()
            optimizer.step()
            inside_time += (timeit.default_timer()-inside_time_start)
            cost_this_turn += loss.data[0]


        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain",echo,"Total cost:",cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "Inside Use %.3f seconds"%(inside_time)
        print >> sys.stderr, "Neg:Pos",neg_num,pos_num
        print >> sys.stderr, "Learning Rate",lr

        if cost_this_turn > last_cost:
            lr = lr*0.7 
        last_cost = cost_this_turn

        print >> sys.stderr,"save model ..."

        best_thres = Evaluate.evaluate(network_model,dev_docs,best_thres)
Example #5
0
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    #dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    metric = performance(test_docs, worker, manager)
    print "Ave", metric["average"]

    #network_file = "./model/network_model_pretrain.top.best"
    #network_model = torch.load(network_file)

    #ana_network_file = "./model/network_model_pretrain.top.best"
    #ana_network_model = torch.load(ana_network_file)

    #reduced=""
    #if args.reduced == 1:
    #    reduced="_reduced"

    #metric = performance(test_docs,network_model,ana_network_model)
Example #6
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    network_model = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    #lr = 0.00009
    lr = 0.0001
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold = target.tolist()
            anaphoricity_gold = anaphoricity_target.tolist()

            pair_nums += len(gold)
            ana_nums += len(anaphoricity_gold)

            lable = autograd.Variable(torch.cuda.FloatTensor([gold]))
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, dropout_rate)
            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)

            optimizer.zero_grad()

            #loss = get_pair_loss(output,positive,negative,train_docs.scale_factor)
            loss = F.binary_cross_entropy(
                output, lable, size_average=False) / train_docs.scale_factor
            #ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor

            pair_cost_this_turn += loss.data[0] * train_docs.scale_factor

            loss_all = loss
            loss_all.backward()
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        #print >> sys.stderr,"save model ..."
        #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo)

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold += target.tolist()
            ana_gold += anaphoricity_target.tolist()

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, 0.0)
            predict += output.data.cpu().numpy()[0].tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.best.pair")

        sys.stdout.flush()

    ## output best
    print "In sum, anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
        %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
    sys.stdout.flush()
Example #7
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/pretrain/network_model_pretrain.best"
    print >> sys.stderr, "Read model from ./model/model.pkl"
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(pair_feature_dimention,
                                    mention_feature_dimention,
                                    word_embedding_dimention, span_dimention,
                                    1000, embedding_size, embedding_dimention,
                                    embedding_matrix).cuda()
    print >> sys.stderr, "save model ..."
    #torch.save(network_model,network_file)

    net_copy(network_model, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    lr = 0.0002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/pretrain/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        #if echo == 100:
        #    lr = lr/2.0
        #if echo == 150:
        #    lr = lr/2.0

        #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda)
        #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)
        optimizer = optim.RMSprop(network_model.parameters(),
                                  lr=lr,
                                  eps=1e-5,
                                  weight_decay=l2_lambda)

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,top_x = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))

            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            top_gold = autograd.Variable(
                torch.from_numpy(top_x["top_gold"]).type(
                    torch.cuda.FloatTensor))

            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, dropout_rate)
            loss = F.binary_cross_entropy(
                output, top_gold,
                size_average=False) / train_docs.scale_factor_top

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(
                ana_output, ana_lable,
                size_average=False) / train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss

            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain", echo, "Pair total cost:", pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        print >> sys.stderr, "save model ..."
        torch.save(network_model,
                   model_save_dir + "network_model_pretrain.%d.top" % echo)

        #if cost_this_turn > last_cost:
        #    lr = lr*0.7
        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target, top_x = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))
            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            gold += top_x["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, 0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] > all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.top.best")

        ana_gold = numpy.array(ana_gold, dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if (echo + 1) % 10 == 0:
            best_network_model = torch.load(model_save_dir +
                                            "network_model_pretrain.top.best")
            print "DEV:"
            performance.performance(dev_docs, best_network_model)
            print "TEST:"
            performance.performance(test_docs, best_network_model)
Example #8
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    #network_file = "./model/model.pkl"
    #network_file = "./model/pretrain/network_model_pretrain.20"
    network_file = "./model/pretrain/network_model_pretrain.top.best"
    if os.path.isfile(network_file):
        print >> sys.stderr, "Read model from ./model/model.pkl"
        network_model = torch.load(network_file)
    else:
        embedding_matrix = numpy.load(embedding_file)
        #print len(embedding_matrix)

        "Building torch model"
        network_model = network.Network(pair_feature_dimention,
                                        mention_feature_dimention,
                                        word_embedding_dimention,
                                        span_dimention, 1000, embedding_size,
                                        embedding_dimention,
                                        embedding_matrix).cuda()
        print >> sys.stderr, "save model ..."
        torch.save(network_model, network_file)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    #train_docs = DataReader.DataGnerater("dev"+reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    #test_docs = DataReader.DataGnerater("test"+reduced)

    l2_lambda = 1e-6
    lr = 0.00002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    reinforce = True

    model_save_dir = "./model/pretrain/"

    metrics = performance.performance(dev_docs, network_model)

    p, r, f = metrics["b3"]

    f_b = [f]

    #for echo in range(30,200):
    for echo in range(20):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        #if echo == 100:
        #    lr = lr/2.0
        #if echo == 150:
        #    lr = lr/2.0

        #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda)
        #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)
        cost = 0.0
        optimizer = optim.RMSprop(network_model.parameters(),
                                  lr=lr,
                                  eps=1e-5,
                                  weight_decay=l2_lambda)

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0

        score_softmax = nn.Softmax()

        cluster_info = []
        new_cluster_num = 0
        cluster_info.append(-1)
        action_list = []
        new_cluster_info = []
        tmp_data = []

        #for data in train_docs.rl_case_generater():
        for data in train_docs.rl_case_generater(shuffle=True):
            inside_time += 1

            this_doc = train_docs
            tmp_data.append(data)

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

            gold_chain = this_doc.gold_chain[rl["did"]]
            gold_dict = {}
            for chain in gold_chain:
                for item in chain:
                    gold_dict[item] = chain

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            output, pair_score = network_model.forward_all_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                dropout_rate)
            ana_output, ana_score = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)

            reindex = autograd.Variable(
                torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))

            scores_reindex = torch.transpose(
                torch.cat((pair_score, ana_score), 1), 0, 1)[reindex]
            #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex]

            for s, e in zip(rl["starts"], rl["ends"]):
                #action_prob: scores_reindex[s:e][1]
                score = score_softmax(
                    torch.transpose(scores_reindex[s:e], 0,
                                    1)).data.cpu().numpy()[0]
                this_action = utils.sample_action(score)
                #this_action = ac_list.index(max(score.tolist()))
                action_list.append(this_action)

                if this_action == len(score) - 1:
                    should_cluster = new_cluster_num
                    new_cluster_num += 1
                    new_cluster_info.append(1)
                else:
                    should_cluster = cluster_info[this_action]
                    new_cluster_info.append(0)

                cluster_info.append(should_cluster)

            if rl["end"] == True:
                ev_document = utils.get_evaluation_document(
                    cluster_info, this_doc.gold_chain[rl["did"]],
                    candi_ids_return, new_cluster_num)
                p, r, f = evaluation.evaluate_documents([ev_document],
                                                        evaluation.b_cubed)
                trick_reward = utils.get_reward_trick(cluster_info, gold_dict,
                                                      new_cluster_info,
                                                      action_list,
                                                      candi_ids_return)

                #reward = f + trick_reward
                average_f = float(sum(f_b)) / len(f_b)

                reward = (f - average_f) * 10

                f_b.append(f)
                if len(f_b) > 128:
                    f_b = f_b[1:]

                index = 0
                for data in tmp_data:
                    mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
                    target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

                    mention_index = autograd.Variable(
                        torch.from_numpy(mention_word_index).type(
                            torch.cuda.LongTensor))
                    mention_span = autograd.Variable(
                        torch.from_numpy(mention_span).type(
                            torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(
                        torch.from_numpy(candi_word_index).type(
                            torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(
                        torch.from_numpy(candi_span).type(
                            torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(
                        torch.from_numpy(feature_pair).type(
                            torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(
                        torch.from_numpy(pair_anaphors).type(
                            torch.cuda.LongTensor))
                    antecedents = autograd.Variable(
                        torch.from_numpy(pair_antecedents).type(
                            torch.cuda.LongTensor))

                    anaphoricity_index = autograd.Variable(
                        torch.from_numpy(anaphoricity_word_indexs).type(
                            torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(
                        torch.from_numpy(anaphoricity_spans).type(
                            torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(
                        torch.from_numpy(anaphoricity_features).type(
                            torch.cuda.FloatTensor))

                    rl_costs = autograd.Variable(
                        torch.from_numpy(rl["costs"]).type(
                            torch.cuda.FloatTensor))
                    rl_costs = torch.transpose(rl_costs, 0, 1)

                    output, pair_score = network_model.forward_all_pair(
                        word_embedding_dimention, mention_index, mention_span,
                        candi_index, candi_spans, pair_feature, anaphors,
                        antecedents, dropout_rate)
                    ana_output, ana_score = network_model.forward_anaphoricity(
                        word_embedding_dimention, anaphoricity_index,
                        anaphoricity_span, anaphoricity_feature, dropout_rate)

                    reindex = autograd.Variable(
                        torch.from_numpy(rl["reindex"]).type(
                            torch.cuda.LongTensor))

                    optimizer.zero_grad()
                    loss = None
                    scores_reindex = torch.transpose(
                        torch.cat((pair_score, ana_score), 1), 0, 1)[reindex]
                    #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex]

                    for s, e in zip(rl["starts"], rl["ends"]):
                        #action_prob: scores_reindex[s:e][1]
                        this_action = action_list[index]
                        #current_reward = reward + trick_reward[index]
                        current_reward = reward

                        #this_loss = -reward*(torch.transpose(F.log_softmax(torch.transpose(scores_reindex[s:e],0,1)),0,1)[this_action])
                        this_loss = -current_reward * (torch.transpose(
                            F.log_softmax(
                                torch.transpose(scores_reindex[s:e], 0, 1)), 0,
                            1)[this_action])

                        if loss is None:
                            loss = this_loss
                        else:
                            loss += this_loss
                        index += 1
                    #loss /= len(rl["starts"])
                    loss /= len(rl["starts"])
                    #loss = loss/train_docs.scale_factor
                    ## policy graident
                    cost += loss.data[0]
                    loss.backward()
                    optimizer.step()

                new_cluster_num = 0
                cluster_info = []
                cluster_info.append(-1)
                tmp_data = []
                action_list = []
                new_cluster_info = []
            #if inside_time%50 == 0:
            #    performance.performance(dev_docs,network_model)
            #    print
            #    sys.stdout.flush()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "cost:", cost
        #print >> sys.stderr,"save model ..."
        #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo)

        performance.performance(dev_docs, network_model)

        sys.stdout.flush()
Example #9
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top.pair"
    print >> sys.stderr,"Read model from ",best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    net_copy(network_model,best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top.ana"
    print >> sys.stderr,"Read model from ",best_network_file
    best_network_model = torch.load(best_network_file)

    ana_network = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    net_copy(ana_network,best_network_model)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    print >> sys.stderr,"prepare data for train ..."
    train_docs_iter = DataReader.DataGnerater("train"+reduced)
    print >> sys.stderr,"prepare data for dev and test ..."
    dev_docs_iter = DataReader.DataGnerater("dev"+reduced)
    test_docs_iter = DataReader.DataGnerater("test"+reduced)

    print "Performance after pretraining..."
    print "DEV"
    metric = performance.performance(dev_docs_iter,network_model,ana_network) 
    print "Average:",metric["average"]
    print "TEST"
    metric = performance.performance(test_docs_iter,network_model,ana_network) 
    print "Average:",metric["average"]
    print "***"
    print
    sys.stdout.flush()

    l2_lambda = 1e-6
    #lr = 0.00001
    #lr = 0.000005
    lr = 0.000002
    #lr = 0.0000009
    dropout_rate = 0.5
    shuffle = True
    times = 0

    reinforce = True

    model_save_dir = "./model/reinforce/"
    utils.mkdir(model_save_dir)

    score_softmax = nn.Softmax()
    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps = 1e-6)
    ana_optimizer = optim.RMSprop(ana_network.parameters(), lr=lr, eps = 1e-6)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)
    ana_scheduler = lr_scheduler.StepLR(ana_optimizer, step_size=15, gamma=0.5)
   
    for echo in range(30):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo

        scheduler.step()
        ana_scheduler.step()

        train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl')

        docs_by_id = {doc.did: doc for doc in train_docs}
       
        print >> sys.stderr,"Link docs ..."
        tmp_data = []
        path = []
        for data in train_docs_iter.rl_case_generater(shuffle=True):
            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

            mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
            mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))

            output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,0.0)
            ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0)
            ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents, 0.0)

            reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))

            scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex]
            ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex]

            doc = docs_by_id[rl['did']]

            for s,e in zip(rl["starts"],rl["ends"]):
                score = score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)).data.cpu().numpy()[0]
                pair_score = score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)).data.cpu().numpy()[0]

                ana_action = utils.sample_action(score)
                if ana_action == (e-s-1):
                    action = ana_action
                else:
                    pair_action = utils.sample_action(pair_score*score[:-1])
                    action = pair_action
                path.append(action)
                link = action
                m1, m2 = rl['ids'][s + link]
                doc.link(m1, m2)

            tmp_data.append((mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return))
                
            if rl["end"] == True:
                doc = docs_by_id[rl['did']]
                reward = doc.get_f1()
                inside_index = 0
                for mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return in tmp_data:

                    for (start, end) in zip(rl['starts'], rl['ends']):
                        ids = rl['ids'][start:end]
                        ana = ids[0, 1]
                        old_ant = doc.ana_to_ant[ana]
                        doc.unlink(ana)
                        costs = rl['costs'][start:end]
                        for ant_ind in range(end - start):
                            costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1)
                        doc.link(old_ant, ana) 

                    cost = 0.0
                    mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
                    mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
                    antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))
                    anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))
        
                    ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
                    ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate)
        
                    reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))
        
                    ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex]
        
                    ana_optimizer.zero_grad()
                    ana_loss = None
                    i = inside_index
                    for s,e in zip(rl["starts"],rl["ends"]):
                        costs = rl["costs"][s:e]
                        costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))
                        score = torch.squeeze(score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)))
                        baseline = torch.sum(score*costs) 

                        action = path[i]
                        this_cost = torch.log(score[action])*-1.0*(reward-baseline)
                        
                        if ana_loss is None:
                            ana_loss = this_cost
                        else:
                            ana_loss += this_cost
                        i += 1
                    ana_loss.backward()
                    torch.nn.utils.clip_grad_norm(ana_network.parameters(), 5.0)
                    ana_optimizer.step()
        
                    mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
                    mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
                    antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))
        
                    anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))
        
                    output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate)
        
                    ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
        
                    reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))
        
                    scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex]
        
                    pair_loss = None
                    optimizer.zero_grad()
                    i = inside_index
                    index = 0
                    for s,e in zip(rl["starts"],rl["ends"]):
                        action = path[i]
                        if (not (action == (e-s-1))) and (anaphoricity_target[index] == 1):
                            costs = rl["costs"][s:e-1]
                            costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))
                            score = torch.squeeze(score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)))
                            baseline = torch.sum(score*costs)
                            this_cost = torch.log(score[action])*-1.0*(reward-baseline)
                            if pair_loss is None:
                                pair_loss = this_cost
                            else:
                                pair_loss += this_cost
                        i += 1
                        index += 1
                    if pair_loss is not None:
                        pair_loss.backward()
                        torch.nn.utils.clip_grad_norm(network_model.parameters(), 5.0)
                        optimizer.step()
                    inside_index = i

                tmp_data = []
                path = []
                        
        end_time = timeit.default_timer()
        print >> sys.stderr, "TRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "cost:",cost
        print >> sys.stderr,"save model ..."
        torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo)
        torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo)
        
        print "DEV"
        metric = performance.performance(dev_docs_iter,network_model,ana_network) 
        print "Average:",metric["average"]
        print "DEV Ana: ",metric["ana"]
        print "TEST"
        metric = performance.performance(test_docs_iter,network_model,ana_network) 
        print "Average:",metric["average"]
        print "TEST Ana: ",metric["ana"]
        print

        sys.stdout.flush()
Example #10
0
                cluster_info, this_doc.gold_chain[rl["did"]], candi_ids_return,
                new_cluster_num)
            test_document.append(ev_document)
            cluster_info = []
            new_cluster_num = 0
            cluster_info.append(-1)

    metrics = Evaluate.Output_Result(test_document)
    r, p, f = metrics["muc"]
    print "MUC: recall: %f precision: %f  f1: %f" % (r, p, f)
    r, p, f = metrics["b3"]
    print "B3: recall: %f precision: %f  f1: %f" % (r, p, f)
    r, p, f = metrics["ceaf"]
    print "CEAF: recall: %f precision: %f  f1: %f" % (r, p, f)
    return metrics


if __name__ == "__main__":
    DIR = args.DIR
    network_file = "./model/pretrain/network_model_pretrain.top.best"
    network_model = torch.load(network_file)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    dev_docs = DataReader.DataGnerater("dev" + reduced)
    #test_docs = DataReader.DataGnerater("test"+reduced)

    performance(dev_docs, network_model)