Ejemplo n.º 1
0
    def __init__(self, world, body, pos=(0, 0), brain=None):
        """
        Shouldnt be used directly, use world.create_entity instead
        """
        self.id = Entity.NEXT_ID
        Entity.NEXT_ID += 1

        self.alive = True
        self.colour = ENTITY_DEFAULT_COLOUR

        self.world = world
        self.body = body
        self.pos = pos
        self.velocity = b2Vec2()

        self.sensors = []
        self._add_sensor(0.5, 0.25, EntityType.FOOD_SENSOR)  # 90 degrees on left

        self.total_food_eaten = 0
        self.cumulative_food = 0

        if brain is None:
            self.brain = net.Network(NET_LAYERS)
        else:
            self.brain = brain
Ejemplo n.º 2
0
 def get_network(self):
     for item in self.data:
         if 'latitude' in item:
             lat = float(item['latitude'])
         else:
             continue
         if 'longitude' in item:
             lon = float(item['longitude'])
         else:
             continue
         if 'altitude' in item:
             alt = float(item['altitude'])
         else:
             alt = 10.0
         if 'radius' in item:
             rad = int(item['radius'], 10)
         else:
             rad = 100
         self.ap_list.append(ap.AP(lat, lon, alt, rad))
         self.ap_list[-1].get_cover()
     self.net = net.Network(self.ap_list)
Ejemplo n.º 3
0
        mcp[i].data[:] = mp[i].data[:]


if __name__ == "__main__":
    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    worker = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(worker, best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)
Ejemplo n.º 4
0
 def _clone_with_zero_pots(bn, psi=1):
     network_type = str(bn.type)
     var_sizes = list(bn.var_sizes)
     pot_domains = [list(domain) for domain in bn.pot_domains]
     pots = [[psi - 1] * len(pot) for pot in bn.pots]
     return net.Network(network_type, var_sizes, pot_domains, pots)
Ejemplo n.º 5
0
import net
from pong import game
import time
import random as r
import numpy as np
import matplotlib.pyplot as plt


winner_genome_a = net.Network([5, 3, 1], weights_path=f"Saves_little_04/best.txt")
winner_genome_b = net.Network([5, 3, 1], weights_path=f"Saves_little_04/best.txt")

input('START GAME')
init=True
while True:
    if init: up_a, down_a, up_b, down_b = False, False, False, False
    dx_a, dy_a, dx_b, dy_b, ball_y, ball_dx, ball_dy = game(up_a, down_a, up_b, down_b)
    up_a = winner_genome_a.forward_propagation([dx_a, dy_a, ball_y, ball_dx, ball_dy])>0.5
    down_a = not up_a
    up_b = winner_genome_b.forward_propagation([-dx_b, dy_b, ball_y, -ball_dx, ball_dy])>0.5
    down_b = not up_b
    init = False

    


Ejemplo n.º 6
0
import net
from pong_copy import game
import time
import random as r
import numpy as np
import matplotlib.pyplot as plt

popolation = []
GENOME_SIZE = 50
for i in range(GENOME_SIZE):
    popolation.append(
        net.Network([5, 3, 1], weights_path=f"Saves_little_02/save_{i}.txt"))


def log_change(best_five):
    vectors = []

    for i, j in zip(np.logspace(-5, -1, 4), range(len(best_five))):
        vectors.append(best_five[j] +
                       (np.random.uniform(-1, 1, len(best_five[j])) * i) *
                       best_five[j])

    for _ in range(25):
        vectors.append(np.random.uniform(-1, 1, len(best_five[0])))

    for i in range(len(best_five)):
        vectors.append(best_five[i])

    return np.array(vectors)

Ejemplo n.º 7
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top.pair"
    print >> sys.stderr,"Read model from ",best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    net_copy(network_model,best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top.ana"
    print >> sys.stderr,"Read model from ",best_network_file
    best_network_model = torch.load(best_network_file)

    ana_network = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    net_copy(ana_network,best_network_model)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    print >> sys.stderr,"prepare data for train ..."
    train_docs_iter = DataReader.DataGnerater("train"+reduced)
    print >> sys.stderr,"prepare data for dev and test ..."
    dev_docs_iter = DataReader.DataGnerater("dev"+reduced)
    test_docs_iter = DataReader.DataGnerater("test"+reduced)

    print "Performance after pretraining..."
    print "DEV"
    metric = performance.performance(dev_docs_iter,network_model,ana_network) 
    print "Average:",metric["average"]
    print "TEST"
    metric = performance.performance(test_docs_iter,network_model,ana_network) 
    print "Average:",metric["average"]
    print "***"
    print
    sys.stdout.flush()

    l2_lambda = 1e-6
    #lr = 0.00001
    #lr = 0.000005
    lr = 0.000002
    #lr = 0.0000009
    dropout_rate = 0.5
    shuffle = True
    times = 0

    reinforce = True

    model_save_dir = "./model/reinforce/"
    utils.mkdir(model_save_dir)

    score_softmax = nn.Softmax()
    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps = 1e-6)
    ana_optimizer = optim.RMSprop(ana_network.parameters(), lr=lr, eps = 1e-6)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)
    ana_scheduler = lr_scheduler.StepLR(ana_optimizer, step_size=15, gamma=0.5)
   
    for echo in range(30):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo

        scheduler.step()
        ana_scheduler.step()

        train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl')

        docs_by_id = {doc.did: doc for doc in train_docs}
       
        print >> sys.stderr,"Link docs ..."
        tmp_data = []
        path = []
        for data in train_docs_iter.rl_case_generater(shuffle=True):
            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

            mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
            mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))

            output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,0.0)
            ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0)
            ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents, 0.0)

            reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))

            scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex]
            ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex]

            doc = docs_by_id[rl['did']]

            for s,e in zip(rl["starts"],rl["ends"]):
                score = score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)).data.cpu().numpy()[0]
                pair_score = score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)).data.cpu().numpy()[0]

                ana_action = utils.sample_action(score)
                if ana_action == (e-s-1):
                    action = ana_action
                else:
                    pair_action = utils.sample_action(pair_score*score[:-1])
                    action = pair_action
                path.append(action)
                link = action
                m1, m2 = rl['ids'][s + link]
                doc.link(m1, m2)

            tmp_data.append((mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return))
                
            if rl["end"] == True:
                doc = docs_by_id[rl['did']]
                reward = doc.get_f1()
                inside_index = 0
                for mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return in tmp_data:

                    for (start, end) in zip(rl['starts'], rl['ends']):
                        ids = rl['ids'][start:end]
                        ana = ids[0, 1]
                        old_ant = doc.ana_to_ant[ana]
                        doc.unlink(ana)
                        costs = rl['costs'][start:end]
                        for ant_ind in range(end - start):
                            costs[ant_ind] = doc.link(ids[ant_ind, 0], ana, hypothetical=True, beta=1)
                        doc.link(old_ant, ana) 

                    cost = 0.0
                    mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
                    mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
                    antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))
                    anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))
        
                    ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
                    ana_pair_output, ana_pair_score = ana_network.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate)
        
                    reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))
        
                    ana_scores_reindex = torch.transpose(torch.cat((ana_pair_score,ana_score),1),0,1)[reindex]
        
                    ana_optimizer.zero_grad()
                    ana_loss = None
                    i = inside_index
                    for s,e in zip(rl["starts"],rl["ends"]):
                        costs = rl["costs"][s:e]
                        costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))
                        score = torch.squeeze(score_softmax(torch.transpose(ana_scores_reindex[s:e],0,1)))
                        baseline = torch.sum(score*costs) 

                        action = path[i]
                        this_cost = torch.log(score[action])*-1.0*(reward-baseline)
                        
                        if ana_loss is None:
                            ana_loss = this_cost
                        else:
                            ana_loss += this_cost
                        i += 1
                    ana_loss.backward()
                    torch.nn.utils.clip_grad_norm(ana_network.parameters(), 5.0)
                    ana_optimizer.step()
        
                    mention_index = autograd.Variable(torch.from_numpy(mention_word_index).type(torch.cuda.LongTensor))
                    mention_spans = autograd.Variable(torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
                    antecedents = autograd.Variable(torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))
        
                    anaphoricity_index = autograd.Variable(torch.from_numpy(anaphoricity_word_indexs).type(torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(torch.from_numpy(anaphoricity_spans).type(torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(torch.from_numpy(anaphoricity_features).type(torch.cuda.FloatTensor))
        
                    output, pair_score = network_model.forward_all_pair(nnargs["word_embedding_dimention"],mention_index,mention_spans,candi_index,candi_spans,pair_feature,anaphors,antecedents,dropout_rate)
        
                    ana_output, ana_score = ana_network.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
        
                    reindex = autograd.Variable(torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))
        
                    scores_reindex = torch.transpose(torch.cat((pair_score,ana_score),1),0,1)[reindex]
        
                    pair_loss = None
                    optimizer.zero_grad()
                    i = inside_index
                    index = 0
                    for s,e in zip(rl["starts"],rl["ends"]):
                        action = path[i]
                        if (not (action == (e-s-1))) and (anaphoricity_target[index] == 1):
                            costs = rl["costs"][s:e-1]
                            costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))
                            score = torch.squeeze(score_softmax(torch.transpose(scores_reindex[s:e-1],0,1)))
                            baseline = torch.sum(score*costs)
                            this_cost = torch.log(score[action])*-1.0*(reward-baseline)
                            if pair_loss is None:
                                pair_loss = this_cost
                            else:
                                pair_loss += this_cost
                        i += 1
                        index += 1
                    if pair_loss is not None:
                        pair_loss.backward()
                        torch.nn.utils.clip_grad_norm(network_model.parameters(), 5.0)
                        optimizer.step()
                    inside_index = i

                tmp_data = []
                path = []
                        
        end_time = timeit.default_timer()
        print >> sys.stderr, "TRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "cost:",cost
        print >> sys.stderr,"save model ..."
        torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo)
        torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo)
        
        print "DEV"
        metric = performance.performance(dev_docs_iter,network_model,ana_network) 
        print "Average:",metric["average"]
        print "DEV Ana: ",metric["ana"]
        print "TEST"
        metric = performance.performance(test_docs_iter,network_model,ana_network) 
        print "Average:",metric["average"]
        print "TEST Ana: ",metric["ana"]
        print

        sys.stdout.flush()
Ejemplo n.º 8
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    worker = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(worker, best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    #train_docs_iter = DataReader.DataGnerater("train"+reduced)
    train_docs_iter = DataReader.DataGnerater("dev" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs_iter = DataReader.DataGnerater("dev" + reduced)
    test_docs_iter = DataReader.DataGnerater("test" + reduced)

    print "Performance after pretraining..."
    print "DEV"
    metric = performance.performance(dev_docs_iter, worker, manager)
    print "Average:", metric["average"]
    print "TEST"
    metric = performance.performance(test_docs_iter, worker, manager)
    print "Average:", metric["average"]
    print "***"
    print
    sys.stdout.flush()

    lr = nnargs["lr"]
    top_k = nnargs["top_k"]

    model_save_dir = "./model/reinforce/"
    utils.mkdir(model_save_dir)

    score_softmax = nn.Softmax()

    optimizer_manager = optim.RMSprop(manager.parameters(), lr=lr, eps=1e-6)
    optimizer_worker = optim.RMSprop(worker.parameters(), lr=lr, eps=1e-6)

    MAX_AVE = 2048

    for echo in range(nnargs["epoch"]):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        reward_log = Logger(Tensorboard + args.tb +
                            "/acl2018/%d/reward/" % echo,
                            flush_secs=3)
        entropy_log_manager = Logger(Tensorboard + args.tb +
                                     "/acl2018/%d/entropy/worker" % echo,
                                     flush_secs=3)
        entropy_log_worker = Logger(Tensorboard + args.tb +
                                    "/acl2018/%d/entropy/manager" % echo,
                                    flush_secs=3)

        #train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl')
        train_docs = utils.load_pickle(args.DOCUMENT + 'dev_docs.pkl')
        docs_by_id = {doc.did: doc for doc in train_docs}

        ave_reward = []
        ave_manager_entropy = []
        ave_worker_entropy = []

        print >> sys.stderr, "Link docs ..."
        tmp_data = []
        cluster_info = {0: [0]}
        cluster_list = [0]
        current_new_cluster = 1
        predict_action_embedding = []
        choose_action = []
        mid = 1

        step = 0

        statistic = {
            "worker_hits": 0,
            "manager_hits": 0,
            "total": 0,
            "manager_predict_last": 0,
            "worker_predict_last": 0
        }

        for data in train_docs_iter.rl_case_generater(shuffle=True):

            rl = data["rl"]

            scores_manager, representations_manager = get_score_representations(
                manager, data)

            for s, e in zip(rl["starts"], rl["ends"]):
                action_embeddings = representations_manager[s:e]

                probs = F.softmax(torch.transpose(scores_manager[s:e], 0, 1))

                m = Categorical(probs)
                this_action = m.sample()
                index = this_action.data.cpu().numpy()[0]

                if index == (e - s - 1):
                    should_cluster = current_new_cluster
                    cluster_info[should_cluster] = []
                    current_new_cluster += 1
                else:
                    should_cluster = cluster_list[index]

                choose_action.append(index)
                cluster_info[should_cluster].append(mid)
                cluster_list.append(should_cluster)
                mid += 1

                cluster_indexs = torch.cuda.LongTensor(
                    cluster_info[should_cluster])
                action_embedding_predict = torch.mean(
                    action_embeddings[cluster_indexs], 0, keepdim=True)
                predict_action_embedding.append(action_embedding_predict)

            tmp_data.append(data)

            if rl["end"] == True:

                inside_index = 0
                manager_path = []
                worker_path = []

                doc = docs_by_id[rl["did"]]

                for data in tmp_data:

                    rl = data["rl"]
                    pair_target = data["pair_target"]
                    anaphoricity_target = 1 - data["anaphoricity_target"]
                    target = numpy.concatenate(
                        (pair_target, anaphoricity_target))[rl["reindex"]]

                    scores_worker, representations_worker = get_score_representations(
                        worker, data)

                    for s, e in zip(rl["starts"], rl["ends"]):
                        action_embeddings = representations_worker[s:e]
                        score = score_softmax(
                            torch.transpose(scores_worker[s:e], 0,
                                            1)).data.cpu().numpy()[0]

                        action_embedding_choose = predict_action_embedding[
                            inside_index]
                        similarities = torch.sum(
                            torch.abs(action_embeddings -
                                      action_embedding_choose), 1)
                        similarities = similarities.data.cpu().numpy()

                        action_probabilities = []
                        action_list = []
                        action_candidates = heapq.nlargest(
                            top_k, -similarities)
                        for action in action_candidates:
                            action_index = numpy.argwhere(
                                similarities == -action)[0][0]
                            action_probabilities.append(score[action_index])
                            action_list.append(action_index)

                        manager_action = choose_action[inside_index]
                        if not manager_action in action_list:
                            action_list.append(manager_action)
                            action_probabilities.append(score[manager_action])

                        this_target = target[s:e]
                        manager_action = choose_action[inside_index]

                        sample_action = utils.sample_action(
                            numpy.array(action_probabilities))
                        worker_action = action_list[sample_action]

                        if this_target[worker_action] == 1:
                            statistic["worker_hits"] += 1
                        if this_target[manager_action] == 1:
                            statistic["manager_hits"] += 1
                        if worker_action == (e - s - 1):
                            statistic["worker_predict_last"] += 1
                        if manager_action == (e - s - 1):
                            statistic["manager_predict_last"] += 1
                        statistic["total"] += 1

                        inside_index += 1

                        #link = manager_action
                        link = worker_action
                        m1, m2 = rl['ids'][s + link]
                        doc.link(m1, m2)

                        manager_path.append(manager_action)
                        worker_path.append(worker_action)

                reward = doc.get_f1()
                for data in tmp_data:
                    for s, e in zip(rl["starts"], rl["ends"]):
                        ids = rl['ids'][s:e]
                        ana = ids[0, 1]
                        old_ant = doc.ana_to_ant[ana]
                        doc.unlink(ana)
                        costs = rl['costs'][s:e]
                        for ant_ind in range(e - s):
                            costs[ant_ind] = doc.link(ids[ant_ind, 0],
                                                      ana,
                                                      hypothetical=True,
                                                      beta=1)
                        doc.link(old_ant, ana)
                        #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))

                inside_index = 0
                worker_entropy = 0.0

                for data in tmp_data:
                    new_step = step
                    # worker
                    scores_worker, representations_worker = get_score_representations(
                        worker, data, dropout=nnargs["dropout_rate"])
                    optimizer_worker.zero_grad
                    worker_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        costs = rl['costs'][s:e]
                        costs = autograd.Variable(
                            torch.from_numpy(costs).type(
                                torch.cuda.FloatTensor))
                        action = worker_path[inside_index]
                        score = F.softmax(
                            torch.transpose(scores_worker[s:e], 0, 1))
                        if not score.size()[1] == costs.size()[0]:
                            continue
                        score = torch.squeeze(score)

                        baseline = torch.sum(costs * score)
                        this_cost = torch.log(
                            score[action]) * -1.0 * (reward - baseline)

                        if worker_loss is None:
                            worker_loss = this_cost
                        else:
                            worker_loss += this_cost
                        worker_entropy += torch.sum(
                            score * torch.log(score + 1e-7)
                        ).data.cpu().numpy()[
                            0]  #+ 0.001*torch.sum(score*torch.log(score+1e-7))
                        inside_index += 1

                    worker_loss.backward()
                    torch.nn.utils.clip_grad_norm(worker.parameters(),
                                                  nnargs["clip"])
                    optimizer_worker.step()

                    ave_worker_entropy.append(worker_entropy)
                    if len(ave_worker_entropy) >= MAX_AVE:
                        ave_worker_entropy = ave_worker_entropy[1:]
                    entropy_log_worker.log_value(
                        'entropy',
                        float(sum(ave_worker_entropy)) /
                        float(len(ave_worker_entropy)), new_step)
                    new_step += 1

                inside_index = 0
                manager_entropy = 0.0
                for data in tmp_data:
                    new_step = step
                    rl = data["rl"]

                    ave_reward.append(reward)
                    if len(ave_reward) >= MAX_AVE:
                        ave_reward = ave_reward[1:]
                    reward_log.log_value(
                        'reward',
                        float(sum(ave_reward)) / float(len(ave_reward)),
                        new_step)

                    scores_manager, representations_manager = get_score_representations(
                        manager, data, dropout=nnargs["dropout_rate"])

                    optimizer_manager.zero_grad
                    manager_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        score = F.softmax(
                            torch.transpose(scores_manager[s:e], 0, 1))
                        costs = rl['costs'][s:e]
                        costs = autograd.Variable(
                            torch.from_numpy(costs).type(
                                torch.cuda.FloatTensor))
                        if not score.size()[1] == costs.size()[0]:
                            continue

                        action = manager_path[inside_index]
                        score = torch.squeeze(score)

                        baseline = torch.sum(costs * score)
                        this_cost = torch.log(score[action]) * -1.0 * (
                            reward - baseline
                        )  # + 0.001*torch.sum(score*torch.log(score+1e-7))

                        #this_cost = torch.sum(score*costs) + 0.001*torch.sum(score*torch.log(score+1e-7))

                        if manager_loss is None:
                            manager_loss = this_cost
                        else:
                            manager_loss += this_cost

                        manager_entropy += torch.sum(
                            score *
                            torch.log(score + 1e-7)).data.cpu().numpy()[0]
                        inside_index += 1

                    manager_loss.backward()
                    torch.nn.utils.clip_grad_norm(manager.parameters(),
                                                  nnargs["clip"])
                    optimizer_manager.step()

                    ave_manager_entropy.append(manager_entropy)
                    if len(ave_manager_entropy) >= MAX_AVE:
                        ave_manager_entropy = ave_manager_entropy[1:]
                    entropy_log_manager.log_value(
                        'entropy',
                        float(sum(ave_manager_entropy)) /
                        float(len(ave_manager_entropy)), new_step)
                    new_step += 1

                step = new_step
                tmp_data = []
                cluster_info = {0: [0]}
                cluster_list = [0]
                current_new_cluster = 1
                mid = 1
                predict_action_embedding = []
                choose_action = []

        end_time = timeit.default_timer()
        print >> sys.stderr, "TRAINING Use %.3f seconds" % (end_time -
                                                            start_time)
        print >> sys.stderr, "save model ..."
        #print "Top k",top_k
        print "Worker Hits", statistic[
            "worker_hits"], "Manager Hits", statistic[
                "manager_hits"], "Total", statistic["total"]
        print "Worker predict last", statistic[
            "worker_predict_last"], "Manager predict last", statistic[
                "manager_predict_last"]
        #torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo)
        #torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo)

        print "DEV"
        metric = performance.performance(dev_docs_iter, worker, manager)
        print "Average:", metric["average"]
        print "DEV manager"
        metric = performance_manager.performance(dev_docs_iter, worker,
                                                 manager)
        print "Average:", metric["average"]
        print "TEST"
        metric = performance.performance(test_docs_iter, worker, manager)
        print "Average:", metric["average"]
        print
        sys.stdout.flush()
Ejemplo n.º 9
0
import numpy as np
from net import load
from net2 import loadNet

if __name__ == '__main__':
    
#    import csv_loader
    import net as network
    #training_data, validation_data, test_data = csv_loader.load_data()
    #training_data, validation_data, testing_data = loadPokemons()
    training_data, validation_data, testing_data = loadImagesSimple()
    validation_data = list(validation_data)
    training_data = list(training_data)
    
    #net = network.Network([100, 10, 2], cost=network.QuadraticCost)
    net = network.Network([100, 10, 2], cost=network.QuadraticCost)
    #net.SGD(training_data, 10, 10, 0.2, lmbda=1.0, evaluation_data=list(validation_data), monitor_evaluation_accuracy=True)
    net.SGD(training_data, 50, 10, 0.1, 0.0, evaluation_data=list(validation_data), monitor_evaluation_cost=True, monitor_evaluation_accuracy=True, monitor_training_cost=True, monitor_training_accuracy=True)
    net.save("DANN_distance.txt")
    
    # best result so far in DANN_100.txt
    
    #net = load("DANN_distance.txt")
    a = toSepia(cv2.imread("resized/images109.png"))
    a = processImage(a)
    print(net.feedforward(a))
    print(np.argmax(net.feedforward(a)))
    
    a = toSepia(cv2.imread("resized/images109_sepia.png"))
    a = processImage(a)
    print(net.feedforward(a))
Ejemplo n.º 10
0
import net
from pong import game
import time
import random as r
import numpy as np
import matplotlib.pyplot as plt

popolation = []
GENOME_SIZE = 50
for i in range(GENOME_SIZE):
    popolation.append(net.Network([5, 3, 1]))#, weights_path=f"Saves_little/save_{i}.txt"))

def log_change(best_five):
    vectors = []

    for i, j in zip(np.logspace(-5, -1, 4), range(len(best_five))):
        vectors.append(best_five[j] + (np.random.uniform(-1, 1, len(best_five[j]))*i)*best_five[j]  )

    for _ in range(25):
        vectors.append(np.random.uniform(-1, 1, len(best_five[0])))

    for i in range(len(best_five)):
        vectors.append(best_five[i])

    return np.array(vectors)    

best = 0
#a = net.Network([2, 3, 2], weights_path="best.txt")
epochs = 5

std_all = []
Ejemplo n.º 11
0
import net
import sys, os, json
import scipy.stats  # for entropy
import setting

# arg
workdir = sys.argv[1]
itercnt = int(sys.argv[2])

#
# load generated graph file
#
net_base = net.Network()
n_rootnei = 0
with open(os.path.join(workdir, 'cydata.json'), 'r') as f:
    net_base.loadJson(f)
with open(os.path.join(workdir, 'cydata.json'), 'r') as f:
    j = json.load(f)
    n_rootnei = j['root_nei_total_cnt']
net_base.gene_targets.sort(key=lambda x: abs(x['pval']))  # sort targets first
# load metadata of whole graph
# TODO: make data when netserver.py runs
with open('net_meta.json', 'r') as f:
    net_meta = json.load(f)


##
# input: arrayes
# output: p-value
#
def calcpv(samplearr, v):
Ejemplo n.º 12
0
def load_model(path_model, input_size, output_size, gpu):
    model = net.Network(input_size, output_size, gpu)
    model.load_state_dict(torch.load(path_model))
    return model
Ejemplo n.º 13
0
train, validation, test, verbose, path, file, modelf, gpu = main(path)
gpu = True
training = True
n_epochs = len(train)
n_iters = train[0].n_item
input_size = 400
output_size = 1
lr = 0.005
cycle = 5

if training:
    if verbose:
        print('###\tCreate the net')

    # create the net
    network = net.Network(input_size, output_size, gpu)
    if gpu:
        network.cuda()
    criterion = nn.L1Loss()
    optimizer = optim.SGD(network.parameters(), lr)

    if verbose:
        print('###\tStart training')
        start = timer()

    losses = np.zeros(n_epochs)
    hid = network.initHidden()
    for k in range(cycle):
        for epoch in range(n_epochs):
            input = []
            target = []
Ejemplo n.º 14
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    #network_file = "./model/model.pkl"
    #network_file = "./model/pretrain/network_model_pretrain.20"
    network_file = "./model/pretrain/network_model_pretrain.top.best"
    if os.path.isfile(network_file):
        print >> sys.stderr, "Read model from ./model/model.pkl"
        network_model = torch.load(network_file)
    else:
        embedding_matrix = numpy.load(embedding_file)
        #print len(embedding_matrix)

        "Building torch model"
        network_model = network.Network(pair_feature_dimention,
                                        mention_feature_dimention,
                                        word_embedding_dimention,
                                        span_dimention, 1000, embedding_size,
                                        embedding_dimention,
                                        embedding_matrix).cuda()
        print >> sys.stderr, "save model ..."
        torch.save(network_model, network_file)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    #train_docs = DataReader.DataGnerater("dev"+reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    #test_docs = DataReader.DataGnerater("test"+reduced)

    l2_lambda = 1e-6
    lr = 0.00002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    reinforce = True

    model_save_dir = "./model/pretrain/"

    metrics = performance.performance(dev_docs, network_model)

    p, r, f = metrics["b3"]

    f_b = [f]

    #for echo in range(30,200):
    for echo in range(20):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        #if echo == 100:
        #    lr = lr/2.0
        #if echo == 150:
        #    lr = lr/2.0

        #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda)
        #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)
        cost = 0.0
        optimizer = optim.RMSprop(network_model.parameters(),
                                  lr=lr,
                                  eps=1e-5,
                                  weight_decay=l2_lambda)

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0

        score_softmax = nn.Softmax()

        cluster_info = []
        new_cluster_num = 0
        cluster_info.append(-1)
        action_list = []
        new_cluster_info = []
        tmp_data = []

        #for data in train_docs.rl_case_generater():
        for data in train_docs.rl_case_generater(shuffle=True):
            inside_time += 1

            this_doc = train_docs
            tmp_data.append(data)

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

            gold_chain = this_doc.gold_chain[rl["did"]]
            gold_dict = {}
            for chain in gold_chain:
                for item in chain:
                    gold_dict[item] = chain

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            output, pair_score = network_model.forward_all_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                dropout_rate)
            ana_output, ana_score = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)

            reindex = autograd.Variable(
                torch.from_numpy(rl["reindex"]).type(torch.cuda.LongTensor))

            scores_reindex = torch.transpose(
                torch.cat((pair_score, ana_score), 1), 0, 1)[reindex]
            #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex]

            for s, e in zip(rl["starts"], rl["ends"]):
                #action_prob: scores_reindex[s:e][1]
                score = score_softmax(
                    torch.transpose(scores_reindex[s:e], 0,
                                    1)).data.cpu().numpy()[0]
                this_action = utils.sample_action(score)
                #this_action = ac_list.index(max(score.tolist()))
                action_list.append(this_action)

                if this_action == len(score) - 1:
                    should_cluster = new_cluster_num
                    new_cluster_num += 1
                    new_cluster_info.append(1)
                else:
                    should_cluster = cluster_info[this_action]
                    new_cluster_info.append(0)

                cluster_info.append(should_cluster)

            if rl["end"] == True:
                ev_document = utils.get_evaluation_document(
                    cluster_info, this_doc.gold_chain[rl["did"]],
                    candi_ids_return, new_cluster_num)
                p, r, f = evaluation.evaluate_documents([ev_document],
                                                        evaluation.b_cubed)
                trick_reward = utils.get_reward_trick(cluster_info, gold_dict,
                                                      new_cluster_info,
                                                      action_list,
                                                      candi_ids_return)

                #reward = f + trick_reward
                average_f = float(sum(f_b)) / len(f_b)

                reward = (f - average_f) * 10

                f_b.append(f)
                if len(f_b) > 128:
                    f_b = f_b[1:]

                index = 0
                for data in tmp_data:
                    mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
                    target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,rl,candi_ids_return = data

                    mention_index = autograd.Variable(
                        torch.from_numpy(mention_word_index).type(
                            torch.cuda.LongTensor))
                    mention_span = autograd.Variable(
                        torch.from_numpy(mention_span).type(
                            torch.cuda.FloatTensor))
                    candi_index = autograd.Variable(
                        torch.from_numpy(candi_word_index).type(
                            torch.cuda.LongTensor))
                    candi_spans = autograd.Variable(
                        torch.from_numpy(candi_span).type(
                            torch.cuda.FloatTensor))
                    pair_feature = autograd.Variable(
                        torch.from_numpy(feature_pair).type(
                            torch.cuda.FloatTensor))
                    anaphors = autograd.Variable(
                        torch.from_numpy(pair_anaphors).type(
                            torch.cuda.LongTensor))
                    antecedents = autograd.Variable(
                        torch.from_numpy(pair_antecedents).type(
                            torch.cuda.LongTensor))

                    anaphoricity_index = autograd.Variable(
                        torch.from_numpy(anaphoricity_word_indexs).type(
                            torch.cuda.LongTensor))
                    anaphoricity_span = autograd.Variable(
                        torch.from_numpy(anaphoricity_spans).type(
                            torch.cuda.FloatTensor))
                    anaphoricity_feature = autograd.Variable(
                        torch.from_numpy(anaphoricity_features).type(
                            torch.cuda.FloatTensor))

                    rl_costs = autograd.Variable(
                        torch.from_numpy(rl["costs"]).type(
                            torch.cuda.FloatTensor))
                    rl_costs = torch.transpose(rl_costs, 0, 1)

                    output, pair_score = network_model.forward_all_pair(
                        word_embedding_dimention, mention_index, mention_span,
                        candi_index, candi_spans, pair_feature, anaphors,
                        antecedents, dropout_rate)
                    ana_output, ana_score = network_model.forward_anaphoricity(
                        word_embedding_dimention, anaphoricity_index,
                        anaphoricity_span, anaphoricity_feature, dropout_rate)

                    reindex = autograd.Variable(
                        torch.from_numpy(rl["reindex"]).type(
                            torch.cuda.LongTensor))

                    optimizer.zero_grad()
                    loss = None
                    scores_reindex = torch.transpose(
                        torch.cat((pair_score, ana_score), 1), 0, 1)[reindex]
                    #scores_reindex = torch.transpose(torch.cat((pair_score,-1-0.3*ana_score),1),0,1)[reindex]

                    for s, e in zip(rl["starts"], rl["ends"]):
                        #action_prob: scores_reindex[s:e][1]
                        this_action = action_list[index]
                        #current_reward = reward + trick_reward[index]
                        current_reward = reward

                        #this_loss = -reward*(torch.transpose(F.log_softmax(torch.transpose(scores_reindex[s:e],0,1)),0,1)[this_action])
                        this_loss = -current_reward * (torch.transpose(
                            F.log_softmax(
                                torch.transpose(scores_reindex[s:e], 0, 1)), 0,
                            1)[this_action])

                        if loss is None:
                            loss = this_loss
                        else:
                            loss += this_loss
                        index += 1
                    #loss /= len(rl["starts"])
                    loss /= len(rl["starts"])
                    #loss = loss/train_docs.scale_factor
                    ## policy graident
                    cost += loss.data[0]
                    loss.backward()
                    optimizer.step()

                new_cluster_num = 0
                cluster_info = []
                cluster_info.append(-1)
                tmp_data = []
                action_list = []
                new_cluster_info = []
            #if inside_time%50 == 0:
            #    performance.performance(dev_docs,network_model)
            #    print
            #    sys.stdout.flush()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "cost:", cost
        #print >> sys.stderr,"save model ..."
        #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo)

        performance.performance(dev_docs, network_model)

        sys.stdout.flush()
Ejemplo n.º 15
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best"
    print >> sys.stderr,"Read model from",best_network_file
    best_network_model = torch.load(best_network_file)
        
    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    print >> sys.stderr,"save model ..."

    net_copy(network_model,best_network_model)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    print >> sys.stderr,"prepare data for train ..."
    train_docs = DataReader.DataGnerater("train"+reduced)
    print >> sys.stderr,"prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test"+reduced)


    l2_lambda = 1e-6
    lr = nnargs["lr"]
    dropout_rate = nnargs["dropout_rate"]
    epoch = nnargs["epoch"]

    model_save_dir = "./model/bp/"
   
    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
        }
  
    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(epoch):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo
        
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        for data in train_docs.train_generater(shuffle=True):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))
            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,dropout_rate)
            loss = F.binary_cross_entropy(output,top_gold,size_average=False)/train_docs.scale_factor_top

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss    
            
            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain",echo,"Pair total cost:",pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "Learning Rate",lr

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))

            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))
            
            gold += data["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()
        
            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()
        
        gold = numpy.array(gold,dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3,0.35,0.4,0.45,0.5,0.55,0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
 
        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model, model_save_dir+"network_model_pretrain.best.top")

        ana_gold = numpy.array(ana_gold,dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if (echo+1)%10 == 0:
            best_network_model = torch.load(model_save_dir+"network_model_pretrain.best.top") 
            print "DEV:"
            performance.performance(dev_docs,best_network_model)
            print "TEST:"
            performance.performance(test_docs,best_network_model)
Ejemplo n.º 16
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/pretrain/network_model_pretrain.best"
    print >> sys.stderr, "Read model from ./model/model.pkl"
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(pair_feature_dimention,
                                    mention_feature_dimention,
                                    word_embedding_dimention, span_dimention,
                                    1000, embedding_size, embedding_dimention,
                                    embedding_matrix).cuda()
    print >> sys.stderr, "save model ..."
    #torch.save(network_model,network_file)

    net_copy(network_model, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    lr = 0.0002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/pretrain/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        #if echo == 100:
        #    lr = lr/2.0
        #if echo == 150:
        #    lr = lr/2.0

        #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda)
        #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)
        optimizer = optim.RMSprop(network_model.parameters(),
                                  lr=lr,
                                  eps=1e-5,
                                  weight_decay=l2_lambda)

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,top_x = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))

            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            top_gold = autograd.Variable(
                torch.from_numpy(top_x["top_gold"]).type(
                    torch.cuda.FloatTensor))

            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, dropout_rate)
            loss = F.binary_cross_entropy(
                output, top_gold,
                size_average=False) / train_docs.scale_factor_top

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(
                ana_output, ana_lable,
                size_average=False) / train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss

            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain", echo, "Pair total cost:", pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        print >> sys.stderr, "save model ..."
        torch.save(network_model,
                   model_save_dir + "network_model_pretrain.%d.top" % echo)

        #if cost_this_turn > last_cost:
        #    lr = lr*0.7
        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target, top_x = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))
            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            gold += top_x["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, 0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] > all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.top.best")

        ana_gold = numpy.array(ana_gold, dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if (echo + 1) % 10 == 0:
            best_network_model = torch.load(model_save_dir +
                                            "network_model_pretrain.top.best")
            print "DEV:"
            performance.performance(dev_docs, best_network_model)
            print "TEST:"
            performance.performance(test_docs, best_network_model)
Ejemplo n.º 17
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    network_model = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    #lr = 0.00009
    lr = 0.0001
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold = target.tolist()
            anaphoricity_gold = anaphoricity_target.tolist()

            pair_nums += len(gold)
            ana_nums += len(anaphoricity_gold)

            lable = autograd.Variable(torch.cuda.FloatTensor([gold]))
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, dropout_rate)
            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)

            optimizer.zero_grad()

            #loss = get_pair_loss(output,positive,negative,train_docs.scale_factor)
            loss = F.binary_cross_entropy(
                output, lable, size_average=False) / train_docs.scale_factor
            #ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor

            pair_cost_this_turn += loss.data[0] * train_docs.scale_factor

            loss_all = loss
            loss_all.backward()
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        #print >> sys.stderr,"save model ..."
        #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo)

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold += target.tolist()
            ana_gold += anaphoricity_target.tolist()

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, 0.0)
            predict += output.data.cpu().numpy()[0].tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.best.pair")

        sys.stdout.flush()

    ## output best
    print "In sum, anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
        %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
    sys.stdout.flush()
Ejemplo n.º 18
0
train, validation, test, verbose, path, file, modelf = main(path)
n_epochs = len(train)
n_iters = train[0].n_item
input_size = 2
h1 = 256
h2 = 128
h3 = 64
output_size = 3 # fixed
lr = 0.005


if verbose:
    print('###\tCreate the net')

# create the net
rnn = net.Network(input_size, n_iters, output_size, False, lr, h1, h2)
criterion = nn.MSELoss()
optimizer = optim.SGD(rnn.parameters(), lr)


if verbose:
    print('###\tStart training')
    start = timer()

losses = np.zeros(n_epochs)

for epoch in range(n_epochs):
    inputs = []
    targets = []
    for i in range(len(train[epoch].items)):
        inputs.append(train[epoch].items[i][0])
Ejemplo n.º 19
0
def main():

    data_dir = "../data/"
    os.makedirs(data_dir, exist_ok=True)

    np.set_printoptions(suppress=True)

    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    x_train, y_train = sklearn.utils.shuffle(x_train, y_train)
    x_test, y_test = sklearn.utils.shuffle(x_test, y_test)

    logger = utilities.get_logger(os.path.join(data_dir, "mnist.html"))

    log_size = 10

    # Log a few samples
    utilities.log_samples(logger, x_test[:log_size], y_test[:log_size])

    # Reshape 28x28 matrices to vectors 784 elements vectors
    x_train_flat = x_train.reshape(60000, 784, 1)
    x_test_flat = x_test.reshape(10000, 784, 1)

    # ys are scalars, convert them to one-hot encoded vectors
    y_train_categorical = keras.utils.to_categorical(y_train,
                                                     num_classes=10).reshape(
                                                         60000, 10, 1)
    y_test_categorical = keras.utils.to_categorical(y_test,
                                                    num_classes=10).reshape(
                                                        10000, 10, 1)

    model = net.Network(layers=[784, 100, 50, 10])

    # Log untrained model predictions
    log_predictions(logger,
                    model,
                    x_test[:log_size],
                    y_test[:log_size],
                    header="Untrained model")

    train_cost, train_accuracy = net.get_statistics(model, x_train_flat,
                                                    y_train_categorical)
    print("Initial training cost: {:.3f}, training accuracy: {:.3f}".format(
        train_cost, train_accuracy))

    test_cost, test_accuracy = net.get_statistics(model, x_test_flat,
                                                  y_test_categorical)
    print("Initial test cost: {:.3f}, test accuracy: {:.3f}".format(
        test_cost, test_accuracy))

    model.fit(x_train_flat,
              y_train_categorical,
              epochs=10,
              learning_rate=0.1,
              x_test=x_test_flat,
              y_test=y_test_categorical)

    # Log trained model predictions
    log_predictions(logger,
                    model,
                    x_test[:log_size],
                    y_test[:log_size],
                    header="Trained model")