Example #1
0
 def __init__(self,
              vocab_size,
              key_size,
              query_size,
              value_size,
              num_hiddens,
              norm_shape,
              ffn_num_input,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              use_bias=False,
              **kwargs):
     super(TransformerEncoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = am.PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for i in range(num_layers):
         self.blks.add_module(
             "block" + str(i),
             am.EncoderBlock(key_size, query_size, value_size, num_hiddens,
                             norm_shape, ffn_num_input, ffn_num_hiddens,
                             num_heads, dropout, use_bias))
Example #2
0
 def __init__(self,
              vocab_size,
              num_hiddens,
              norm_shape,
              ffn_num_input,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              max_len=1000,
              key_size=768,
              query_size=768,
              value_size=768,
              hid_in_features=768,
              mlm_in_features=768,
              nsp_in_features=768):
     super(BERTModel, self).__init__()
     self.encoder = am.BERTEncoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.hidden = nn.Sequential(nn.Linear(hid_in_features, num_hiddens),
                                 nn.Tanh())
     self.mlm = am.MaskLM(vocab_size, num_hiddens, mlm_in_features)
def atn_exams(sequence_fix_length, foresight_steps, class_num, data, training_sample_ids, test_sample_ids, random_seed=None):
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
    print('atn_model test start')

    atn = AttentionModel.CNN_Attention(sequence_fix_length, data['features'].shape[1], class_num=class_num,
                                       network_hyperparameters='./data/attention_network_hyperparameters_v2.json')

    # atn.train(data['features'], data['labels'], 1, 1024, training_sample_ids)
    # atn.test(data['features'], data['labels'], test_sample_ids)
    # atn.save_model('./model/atn_model_new')
    start_time = time.time()
    atn.train_v2(data['features'], data['labels'], data['samples_length'], 1, 1024, training_sample_ids, test_sample_ids,
                 foresight_steps=0, reset_flag=True, record_flag=False, random_seed=random_seed)
    end_time = time.time()
    print('cost training time %f' % (end_time - start_time))
    atn.test_v2(data['features'], data['labels'], data['samples_length'], test_sample_ids)
    # atn.save_model('./model/atn_model_v2')

    whole_time_on_test = atn.test_time(data['features'], data['labels'], data['samples_length'], test_sample_ids)
    whole_time_on_test_str = 'sum_a_cnn time cost on predicting %d test samples: %fs\n' % (len(test_sample_ids),
                                                                                     whole_time_on_test)

    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
    # print('cost time %f' % (end_time - start_time))
    print('atn_model test over\n')
    return whole_time_on_test_str
Example #4
0
 def __init__(self,
              vocab_size,
              num_hiddens,
              norm_shape,
              ffn_num_input,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              max_len=1000,
              key_size=768,
              query_size=768,
              value_size=768,
              hid_in_features=768,
              mlm_in_features=768,
              nsp_in_features=768):
     super(BERTDecoder, self).__init__()
     self.decoder = am.BERTDecoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.dense = nn.Linear(num_hiddens, vocab_size)
 def __init__(self, vocab_size, key_size, query_size, value_size,
              num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens,
              num_heads, num_layers, dropout, **kwargs):
     super(TransformerDecoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.encoder = am.BERTEncoder(vocab_size, num_hiddens, norm_shape,
                 ffn_num_input, ffn_num_hiddens, num_heads, num_layers,
                 dropout, max_len=max_len, key_size=key_size,
                 query_size=query_size, value_size=value_size)
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = am.PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for i in range(num_layers):
         self.blks.add_module("block"+str(i),
             am.DecoderBlock(key_size, query_size, value_size, num_hiddens,
                          norm_shape, ffn_num_input, ffn_num_hiddens,
                          num_heads, dropout, i))
     self.dense = nn.Linear(num_hiddens, vocab_size)
 def __init__(self, vocab_size, key_size, query_size, value_size,
              num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens,
              num_heads, num_layers, dropout, **kwargs):
     super(BERTDecoder, self).__init__(**kwargs)
     self.decoder = am.BERTDecoder(vocab_size, num_hiddens, norm_shape,
                 ffn_num_input, ffn_num_hiddens, num_heads, num_layers,
                 dropout, max_len=max_len, key_size=key_size,
                 query_size=query_size, value_size=value_size)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.dense = nn.Linear(num_hiddens, vocab_size)
Example #7
0
    def create_graph():

        print "Start graph creation"
        # Creating Model object
        model = AttentionModel.AttentionModel(
            mapping, representation, FLAGS.max_seq_length,
            FLAGS.embedding_size, FLAGS.enc_rnn_size, FLAGS.dec_rnn_size,
            FLAGS.enc_type, FLAGS.separate_output_embedding)

        ## Creating placeholder for sequences, masks and lengths and dropout keep probability
        batch_sequences = tf.placeholder(shape=[None, FLAGS.max_seq_length],
                                         dtype=tf.int32)
        batch_sequence_lengths = tf.placeholder(shape=[None], dtype=tf.float32)

        # Predict output for test sequences
        o_enc_outputs = compute_hidden_representation(model, batch_sequences,
                                                      batch_sequence_lengths,
                                                      FLAGS.lang)

        return batch_sequences, batch_sequence_lengths, o_enc_outputs
        print "Done with creating graph. Starting session"
def train_bert(net, data_iter, lr, num_epochs, batch_size, tgt_vocab, device):
    """Train a model for sequence to sequence."""
    def xavier_init_weights(m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
        if type(m) == nn.GRU:
            for param in m._flat_weights_names:
                if "weight" in param:
                    torch.nn.init.xavier_uniform_(m._parameters[param])
    # net.apply(xavier_init_weights)
    try:
        checkpoint_prefix = os.path.join("model_data/model_bert.pt")
        checkpoint = torch.load(checkpoint_prefix)
        net.load_state_dict(checkpoint['model_state_dict'])
        net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.to(device)
    except Exception as e:
        net.apply(xavier_init_weights)
        net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        print("Can not load the model with error:", e)
    
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    
    loss = am.MaskedSoftmaxCELoss()
    net.train()
    animator = am.Animator(xlabel='epoch', ylabel='loss',
                            xlim=[1, num_epochs*batch_size])

    
    checkpoint_prefix = os.path.join("model_data/model_bert.pt")
    # ratio = 100 / len(data_iter)
    # print("ratio=", ratio)
    num_trained = 0
    for epoch in range(num_epochs):
        timer = Utility.Timer()
        metric = am.Accumulator(2)  # Sum of training loss, no. of tokens
        # print("epoch ...", epoch)
        for i, batch in enumerate(data_iter):
            # if random.random() < (1 - ratio * 1.5):
            #     continue
            num_trained += 1
            optimizer.zero_grad()
            X, X_valid_len, Y, Y_valid_len = [x.to(device) for x in batch]
            bos = torch.tensor([tgt_vocab['<bos>']] * Y.shape[0],
                               device=device).reshape(-1, 1)
            dec_input = torch.cat([bos, Y[:, :-1]], 1)  # Teacher forcing
            Y_hat, _ = net(X, dec_input, X_valid_len)
            l = loss(Y_hat, Y, Y_valid_len)
            l.sum().backward()  # Make the loss scalar for `backward`
            # Utility.grad_clipping(net, 1)
            num_tokens = Y_valid_len.sum()
            optimizer.step()
            with torch.no_grad():
                metric.add(l.sum(), num_tokens)
            # if (i + 1) % 100 == 0:
            # print("    batch>>>", i)
            if (num_trained + 1) % 100 == 0:
                animator.add(num_trained + 1, (metric[0] / metric[1],))
                # print(f'epoch = {epoch}, loss = {metric[0] / metric[1]:.3f}')
                torch.save({'model_state_dict': net.state_dict(), "optimizer": optimizer.state_dict()},checkpoint_prefix)
        # if (epoch + 1) % 10 == 0:
        # animator.add(epoch + 1, (metric[0] / metric[1],))
        # # print(f'epoch = {epoch}, loss = {metric[0] / metric[1]:.3f}')
        # torch.save({'model_state_dict': net.state_dict(), "optimizer": optimizer.state_dict()},checkpoint_prefix)
        # sys.stdout.flush()
    print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} '
          f'tokens/sec on {str(device)}')
def train_bert(train_iter, net, loss, vocab_size, device, num_steps, lr):
    net = net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    try:
        checkpoint_prefix = os.path.join(
            "model_data/model_BERT_pretraining_single.pt")
        checkpoint = torch.load(checkpoint_prefix)
        net.load_state_dict(checkpoint['model_state_dict'])
        net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.to(device)
    except Exception as e:
        print("Can not load the model with error:", e)

    checkpoint_prefix = os.path.join(
        "model_data/model_BERT_pretraining_single.pt")

    step, timer = 0, Utility.Timer()
    animator = am.Animator(xlabel='step',
                           ylabel='loss',
                           xlim=[1, num_steps],
                           legend=['mlm'])
    # Sum of masked language modeling losses, sum of next sentence prediction
    # losses, no. of sentence pairs, count
    metric = am.Accumulator(2)
    num_steps_reached = False
    while step < num_steps and not num_steps_reached:
        for tokens_X, segments_X, valid_lens_x, pred_positions_X,\
            mlm_weights_X, mlm_Y in train_iter:
            tokens_X = tokens_X.to(device)
            segments_X = segments_X.to(device)
            valid_lens_x = valid_lens_x.to(device)
            pred_positions_X = pred_positions_X.to(device)
            mlm_weights_X = mlm_weights_X.to(device)
            mlm_Y = mlm_Y.to(device)
            optimizer.zero_grad()
            timer.start()
            l = _get_batch_loss_bert(net, loss, vocab_size, tokens_X,
                                     segments_X, valid_lens_x,
                                     pred_positions_X, mlm_weights_X, mlm_Y)
            l.backward()
            optimizer.step()
            timer.stop()
            with torch.no_grad():
                metric.add(l, tokens_X.shape[0])
                animator.add(step + 1, (metric[0] / metric[1]))
            if (step + 1) % 50 == 0:
                torch.save(
                    {
                        'model_state_dict': net.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }, checkpoint_prefix)

            step += 1
            if step == num_steps:
                num_steps_reached = True
                break

    print(f'MLM loss {metric[0] / metric[1]:.3f}')
    print(f'{metric[1] / timer.sum():.1f} sentence pairs/sec on '
          f'{str(device)}')
Example #10
0
def main_loop(args):
    print(args)

    settings = Settings.Settings(args)
    history = History.History(settings)
    connection = Connection.Connection(settings, history)
    #if connection.failed: return -1
    if connection.hard_stop: return -1

    cropscoordinates = CropsCoordinates.CropsCoordinates(settings, history)
    videocapture = VideoCapture.VideoCapture(settings, history)
    evaluation = Evaluation.Evaluation(settings, connection, cropscoordinates,
                                       history)
    attentionmodel = AttentionModel.AttentionModel(settings, cropscoordinates,
                                                   evaluation, history)
    postprocess = Postprocess.Postprocess(settings, history)

    renderer = Renderer.Renderer(settings, history)
    debugger = Debugger.Debugger(settings, cropscoordinates, evaluation)

    settings.save_settings()
    settings.set_debugger(debugger)

    for frame, next_frames, frame_number in videocapture.frame_generator_thread_loading(
    ):
        settings.frame_number = frame_number

        print("frame: ", frame[2])
        for i in range(len(next_frames)):
            print("next_frames", i, ": ", next_frames[i][2], next_frames[i][0],
                  next_frames[i][2:])

        attention_coordinates = cropscoordinates.get_crops_coordinates(
            'attention')
        #debugger.debug_coordinates_in_frame(attention_coordinates, frame[1],'attention')

        attention_evaluation = evaluation.evaluate_attention_with_precomputing(
            frame_number, attention_coordinates, frame, 'attention',
            next_frames)
        # attention_evaluation start in attention crops space (size of frame downscaled for attention evaluation
        # so that we can cut crops of 608x608 from it easily)

        projected_evaluation = cropscoordinates.project_evaluation_back(
            attention_evaluation, 'attention')
        #debugger.debug_evaluation_to_bboxes_after_reprojection(projected_evaluation, frame[1], 'attention', 'afterRepro')
        # projected_evaluation are now in original image space

        evaluation_coordinates = cropscoordinates.get_crops_coordinates(
            'evaluation')
        # evaluation_coordinates are in evaluation space. (size of frame downscaled for regular evaluation
        # so that we can cut crops of 608x608 from it easily)
        #debugger.debug_coordinates_in_frame(evaluation_coordinates, frame[1], 'evaluation')

        active_coordinates = attentionmodel.get_active_crops_intersections(
            projected_evaluation, evaluation_coordinates, frame)
        #debugger.debug_coordinates_in_frame(active_coordinates, frame[1], 'evaluation', "__"+str(settings.frame_number)+'activeonly')

        if len(active_coordinates) == 0:
            print("Nothing left active - that's possibly ok, skip")
            renderer.render([], frame)
            history.report_skipped_final_evaluation(frame_number)
            continue

        final_evaluation = evaluation.evaluate(active_coordinates, frame,
                                               'evaluation', frame_number)
        # evaluation are in evaluation space
        projected_final_evaluation = cropscoordinates.project_evaluation_back(
            final_evaluation, 'evaluation')
        # projected back to original space

        projected_active_coordinates = cropscoordinates.project_coordinates_back(
            active_coordinates, 'evaluation')

        processed_evaluations = postprocess.postprocess(
            projected_active_coordinates, projected_final_evaluation)
        #debugger.debug_evaluation_to_bboxes_after_reprojection(processed_evaluations, frame[1], 'finalpostprocessed'+frame[0][-8:-4])

        renderer.render(processed_evaluations, frame)

    history.tick_loop(frame_number, True)

    history.save_whole_history_and_settings()
Example #11
0
    batch_sequences = tf.placeholder(shape=[None,max_sequence_length],dtype=tf.int32)
    batch_sequence_masks = tf.placeholder(shape=[None,max_sequence_length],dtype=tf.float32)
    batch_sequence_lengths = tf.placeholder(shape=[None],dtype=tf.float32)
    beam_size = tf.placeholder(dtype=tf.int32)
    topn = tf.placeholder(dtype=tf.int32)

    ## Create session 
    print "Creating Session"
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    #### Creating Translation Model graph creation 
    print "Start graph creation for Translation Model"
    model = AttentionModel.AttentionModel(mapping,representation,max_sequence_length,
            embedding_size,enc_rnn_size,dec_rnn_size,
            enc_type,separate_output_embedding)

    # Predict output for test sequences
    ### a secondary purpose for crearing this graph is to allow loading of variables 
    outputs, outputs_scores = model.transliterate_beam(
                lang_pair[0],batch_sequences,batch_sequence_lengths,lang_pair[1],beam_size, topn)
    
    ### now restore variables from translation model 
    saver_trans = tf.train.Saver()
    saver_trans.restore(sess,model_fname)
    print "Loaded translation model parameters"

    # Creating Language Model graph creation 
    if fuse_lm is not None: 
        net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.to(device)
    except Exception as e:
        net.apply(xavier_init_weights)
        net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        print("Can not load the model with error:", e)
    
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    
    loss = am.MaskedSoftmaxCELoss()
    net.train()
    animator = am.Animator(xlabel='epoch', ylabel='loss',
                            xlim=[1, num_epochs*batch_size])

    
    checkpoint_prefix = os.path.join("model_data/model_bert.pt")
    # ratio = 100 / len(data_iter)
    # print("ratio=", ratio)
    num_trained = 0
    for epoch in range(num_epochs):
        timer = Utility.Timer()
        metric = am.Accumulator(2)  # Sum of training loss, no. of tokens
        # print("epoch ...", epoch)
        for i, batch in enumerate(data_iter):
            # if random.random() < (1 - ratio * 1.5):