Esempio n. 1
0
    parser.add_argument('--seperate', action='store_true', default=False)
    parser.add_argument('--choice', type=str, default='melody',
                        choices = ['melody', 'harmony'])
    args = parser.parse_args()

    with open(args.config_file, 'r') as f: 
        config = cPickle.load(f)

    if config.dataset == 'softmax':
        config.time_batch_len = 1
        config.max_time_batches = -1
        with open(nottingham_util.PICKLE_LOC, 'r') as f:
            pickle = cPickle.load(f)
        if args.seperate:
            model_class = NottinghamSeparate
            test_data = util.batch_data(pickle['test'], time_batch_len = 1, 
                max_time_batches = -1, softmax = True)
            r = nottingham_util.NOTTINGHAM_MELODY_RANGE
            if args.choice == 'melody':
                print "Using only melody"
                new_data = []
                for batch_data, batch_targets in test_data:
                    new_data.append(([tb[:, :, :r] for tb in batch_data],
                                     [tb[:, :, 0] for tb in batch_targets]))
                test_data = new_data
            else:
                print "Using only harmony"
                new_data = []
                for batch_data, batch_targets in test_data:
                    new_data.append(([tb[:, :, r:] for tb in batch_data],
                                     [tb[:, :, 1] for tb in batch_targets]))
                test_data = new_data
Esempio n. 2
0
def test():
    # Getting settings from config.py
    max_len = cfg.MAX_TOKEN_LEN
    num_token = cfg.NUM_OF_TOKEN
    imw = cfg.IMW
    imh = cfg.IMH

    # Training params
    is_train = False
    batch_size = 1

    # Tracking/Saving
    num_ite_to_log = cfg.NUM_ITE_TO_LOG
    num_ite_to_vis = cfg.NUM_ITE_TO_VIS
    save_name = cfg.SAVE_NAME
    test_name = cfg.TEST_NAME
    vis_path = cfg.VIS_PATH

    use_cuda = cfg.CUDA and torch.cuda.is_available()
    save_path = cfg.MODEL_FOLDER
    dataset_path = cfg.DATASET_PATH + 'CROHME2013_data/TestINKML/'
    scale_factor = cfg.TEST_SCALE_FACTOR

    # Load the vocab dictionary for display purpose
    word_to_id, id_to_word = get_gt.build_vocab('mathsymbolclass.txt')
    start_id = word_to_id['<s>']
    stop_id = word_to_id['</s>']

    # Initialize the network and load its weights
    net = AGRU()
    save_files = glob.glob(save_path + save_name + '*.dat')
    if (len(save_files) > 0):
        save_file = sorted(save_files)[-1]
        print('Loading network weights saved at %s...' % save_file)
        loadobj = torch.load(save_file)
        net.load_state_dict(loadobj['state_dict'])
        print('Loading done.')

    if (use_cuda):
        net.cuda()

    # For debugging
    if (not is_train):
        net.train(False)

    # Get full paths to test inkml files, create a list of scale factors to be used for rendering test images
    inkml_list = glob.glob(dataset_path + '*.inkml')
    scale_list = [scale_factor] * len(inkml_list)
    inkml_list = np.asarray(inkml_list)
    scale_list = np.asarray(scale_list)

    #inkml_list = inkml_list[0:120]
    #scale_list = scale_list[0:120]
    num_test = len(inkml_list)
    num_ite = int(np.ceil(1.0 * num_test / batch_size))

    # Exact match and word error rate
    em = []
    wer = []
    all_pred = []
    all_gt = []
    # Main test loop
    for i in range(num_ite):
        batch_idx = range(i * batch_size, (i + 1) * batch_size)
        if (batch_idx[-1] >= num_test):
            batch_idx = range(i * batch_size, num_test)
        batch_size = len(batch_idx)
        batch_x = util.batch_data(inkml_list[batch_idx], scale_list[batch_idx],
                                  is_train)
        batch_y_np = util.batch_target(inkml_list[batch_idx])
        batch_y = util.np_to_var(batch_y_np, use_cuda)

        #pred_y, attention = net(batch_x, batch_y)
        pred_y, attention = net.beam_search(batch_x, start_id, stop_id)
        pred_y = util.var_to_np(pred_y, use_cuda)
        pred_y = np.argmax(pred_y, 2)
        batch_y = np.reshape(batch_y_np, (batch_size, max_len))

        print('Finished ite %d/%d.' % (i, num_ite))
        j = 0

        pred_string = pred_y[j, :]
        pred_string = [id_to_word[idx] for idx in list(pred_string)]
        gt_string = batch_y[0, :]
        gt_string = [id_to_word[idx] for idx in list(gt_string)]
        all_pred.append(pred_string)
        all_gt.append(gt_string)
        em.append(util.exact_match(pred_string, gt_string))
        if ('</s>' in pred_string):
            pred_string = pred_string[0:pred_string.index('</s>') + 1]
        gt_string = gt_string[0:gt_string.index('</s>') + 1]
        wer.append(util.levenshtein_distance(pred_string, gt_string))

        if (i % 4 == 0):
            continue

        # Printing stuffs to console
        print('Prediction: %s' % ' '.join(pred_string))
        print('Target: %s\n' % ' '.join(gt_string))

        # Save attention to files for visualization
        file_name = ntpath.basename(inkml_list[batch_idx[j]])[:-6]
        vis_path_j = vis_path + file_name + '/'
        if (not os.path.exists(vis_path_j)):
            os.makedirs(vis_path_j)

        tmp_x = np.sum(batch_x.data.cpu().numpy()[j, :, :, :], axis=0)
        attention_np = attention.data.cpu().numpy()[j, 1:, :, :]
        pred_string = pred_string[1:]
        for k, word in enumerate(pred_string):
            word = word.replace('/', 'slash_')
            attention_k = attention_np[k, :, :] / np.max(
                attention_np[k, :, :]) * 0.8
            attention_k = (scipy.misc.imresize(attention_k, 16.0)) / 255.0
            tmp_x = scipy.misc.imresize(tmp_x, attention_k.shape)
            attention_k += tmp_x
            attention_k[attention_k > 1] = 1
            try:
                scipy.misc.imsave(vis_path_j + ('%02d_%s.jpg' % (k, word)),
                                  attention_k)
            except FileNotFoundError:
                pdb.set_trace()
            if (word == '<slash_s>'):
                break

        #pdb.set_trace()

    print("Exact match count: %d/%d" % (sum(em), len(em)))
    print("Word error rate: %.5f" % (np.mean(wer)))
    pdb.set_trace()
    util.save_list([em, wer, all_pred, all_gt], save_path + test_name + '.dat')

    pdb.set_trace()
Esempio n. 3
0
def train():
    # Getting settings from config.py
    max_len = cfg.MAX_TOKEN_LEN
    num_token = cfg.NUM_OF_TOKEN
    imw = cfg.IMW
    imh = cfg.IMH

    # Training params
    is_train = True
    batch_size_const = cfg.GPU_BATCH_SIZE
    num_ite_to_update = cfg.NUM_ITE_TO_UPDATE
    lr = cfg.LR
    momentum = cfg.MOMENTUM
    lr_decay = cfg.LR_DECAY
    max_grad = cfg.MAX_GRAD_CLIP
    num_e = cfg.NUM_EPOCH

    # Tracking/Saving
    last_e = -1
    global_step = 0
    running_loss = 0
    num_ite_to_log = cfg.NUM_ITE_TO_LOG
    num_ite_to_vis = cfg.NUM_ITE_TO_VIS
    num_epoch_to_save = cfg.NUM_EPOCH_TO_SAVE
    all_loss = []
    save_name = cfg.SAVE_NAME
    meta_name = cfg.META_NAME
    vis_path = cfg.VIS_PATH

    use_cuda = cfg.CUDA and torch.cuda.is_available()
    save_path = cfg.MODEL_FOLDER
    dataset_path = cfg.DATASET_PATH + 'CROHME2013_data/TrainINKML/'
    subset_list = cfg.SUBSET_LIST
    scale_factors = cfg.SCALE_FACTORS

    # Load the vocab dictionary for display purpose
    _, id_to_word = get_gt.build_vocab('mathsymbolclass.txt')

    # Initialize the network and load its weights
    net = AGRU()
    save_files = glob.glob(save_path + save_name + '*.dat')
    meta_files = glob.glob(save_path + meta_name + '*.dat')
    if (len(save_files) > 0):
        save_file = sorted(save_files)[-1]
        print('Loading network weights saved at %s...' % save_file)
        loadobj = torch.load(save_file)
        net.load_state_dict(loadobj['state_dict'])
        last_e, running_loss, all_loss, lr = util.load_list(
            sorted(meta_files)[-1])
        print('Loading done.')

    if (use_cuda):
        net.cuda()

    # For debugging
    if (not is_train):
        net.train(False)

    # Get a list of convolutional layers
    conv_layers = util.get_layers(net, lambda x: type(x) == type(net.conv1_3))

    # Get conv parameters
    conv_params = []
    for c in conv_layers:
        for p in c.parameters():
            if (p.requires_grad):
                conv_params.append(p)

    # Get a list of trainable layers that are not convolutional
    other_layers = util.get_layers(
        net,
        lambda x: type(x) != type(net.conv1_3) and hasattr(x, 'parameters'))
    other_layers = other_layers[1:]  # The first layer is attend_GRU.AGRU

    # Get GRU parameters
    gru_params = []
    for l in other_layers:
        for p in l.parameters():
            gru_params.append(p)

    # Set different learning rates for conv layers and GRU layers
    optimizer = optim.Adam([{
        'params': gru_params
    }, {
        'params': conv_params,
        'lr': lr
    }],
                           lr=lr)

    # Loss function
    criterion = nn.CrossEntropyLoss(ignore_index=1)

    # Get full paths to train inkml files, create a list of scale factors to be used for rendering train images
    inkml_list = []
    scale_list = []

    for i, subset in enumerate(subset_list):
        subset_inkml_list = glob.glob(dataset_path + subset + '*.inkml')
        inkml_list += subset_inkml_list
        scale_list += [scale_factors[i]] * len(subset_inkml_list)
    inkml_list = np.asarray(inkml_list)
    scale_list = np.asarray(scale_list)

    #inkml_list = inkml_list[0:120]
    #scale_list = scale_list[0:120]
    num_train = len(inkml_list)
    num_ite = int(np.ceil(1.0 * num_train / batch_size_const))

    # Main train loop
    optimizer.zero_grad()
    for e in range(last_e + 1, num_e):
        permu_ind = np.random.permutation(range(num_train))
        inkml_list = inkml_list[permu_ind.astype(int)]
        scale_list = scale_list[permu_ind.astype(int)]

        if (e % cfg.NUM_EPOCH_TO_DECAY == cfg.NUM_EPOCH_TO_DECAY - 1):
            lr = lr * lr_decay
            print('Current learning rate: %.8f' % lr)
            optimizer.param_groups[0]['lr'] = lr
            optimizer.param_groups[1]['lr'] = lr

        for i in range(num_ite):

            batch_idx = range(i * batch_size_const, (i + 1) * batch_size_const)
            if (batch_idx[-1] >= num_train):
                batch_idx = range(i * batch_size_const, num_train)
            batch_size = len(batch_idx)
            batch_x = util.batch_data(inkml_list[batch_idx],
                                      scale_list[batch_idx], is_train)
            batch_y_np = util.batch_target(inkml_list[batch_idx])
            batch_y = util.np_to_var(batch_y_np, use_cuda)

            pred_y, attention = net(batch_x, batch_y)

            # Convert the 3D tensor to 2D matrix of shape (batch_size*MAX_TOKEN_LEN, NUM_OF_TOKEN) to compute log loss
            pred_y = pred_y.view(-1, num_token)
            # Remove the <start> token from target vector & prediction vvector
            batch_y = batch_y.view(batch_size, max_len)
            batch_y = batch_y[:, 1:].contiguous()
            batch_y = batch_y.view(-1)
            pred_y = pred_y.view(batch_size, max_len, num_token)
            pred_y = pred_y[:, 1:].contiguous()
            pred_y = pred_y.view(batch_size * (max_len - 1), num_token)

            loss = criterion(pred_y, batch_y)
            loss.backward()
            running_loss += loss.data[0]

            if (global_step % num_ite_to_update == (num_ite_to_update - 1)):
                util.grad_clip(net, max_grad)
                optimizer.step()
                optimizer.zero_grad()
                running_loss /= num_ite_to_update
                all_loss.append(running_loss)
                running_loss = 0

            # Printing stuffs to console
            if (global_step % num_ite_to_log == (num_ite_to_log - 1)):
                print('Finished ite %d/%d, epoch %d/%d, loss: %.5f' %
                      (i, num_ite, e, num_e, all_loss[-1]))

                # Printing prediction and target
                pred_y_np = util.var_to_np(pred_y, use_cuda)
                pred_y_np = np.reshape(pred_y_np,
                                       (batch_size, max_len - 1, num_token))
                # Only display the first sample in the batch
                pred_y_np = pred_y_np[0, 0:40, :]
                pred_y_np = np.argmax(pred_y_np, axis=1)
                pred_list = [id_to_word[idx] for idx in list(pred_y_np)]
                print('Prediction: %s' % ' '.join(pred_list))

                batch_y_np = np.reshape(batch_y_np, (batch_size, max_len))
                batch_y_np = batch_y_np[0, 1:40]
                target_list = [id_to_word[idx] for idx in list(batch_y_np)]
                print('Target: %s\n' % ' '.join(target_list))

            if (global_step % num_ite_to_vis == (num_ite_to_vis - 1)):
                tmp_x = util.var_to_np(batch_x, use_cuda)[0, :, :, :]
                tmp_x = np.transpose(tmp_x, (1, 2, 0))[:, :, 0:3]
                attention_np = attention.data.cpu().numpy()[0, 2:, :, :]
                for k in range(10):
                    attention_k = attention_np[k, :, :] / np.max(
                        attention_np[k, :, :]) * 0.8
                    attention_k = (scipy.misc.imresize(
                        attention_k, 16.0, interp='bicubic')) / 255.0
                    tmp_x = scipy.misc.imresize(tmp_x, attention_k.shape)
                    attention_k = np.repeat(np.expand_dims(attention_k, 2), 3,
                                            2)
                    attention_k = attention_k * 255
                    attention_k += tmp_x
                    attention_k /= 2.0
                    attention_k[attention_k > 255] = 255
                    attention_k = (attention_k).astype(np.uint8)
                    scipy.misc.imsave(vis_path + ('%02d.jpg' % k), attention_k)

                plt.clf()
                plt.plot(all_loss)
                plt.show()
                plt.savefig(vis_path + 'loss.png')

            global_step += 1

        if (e % num_epoch_to_save == (num_epoch_to_save - 1)):
            print('Saving at epoch %d/%d' % (e, num_e))
            torch.save(
                {
                    'state_dict': net.state_dict(),
                    'opt': optimizer.state_dict()
                }, save_path + save_name + ('_%03d' % e) + '.dat')
            metadata = [e, running_loss, all_loss, lr]
            util.save_list(metadata,
                           save_path + meta_name + ('_%03d' % e) + '.dat')

        last_e = e
Esempio n. 4
0
    with open(args.config_file, 'r') as f: 
        config = cPickle.load(f)

    if config.dataset == 'softmax':
        config.time_batch_len = 1
        config.max_time_batches = -1
        model_class = NottinghamModel
        with open(nottingham_util.PICKLE_LOC, 'r') as f:
            pickle = cPickle.load(f)
        chord_to_idx = pickle['chord_to_idx']

        time_step = 120
        resolution = 480

        # use time batch len of 1 so that every target is covered
        test_data = util.batch_data(pickle['test'], time_batch_len = 1, 
            max_time_batches = -1, softmax = True)
    else:
        raise Exception("Other datasets not yet implemented")

    print config

    with tf.Graph().as_default(), tf.Session() as session:
        with tf.variable_scope("model", reuse=None):
            sampling_model = model_class(config)

        saver = tf.train.Saver(tf.all_variables())
        model_path = os.path.join(os.path.dirname(args.config_file), 
            config.model_name)
        saver.restore(session, model_path)

        state = sampling_model.get_cell_zero_state(session, 1)