Ejemplo n.º 1
0
def get_prediction(image):

    #image = Image.
    transform_test = transforms.Compose([
        transforms.Resize(256),  # smaller edge of image resized to 256
        transforms.CenterCrop(224),  # get 224x224 crop from the center
        transforms.ToTensor(),  # convert the PIL Image to a tensor
        transforms.Normalize(
            (0.485, 0.456, 0.406),  # normalize image for pre-trained model
            (0.229, 0.224, 0.225))
    ])

    orig_img = np.array(image)
    test_img = transform_test(image)
    sample_vocab = Vocabulary(threshold=5,
                              load_vocab=True,
                              anns_file="captions_train2014.json")
    vocab_size = len(sample_vocab)

    #Model

    checkpoint = torch.load('train-model-1-9900.pkl')

    # Specify values for embed_size and hidden_size - we use the same values as in training step
    embed_size = 256
    hidden_size = 512

    # Initialize the encoder and decoder, and set each to inference mode
    encoder = ResNetEncoder(embed_size)
    encoder.eval()
    decoder = RNNDecoder(embed_size, hidden_size, vocab_size)
    decoder.eval()

    # Load the pre-trained weights
    encoder.load_state_dict(checkpoint['encoder'])
    decoder.load_state_dict(checkpoint['decoder'])

    # Move models to GPU if CUDA is available.
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()
        image = image.cuda()
    test_img = test_img.unsqueeze(0)

    features = encoder(test_img).unsqueeze(1)
    output = decoder.greedy_search(features)

    cleaned_pred = []

    for i in range(len(output)):
        vocab_id = output[i]
        word = sample_vocab.id2word[vocab_id]
        if word == sample_vocab.end_seq:
            break
        if word != sample_vocab.start_seq:
            cleaned_pred.append(word)
    caption = " ".join(cleaned_pred)

    return caption
Ejemplo n.º 2
0
def main():
    data_path = '../five-video-classification-methods/data/data_file_ordinal_logistic_regression_pytorch.csv'
    testdata_path = '../five-video-classification-methods/data/3_combined_test.csv'
    train_loader, test_loader = get_dataloader(data_path, testdata_path)
    if torch.cuda.is_available():
        map_location = lambda storage, loc: storage.cuda()
    else:
        map_location = 'cpu'
    model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params),
                          RNNDecoder(**config.rnn_decoder_params))
    if args.mode == 'train':
        args.round = 0
        train_model(model, train_loader, test_loader)
    elif args.mode == 'prune':
        previous_ckpt = 'checkpoints/ep-3794-0.193.pth'
        print("Pruning round %d, load model from %s" %
              (args.round, previous_ckpt))
        ckpt = torch.load(previous_ckpt, map_location=map_location)
        model.load_state_dict(ckpt['model_state_dict'])
        prune_model(model)
        print(model)
        params = sum([np.prod(p.size()) for p in model.parameters()])
        print("Number of Parameters: %.1fM" % (params / 1e6))
        train_model(model, train_loader, test_loader)
    elif args.mode == 'test':
        ckpt = 'resnet18-round%d.pth' % (args.round)
        print("Load model from %s" % (ckpt))
        model = torch.load(ckpt)
        params = sum([np.prod(p.size()) for p in model.parameters()])
        print("Number of Parameters: %.1fM" % (params / 1e6))
        acc = eval(model, test_loader)
        print("Acc=%.4f\n" % (acc))
Ejemplo n.º 3
0
def build_model(config, gpu_id, checkpoint=None):
    # Build encoder
    encoder = ImageEncoder(config.encoder_num_layers, True,
                           config.encoder_num_hidden, config.dropout,
                           config.image_channel_size)
    # Build decoder
    decoder_num_hidden = config.encoder_num_hidden
    decoder = RNNDecoder(True,
                         config.target_embedding_size,
                         config.decoder_num_layers,
                         decoder_num_hidden,
                         config.dropout,
                         config.target_vocab_size,
                         attn_type='general',
                         input_feed=config.input_feed)

    device = torch.device('cuda') if gpu_id >= 0 else torch.device('cpu')

    # Build Generator
    generator = nn.Sequential(
        nn.Linear(decoder_num_hidden, config.target_vocab_size),
        nn.LogSoftmax(dim=-1))

    # Build UIModel
    model = UIModel(encoder, decoder, generator)

    # Load the model states from checkpoint or initialize them
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
    else:
        for p in model.parameters():
            p.data.uniform_(-config.param_init, config.param_init)

    model.to(device)
    return model
Ejemplo n.º 4
0
def train_on_epochs(train_loader: DataLoader,
                    test_loader: DataLoader,
                    restore_from: str = None):
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params),
                          RNNDecoder(**config.rnn_decoder_params))
    model.to(device)

    device_count = torch.cuda.device_count()
    if device_count > 1:
        print('Use {} GPU training'.format(device_count))
        model = nn.DataParallel(model)

    ckpt = {}
    if restore_from is not None:
        ckpt = torch.load(restore_from)
        model.load_state_dict(ckpt['model_state_dict'])
        print('Model is loaded from %s' % (restore_from))

    model_params = model.parameters()

    optimizer = torch.optim.Adam(model_params, lr=config.learning_rate)

    if restore_from is not None:
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])

    info = {
        'train_losses': [],
        'train_scores': [],
        'test_losses': [],
        'test_scores': []
    }

    start_ep = ckpt['epoch'] + 1 if 'epoch' in ckpt else 0

    save_path = './checkpoints'
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    for ep in range(start_ep, config.epoches):
        train_losses, train_scores = train(model, train_loader, optimizer, ep,
                                           device)
        test_loss, test_score = validation(model, test_loader, optimizer, ep,
                                           device)

        info['train_losses'].append(train_losses)
        info['train_scores'].append(train_scores)
        info['test_losses'].append(test_loss)
        info['test_scores'].append(test_score)

        ckpt_path = os.path.join(save_path, 'ep-%d.pth' % ep)
        if (ep + 1) % config.save_interval == 0:
            torch.save(
                {
                    'epoch': ep,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'label_map': train_loader.dataset.labels
                }, ckpt_path)
            print('Model of Epoch %3d has been saved to: %s' % (ep, ckpt_path))

    with open('./train_info.json', 'w') as f:
        json.dump(info, f)

    print('End of training')
Ejemplo n.º 5
0
                      os.path.splitext(os.path.basename(args.data_path))[0] +
                      '.npy')
    accessed = np.load('y_gd' +
                       os.path.splitext(os.path.basename(args.data_path))[0] +
                       '.npy')
    plt.plot(modeled, label="modeled")
    plt.plot(accessed, label='accessed')
    plt.legend()
    plt.show()
    print('Loading model from {}'.format(args.checkpoint))
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    # Build model
    model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params),
                          RNNDecoder(**config.rnn_decoder_params))
    model.to(device)
    model.eval()

    if torch.cuda.is_available():
        map_location = lambda storage, loc: storage.cuda()
    else:
        map_location = 'cpu'
    # Load model
    ckpt = torch.load(args.checkpoint, map_location=map_location)
    model.load_state_dict(ckpt['model_state_dict'])
    print('Model has been loaded from {}'.format(args.checkpoint))

    label_map = [-1] * config.rnn_decoder_params['num_classes']
    # load label map
    if 'label_map' in ckpt:
Ejemplo n.º 6
0
                          batch_size=batch_size,
                          threshold=vocab_threshold,
                          load_vocab=load_vocab)

val_loader = get_loader(transform=transform_val,
                        mode='val',
                        batch_size=batch_size,
                        threshold=vocab_threshold,
                        load_vocab=load_vocab)

# The size of the vocabulary
vocab_size = len(train_loader.dataset.vocab)

# Initialize the encoder and decoder
encoder = ResNetEncoder(embedding_size)
decoder = RNNDecoder(embedding_size, hidden_size, vocab_size)

# Move models to GPU if CUDA is available
if torch.cuda.is_available():
    encoder.cuda()
    decoder.cuda()

# Define the loss function
criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available(
) else nn.CrossEntropyLoss()

# Specify the learnable parameters of the model
params = list(decoder.parameters()) + list(encoder.embed.parameters()) + list(
    encoder.bn.parameters())

# Define the optimizer
Ejemplo n.º 7
0
def _eval(checkpoint: str, video_path: str, labels=[]) -> list:
    """Inference the model and return the labels.

    Args:
        checkpoint(str): The checkpoint where the model restore from.
        path(str): The path of videos.
        labels(list): Labels of videos.

    Returns:
        A list of labels of the videos.
    """
    if not os.path.exists(video_path):
        raise ValueError('Invalid path! which is: {}'.format(video_path))

    print('Loading model from {}'.format(checkpoint))
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    # Build model
    model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params),
                          RNNDecoder(**config.rnn_decoder_params))
    model.to(device)
    model.eval()

    # Load model
    ckpt = torch.load(checkpoint)
    model.load_state_dict(ckpt['model_state_dict'])
    print('Model has been loaded from {}'.format(checkpoint))

    label_map = [-1] * config.rnn_decoder_params['num_classes']
    # load label map
    if 'label_map' in ckpt:
        label_map = ckpt['label_map']

    # Do inference
    pred_labels = []
    video_names = os.listdir(video_path)
    with torch.no_grad():
        for video in tqdm(video_names, desc='Inferencing'):
            # read images from video
            images = load_imgs_from_video(os.path.join(video_path, video))
            # apply transform
            images = [Dataset.transform(None, img) for img in images]
            # stack to tensor, batch size = 1
            images = torch.stack(images, dim=0).unsqueeze(0)
            # do inference
            images = images.to(device)
            pred_y = model(images)  # type: torch.Tensor
            pred_y = pred_y.argmax(dim=1).cpu().numpy().tolist()
            pred_labels.append([video, pred_y[0], label_map[pred_y[0]]])
            print(pred_labels[-1])

    if len(labels) > 0:
        acc = accuracy_score(pred_labels, labels)
        print('Accuracy: %0.2f' % acc)

    # Save results
    pandas.DataFrame(pred_labels).to_csv('result.csv', index=False)
    print('Results has been saved to {}'.format('result.csv'))

    return pred_labels