예제 #1
0
def build_decoder(n_vocabs):
    model = Decoder(model_name=C.decoder_model,
                    n_layers=C.decoder_n_layers,
                    encoder_size=C.encoder_output_size,
                    embedding_size=C.embedding_size,
                    embedding_scale=C.embedding_scale,
                    hidden_size=C.decoder_hidden_size,
                    attn_size=C.decoder_attn_size,
                    output_size=n_vocabs,
                    embedding_dropout=C.embedding_dropout,
                    dropout=C.decoder_dropout,
                    out_dropout=C.decoder_out_dropout)
    model = model.to(C.device)
    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=C.decoder_learning_rate,
                                 weight_decay=C.decoder_weight_decay,
                                 amsgrad=C.decoder_use_amsgrad)
    lambda_reg = torch.autograd.Variable(torch.tensor(0.001),
                                         requires_grad=True)
    lambda_reg = lambda_reg.to(C.device)

    decoder = {
        'model': model,
        'loss': loss,
        'optimizer': optimizer,
        'lambda_reg': lambda_reg,
    }
    return decoder
예제 #2
0
def detect(path, encoder=None, decoder=None):
    torch.backends.cudnn.benchmark = True

    dataset = LoadImages(path, img_size=config.IMAGE_SIZE, used_layers=config.USED_LAYERS)

    if not encoder or not decoder:
        in_channels = num_channels(config.USED_LAYERS)
        encoder = Encoder(in_channels=in_channels)
        decoder = Decoder(num_classes=config.NUM_CLASSES+1)
        encoder = encoder.to(config.DEVICE)
        decoder = decoder.to(config.DEVICE)

        _, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE)

    encoder.eval()
    decoder.eval()

    for _, layers, path in dataset:
        with torch.no_grad():
            layers = torch.from_numpy(layers).to(config.DEVICE, non_blocking=True)
            if layers.ndimension() == 3:
                layers = layers.unsqueeze(0)

            features = encoder(layers)
            predictions = decoder(features)
            _, out = predictions, predictions.sigmoid()

            plot_volumes(to_volume(out, config.VOXEL_THRESH).cpu(), [path], config.NAMES)
예제 #3
0
def run(ckpt_fpath):
    checkpoint = torch.load(ckpt_fpath)
    """ Load Config """
    config = dict_to_cls(checkpoint['config'])
    """ Build Data Loader """
    if config.corpus == "MSVD":
        corpus = MSVD(config)
    elif config.corpus == "MSR-VTT":
        corpus = MSRVTT(config)
    train_iter, val_iter, test_iter, vocab = \
        corpus.train_data_loader, corpus.val_data_loader, corpus.test_data_loader, corpus.vocab
    print(
        '#vocabs: {} ({}), #words: {} ({}). Trim words which appear less than {} times.'
        .format(vocab.n_vocabs, vocab.n_vocabs_untrimmed, vocab.n_words,
                vocab.n_words_untrimmed, config.loader.min_count))
    """ Build Models """
    decoder = Decoder(rnn_type=config.decoder.rnn_type,
                      num_layers=config.decoder.rnn_num_layers,
                      num_directions=config.decoder.rnn_num_directions,
                      feat_size=config.feat.size,
                      feat_len=config.loader.frame_sample_len,
                      embedding_size=config.vocab.embedding_size,
                      hidden_size=config.decoder.rnn_hidden_size,
                      attn_size=config.decoder.rnn_attn_size,
                      output_size=vocab.n_vocabs,
                      rnn_dropout=config.decoder.rnn_dropout)
    decoder.load_state_dict(checkpoint['decoder'])
    model = CaptionGenerator(decoder, config.loader.max_caption_len, vocab)
    model = model.cuda()
    """ Train Set """
    """
    train_vid2pred = get_predicted_captions(train_iter, model, model.vocab, beam_width=5, beam_alpha=0.)
    train_vid2GTs = get_groundtruth_captions(train_iter, model.vocab)
    train_scores = score(train_vid2pred, train_vid2GTs)
    print("[TRAIN] {}".format(train_scores))
    """
    """ Validation Set """
    """
    val_vid2pred = get_predicted_captions(val_iter, model, model.vocab, beam_width=5, beam_alpha=0.)
    val_vid2GTs = get_groundtruth_captions(val_iter, model.vocab)
    val_scores = score(val_vid2pred, val_vid2GTs)
    print("[VAL] scores: {}".format(val_scores))
    """
    """ Test Set """
    test_vid2pred = get_predicted_captions(test_iter,
                                           model,
                                           model.vocab,
                                           beam_width=5,
                                           beam_alpha=0.)
    test_vid2GTs = get_groundtruth_captions(test_iter, model.vocab)
    test_scores = score(test_vid2pred, test_vid2GTs)
    print("[TEST] {}".format(test_scores))

    test_save_fpath = os.path.join(C.result_dpath,
                                   "{}_{}.csv".format(config.corpus, 'test'))
    save_result(test_vid2pred, test_vid2GTs, test_save_fpath)
def evaluate_hand_draw_net(cfg):
    # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use
    torch.backends.cudnn.benchmark = True

    IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
    CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W

    eval_transforms = utils.data_transforms.Compose([
        utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
        utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE),
        utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                        std=cfg.DATASET.STD),
        utils.data_transforms.ToTensor(),
    ])

    # Set up networks
    encoder = Encoder(cfg)
    decoder = Decoder(cfg)
    azi_classes, ele_classes = int(360 / cfg.CONST.BIN_SIZE), int(
        180 / cfg.CONST.BIN_SIZE)
    view_estimater = ViewEstimater(cfg,
                                   azi_classes=azi_classes,
                                   ele_classes=ele_classes)

    if torch.cuda.is_available():
        encoder = torch.nn.DataParallel(encoder).cuda()
        decoder = torch.nn.DataParallel(decoder).cuda()
        view_estimater = torch.nn.DataParallel(view_estimater).cuda()

    # Load weight
    # Load weight for encoder, decoder
    print('[INFO] %s Loading reconstruction weights from %s ...' %
          (dt.now(), cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS))
    rec_checkpoint = torch.load(cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS)
    encoder.load_state_dict(rec_checkpoint['encoder_state_dict'])
    decoder.load_state_dict(rec_checkpoint['decoder_state_dict'])
    print('[INFO] Best reconstruction result at epoch %d ...' %
          rec_checkpoint['epoch_idx'])

    # Load weight for view estimater
    print('[INFO] %s Loading view estimation weights from %s ...' %
          (dt.now(), cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS))
    view_checkpoint = torch.load(
        cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS)
    view_estimater.load_state_dict(
        view_checkpoint['view_estimator_state_dict'])
    print('[INFO] Best view estimation result at epoch %d ...' %
          view_checkpoint['epoch_idx'])

    for img_path in os.listdir(cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER):
        eval_id = int(img_path[:-4])
        input_img_path = os.path.join(
            cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER, img_path)
        print(input_img_path)
        evaluate_hand_draw_img(cfg, encoder, decoder, view_estimater,
                               input_img_path, eval_transforms, eval_id)
예제 #5
0
 def __init__(self, config):
     super().__init__()
     self.encoder_word = Encoder(config, config.src_vocab_size)
     self.encoder_char = Encoder(config, config.tgt_vocab_size)
     self.pointer = Pointer(config)
     self.attention = Luong_Attention(config)
     self.decoder = Decoder(config)
     self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
     self.softmax = nn.Softmax(dim=-1)
     self.s_len = config.s_len
     self.bos = config.bos
예제 #6
0
class Visualization_demo():
    def __init__(self, cfg, output_dir):
        self.encoder = Encoder(cfg)
        self.decoder = Decoder(cfg)
        self.refiner = Refiner(cfg)
        self.merger = Merger(cfg)

        checkpoint = torch.load(cfg.CHECKPOINT)
        encoder_state_dict = clean_state_dict(checkpoint['encoder_state_dict'])
        self.encoder.load_state_dict(encoder_state_dict)
        decoder_state_dict = clean_state_dict(checkpoint['decoder_state_dict'])
        self.decoder.load_state_dict(decoder_state_dict)
        if cfg.NETWORK.USE_REFINER:
            refiner_state_dict = clean_state_dict(
                checkpoint['refiner_state_dict'])
            self.refiner.load_state_dict(refiner_state_dict)
        if cfg.NETWORK.USE_MERGER:
            merger_state_dict = clean_state_dict(
                checkpoint['merger_state_dict'])
            self.merger.load_state_dict(merger_state_dict)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        self.output_dir = output_dir

    def run_on_images(self, imgs, sid, mid, iid, sampled_idx):
        dir1 = os.path.join(output_dir, str(sid), str(mid))
        if not os.path.exists(dir1):
            os.makedirs(dir1)

        deprocess = imagenet_deprocess(rescale_image=False)
        image_features = self.encoder(imgs)
        raw_features, generated_volume = self.decoder(image_features)
        generated_volume = self.merger(raw_features, generated_volume)
        generated_volume = self.refiner(generated_volume)

        mesh = cubify(generated_volume, 0.3)
        #         mesh = voxel_to_world(meshes)
        save_mesh = os.path.join(dir1, "%s_%s.obj" % (iid, sampled_idx))
        verts, faces = mesh.get_mesh_verts_faces(0)
        save_obj(save_mesh, verts, faces)

        generated_volume = generated_volume.squeeze()
        img = image_to_numpy(deprocess(imgs[0][0]))
        save_img = os.path.join(dir1, "%02d.png" % (iid))
        #         cv2.imwrite(save_img, img[:, :, ::-1])
        cv2.imwrite(save_img, img)
        img1 = image_to_numpy(deprocess(imgs[0][1]))
        save_img1 = os.path.join(dir1, "%02d.png" % (sampled_idx))
        cv2.imwrite(save_img1, img1)
        #         cv2.imwrite(save_img1, img1[:, :, ::-1])
        get_volume_views(generated_volume, dir1, iid, sampled_idx)
예제 #7
0
 def __init__(self, vocabulary_size, sos_token, eos_token, pad_token,
              max_string_length=default_eda['string_max_length'], attention_size=default_attention['size'],
              embedding_size=default_embedding['size'], hidden_size=default_gru['hidden_size'],
              num_layers=default_gru['num_layers'], dropout=default_gru['dropout']):
     super().__init__()
     self.max_string_length = max_string_length
     self.attention_size = attention_size
     self.vocabulary_size = vocabulary_size
     self.encoder = Encoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout)
     self.decoder = Decoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout, attention_size,
                            pad_token)
     self.sos_token = sos_token
     self.eos_token = eos_token
예제 #8
0
    def __init__(self, num_classes, in_channels=3, backbone='xception', pretrained=True,
                 output_stride=16, freeze_bn=False, **_):
        super(DeepLabV3Plus, self).__init__()
        assert ('xception' or 'resnet' in backbone)
        self.backbone, low_level_channels = getBackBone(backbone, in_channels=in_channels, output_stride=output_stride,
                                                        pretrained=pretrained)

        self.ASSP = ASSP(in_channels=2048, output_stride=output_stride)

        self.decoder = Decoder(low_level_channels, num_classes)

        if freeze_bn:
            self.freeze_bn()
예제 #9
0
 def __init__(self, config):
     super().__init__()
     self.encoder = Encoder(config)
     self.decoder = Decoder(config)
     self.bert = Bert(config)
     self.decoder_ae = Decoder(config)
     self.t_len = config.t_len
     self.s_len = config.s_len
     self.pad = config.pad
     self.bos = config.bos
     self.model_size = config.model_size
     self.linear_bert = nn.Linear(768, config.model_size)
     self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
     self.linear_ae = nn.Linear(config.model_size, config.tgt_vocab_size)
예제 #10
0
    def test_train_method(self):
        file_name = 'test/test_data/attention_test.txt'
        fine_tune_model_name = '../models/glove_model_40.pth'
        self.test_data_loader_attention = DataLoaderAttention(
            file_name=file_name)
        self.test_data_loader_attention.load_data()
        source2index, index2source, target2index, index2target, train_data = \
            self.test_data_loader_attention.load_data()
        EMBEDDING_SIZE = 50
        HIDDEN_SIZE = 32

        encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3,
                          True)
        decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2)

        self.trainer = Trainer(fine_tune_model=fine_tune_model_name)

        self.trainer.train_attention(
            train_data=train_data,
            source2index=source2index,
            target2index=target2index,
            index2source=index2source,
            index2target=index2target,
            encoder_model=encoder,
            decoder_model=decoder,
        )
예제 #11
0
 def __init__(self, config):
     super().__init__()
     self.encoder = Encoder(config, config.src_vocab_size)
     self.decoder = Decoder(config)
     self.bos = config.bos
     self.s_len = config.s_len
     self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
예제 #12
0
def build_model(config, vocab):
    visual_encoder = VisualEncoder(
        app_feat=config.vis_encoder.app_feat,
        mot_feat=config.vis_encoder.mot_feat,
        app_input_size=config.vis_encoder.app_feat_size,
        mot_input_size=config.vis_encoder.mot_feat_size,
        app_output_size=config.vocab.embedding_size,
        mot_output_size=config.vocab.embedding_size)

    phrase_encoder = PhraseEncoder(
        len_max_seq=config.loader.max_caption_len + 2,
        d_word_vec=config.vocab.embedding_size,
        n_layers=config.phr_encoder.SA_num_layers,
        n_head=config.phr_encoder.SA_num_heads,
        d_k=config.phr_encoder.SA_dim_k,
        d_v=config.phr_encoder.SA_dim_v,
        d_model=config.vocab.embedding_size,
        d_inner=config.phr_encoder.SA_dim_inner,
        dropout=config.phr_encoder.SA_dropout)

    decoder = Decoder(
        num_layers=config.decoder.rnn_num_layers,
        vis_feat_size=2 * config.vocab.embedding_size,
        feat_len=config.loader.frame_sample_len,
        embedding_size=config.vocab.embedding_size,
        sem_align_hidden_size=config.decoder.sem_align_hidden_size,
        sem_attn_hidden_size=config.decoder.sem_attn_hidden_size,
        hidden_size=config.decoder.rnn_hidden_size,
        output_size=vocab.n_vocabs)

    model = SGN(visual_encoder, phrase_encoder, decoder, config.loader.max_caption_len, vocab,
                config.PS_threshold)
    return model
예제 #13
0
    def __init__(self,
                 channels,
                 h_dim,
                 res_h_dim,
                 n_res_layers,
                 n_embeddings,
                 embedding_dim,
                 beta,
                 save_img_embedding_map=False):
        super(VQVAE, self).__init__()
        # encode image into continuous latent space
        self.encoder = Encoder(channels, h_dim, n_res_layers, res_h_dim)
        self.pre_quantization_conv = nn.Conv2d(h_dim,
                                               embedding_dim,
                                               kernel_size=1,
                                               stride=1)
        # pass continuous latent vector through discretization bottleneck
        self.vector_quantization = VectorQuantizer(n_embeddings, embedding_dim,
                                                   beta)
        # decode the discrete latent representation
        self.decoder = Decoder(channels, embedding_dim, h_dim, n_res_layers,
                               res_h_dim)

        if save_img_embedding_map:
            self.img_to_embedding_map = {i: [] for i in range(n_embeddings)}
        else:
            self.img_to_embedding_map = None
    def __init__(self, args):
        """
        Basic initialization of Transformer.

        Arguments
        ---------
        args: <argparse.Namespace> Arguments used for overall process.
        """
        super().__init__()

        self.args = args
        self.num_stacks = self.args.num_stacks
        self.d_model = self.args.d_model
        self.vocab_size = self.args.vocab_size

        self.emb = EmbeddingLayer(self.args)

        encoders = [Encoder(self.args) for _ in range(self.num_stacks)]
        self.encoder_stack = nn.Sequential(*encoders)

        decoders = [Decoder(self.args) for _ in range(self.num_stacks)]
        self.decoder_stack = nn.ModuleList(decoders)

        self.output_linear = nn.Linear(in_features=self.d_model,
                                       out_features=self.vocab_size,
                                       bias=False)
        self.output_linear.weight = self.emb.embedding_layer.weight

        self.softmax = nn.LogSoftmax(dim=-1)
        self.dropout = nn.Dropout(p=0.1)
예제 #15
0
파일: vqvae.py 프로젝트: yifr/brainqa
    def __init__(self,
                 h_dim,
                 res_h_dim,
                 n_res_layers,
                 n_embeddings,
                 embedding_dim,
                 beta,
                 restart=True):
        super(VQVAE, self).__init__()
        # encode image into continuous latent space
        self.encoder = Encoder(in_dim=256,
                               h_dim=h_dim,
                               n_res_layers=n_res_layers,
                               res_h_dim=res_h_dim)
        self.pre_quantization_conv = nn.Conv1d(h_dim,
                                               embedding_dim,
                                               kernel_size=3,
                                               stride=1,
                                               padding=1)

        # Define discretization bottleneck
        if not restart:
            self.vector_quantization = VectorQuantizer(n_embeddings,
                                                       embedding_dim, beta)
        else:
            self.vector_quantization = VectorQuantizerRandomRestart(
                n_embeddings, embedding_dim, beta)

        # decode the discrete latent representation
        self.decoder = Decoder(embedding_dim, h_dim, n_res_layers, res_h_dim)

        #E_indices used in sampling, just save last to rep last latent state
        self.e_indices = None
예제 #16
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 data='ETTh',
                 activation='gelu',
                 device=torch.device('cuda:0')):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, data,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, data,
                                           dropout)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        self.encoder = Encoder([
            EncoderLayer(AttentionLayer(
                Attn(False, factor, attention_dropout=dropout), d_model,
                n_heads),
                         d_model,
                         d_ff,
                         dropout=dropout,
                         activation=activation) for l in range(e_layers)
        ], [ConvLayer(d_model) for l in range(e_layers - 1)],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # Decoder
        self.decoder = Decoder([
            DecoderLayer(
                AttentionLayer(
                    FullAttention(True, factor, attention_dropout=dropout),
                    d_model, n_heads),
                AttentionLayer(
                    FullAttention(False, factor, attention_dropout=dropout),
                    d_model, n_heads),
                d_model,
                d_ff,
                dropout=dropout,
                activation=activation,
            ) for l in range(d_layers)
        ],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)
예제 #17
0
    def __init__(self, config, device):
        super(MedicalFSS, self).__init__()
        self.config = config
        resize_dim = self.config['input_size']
        self.encoded_h = int(resize_dim[0] / 2**self.config['n_pool'])
        self.encoded_w = int(resize_dim[1] / 2**self.config['n_pool'])

        self.s_encoder = SupportEncoder(self.config['path']['init_path'],
                                        device)  # .to(device)
        self.q_encoder = QueryEncoder(self.config['path']['init_path'],
                                      device)  # .to(device)
        self.ConvBiGRU = ConvBGRU(in_channels=512,
                                  hidden_channels=256,
                                  kernel_size=(3, 3),
                                  num_layers=self.config['n_layer'],
                                  device=device).to(device)
        self.decoder = Decoder(input_res=(self.encoded_h, self.encoded_w),
                               output_res=resize_dim).to(device)
        self.q_slice_n = self.config['q_slice']
        self.ch = 256  # number of channels of embedding vector
        self.n_shot = self.config['n_shot']
        self.reversed_idx = list(reversed(range(self.q_slice_n)))

        self.is_attention = self.config['is_attention']
        if self.is_attention:
            self.avgpool3d = nn.AvgPool3d(
                (self.ch * 2, self.encoded_w, self.encoded_h))
            self.softmax = nn.Softmax(dim=1)
예제 #18
0
def build_model(C, vocab):

    decoder = Decoder(rnn_type=C.decoder.rnn_type,
                      num_layers=C.decoder.rnn_num_layers,
                      num_directions=C.decoder.rnn_num_directions,
                      feat_size=C.feat.size,
                      feat_len=C.loader.frame_sample_len,
                      embedding_size=C.vocab.embedding_size,
                      hidden_size=C.decoder.rnn_hidden_size,
                      attn_size=C.decoder.rnn_attn_size,
                      output_size=vocab.n_vocabs,
                      rnn_dropout=C.decoder.rnn_dropout)
    if C.pretrained_decoder_fpath is not None:
        decoder.load_state_dict(
            torch.load(C.pretrained_decoder_fpath)['decoder'])
        print("Pretrained decoder is loaded from {}".format(
            C.pretrained_decoder_fpath))
    #全局和局部重构器
    if C.reconstructor is None:
        reconstructor = None
    elif C.reconstructor.type == 'global':
        reconstructor = GlobalReconstructor(
            rnn_type=C.reconstructor.rnn_type,
            num_layers=C.reconstructor.rnn_num_layers,
            num_directions=C.reconstructor.rnn_num_directions,
            decoder_size=C.decoder.rnn_hidden_size,
            hidden_size=C.reconstructor.rnn_hidden_size,
            rnn_dropout=C.reconstructor.rnn_dropout)
    else:
        reconstructor = LocalReconstructor(
            rnn_type=C.reconstructor.rnn_type,
            num_layers=C.reconstructor.rnn_num_layers,
            num_directions=C.reconstructor.rnn_num_directions,
            decoder_size=C.decoder.rnn_hidden_size,
            hidden_size=C.reconstructor.rnn_hidden_size,
            attn_size=C.reconstructor.rnn_attn_size,
            rnn_dropout=C.reconstructor.rnn_dropout)
    if C.pretrained_reconstructor_fpath is not None:
        reconstructor.load_state_dict(
            torch.load(C.pretrained_reconstructor_fpath)['reconstructor'])
        print("Pretrained reconstructor is loaded from {}".format(
            C.pretrained_reconstructor_fpath))

    model = CaptionGenerator(decoder, reconstructor, C.loader.max_caption_len,
                             vocab)
    model.cuda()
    return model
예제 #19
0
def test(encoder=None, decoder=None):
    torch.backends.cudnn.benchmark = True

    _, dataloader = create_dataloader(config.IMG_DIR + "/test", config.MESH_DIR + "/test",
                                            batch_size=config.BATCH_SIZE, used_layers=config.USED_LAYERS,
                                            img_size=config.IMAGE_SIZE, map_size=config.MAP_SIZE,
                                            augment=config.AUGMENT, workers=config.NUM_WORKERS,
                                            pin_memory=config.PIN_MEMORY, shuffle=False)
    if not encoder or not decoder:
        in_channels = num_channels(config.USED_LAYERS)
        encoder = Encoder(in_channels=in_channels)
        decoder = Decoder(num_classes=config.NUM_CLASSES+1)
        encoder = encoder.to(config.DEVICE)
        decoder = decoder.to(config.DEVICE)

        _, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE)

    loss_fn = LossFunction()

    loop = tqdm(dataloader, leave=True)
    losses = []
    ious = []

    encoder.eval()
    decoder.eval()

    for i, (_, layers, volumes, img_files) in enumerate(loop):
        with torch.no_grad():
            layers = layers.to(config.DEVICE, non_blocking=True)
            volumes = volumes.to(config.DEVICE, non_blocking=True)

            features = encoder(layers)
            predictions = decoder(features)

            loss = loss_fn(predictions, volumes)
            losses.append(loss.item())

            iou = predictions_iou(to_volume(predictions, config.VOXEL_THRESH), volumes)
            ious.append(iou)

            mean_iou = sum(ious) / len(ious)
            mean_loss = sum(losses) / len(losses)
            loop.set_postfix(loss=mean_loss, mean_iou=mean_iou)

            if i == 0 and config.PLOT:
                plot_volumes(to_volume(predictions, config.VOXEL_THRESH).cpu(), img_files, config.NAMES)
                plot_volumes(volumes.cpu(), img_files, config.NAMES)
예제 #20
0
 def __init__(self, d_model, d_ff, d_K, d_V, n_heads, n_layers,
              sourceVocabSize, sourceLength, targetVocabSize, targetLength):
     super(Transformer, self).__init__()
     self.encoder = Encoder(sourceVocabSize, sourceLength, d_model, d_ff,
                            d_K, d_V, n_heads, n_layers)
     self.decoder = Decoder(targetVocabSize, targetLength, d_model, d_ff,
                            d_K, d_V, n_heads, n_layers)
     self.projection = nn.Linear(d_model, targetVocabSize, bias=False)
예제 #21
0
 def __init__(self, num_classes, fixed_height = 48, net='efficientnet'):
     super(Model, self).__init__()
     self.encoder = Encoder(net = net)
     self.decoder = Decoder(input_dim=int(fixed_height * 288 / 8), num_class=num_classes)
     self.crnn = nn.Sequential(
         self.encoder,
         self.decoder
     )
     self.log_softmax = nn.LogSoftmax(dim=2)
def test(test_loader, modelID, showAttn=True):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).cuda()
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).cuda()
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda()
    model_file = 'save_weights/seq2seq-' + str(modelID) + '.model'
    pretrain_dict = torch.load(model_file)
    seq2seq_dict = seq2seq.state_dict()
    pretrain_dict = {
        k: v
        for k, v in pretrain_dict.items() if k in seq2seq_dict
    }
    seq2seq_dict.update(pretrain_dict)
    seq2seq.load_state_dict(seq2seq_dict)  #load
    print('Loading ' + model_file)

    seq2seq.eval()
    total_loss_t = 0
    start_t = time.time()
    for num, (test_index, test_in, test_in_len, test_out,
              test_domain) in enumerate(test_loader):
        lambd = LAMBD
        test_in, test_out = Variable(test_in, volatile=True).cuda(), Variable(
            test_out, volatile=True).cuda()
        test_domain = Variable(test_domain, volatile=True).cuda()
        output_t, attn_weights_t, out_domain_t = seq2seq(test_in,
                                                         test_out,
                                                         test_in_len,
                                                         lambd,
                                                         teacher_rate=False,
                                                         train=False)
        batch_count_n = writePredict(modelID, test_index, output_t, 'test')
        test_label = test_out.permute(1, 0)[1:].contiguous().view(-1)
        if LABEL_SMOOTH:
            loss_t = crit(log_softmax(output_t.view(-1, vocab_size)),
                          test_label)
        else:
            loss_t = F.cross_entropy(output_t.view(-1, vocab_size),
                                     test_label,
                                     ignore_index=tokens['PAD_TOKEN'])

        total_loss_t += loss_t.data[0]
        if showAttn:
            global_index_t = 0
            for t_idx, t_in in zip(test_index, test_in):
                visualizeAttn(t_in.data[0], test_in_len[0],
                              [j[global_index_t] for j in attn_weights_t],
                              modelID, batch_count_n[global_index_t],
                              'test_' + t_idx.split(',')[0])
                global_index_t += 1

    total_loss_t /= (num + 1)
    writeLoss(total_loss_t, 'test')
    print('       TEST loss=%.3f, time=%.3f' %
          (total_loss_t, time.time() - start_t))
예제 #23
0
def train():
    torch.backends.cudnn.benchmark = True

    _, dataloader = create_dataloader(config.IMG_DIR + "/train", config.MESH_DIR + "/train",
                                            batch_size=config.BATCH_SIZE, used_layers=config.USED_LAYERS,
                                            img_size=config.IMAGE_SIZE, map_size=config.MAP_SIZE,
                                            augment=config.AUGMENT, workers=config.NUM_WORKERS,
                                            pin_memory=config.PIN_MEMORY, shuffle=True)

    in_channels = num_channels(config.USED_LAYERS)
    encoder = Encoder(in_channels=in_channels)
    decoder = Decoder(num_classes=config.NUM_CLASSES+1)
    encoder.apply(init_weights)
    decoder.apply(init_weights)
    encoder_solver = torch.optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()),
                                      lr=config.ENCODER_LEARNING_RATE,
                                      betas=config.BETAS)
    decoder_solver = torch.optim.Adam(decoder.parameters(),
                                      lr=config.DECODER_LEARNING_RATE,
                                      betas=config.BETAS)
    encoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(encoder_solver,
                                                                milestones=config.ENCODER_LR_MILESTONES,
                                                                gamma=config.GAMMA)
    decoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(decoder_solver,
                                                                milestones=config.DECODER_LR_MILESTONES,
                                                                gamma=config.GAMMA)
    encoder = encoder.to(config.DEVICE)
    decoder = decoder.to(config.DEVICE)

    loss_fn = LossFunction()

    init_epoch = 0
    if config.CHECKPOINT_FILE and config.LOAD_MODEL:
        init_epoch, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE)

    output_dir = os.path.join(config.OUT_PATH, re.sub("[^0-9a-zA-Z]+", "-", dt.now().isoformat()))

    for epoch_idx in range(init_epoch, config.NUM_EPOCHS):
        encoder.train()
        decoder.train()
        train_one_epoch(encoder, decoder, dataloader, loss_fn, encoder_solver, decoder_solver, epoch_idx)
        encoder_lr_scheduler.step()
        decoder_lr_scheduler.step()

        if config.TEST:
            test(encoder, decoder)
        if config.SAVE_MODEL:
            save_checkpoint(epoch_idx, encoder, decoder, output_dir)

    if not config.TEST:
        test(encoder, decoder)
    if not config.SAVE_MODEL:
        save_checkpoint(config.NUM_EPOCHS - 1, encoder, decoder, output_dir)
class Quantitative_analysis_demo():
    def __init__(self, cfg, output_dir):
        self.encoder = Encoder(cfg)
        self.decoder = Decoder(cfg)
        self.refiner = Refiner(cfg)
        self.merger = Merger(cfg)
        #         self.thresh = cfg.VOXEL_THRESH
        self.th = cfg.TEST.VOXEL_THRESH

        checkpoint = torch.load(cfg.CHECKPOINT)
        encoder_state_dict = clean_state_dict(checkpoint['encoder_state_dict'])
        self.encoder.load_state_dict(encoder_state_dict)
        decoder_state_dict = clean_state_dict(checkpoint['decoder_state_dict'])
        self.decoder.load_state_dict(decoder_state_dict)
        if cfg.NETWORK.USE_REFINER:
            refiner_state_dict = clean_state_dict(
                checkpoint['refiner_state_dict'])
            self.refiner.load_state_dict(refiner_state_dict)
        if cfg.NETWORK.USE_MERGER:
            merger_state_dict = clean_state_dict(
                checkpoint['merger_state_dict'])
            self.merger.load_state_dict(merger_state_dict)

        self.output_dir = output_dir

    def calculate_iou(self, imgs, GT_voxels, sid, mid, iid):
        dir1 = os.path.join(self.output_dir, str(sid), str(mid))
        if not os.path.exists(dir1):
            os.makedirs(dir1)

        image_features = self.encoder(imgs)
        raw_features, generated_volume = self.decoder(image_features)
        generated_volume = self.merger(raw_features, generated_volume)
        generated_volume = self.refiner(generated_volume)
        generated_volume = generated_volume.squeeze()

        sample_iou = []
        for th in self.th:
            _volume = torch.ge(generated_volume, th).float()
            intersection = torch.sum(_volume.mul(GT_voxels)).float()
            union = torch.sum(torch.ge(_volume.add(GT_voxels), 1)).float()
            sample_iou.append((intersection / union).item())
        return sample_iou
예제 #25
0
def main():
    parser = argparse.ArgumentParser(description="Training attention model")

    parser.add_argument(
        "-t",
        "--train_data",
        metavar="train_data",
        type=str,
        default='../data/processed/source_replay_twitter_data.txt',
        dest="train_data",
        help="set the training data ")
    parser.add_argument("-e",
                        "--embedding_size",
                        metavar="embedding_size",
                        type=int,
                        default=50,
                        dest="embedding_size",
                        help="set the embedding size ")
    parser.add_argument("-H",
                        "--hidden_size",
                        metavar="hidden_size",
                        type=int,
                        default=512,
                        dest="hidden_size",
                        help="set the hidden size ")
    parser.add_argument("-f",
                        "--fine_tune_model_name",
                        metavar="fine_tune_model_name",
                        type=str,
                        default='../models/glove_wiki/glove_model_40.pth',
                        dest="fine_tune_model_name",
                        help="set the fine tune model name ")
    args = parser.parse_args()

    data_loader_attention = DataLoaderAttention(file_name=args.train_data)
    data_loader_attention.load_data()
    source2index, index2source, target2index, index2target, train_data = \
        data_loader_attention.load_data()
    EMBEDDING_SIZE = args.embedding_size
    HIDDEN_SIZE = args.hidden_size

    encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3, True)
    decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2)

    trainer = Trainer(epoch=600,
                      batch_size=64,
                      fine_tune_model=args.fine_tune_model_name)

    trainer.train_attention(train_data=train_data,
                            source2index=source2index,
                            target2index=target2index,
                            index2source=index2source,
                            index2target=index2target,
                            encoder_model=encoder,
                            decoder_model=decoder)
예제 #26
0
def test(test_loader, modelID, showAttn=True):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).to(device)
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).to(device)
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).to(device)
    model_file = 'save_weights/seq2seq-' + str(modelID) + '.model'
    print('Loading ' + model_file)
    seq2seq.load_state_dict(torch.load(model_file))  #load

    seq2seq.eval()
    total_loss_t = 0
    start_t = time.time()
    with torch.no_grad():
        for num, (test_index, test_in, test_in_len,
                  test_out) in enumerate(test_loader):
            #test_in = test_in.unsqueeze(1)
            test_in, test_out = test_in.to(device), test_out.to(device)
            if test_in.requires_grad or test_out.requires_grad:
                print(
                    'ERROR! test_in, test_out should have requires_grad=False')
            output_t, attn_weights_t = seq2seq(test_in,
                                               test_out,
                                               test_in_len,
                                               teacher_rate=False,
                                               train=False)
            batch_count_n = writePredict(modelID, test_index, output_t, 'test')
            test_label = test_out.permute(1, 0)[1:].reshape(-1)
            #loss_t = F.cross_entropy(output_t.view(-1, vocab_size),
            #                        test_label, ignore_index=tokens['PAD_TOKEN'])
            #loss_t = loss_label_smoothing(output_t.view(-1, vocab_size), test_label)
            if LABEL_SMOOTH:
                loss_t = crit(log_softmax(output_t.reshape(-1, vocab_size)),
                              test_label)
            else:
                loss_t = F.cross_entropy(output_t.reshape(-1, vocab_size),
                                         test_label,
                                         ignore_index=tokens['PAD_TOKEN'])

            total_loss_t += loss_t.item()

            if showAttn:
                global_index_t = 0
                for t_idx, t_in in zip(test_index, test_in):
                    visualizeAttn(t_in.detach()[0], test_in_len[0],
                                  [j[global_index_t] for j in attn_weights_t],
                                  modelID, batch_count_n[global_index_t],
                                  'test_' + t_idx.split(',')[0])
                    global_index_t += 1

        total_loss_t /= (num + 1)
        writeLoss(total_loss_t, 'test')
        print('    TEST loss=%.3f, time=%.3f' %
              (total_loss_t, time.time() - start_t))
예제 #27
0
    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams
        # Encoder
        self.encoder = Encoder(ngf=self.hparams.ngf, z_dim=self.hparams.z_dim)
        self.encoder.apply(weights_init)
        device = "cuda" if isinstance(self.hparams.gpus, int) else "cpu"
        # Decoder
        self.decoder = Decoder(ngf=self.hparams.ngf, z_dim=self.hparams.z_dim)
        self.decoder.apply(weights_init)
        # Discriminator
        self.discriminator = Discriminator()
        self.discriminator.apply(weights_init)

        # Losses
        self.criterionFeat = torch.nn.L1Loss()
        self.criterionGAN = GANLoss(gan_mode="lsgan")

        if self.hparams.use_vgg:
            self.criterion_perceptual_style = [Perceptual_Loss(device)]
예제 #28
0
def build_model(vocab):
    decoder = Decoder(rnn_type=C.decoder.rnn_type,
                      num_layers=C.decoder.rnn_num_layers,
                      num_directions=C.decoder.rnn_num_directions,
                      feat_size=C.feat.size,
                      feat_len=C.loader.frame_sample_len,
                      embedding_size=C.vocab.embedding_size,
                      hidden_size=C.decoder.rnn_hidden_size,
                      attn_size=C.decoder.rnn_attn_size,
                      output_size=vocab.n_vocabs,
                      rnn_dropout=C.decoder.rnn_dropout)
    if C.pretrained_decoder_fpath is not None:
        decoder.load_state_dict(
            torch.load(C.pretrained_decoder_fpath)['decoder'])
        print("Pretrained decoder is loaded from {}".format(
            C.pretrained_decoder_fpath))

    model = CaptionGenerator(decoder, C.loader.max_caption_len, vocab)
    model.cuda()
    return model
예제 #29
0
    def __init__(self, cfg_network: DictConfig, cfg_tester: DictConfig):
        super().__init__()
        self.cfg_network = cfg_network
        self.cfg_tester = cfg_tester

        # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use
        torch.backends.cudnn.benchmark = True

        # Set up networks
        self.encoder = Encoder(cfg_network)
        self.decoder = Decoder(cfg_network)
        self.refiner = Refiner(cfg_network)
        self.merger = Merger(cfg_network)
        
        # Initialize weights of networks
        self.encoder.apply(utils.network_utils.init_weights)
        self.decoder.apply(utils.network_utils.init_weights)
        self.refiner.apply(utils.network_utils.init_weights)
        self.merger.apply(utils.network_utils.init_weights)
        
        self.bce_loss = nn.BCELoss()
    def __init__(self, cfg, output_dir):
        self.encoder = Encoder(cfg)
        self.decoder = Decoder(cfg)
        self.refiner = Refiner(cfg)
        self.merger = Merger(cfg)
#         self.thresh = cfg.VOXEL_THRESH
        self.th = cfg.TEST.VOXEL_THRESH
        
        checkpoint = torch.load(cfg.CHECKPOINT)
        encoder_state_dict = clean_state_dict(checkpoint['encoder_state_dict'])
        self.encoder.load_state_dict(encoder_state_dict)
        decoder_state_dict = clean_state_dict(checkpoint['decoder_state_dict'])
        self.decoder.load_state_dict(decoder_state_dict)
        if cfg.NETWORK.USE_REFINER:
            refiner_state_dict = clean_state_dict(checkpoint['refiner_state_dict'])
            self.refiner.load_state_dict(refiner_state_dict)
        if cfg.NETWORK.USE_MERGER:
            merger_state_dict = clean_state_dict(checkpoint['merger_state_dict'])
            self.merger.load_state_dict(merger_state_dict)
        
        self.output_dir = output_dir