def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
            """https://zhuanlan.zhihu.com/p/74274453
            #權值初始化 Xavier均勻分佈"""
    return model
Example #2
0
	def _make_model(self, num_tgt_chars, N, d_model, d_ff, h, dropout):
		"""
		
		:param num_tgt_chars: output space
		:param N: number of decoder and encoder layers
		:param d_model: model dimensionality
		:param d_ff: hidden size of the feed-forward neural network
		:param h: number of attention heads
		:param dropout: dropout rate
		:return: model

		"""
		c = copy.deepcopy
		attn = MultiHeadedAttention(h, d_model)
		ff = PositionwiseFeedForward(d_model, d_ff, dropout)
		position = PositionalEncoding(d_model, dropout)

		if self.config.USE_RESNET:
			feature_extractor = ResNet(block=BasicBlock, layers=self.config.RESNET_LAYERS, d_model=self.config.D_MODEL)
		else:
			feature_extractor = FeatureExtractionNetwork(d_model=self.config.D_MODEL)

		direction_embed = Embeddings(d_model, 2)

		model = EncoderDecoder(
			encoder=Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
			decoder=Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
			tgt_embed=nn.Sequential(Embeddings(d_model, num_tgt_chars), c(position)),
			generator=PredictionLayer(d_model, num_tgt_chars),
			feature_extractor=feature_extractor,
			prediction_layer=PredictionLayer(d_model, len(Dataset.CHAR_ID_MAP)),
			bidirectional_decoding=self.config.BIDIRECTIONAL_DECODING,
			direction_embed=direction_embed,
			device=self.device
		)
		
		for p in model.parameters():
			if p.dim() > 1:
				nn.init.xavier_normal_(p)
		
		logging.info("Model created")
		
		return model
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters"
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionWiseFeedForward(d_model, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab, c(position))),
        Generator(d_model, tgt_vocab))
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Example #4
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))
    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Example #5
0
    img_dir="../data/train/",
    mask=mask2,
    is_train=False,
    image_size=IMAGE_SIZE,
    use_cuda=GPU_AVAILABLE)

train_loader = Data.DataLoader(dataset=train_data,
                               batch_size=BATCH_SIZE,
                               shuffle=True)
val_loader = Data.DataLoader(dataset=val_data,
                             batch_size=int(BATCH_SIZE / 4),
                             shuffle=True)
val_iterator = iter(val_loader)

# training and testing
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_func = utils.iou

print("Beginning training phase..")
for epoch in range(EPOCH):
    for i, batch in enumerate(train_loader):
        output = model(batch['image'])
        loss = loss_func(output, batch['pixel_classes'])  # compute loss
        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients

        if i % 5 == 0:  # Display progress every N batches
            try:
                val_batch = next(val_iterator)
            except StopIteration: