Example #1
0
def train(code_encoder, desc_encoder, code_pooler, desc_pooler, iterator,
          optimizer, criterion):

    epoch_loss = 0
    epoch_mrr = 0

    code_encoder.train()
    desc_encoder.train()
    code_pooler.train()
    desc_pooler.train()

    for batch in tqdm(iterator, desc='Training...'):

        optimizer.zero_grad()

        code, code_lengths = batch.code
        desc, desc_lengths = batch.desc

        #code/desc = [seq len, batch size]

        code_mask = utils.make_mask(code, CODE.vocab.stoi[CODE.pad_token])
        desc_mask = utils.make_mask(desc, DESC.vocab.stoi[DESC.pad_token])

        #mask = [batch size, seq len]

        encoded_code = code_encoder(code)
        encoded_code = code_pooler(encoded_code, code_mask)

        encoded_desc = desc_encoder(desc)
        encoded_desc = desc_pooler(encoded_desc, desc_mask)

        #encoded_code/desc = [batch size, emb dim/hid dim/hid dim * 2 (bow/rnn/bi-rnn)]

        loss, mrr = criterion(encoded_code, encoded_desc)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(code_encoder.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(desc_encoder.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(code_pooler.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(desc_pooler.parameters(),
                                       args.grad_clip)

        optimizer.step()

        epoch_loss += loss.item()
        epoch_mrr += mrr.item()

    return epoch_loss / len(iterator), epoch_mrr / len(iterator)
Example #2
0
def train(code_encoder, desc_encoder, code_pooler, desc_pooler, iterator,
          optimizer, criterion):

    epoch_loss = 0
    epoch_mrr = 0

    code_encoder.train()
    desc_encoder.train()
    code_pooler.train()
    desc_pooler.train()

    for code, code_lengths, desc, desc_lengths, is_var in tqdm(
            iterator, desc='Training...'):

        code = code.to(device)
        desc = desc.to(device)

        optimizer.zero_grad()

        #code/desc = [seq len, batch size]

        code_mask = utils.make_mask(code, code_vocab.pad_idx)
        desc_mask = utils.make_mask(desc, desc_vocab.pad_idx)

        #mask = [seq len, batch size]

        encoded_code = code_encoder(code, code_lengths, code_mask)
        encoded_code, _ = code_pooler(encoded_code, code_lengths, code_mask)

        encoded_desc = desc_encoder(desc, desc_lengths, desc_mask)
        encoded_desc, _ = desc_pooler(encoded_desc, desc_lengths, desc_mask)

        loss, mrr = criterion(encoded_code, encoded_desc)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(code_encoder.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(desc_encoder.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(code_pooler.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(desc_pooler.parameters(),
                                       args.grad_clip)

        optimizer.step()

        epoch_loss += loss.item()
        epoch_mrr += mrr.item()

    return epoch_loss / len(iterator), epoch_mrr / len(iterator)
Example #3
0
def evaluate(code_encoder, code_predictor, iterator, criterion):

    epoch_loss = 0
    epoch_mrr = 0

    code_encoder.eval()
    code_predictor.eval()

    with torch.no_grad():

        for batch in tqdm(iterator, desc='Evaluating...'):

            code, code_lengths = batch.code
            label = batch.label

            code_mask = utils.make_mask(code, code_vocab[PAD_TOKEN])

            encoded_code = code_encoder(code)
            encoded_code = code_predictor(encoded_code, code_mask)

            loss, mrr = criterion(encoded_code, label)

            epoch_loss += loss.item()
            epoch_mrr += mrr.item()

    return epoch_loss / len(iterator), epoch_mrr / len(iterator)
Example #4
0
 def gen_item(self, image_id):
     image_path = os.path.join(self.image_fold, image_id)
     img = cv2.imread(image_path)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     mask = make_mask(self.df, image_id)
     augmented = self.transform(image=img, mask=mask)
     img = np.transpose(augmented['image'], [2, 0, 1]).astype('float')
     mask = np.transpose(augmented['mask'], [2, 0, 1]).astype('float')
     return img, mask
Example #5
0
    def __getitem__(self, idx):
        image_name = self.img_ids[idx]

        if image_name in self.pseudo_imgs:
            image_path = f"{settings.DATA_DIR}/test/{image_name}"
            mask = make_mask(self.df, image_name, (350, 525))
        else:
            image_path = os.path.join(self.data_folder, image_name)
            mask = make_mask(self.df, image_name)
        #print(image_path)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
        return img, mask
Example #6
0
    def __getitem__(self, idx):
        image_id, mask = make_mask(idx, self.df)
        image_path = os.path.join(self.root, "train_images", image_id)
        img = cv2.imread(image_path)

        augmented = self.transforms(image=img, mask=mask)
        ###
        img = augmented['image']
        mask = augmented['mask']  # 1x256x1600x4
        mask = mask[0].permute(2, 0, 1)  # 1x4x256x1600
        return img, mask
Example #7
0
 def __getitem__(self, idx):
     image_id, mask = make_mask(idx, self.df)
     image_path = os.path.join(self.root, "train_images", image_id)
     # img = Image.open(image_path)
     # img = np.array(img)[:, :, 0]
     img = cv2.imread(image_path)[:, :, 0]
     img = img[:, :, np.newaxis]
     augmented = self.transforms(image=img, mask=mask)
     img = augmented['image']
     mask = augmented['mask']  # 1x256x1600x4
     mask = mask[0].permute(2, 0, 1)  # 1x4x256x1600
     return img, mask
Example #8
0
 def __getitem__(self, item):
     image_id = self.ids[item]
     image_path = os.path.join(self.image_fold, image_id)
     img = cv2.imread(image_path)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     mask = make_mask(self.df, image_id)
     augmented = self.transform(image=img, mask=mask)
     img = np.transpose(augmented['image'], [2, 0, 1]).astype(np.float32)
     mask = np.transpose(augmented['mask'], [2, 0, 1]).astype(np.float32)
     # img:  [3, 1400, 2100]
     # mask: [4, 1400, 2100]
     # for torch
     return img, mask
Example #9
0
 def __getitem__(self, idx):
     image_name = self.img_ids[idx]
     mask = make_mask(self.df, image_name)
     image_path = os.path.join(self.data_folder, image_name)
     img = cv2.imread(image_path)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     augmented = self.transforms(image=img, mask=mask)
     img = augmented['image']
     mask = augmented['mask']
     if self.preprocessing:
         preprocessed = self.preprocessing(image=img, mask=mask)
         img = preprocessed['image']
         mask = preprocessed['mask']
     return img, mask
Example #10
0
def evaluate(code_encoder, desc_encoder, code_pooler, desc_pooler, iterator,
             criterion):

    epoch_loss = 0
    epoch_mrr = 0

    code_encoder.eval()
    desc_encoder.eval()
    code_pooler.eval()
    desc_pooler.eval()

    for code, code_lengths, desc, desc_lengths, is_var in tqdm(
            iterator, desc='Evaluating...'):

        code = code.to(device)
        desc = desc.to(device)

        #code/desc = [seq len, batch size]

        code_mask = utils.make_mask(code, code_vocab.pad_idx)
        desc_mask = utils.make_mask(desc, desc_vocab.pad_idx)

        #mask = [seq len, batch size]

        encoded_code = code_encoder(code, code_lengths, code_mask)
        encoded_code, _ = code_pooler(encoded_code, code_lengths, code_mask)

        encoded_desc = desc_encoder(desc, desc_lengths, desc_mask)
        encoded_desc, _ = desc_pooler(encoded_desc, desc_lengths, desc_mask)

        loss, mrr = criterion(encoded_code, encoded_desc)

        epoch_loss += loss.item()
        epoch_mrr += mrr.item()

    return epoch_loss / len(iterator), epoch_mrr / len(iterator)
    def __getitem__(self, idx):
        image_id, mask = make_mask(idx, self.df)
        image_path = expand_path(image_id)
        img = jpeg.JPEG(str(image_path)).decode()
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']  # 1x256x1600x4
        mask = mask[0].permute(2, 0, 1)  # 1x4x256x1600
        if self.num_classes == 5:
            mask_0 = (mask.sum(axis=0, keepdims=True) == 0).float()
            mask = torch.cat([mask_0, mask], axis=0)

        if self.return_fnames:
            return img, mask, image_id
        else:
            return img, mask
Example #12
0
    def __getitem__(self, i):
        index = self.indexs[i]
        img_name = self.id2name[index]
        label = self.train_df.iloc[index][:4]
        cls = torch.from_numpy(np.array(~pd.isna(label), dtype=np.int))
        mask = utils.make_mask(label, self.img_size)
        img = cv2.imread(os.path.join(self.img_path,
                                      img_name)).astype(np.float)

        augmented = self.transformer(image=img, mask=mask)
        img = augmented['image'] / 255
        img = self.normalize(img)
        mask = augmented['mask']
        mask = mask.permute(2, 0, 1)

        return img_name, img, mask, cls
Example #13
0
    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #         augmented = self.transforms(image=img, mask=mask)
        #         img = augmented['image']
        #         mask = augmented['mask']
        # if self.preprocessing:
        #     preprocessed = self.preprocessing(image=img, mask=mask)
        #     img = preprocessed['image']
        #     mask = preprocessed['mask']
        img = img.transpose(2, 0, 1).astype('float32')
        mask = img.transpose(2, 0, 1).astype('float32')

        return img, mask
Example #14
0
    def __getitem__(self, idx):
        image_id, mask = make_mask(
            idx, self.df, height=self.img_size[0], width=self.img_size[1])
        image_path = os.path.join(self.root, image_id)
        img = jpeg.JPEG(str(image_path)).decode()
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        mask = mask[0].permute(2, 0, 1)
        if self.num_classes == 5:
            mask_0 = (mask.sum(axis=0, keepdims=True) == 0).float()
            mask = torch.cat([mask_0, mask], axis=0)

        if self.return_fnames:
            return img, mask, image_id
        else:
            return img, mask
Example #15
0
def Eval(data, cnt):

    perp = 0.
    avg_loss = 0.
    test_batches = range(0, len(data), batch)
    test_minbatches = [data[idx:idx + batch] for idx in test_batches]

    for minbatch in test_minbatches:

        x_padded = utils.make_mask(minbatch)
        inp.set(x_padded)
        loss, score = BuildModel()
        edf.Forward()
        avg_loss += loss.value
        perp += CalPerp(score)

    perp = np.exp(perp / cnt)
    avg_loss /= len(test_batches)
    return perp, avg_loss
    def __getitem__(self, idx):

        image_id, mask = make_mask(
            idx, self.df, height=self.img_size[0], width=self.img_size[1])
        image_path = os.path.join(self.root, image_id)
        image_path=image_path+'.jpg'
        img = jpeg.JPEG(str(image_path)).decode()
        augmented = self.transforms(image=img,mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        mask = mask[0].permute(2, 0, 1)
        #print("size of the output tensor is ",mask.size())
        #return torch.randn(3,128,256),torch.randn(8,128,256)
        mask=mask/255

        if self.return_fnames:
            return img, mask, image_id
        else:
            return img, mask
Example #17
0
def train(code_encoder, code_predictor, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_mrr = 0

    code_encoder.train()
    code_predictor.train()

    for batch in tqdm(iterator, desc='Training...'):

        optimizer.zero_grad()

        code, code_lengths = batch.code
        label = batch.label

        #code/desc = [seq len, batch size]

        code_mask = utils.make_mask(code, code_vocab[PAD_TOKEN])

        #mask = [batch size, seq len]

        encoded_code = code_encoder(code)
        encoded_code = code_predictor(encoded_code, code_mask)

        #encoded_code/desc = [batch size, emb dim/hid dim/hid dim * 2 (bow/rnn/bi-rnn)]

        loss, mrr = criterion(encoded_code, label)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(code_encoder.parameters(),
                                       args.grad_clip)
        torch.nn.utils.clip_grad_norm_(code_predictor.parameters(),
                                       args.grad_clip)

        optimizer.step()

        epoch_loss += loss.item()
        epoch_mrr += mrr.item()

    return epoch_loss / len(iterator), epoch_mrr / len(iterator)
def Eval(data, cnt, model):

    perp = 0.
    avg_loss = 0.
    test_batches = range(0, len(data) - batch, batch)
    test_minbatches = [data[idx:idx + batch] for idx in test_batches]

    for minbatch in test_minbatches:

        x_padded = utils.make_mask(minbatch)

        x_padded = repackage_variable(x_padded, True)
        x_padded = torch.cat(x_padded, 1)
        T = x_padded.size(0)
        B = x_padded.size(1)
        inp = x_padded[:T - 1, :].long()
        target = x_padded[1:, :].long().view(-1, 1)
        if use_cuda:
            inp = inp.cuda()
            target = target.cuda()

        mask = (inp != 0).float().view(-1, 1)

        hidden = model.init_hidden(batch)
        model.zero_grad()
        output, hidden = model(inp, hidden)
        output = output.view(-1, n_vocab)

        loss = output.gather(1, target) * mask
        loss = -torch.sum(loss) / torch.sum(mask)

        avg_loss += loss
        perp += CalPerp(output, target, mask)
        #print("finish iteration")

    perp = np.exp(perp / cnt)
    avg_loss /= len(test_batches)
    return perp, avg_loss
Example #19
0
def mask_ensemble_csv(csvs):
    sample_sub = '/data/Clouds_Classify/sample_submission.csv'
    sample_sub = pd.read_csv(open(sample_sub))
    sample_sub.head()

    sample_sub['label'] = sample_sub['Image_Label'].apply(
        lambda x: x.split('_')[1])
    sample_sub['im_id'] = sample_sub['Image_Label'].apply(
        lambda x: x.split('_')[0])

    image_name_list = np.unique(sample_sub['im_id'].values).tolist()

    sub_list = []
    for i in range(len(csvs)):
        sub = pd.read_csv(open(csvs[i]))
        sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])
        sub_list.append(sub)

    encoded_pixels = []
    for index, image_name in enumerate(tqdm.tqdm(image_name_list)):
        # image = utils.get_img(image_name, file_name='test_images')
        mask_sum = np.zeros((350, 525, 4), dtype=np.float32)
        for sub in sub_list:
            mask = utils.make_mask(sub,
                                   image_name=image_name,
                                   shape=(350, 525))  # [H, W, 4]
            mask_sum += mask
        ensemble_mask = np.where(mask_sum < len(sub_list) // 2 + 1, 0, 1)
        # utils.visualize(image_name, image, ensemble_mask)
        for i in range(4):
            rle = utils.mask2rle(ensemble_mask[:, :, i])
            encoded_pixels.append(rle)

    sample_sub['EncodedPixels'] = encoded_pixels
    sample_sub.to_csv('./sub/tta_ensemble_submission_5unet_3fpn_1resnet34.csv',
                      columns=['Image_Label', 'EncodedPixels'],
                      index=False)
Example #20
0
    data_replica.delete_data("gas", field)
# ==============================================================================
#
# Make the mask
#
# ==============================================================================
# make one mask for gas cells, one for particles. I think this is the best thing
# to do because large DM displacements can take particles outside their host
# cells (at small grid sizes). Doing it separately will help make sure both the
# gas and particles from the original sim are included.
print(" - making the particle mask")
mask_size = int(
    np.ceil(np.max(get_data_wrap(data_root, "x", "all", "particle"))))
mask_part = utils.make_mask(
    mask_size, 0, get_data_wrap(data_original, "x", original_subset,
                                "particle"),
    get_data_wrap(data_original, "y", original_subset, "particle"),
    get_data_wrap(data_original, "z", original_subset, "particle"))
if has_baryons:
    print(" - making the gas mask")
    mask_gas = utils.make_mask(
        mask_size, 0, get_data_wrap(data_original, "x", original_subset,
                                    "gas"),
        get_data_wrap(data_original, "y", original_subset, "gas"),
        get_data_wrap(data_original, "z", original_subset, "gas"))

# ==============================================================================
#
# Check new root against the mask
#
# ==============================================================================
Example #21
0
print("Initial: Perplexity: %0.5f Avg loss = %0.5f" % (perp, loss))
best_loss = loss
prefix = 'the agreements bring'
generation = Predict(400, utils.to_idxs(prefix))
print("Initial generated sentence ")
print(utils.to_string(generation))

for ep in range(epoch):

    perm = np.random.permutation(len(minbatches)).tolist()
    stime = time()

    for k in range(len(minbatches)):

        minbatch = minbatches[perm[k]]
        x_padded = utils.make_mask(minbatch)
        inp.set(x_padded)
        loss, score = BuildModel()
        edf.Forward()
        edf.Backward(loss)
        edf.GradClip(10)
        edf.SGD(eta)

    duration = (time() - stime) / 60.

    perp, loss = Eval(valid_data, vacnt)
    print("Epoch %d: Perplexity: %0.5f Avg loss = %0.5f [%.3f mins]" %
          (ep, perp, loss, duration))

    # generate some text given the prefix and trained model
    prefix = 'the agreements bring'
Example #22
0
intent_optimizer = optim.Adam(intent_model.parameters(),
                              lr=cfg.learning_rate)  # optim.Adamax

best_correct_num = 0
best_epoch = -1
best_F1_score = 0.0
best_epoch_slot = -1
for epoch in range(epoch_num):
    slot_loss_history = []
    intent_loss_history = []
    for batch_index, data in enumerate(utils.get_batch(train_data)):

        # Preparing data
        sentence, real_len, slot_label, intent_label = data

        mask = utils.make_mask(real_len).to(device)
        x = torch.tensor(sentence).to(device)
        y_slot = torch.tensor(slot_label).to(device)
        y_slot = utils.one_hot(y_slot).to(device)
        y_intent = torch.tensor(intent_label).to(device)
        y_intent = utils.one_hot(y_intent, Num=18).to(device)

        # Calculate compute graph
        slot_optimizer.zero_grad()
        intent_optimizer.zero_grad()

        hs = slot_model.enc(x)
        slot_model.share_memory = hs.clone()

        hi = intent_model.enc(x)
        intent_model.share_memory = hi.clone()
def MyRMSProp(eta, g, epoch=10):
    Log("RMSProp With Learning Rate %.6f Decay Rate:%.4f \n" % (eta, g))
    hidden_dim = 200
    n_vocab = utils.n_vocab
    batch = 50
    parameters = []
    model = 'Models/RMSProp/model_RMSProp_%.6f_%.4f_.pkl' % (eta, g)
    #print(model)
    eta = eta
    decay = 0.9

    inp = edf.Value()

    edf.params = []
    C2V = edf.Param(edf.xavier((n_vocab, hidden_dim)))

    # forget gate
    Wf = edf.Param(edf.xavier((2 * hidden_dim, hidden_dim)))
    bf = edf.Param(np.zeros((hidden_dim)))
    # input gate
    Wi = edf.Param(edf.xavier((2 * hidden_dim, hidden_dim)))
    bi = edf.Param(np.zeros((hidden_dim)))
    # carry cell
    Wc = edf.Param(edf.xavier((2 * hidden_dim, hidden_dim)))
    bc = edf.Param(np.zeros((hidden_dim)))
    # output cell
    Wo = edf.Param(edf.xavier((2 * hidden_dim, hidden_dim)))
    bo = edf.Param(np.zeros((hidden_dim)))

    V = edf.Param(edf.xavier((hidden_dim, n_vocab)))

    parameters.extend([C2V, Wf, bf, Wi, bi, Wc, bc, Wo, bo, V])

    # load the trained model if exist
    if os.path.exists(model):
        with open(model, 'rb') as f:
            p_value = pickle.load(f)
            idx = 0
            for p in p_value:
                parameters[idx].value = p
                idx += 1

    def LSTMCell(xt, h, c):

        f = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wf), bf))
        i = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wi), bi))
        o = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wo), bo))
        c_hat = edf.Tanh(edf.Add(edf.VDot(edf.ConCat(xt, h), Wc), bc))
        c_next = edf.Add(edf.Mul(f, c), edf.Mul(i, c_hat))
        h_next = edf.Mul(o, edf.Tanh(c_next))

        return h_next, c_next

    def BuildModel():

        edf.components = []

        B = inp.value.shape[0]
        T = inp.value.shape[1]
        h = edf.Value(np.zeros((B, hidden_dim)))
        c = edf.Value(np.zeros((B, hidden_dim)))

        score = []

        for t in range(T - 1):

            wordvec = edf.Embed(edf.Value(inp.value[:, t]), C2V)
            xt = edf.Reshape(wordvec, [-1, hidden_dim])
            h_next, c_next = LSTMCell(xt, h, c)
            p = edf.SoftMax(edf.VDot(h_next, V))
            logloss = edf.Reshape(
                edf.LogLoss(edf.Aref(p, edf.Value(inp.value[:, t + 1]))),
                (B, 1))

            if t == 0:
                loss = logloss
            else:
                loss = edf.ConCat(loss, logloss)

            score.append(p)
            h = h_next
            c = c_next

        masks = np.zeros((B, T - 1), dtype=np.int32)
        masks[inp.value[:, 1:] != 0] = 1
        loss = edf.MeanwithMask(loss, edf.Value(masks))

        return loss, score

    def CalPerp(score):

        prob = [p.value for p in score]
        prob = np.transpose(np.stack(prob, axis=0), (1, 0, 2))

        B = prob.shape[0]
        T = prob.shape[1]
        V = prob.shape[2]

        masks = np.zeros((B, T), dtype=np.int32)
        masks[inp.value[:, 1:] != 0] = 1

        prob = prob.reshape(-1)
        idx = np.int32(inp.value[:, 1:].reshape(-1))
        outer_dim = len(idx)
        inner_dim = len(prob) / outer_dim
        pick = np.int32(np.array(range(outer_dim)) * inner_dim + idx)
        prob = prob[pick].reshape(B, T)

        return -np.sum(np.log(prob[np.nonzero(prob * masks)]))

    def Predict(max_step, prefix):

        edf.components = []

        T = max_step
        h = edf.Value(np.zeros((1, hidden_dim)))
        c = edf.Value(np.zeros((1, hidden_dim)))

        prediction = []

        for t in range(T):

            if t < len(prefix):
                pred = edf.Value(prefix[t])
                prediction.append(pred)
            else:
                prediction.append(pred)

            wordvec = edf.Embed(pred, C2V)
            xt = edf.Reshape(wordvec, [-1, hidden_dim])
            h_next, c_next = LSTMCell(xt, h, c)
            p = edf.SoftMax(edf.VDot(h_next, V))
            pred = edf.ArgMax(p)
            h = h_next
            c = c_next

        edf.Forward()

        idx = [pred.value for pred in prediction]
        stop_idx = utils.to_index('}')

        if stop_idx in idx:
            return idx[0:idx.index(stop_idx) + 1]
        else:
            return idx

    def Eval(data, cnt):

        perp = 0.
        avg_loss = 0.
        test_batches = range(0, len(data), batch)
        test_minbatches = [data[idx:idx + batch] for idx in test_batches]

        for minbatch in test_minbatches:

            x_padded = utils.make_mask(minbatch)
            inp.set(x_padded)
            loss, score = BuildModel()
            edf.Forward()
            avg_loss += loss.value
            perp += CalPerp(score)

        perp = np.exp(perp / cnt)
        avg_loss /= len(test_batches)
        return perp, avg_loss

    ############################################### training loop #####################################################

    batches = range(0, len(train_data), batch)
    minbatches = [train_data[idx:idx + batch] for idx in batches]

    epoch = epoch

    # initial Perplexity and loss
    #perp, loss = Eval(valid_data, vacnt)
    #print("Initial: Perplexity: %0.5f Avg loss = %0.5f" % (perp, loss))
    #best_loss = loss
    #prefix = 'the agreements bring'
    #generation = Predict(400, utils.to_idxs(prefix))
    #print("Initial generated sentence ")
    #print (utils.to_string(generation))

    for ep in range(epoch):

        perm = np.random.permutation(len(minbatches)).tolist()
        stime = time()

        for k in range(len(minbatches)):

            minbatch = minbatches[perm[k]]
            x_padded = utils.make_mask(minbatch)
            inp.set(x_padded)
            loss, score = BuildModel()
            edf.Forward()
            edf.Backward(loss)
            edf.GradClip(10)
            edf.RMSProp(eta, g)

        duration = (time() - stime) / 60.

        perp, loss = Eval(valid_data, vacnt)
        Log("Epoch %d: Perplexity: %0.5f Avg loss = %0.5f [%.3f mins]" %
            (ep, perp, loss, duration))

        if (ep == epoch - 1):
            # generate some text given the prefix and trained model
            prefix = 'the agreements bring'
            generation = Predict(400, utils.to_idxs(prefix))
            Log("Epoch %d: generated sentence " % ep)
            Log(utils.to_string(generation))

        #if loss < best_loss:
        # save the model
        best_loss = loss
        f = open(model, 'wb')
        p_value = []
        for p in parameters:
            p_value.append(p.value)
        pickle.dump(p_value, f)

        #Save the hyperparameters
        f_hyper = open("HyperParameters.txt", "a")
        f_hyper.write(
            "RMSProp LearningRate: %.6f Decay_Rate: %.4f Epoch: %d BestLoss: %0.5f Perplexity: %0.5f\n"
            % (eta, g, ep, best_loss, perp))
        if (ep == epoch - 1):
            f_hyper.write("\n\n")
        f_hyper.close()

        Log("\n")
Example #24
0
SlotLoss = nn.CrossEntropyLoss()

optim_intent = optim.Adam(my_model.parameters(), lr=cfg.learning_rate)
optim_slot = optim.Adam(my_model.parameters(), lr=cfg.learning_rate)

for epoch in range(cfg.total_epoch):
    losses = []
    for i, batch in enumerate(getBatch(cfg.batch_size, train_data)):

        # ----------------------------- papare data -------------------------
        x, y_1, y_2 = zip(*batch)  # sentence, slot label, intent label

        x = torch.cat(x)
        tag_target = torch.cat(y_1)

        slot_mask = utils.make_mask(tag_target, len(tag2index), cfg.max_length,
                                    cfg.device)

        intent_target = torch.cat(y_2)
        tag_target = utils.one_hot(tag_target, len(tag2index), cfg.max_length,
                                   cfg.device)
        intent_target = utils.one_hot(intent_target, len(intent2index),
                                      cfg.max_length, cfg.device)

        # ----------------------------- compute graph ------------------------

        hi = my_model.intent_enc(x)
        my_model.intent_share_hidden = hi.clone()
        intent_logits = my_model.intent_dec(
            hi, my_model.slot_share_hidden.detach())

        intent_loss = -1.0 * torch.sum(