def train_model(features, stroke_names_id, model, dataloaders, criterion, 
                optimizer, scheduler, labs_keys, labs_values, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            # Iterate over data.
            for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, inputs.shape[1])
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                inputs = inputs.float()
                inp_emb = attn_utils.get_long_tensor(inputs)    # comment out for SA
                inputs = inp_emb.to(device)                     # comment out for SA
                inputs = inputs.t().contiguous()       # Convert to (SEQ, BATCH)
                labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                output = model(inputs)  # output size (SEQ_SIZE, BATCH, NCLASSES)
                output = output.permute(1, 0, 2).contiguous()
                
                output = F.softmax(output.view(-1, output.shape[-1]), dim=1)
#                output = output.view(-1, output.shape[-1])    # To (BATCH*SEQ_SIZE, NCLUSTERS)
                loss = criterion(output, labels)
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                    optimizer.step()
                
                # track history if only in train
                _, preds = torch.max(output, 1)

                # statistics
                running_loss += loss.item()  #* inputs.size(0)
#                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
                running_corrects += torch.sum(preds == labels.data)
#                if bno==20:
#                    break

            epoch_loss = running_loss / len(dataloaders[phase]) #.dataset)
            epoch_acc = running_corrects.double() / (inputs.size(0) * len(dataloaders[phase].dataset))

            print('{} Loss: {:.4f} Acc: {:.4f} LR: {}'.format(phase, epoch_loss, epoch_acc, 
                  scheduler.get_lr()[0]))

            if phase == 'train':
                scheduler.step()
#            # deep copy the model for best test accuracy
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
def predict(features, stroke_names_id, model, dataloaders, labs_keys, labs_values, 
            seq, phase="val"):
    assert phase == "val" or phase=="test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids  = [], [], []
    # Iterate over data.
    for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        seq = inputs.shape[1]
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, seq)
        inputs = inputs.float()
        inp_emb = attn_utils.get_long_tensor(inputs)    # comment out for SA
        inputs = inp_emb.to(device)                     # comment out for SA
        inputs = inputs.t().contiguous()
        labels = labels.to(device)
        
        # forward
        with torch.set_grad_enabled(phase == 'train'):
            outputs = model(inputs)     # output size (BATCH, SEQ_SIZE, NCLUSTERS)
            outputs = outputs.permute(1, 0, 2).contiguous()
            outputs = F.softmax(outputs.view(-1, outputs.shape[-1]), dim=1)

            gt_list.append(labels.tolist())
            pred_list.append((torch.max(outputs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([vid+"_"+str(stroke[0][i].item())+"_"+str(stroke[1][i].item())] * seq)
                
#    epoch_loss = running_loss #/ len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
#    print('{} Loss: {:.4f}'.format(phase, epoch_loss))
    
    ###########################################################################
    
    confusion_mat = np.zeros((model.decoder.out_features, model.decoder.out_features))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]
    
    predictions = {"gt": gt_list, "pred": pred_list}
    
    # Save prediction and ground truth labels
    with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "wb") as fp:
        pickle.dump(predictions, fp)
    with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "rb") as fp:
        predictions = pickle.load(fp)
    gt_list = predictions['gt']
    pred_list = predictions['pred']
    
#    # get boundaries (worse accuracy when used)
#    vkeys = list(set([v.rsplit('_', 2)[0] for v in stroke_ids]))
#    boundaries = read_boundaries(vkeys, HIST_DIFFS, SBD_MODEL)
    #
    
    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i-1])
            pred_labels.append(max(set(vid_preds), key = vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)
        
    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key = vid_preds.count))
    
    ###########################################################################
    
    correct = 0
    for i,true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct+=1
        confusion_mat[pred_labels[i], true_val]+=1
    print('#'*30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format( float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))
def extract_trans_feats(model,
                        DATASET,
                        LABELS,
                        CLASS_IDS,
                        BATCH_SIZE,
                        SEQ_SIZE=16,
                        STEP=16,
                        partition='train',
                        nstrokes=-1,
                        base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    model : tt.TransformerModel 
        TransformerModel object
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    base_name : str
        path containing the pickled feature dumps
    
    Returns:
    --------
    features_dictionary, stroke_names
    
    '''

    ###########################################################################
    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    #####################################################################

    if partition == 'train':
        partition_lst = train_lst
        ft_path = os.path.join(base_name,
                               "C" + str(cluster_size) + "_train.pkl")
    elif partition == 'val':
        partition_lst = val_lst
        ft_path = os.path.join(base_name, "C" + str(cluster_size) + "_val.pkl")
    elif partition == 'test':
        partition_lst = test_lst
        ft_path = os.path.join(base_name,
                               "C" + str(cluster_size) + "_test.pkl")
    else:
        print("Partition should be : train / val / test")
        return

    ###########################################################################
    # Create a Dataset

    part_dataset = StrokeFeatureSequenceDataset(ft_path,
                                                partition_lst,
                                                DATASET,
                                                LABELS,
                                                CLASS_IDS,
                                                frames_per_clip=SEQ_SIZE,
                                                extracted_frames_per_clip=2,
                                                step_between_clips=STEP,
                                                train=True)

    data_loader = DataLoader(dataset=part_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

    ###########################################################################
    # Validate / Evaluate
    model.eval()
    stroke_names = []
    trajectories, stroke_traj = [], []
    num_strokes = 0
    prev_stroke = None
    print("Total Batches : {} :: BATCH_SIZE : {}".format(
        data_loader.__len__(), BATCH_SIZE))
    ###########################################################################
    for bno, (inputs, vid_path, stroke, labels) in enumerate(data_loader):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        inputs = inputs.float()
        inp_emb = attn_utils.get_long_tensor(inputs)  # comment out for SA
        inputs = inp_emb.t().contiguous().to(device)  # comment out for SA

        # forward
        # track history if only in train
        with torch.set_grad_enabled(False):

            outputs = model.get_vec(
                inputs)  # output size (BATCH, SEQ_SIZE, NCLUSTERS)
            outputs = outputs.transpose(0, 1).contiguous()
#            output = output.view(-1, INPUT_SIZE)    # To (BATCH*SEQ_SIZE, NCLUSTERS)
#            loss = criterion(output, targets)

#            batch_size = inputs.size(0)
#            enc_h = encoder.init_hidden(batch_size)
#            enc_out, h = encoder(inputs, enc_h)
#            dec_h = h
#            dec_in = torch.zeros(batch_size, inputs.size(2)).to(device)
#            dec_out_lst = []
#            target_length = inputs.size(1)      # assign SEQ_LEN as target length for now
#            # run for each word of the sequence (use teacher forcing)
#            for ti in range(target_length):
#                dec_out, dec_h, dec_attn = decoder(dec_h, enc_out, dec_in)
#                dec_out_lst.append(dec_out)
#                dec_in = dec_out

#            outputs = torch.stack(dec_out_lst, dim=1)

# convert to start frames and end frames from tensors to lists
        stroke = [s.tolist() for s in stroke]
        # outputs are the reconstructed features. Use compressed enc_out values(maybe wtd.).
        inputs_lst, batch_stroke_names = autoenc_utils.separate_stroke_tensors(outputs, \
                                                                    vid_path, stroke)

        # for sequence of features from batch segregated extracted features.
        if bno == 0:
            prev_stroke = batch_stroke_names[0]

        for enc_idx, enc_input in enumerate(inputs_lst):
            # get no of sequences that can be extracted from enc_input tensor
            nSeqs = enc_input.size(0)
            if prev_stroke != batch_stroke_names[enc_idx]:
                # append old stroke to trajectories
                if len(stroke_traj) > 0:
                    num_strokes += 1
                    trajectories.append(stroke_traj)
                    stroke_names.append(prev_stroke)
                    stroke_traj = []

#            enc_output = model.encoder(enc_input.to(device))
#            enc_output = enc_output.squeeze(axis=1).cpu().data.numpy()
            enc_output = enc_input.cpu().data.numpy()

            # convert to [[[stroke1(size 32 each) ... ], [], ...], [ [], ... ]]
            stroke_traj.extend([enc_output[i,j,:] for i in range(enc_output.shape[0]) \
                                                for j in range(enc_output.shape[1])])
            prev_stroke = batch_stroke_names[enc_idx]

        if nstrokes > -1 and num_strokes >= nstrokes:
            break

    # for last batch only if extracted for full dataset
    if len(stroke_traj) > 0 and nstrokes < 0:
        trajectories.append(stroke_traj)
        stroke_names.append(batch_stroke_names[-1])

    # convert to dictionary of features with keys as stroke names(with ext).
    features = {}
    for i, t in enumerate(trajectories):
        features[stroke_names[i]] = np.array(t)


#    trajectories, stroke_names = autoenc_utils.group_strokewise(trajectories, stroke_names)

    return features, stroke_names
def train_model(features,
                stroke_names_id,
                model,
                dataloaders,
                criterion,
                optimizer,
                scheduler,
                labs_keys,
                labs_values,
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            count = [0.] * 5

            # Iterate over data.
            for bno, (inputs, vid_path, stroke,
                      labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values, 1)
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                inputs = inputs.float()
                inp_emb = attn_utils.get_long_tensor(
                    inputs)  # comment out for SA
                inputs = inp_emb.to(device)  # comment out for SA
                inputs = inputs.to(device)
                labels = labels.to(device)
                iter_counts = Counter(labels.tolist())
                for k, v in iter_counts.items():
                    count[k] += v

                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    hidden = model.init_hidden(inputs.size(0))

                    outputs, hidden = model(inputs, hidden)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs,
                                     labels)  #torch.flip(targets, [1])

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                #                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()
                print("Category Weights : {}".format(count))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(
                dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))
            #            # deep copy the model for best test accuracy
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    #    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
Example #5
0
def predict(model, dataloaders, seq, phase="val"):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids = [], [], []
    count = [0.] * cluster_size
    # Iterate over data.
    for bno, (inputs, vid_path, start_pts,
              labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        inputs = inputs.float()
        inp_emb = attn_utils.get_long_tensor(inputs)  # comment out for SA
        inputs = inp_emb.to(device)  # comment out for SA
        inputs = inputs.to(device)
        labels = labels.to(device)
        iter_counts = Counter(inp_emb.flatten().tolist())
        for k, v in iter_counts.items():
            count[k] += v
        # forward
        with torch.set_grad_enabled(phase == 'train'):
            batch_size = inputs.size(0)
            hidden = model.init_hidden(batch_size)
            outputs, hidden = model(inputs, hidden)
            gt_list.append(labels.tolist())
            pred_list.append((torch.max(outputs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([vid] * 1)

    ###########################################################################
    print("Clusters : ")
    print(count)
    confusion_mat = np.zeros((model.n_classes, model.n_classes))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]

    predictions = {"gt": gt_list, "pred": pred_list}

    # Save prediction and ground truth labels
    with open(
            os.path.join(
                log_path,
                "preds_Seq" + str(seq) + "_C" + str(cluster_size) + ".pkl"),
            "wb") as fp:
        pickle.dump(predictions, fp)
    with open(
            os.path.join(
                log_path,
                "preds_Seq" + str(seq) + "_C" + str(cluster_size) + ".pkl"),
            "rb") as fp:
        predictions = pickle.load(fp)
    gt_list = predictions['gt']
    pred_list = predictions['pred']

    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i - 1])
            pred_labels.append(max(set(vid_preds), key=vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)

    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key=vid_preds.count))

    ###########################################################################

    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct += 1
        confusion_mat[pred_labels[i], true_val] += 1
    print('#' * 30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format(float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))