Esempio n. 1
0
def sample(out_len, tweet):
    with open('models/word2vec.p', 'rb') as f:
        word2vec = pickle.load(f)
        word2vec = torch.tensor(word2vec)

    with open('models/translators.p', 'rb') as f:
        translators = pickle.load(f)

    inx2word = {int(k): v for k, v in translators['inx2word'].items()}
    word2inx = {k: int(v) for k, v in translators['word2inx'].items()}

    dict_size = len(inx2word)

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model = RNN(embedding_matrix=word2vec,
                dict_size=dict_size,
                hidden_dim=100,
                n_layers=1)
    model.load_state_dict(torch.load('models/rnn', map_location=device))
    model.eval()
    model = model.to(device)

    size = out_len - len(tweet)
    # Now pass in the previous characters and get a new one
    for _ in range(size):
        word, h = predict(model, tweet, device, inx2word, word2inx)
        if word != '<UNK>':
            tweet.append(word)
        h = h.to(device)

    return ' '.join(tweet)
Esempio n. 2
0
def predict(cfg, model_path, loader, device, save_path):
    print(f'Saving predictions @ {save_path}')
    # define model
    model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model = model.to(device)
    # load the model
    model.load_state_dict(torch.load(model_path))

    # just to be sure
    model.eval()

    predictions = {
        'Object': [],
        'Sequence': [],
    }

    for c in range(cfg.output_dim):
        predictions[f'{cfg.task}_prob_{c}'] = []

    for batch in loader:
        inputs = batch['inputs'].to(device)

        with torch.set_grad_enabled(False):
            outputs, hiddens = model(inputs)
            _, preds = torch.max(outputs, 1)
            softmaxed = torch.nn.functional.softmax(outputs, dim=-1)

        for i in range(len(batch['paths'])):
            sequence = pathlib.Path(batch['paths'][i]).stem.replace(
                '_audio', '')
            predictions['Object'].append(batch['containers'][i])
            predictions['Sequence'].append(sequence.replace('_vggish', ''))
            for c in range(cfg.output_dim):
                predictions[f'{cfg.task}_prob_{c}'].append(softmaxed[i,
                                                                     c].item())

    predictions_dataset = pd.DataFrame.from_dict(predictions).sort_values(
        ['Object', 'Sequence'])
    predictions_dataset.to_csv(save_path, index=False)
    # returning the dataset because it will be useful for test-time prediction averaging
    return predictions_dataset
Esempio n. 3
0
def main():
    model = RNN()
    model = model.to('cuda:0')
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[30, 50, 60],
                                                     gamma=0.1)
    train_dataset = HoleDataset(batch_size=128, dataset_size=128, steps=5)
    val_dataset = HoleDataset(batch_size=512, dataset_size=5120, steps=20)

    ckpt_path = os.path.abspath('./checkpoints')
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    trainer = Trainer(model,
                      optimizer,
                      scheduler,
                      train_dataset,
                      val_dataset,
                      checkpoint_path='./checkpoints')
    history = trainer.fit(num_epochs=70,
                          num_train_batch=1000,
                          num_val_batch=10)
Esempio n. 4
0
def main(args):
    # prepare data
    train_texts, train_labels = read_data(os.path.join(args.data_dir, 'train'))
    test_texts, test_labels = read_data(os.path.join(args.data_dir, 'test'))
    training_set = list(zip(train_texts, train_labels))
    test_set = list(zip(test_texts, test_labels))
    random.shuffle(training_set)
    random.shuffle(test_set)

    vocab_counter = Counter(flatten([get_words(text) for text in train_texts]))
    word2vec = vocab.Vocab(vocab_counter, max_size=20000, min_freq=3, vectors='glove.6B.100d')

    model = RNN(args.input_size, args.hidden_size, args.nb_class)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    train(training_set, model, criterion, optimizer, args.batch_size, args.nb_epoch, word2vec)
    evaluate(test_set, model, args.batch_size, word2vec)

    torch.save(model.state_dict(), args.weights_file)
    writer.close()
Esempio n. 5
0
parser.add_argument('--device',
                    default='cuda',
                    type=str,
                    help="device to train on")

args = parser.parse_args()

device = getattr(args, "device")

exp_name = f"Exp_optim_{args.optimizer}_lr_{args.learning_rate}_hu_{args.hidden_units}"
writer = SummaryWriter(f'runs/{exp_name}')

print(device)
### Create Model here
Model = RNN(args)
net = Model.to(torch.device(device))

### create or instantiate data loader
_data = TorchvisionDataLoader(args)
_data.prepare_data()

# optimizer stuff here
Optimizer = getattr(optim, getattr(args, "optimizer"))
optimizer = Optimizer(net.parameters(), lr=getattr(args, "learning_rate"))

# loss function
loss_fn = nn.CrossEntropyLoss()

_data.setup(stage='fit')

for i in range(getattr(args, "epochs")):
Esempio n. 6
0
parser.add_argument('--device', default='cpu', type=str, help="device to train on")




args = parser.parse_args()

device = getattr(args, "device")

exp_name = f"Exp_optim_{args.optimizer}_lr_{args.learning_rate}"
writer = SummaryWriter(f'runs/{exp_name}')


### Create Model here
Model = RNN(args)
net = Model.to(device)

### create or instantiate data loader
_data = TorchvisionDataLoader(args)
_data.prepare_data()

# optimizer stuff here
Optimizer = getattr(optim, getattr(args, "optimizer"))
optimizer = Optimizer(net.parameters(), lr = getattr(args, "learning_rate"))

# loss function
loss_fn = nn.CrossEntropyLoss()

_data.setup(stage = 'fit')

for i in range(getattr(args, "epochs")):
Esempio n. 7
0
def train(cfg, datasets, dataloaders, device, save_model_path):
    model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    criterion = torch.nn.CrossEntropyLoss()
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    best_metric = 0.0
    best_epoch = 0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(cfg.num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            # running_corrects = 0
            y_pred = []
            y_true = []

            # Iterate over data.
            for batch in dataloaders[phase]:
                inputs = batch['inputs'].to(device)
                targets = batch['targets'][cfg.task].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs, hiddens = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # running_corrects += torch.sum(preds == targets.data)
                y_pred.extend(preds.tolist())
                y_true.extend(targets.tolist())

            # if phase == 'train':
            #     scheduler.step()

            # epoch_acc = running_corrects.double() / len(datasets[phase])
            epoch_loss = running_loss / len(datasets[phase])
            f1_ep = f1_score(y_true, y_pred, average='weighted')
            precision_ep = precision_score(y_true, y_pred, average='weighted')
            recall_ep = recall_score(y_true, y_pred, average='weighted')
            accuracy_ep = accuracy_score(y_true, y_pred)

            # print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            print(
                f'({phase} @ {epoch+1}): L: {epoch_loss:3f}; A: {accuracy_ep:3f}; R: {recall_ep:3f}; '
                + f'P: {precision_ep:3f}; F1: {f1_ep:3f}')

            # deep copy the model
            if phase == 'valid' and f1_ep > best_metric:
                best_metric = f1_ep
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

    print(f'Best val Metric {best_metric:3f} @ {best_epoch+1}\n')

    # load best model weights and saves it
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), save_model_path)
    print(f'model is saved @ {save_model_path}')
    return best_metric
Esempio n. 8
0
def main():

    start_epoch = 0
    max_loss = math.inf
    epochs_since_improvement = 0

    dataset = GaitSequenceDataset(root_dir=data_dir,
                                  longest_sequence=85,
                                  shortest_sequence=55)

    train_sampler, validation_sampler = generate_train_validation_samplers(
        dataset, validation_split=0.2)

    print('Building dataloaders..')
    train_dataloader = data.DataLoader(dataset,
                                       batch_size=batch_size,
                                       sampler=train_sampler)
    validation_dataloader = data.DataLoader(dataset,
                                            batch_size=1,
                                            sampler=validation_sampler,
                                            drop_last=True)

    model = RNN(num_features, hidden_dimension, num_classes,
                num_layers=2).to(device)

    if load_pretrained is True:
        print('Loading pretrained model..')
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = checkpoint['optimizer']

    else:
        print('Creating model..')
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss().to(device)

    if mode == 'train':

        summary = SummaryWriter()
        #summary = None

        model.to(device)
        print('###########    ', model)

        for epoch in range(start_epoch, start_epoch + num_epochs):

            if epochs_since_improvement == 20:
                break

            if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0:
                adjust_learning_rate(optimizer, 0.8)

            train(model, train_dataloader, optimizer, criterion, clip_gradient,
                  device, epoch, num_epochs, summary, loss_display_interval)

            current_loss = validate(model, validation_dataloader, criterion,
                                    device, epoch, num_epochs, summary,
                                    loss_display_interval)

            is_best = max_loss > current_loss
            max_loss = min(max_loss, current_loss)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                            is_best)

            print('Current loss : ', current_loss, ' Max loss : ', max_loss)

    else:
        print('testing...')
        model = RNN(num_features, hidden_dimension, num_classes, num_layers=2)
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        print(model)
        for batch_idx, val_data in enumerate(validation_dataloader):
            sequence = val_data['sequence'].permute(1, 0, 2).to(device)
            piano_roll = val_data['piano_roll'].permute(1, 0,
                                                        2).squeeze(1).to('cpu')
            sequence_length = val_data['sequence_length']
            file_name = val_data['file_name']
            frame = val_data['frame']
            leg = val_data['leg']
            sonify_sequence(model, sequence, sequence_length)
            plt.imshow(piano_roll)
            plt.show()
            print(file_name, frame, leg)
            break
Esempio n. 9
0
def test(args, ckpt_file):
    print("========== In the test step ==========")

    iterator, TEXT, LABEL, tabular_dataset = load_data(stage="test",
                                                       args=args,
                                                       indices=None)

    INPUT_DIM = len(TEXT.vocab)
    OUTPUT_DIM = 1
    BIDIRECTIONAL = True

    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = RNN(
        INPUT_DIM,
        args["EMBEDDING_DIM"],
        args["HIDDEN_DIM"],
        OUTPUT_DIM,
        args["N_LAYERS"],
        BIDIRECTIONAL,
        args["DROPOUT"],
        PAD_IDX,
    )

    model.load_state_dict(
        torch.load(os.path.join(args["EXPT_DIR"],
                                ckpt_file + ".pth"))["model"])

    model = model.to(device=device)

    model.eval()

    predix = 0
    predictions = {}
    truelabels = {}

    n_val = len(tabular_dataset)
    with tqdm(total=n_val, desc="Testing round", unit="batch",
              leave=False) as pbar:

        for batch in iterator:
            text, text_length = batch.review
            labels = batch.sentiment

            with torch.no_grad():

                text = text.to(device)
                text_length = text_length.to(device)
                labels = labels.to(device)

                prediction = model(text, text_length)

            for logit, label in zip(prediction, labels):
                # print("logit",logit)
                # print("label",label)
                # print("logit.cpu()",logit.cpu())
                predictions[predix] = torch.sigmoid(logit.cpu())
                truelabels[predix] = label.cpu().numpy().tolist()
                predix += 1

            pbar.update()

    truelabels_ = []
    predictions_ = []

    for key in predictions:
        if predictions[key][0] > 0.5:
            predictions_.append(1)
        else:
            predictions_.append(0)

    for key in truelabels:
        truelabels_.append(truelabels[key])

    truelabels = truelabels_
    predictions = predictions_

    return {"predictions": predictions, "labels": truelabels}
class re3Tracker():
    def __init__(
            self,
            loss_flag=False,
            checkpoint_name='./final_checkpoint/re3_final_checkpoint.pth'):

        self.device = device
        self.CNN = CNN(1, 1).to(self.device)
        self.RNN = RNN(CNN_OUTPUT_SIZE, 1, 1, True).to(self.device)
        if os.path.isfile(checkpoint_name):
            checkpoint = torch.load(checkpoint_name, map_location='cpu')
            self.CNN.load_state_dict(checkpoint['cnn_model_state_dict'])
            self.RNN.load_state_dict(checkpoint['rnn_model_state_dict'])

        else:
            print("Invalid/No Checkpoint. Aborting...!!")
            sys.exit()
        self.CNN = self.CNN.to(device)
        self.RNN = self.RNN.to(device)
        self.forward_count = -1
        self.previous_frame = None
        self.cropped_input = np.zeros((2, 3, CROP_SIZE, CROP_SIZE),
                                      dtype=np.float32)
        self.calculate_loss = loss_flag
        self.criterion = nn.MSELoss()
        self.MSE_loss = 0

    def track(self, image, starting_box=None, gt_labels=None):
        if starting_box is not None:
            prev_image = image
            past_box = starting_box
            self.forward_count = 0
        else:
            prev_image, past_box = self.previous_frame

        image_0, output_box0 = im_util.get_crop_input(prev_image, past_box,
                                                      CROP_PAD, CROP_SIZE)
        # print('output_box0')
        # print(output_box0)
        self.cropped_input[0, ...] = data_preparation(image_0)

        image_1, _ = im_util.get_crop_input(image, past_box, CROP_PAD,
                                            CROP_SIZE)
        self.cropped_input[1, ...] = data_preparation(image_1)

        cropped_input_tensor = torch.from_numpy((self.cropped_input))
        cropped_input_tensor = cropped_input_tensor.view(
            -1, 3, CROP_SIZE, CROP_SIZE)
        with torch.no_grad():
            features = self.CNN(cropped_input_tensor.to(self.device))
            predicted_bbox = self.RNN(features)
        # Loss Calculation
        if starting_box is None and self.calculate_loss == True:
            gt_labels = torch.from_numpy(gt_labels).float()
            gt_labels = gt_labels.to(self.device)
            loss = self.criterion(predicted_bbox, gt_labels)
            # Running averagae loss
            self.MSE_loss = (self.MSE_loss * self.forward_count +
                             loss) / (self.forward_count + 1)
            print(self.MSE_loss)
        predicted_bbox_array = predicted_bbox.cpu().numpy()
        #print(predicted_bbox_array.squeeze())

        # Save initial LSTM states
        predicted_bbox_array = predicted_bbox.numpy()
        # print(predicted_bbox_array.squeeze())

        # Save initial LSTM states
        if self.forward_count == 0:
            self.RNN.lstm_state_init()

        output_bbox = im_util.from_crop_coordinate_system(
            predicted_bbox_array.squeeze() / 10.0, output_box0, 1, 1)

        # Reset LSTM states to initial state once #MAX_TRACK_LENGTH frames are processed and perform one forward pass
        if self.forward_count > 0 and self.forward_count % MAX_TRACK_LENGTH == 0:
            cropped_input, _ = im_util.get_crop_input(image, output_bbox,
                                                      CROP_PAD, CROP_SIZE)
            cropped_input = data_preparation(cropped_input)
            input_image = np.tile(cropped_input[np.newaxis, ...], (2, 1, 1, 1))
            input_tensor = torch.from_numpy(np.float32(input_image)).to(
                self.device)
            #input_tensor = input_tensor.view(-1,3,CROP_SIZE,CROP_SIZE)
            self.RNN.reset()
            features = self.CNN(input_tensor)
            prediction = self.RNN(features)
        if starting_box is not None:
            output_bbox = starting_box

        self.forward_count += 1
        self.previous_frame = (image, output_bbox)
        return output_bbox
Esempio n. 11
0
'''
batch data

X,Y = train_data.getBatch(i)
train-data.n_batches

'''

n_exp = 1
accs = []
f1s = []

for n in range(n_exp):
    model = RNN(args)
    print("Setting Model Complete")
    model.to(args.device)

    optimizer = "optim." + args.optim
    optimizer = eval(optimizer)(model.parameters(), lr=args.lr)
    #optimizer = optim.RMSprop(model.parameters())
    #optimizer = optim.Adam(model.parameters())
    #oprimizer = optim.SGD(model.parameters())

    print("Setting optimizer Complete")

    train_main(model, args, train_data, valid_data, optimizer)
    acc, f1 = test(model, args, test_data)

    accs.append(acc)
    f1s.append(f1)
Esempio n. 12
0
vocab_size = len(word_to_index)
embedding_size = 128
num_output = len(label_to_index)

model = RNN(vocab_size=vocab_size,
            embed_size=embedding_size,
            num_output=num_output,
            rnn_model="LSTM",
            use_last=True,
            hidden_size=128,
            embedding_tensor=None,
            num_layers=2,
            batch_first=True)

model.to("cuda:0")

import torch
import torch.nn as nn
import torch.optim as optim
from utils import accuracy, AverageMeter

optimizer = optim.Adam(model.parameters(), lr=0.005)

criterion = nn.CrossEntropyLoss()
clip = 0.25


def train(epoches):
    losses = AverageMeter()
    top1 = AverageMeter()
Esempio n. 13
0
def train():

    train_seq, test_seq, inx2word, word2inx, word2vec, batch_size = load_data()

    translators = {'inx2word': inx2word, 'word2inx': word2inx}
    with open('models/translators.p', 'wb') as f:
        pickle.dump(translators, f)
    with open('models/word2vec.p', 'wb') as f:
        pickle.dump(word2vec, f)

    dict_size = len(word2inx)

    word2vec = torch.tensor(word2vec)

    # check for GPU
    is_cuda = torch.cuda.is_available()

    if is_cuda:
        device = torch.device("cuda")
        print("GPU is available")
    else:
        device = torch.device("cpu")
        print("GPU not available, CPU used")

    # Instantiate the model with hyperparameters
    model = RNN(embedding_matrix=word2vec,
                dict_size=dict_size,
                hidden_dim=100,
                n_layers=1)
    model.to(device)

    # Define hyperparameters
    batch_size = 2000
    n_epochs = 100
    lr = 0.01

    # Define Loss, Optimizer
    lossfunction = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training Run
    for epoch in range(1, n_epochs + 1):
        epoch_loss = 0
        for _, (input_seq, target_seq) in enumerate(train_seq):

            optimizer.zero_grad(
            )  # Clears existing gradients from previous epoch
            input_seq = input_seq.to(device)
            target_seq = target_seq.to(device)
            output, h = model(input_seq)
            h = h.to(device)
            loss = lossfunction(output, target_seq.view(-1).long())
            loss.backward()  # Does backpropagation and calculates gradients
            optimizer.step()  # Updates the weights accordingly
            epoch_loss += loss.item()

        if epoch % 10 == 0 or epoch == 1:
            loss_test_total = 0
            for input_test, target_test in test_seq:
                input_test = input_test.to(device)
                target_test = target_test.to(device)

                output_test, _ = model(input_test)
                loss_test = lossfunction(output_test,
                                         target_test.view(-1).long())
                loss_test_total += loss_test.item()

            norm_loss = epoch_loss / (len(train_seq) * batch_size)
            norm_loss_test = loss_test_total / (len(test_seq) * batch_size)

            print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
            print("Train loss: {:.4f}".format(norm_loss), end=' | ')
            print("Test loss: {:.4f}".format(norm_loss_test))
            torch.save(model.state_dict(), 'models/rnn')

    print('Training done')
Esempio n. 14
0
        end_time = time.time()
        data_loading_time = round(end_time - start_time,3)
        data_prep_mins, data_prep_secs = epoch_time(start_time, end_time)
        print(f'Data loading Time: {data_prep_mins}m {data_prep_secs}s')


        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        model = RNN(input_dim, args.embedding_dim,
                    args.hidden_dim, 1, args.n_layers, 
                    args.bidirectional, args.dropout, pad_idx)
        model.embedding.weight.data[pad_idx] = torch.zeros(args.embedding_dim)

        optimizer = optim.Adam(model.parameters())
        criterion = nn.BCEWithLogitsLoss()

        model = model.to(device)
        criterion = criterion.to(device)

        best_test_loss = float('inf')

        loss_result = []
        acc_result = []
        elapsed_time = []

        print(f'Training with {tokenizer_name} tokenizer...')
        for epoch in range(args.n_epochs):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)
            test_loss, test_acc = evaluate(model, test_iterator, criterion, device)
Esempio n. 15
0
def main():
    parse = argparse.ArgumentParser()

    parse.add_argument("--train_data_dir",
                       default='./cnews/cnews.train.txt',
                       type=str,
                       required=False)
    parse.add_argument("--dev_data_dir",
                       default='./cnews/cnews.val.txt',
                       type=str,
                       required=False)
    parse.add_argument("--test_data_dir",
                       default='./cnews/cnews.test.txt',
                       type=str,
                       required=False)
    parse.add_argument("--output_file",
                       default='deep_model.log',
                       type=str,
                       required=False)
    parse.add_argument("--batch_size", default=1, type=int)
    parse.add_argument("--do_train",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_test",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--learnning_rate", default=5e-4, type=float)
    parse.add_argument("--num_epoch", default=10, type=int)
    parse.add_argument("--max_vocab_size", default=150000, type=int)
    parse.add_argument("--min_freq", default=2, type=int)
    parse.add_argument("--embed_size", default=300, type=int)
    parse.add_argument("--hidden_size", default=256, type=int)
    parse.add_argument("--dropout_rate", default=0.2, type=float)
    parse.add_argument("--warmup_steps",
                       default=0,
                       type=int,
                       help="Linear warmup over warmup_steps.")
    parse.add_argument("--GRAD_CLIP", default=1, type=float)
    parse.add_argument("--vocab_path", default='./vocab.json', type=str)
    parse.add_argument("--do_cnn",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_rnn",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_avg",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")

    parse.add_argument("--num_filter",
                       default=100,
                       type=int,
                       help="CNN模型一个filter的输出channels")

    args = parse.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device
    set_seed()

    if os.path.exists('./cnews/cache_train_data'):
        train_data = torch.load('./cnews/cache_train_data')
    else:
        train_data = read_corpus(args.train_data_dir)
        train_data = [(text, labs) for text, labs in zip(*train_data)]
        torch.save(train_data, './cnews/cache_train_data')

    if os.path.exists('./cnews/cache_dev_data'):
        dev_data = torch.load('./cnews/cache_dev_data')
    else:
        dev_data = read_corpus(args.dev_data_dir)
        dev_data = [(text, labs) for text, labs in zip(*dev_data)]
        torch.save(dev_data, './cnews/cache_dev_data')

    vocab = build_vocab(args)
    label_map = vocab.labels

    if args.do_train:
        # if args.do_cnn:
        #     cnn_model = CNN(len(vocab.vocab),args.embed_size,args.num_filter,[2,3,4],len(label_map),dropout=args.dropout_rate)
        #     cnn_model.to(device)
        #     train(args,cnn_model,train_data,dev_data,vocab,dtype='CNN')
        #
        # if args.do_avg:
        #     avg_model = WordAVGModel(len(vocab.vocab),args.embed_size,len(label_map),dropout=args.dropout_rate)
        #     avg_model.to(device)
        #     train(args, avg_model, train_data, dev_data, vocab, dtype='AVG')

        if args.do_rnn:
            rnn_model = RNN(len(vocab.vocab),
                            args.embed_size,
                            args.hidden_size,
                            len(label_map),
                            n_layers=1,
                            bidirectional=True,
                            dropout=args.dropout_rate)
            rnn_model.to(device)
            train(args, rnn_model, train_data, dev_data, vocab, dtype='RNN')

    if args.do_test:

        if os.path.exists('./cnews/cache_test_data'):
            test_data = torch.load('./cnews/cache_test_data')
        else:
            test_data = read_corpus(args.test_data_dir)
            test_data = [(text, labs) for text, labs in zip(*test_data)]
            torch.save(test_data, './cnews/cache_test_data')

        cirtion = nn.CrossEntropyLoss()

        cnn_model = CNN(len(vocab.vocab),
                        args.embed_size,
                        args.num_filter, [2, 3, 4],
                        len(label_map),
                        dropout=args.dropout_rate)
        cnn_model.load_state_dict(torch.load('classifa-best-CNN.th'))
        cnn_model.to(device)
        cnn_test_loss, cnn_result = evaluate(args, cirtion, cnn_model,
                                             test_data, vocab)

        avg_model = WordAVGModel(len(vocab.vocab),
                                 args.embed_size,
                                 len(label_map),
                                 dropout=args.dropout_rate)
        avg_model.load_state_dict(torch.load('classifa-best-AVG.th'))
        avg_model.to(device)
        avg_test_loss, avg_result = evaluate(args, cirtion, avg_model,
                                             test_data, vocab)

        rnn_model = RNN(len(vocab.vocab),
                        args.embed_size,
                        args.hidden_size,
                        len(label_map),
                        n_layers=1,
                        bidirectional=True,
                        dropout=args.dropout_rate)
        rnn_model.load_state_dict(torch.load('classifa-best-RNN.th'))
        rnn_model.to(device)
        rnn_test_loss, rnn_result = evaluate(args, cirtion, rnn_model,
                                             test_data, vocab)

        with open(args.output_file, "a") as fout:
            fout.write('\n')
            fout.write('=============== test result ============\n')
            fout.write("test model of {}, loss: {},result: {}\n".format(
                'CNN', cnn_test_loss, cnn_result))
            fout.write("test model of {}, loss: {},result: {}\n".format(
                'AVG', avg_test_loss, avg_result))
            fout.write("test model of {}, loss: {},result: {}\n".format(
                'RNN', rnn_test_loss, rnn_result))
Esempio n. 16
0
def train(args, labeled, resume_from, ckpt_file):
    print("========== In the train step ==========")

    iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train",
                                                       args=args,
                                                       indices=labeled)

    print("Created the iterators")
    INPUT_DIM = len(TEXT.vocab)
    OUTPUT_DIM = 1
    BIDIRECTIONAL = True

    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = RNN(
        INPUT_DIM,
        args["EMBEDDING_DIM"],
        args["HIDDEN_DIM"],
        OUTPUT_DIM,
        args["N_LAYERS"],
        BIDIRECTIONAL,
        args["DROPOUT"],
        PAD_IDX,
    )

    model = model.to(device=device)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)

    unk_idx = TEXT.vocab.stoi["<unk>"]
    pad_idx = TEXT.vocab.stoi["<pad>"]

    model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"])
    model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"])

    optimizer = optim.Adam(model.parameters())

    criterion = nn.BCEWithLogitsLoss()

    model = model.to("cuda")

    criterion = criterion.to("cuda")

    if resume_from is not None:
        ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth"))
        model.load_state_dict(ckpt["model"])
        optimizer.load_state_dict(ckpt["optimizer"])
    else:
        getdatasetstate(args)

    model.train()  # turn on dropout, etc
    for epoch in tqdm(range(args["train_epochs"]), desc="Training"):

        running_loss = 0
        i = 0

        for batch in iterator:

            # print("Batch is", batch.review[0])

            text, text_length = batch.review

            labels = batch.sentiment

            text = text.cuda()
            text_length = text_length.cuda()

            optimizer.zero_grad()

            output = model(text, text_length)

            loss = criterion(torch.squeeze(output).float(), labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 10:
                print(
                    "epoch: {} batch: {} running-loss: {}".format(
                        epoch + 1, i + 1, running_loss / 1000),
                    end="\r",
                )
                running_loss = 0
            i += 1

    print("Finished Training. Saving the model as {}".format(ckpt_file))

    ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
    torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))

    return
max_length = 20
WEIGHT_PATH = "./weights/text_gen11.23178537686666.pth"
device = "cuda" if torch.cuda.is_available() else "cpu"
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1
categories = [
    'Arabic', 'Chinese', 'Korean', 'Japanese', 'French', 'English', 'Czech',
    'Irish', 'Portuguese', 'German', 'Scottish', 'Polish', 'Italian',
    'Vietnamese', 'Dutch', 'Spanish', 'Russian', 'Greek'
]
n_categories = len(categories)
cate2index = {v: i for i, v in enumerate(categories)}
rnn = RNN(categories, n_letters, 128, n_letters)
checkpoint = torch.load(WEIGHT_PATH, map_location=torch.device('cpu'))
rnn.load_state_dict(checkpoint)
rnn.to(device)


# Sample from a category and starting letter
def sample(category, start_letter='A', rnn=rnn):
    with torch.no_grad():  # no need to track history in sampling
        category_tensor = categoryTensor(cate2index[category])
        input = inputTensor(start_letter)
        hidden = rnn.initHidden()
        output_name = start_letter

        for i in range(max_length):
            output, hidden = rnn(category_tensor, input[0], hidden)
            topv, topi = output.topk(1)
            topi = topi[0][0]
            if topi == n_letters - 1:
Esempio n. 18
0
def infer(args, unlabeled, ckpt_file):
    print("========== In the inference step ==========")
    iterator, TEXT, LABEL, tabular_dataset = load_data(stage="infer",
                                                       args=args,
                                                       indices=unlabeled)

    INPUT_DIM = len(TEXT.vocab)
    OUTPUT_DIM = 1
    BIDIRECTIONAL = True

    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = RNN(
        INPUT_DIM,
        args["EMBEDDING_DIM"],
        args["HIDDEN_DIM"],
        OUTPUT_DIM,
        args["N_LAYERS"],
        BIDIRECTIONAL,
        args["DROPOUT"],
        PAD_IDX,
    )

    model.load_state_dict(
        torch.load(os.path.join(args["EXPT_DIR"],
                                ckpt_file + ".pth"))["model"])

    model = model.to(device=device)

    model.eval()

    predix = 0
    predictions = {}
    truelabels = {}

    n_val = len(tabular_dataset)

    with tqdm(total=n_val, desc="Inference round", unit="batch",
              leave=False) as pbar:
        for batch in iterator:
            text, text_length = batch.review
            labels = batch.sentiment

            with torch.no_grad():
                text = text.to(device)
                text_length = text_length.to(device)
                prediction = model(text, text_length)

            for logit in prediction:
                predictions[unlabeled[predix]] = {}

                sig_prediction = torch.sigmoid(logit)

                prediction = 0
                if sig_prediction > 0.5:
                    prediction = 1

                predictions[unlabeled[predix]]["prediction"] = prediction

                predictions[unlabeled[predix]]["pre_softmax"] = [[
                    logit_fn(sig_prediction.cpu()),
                    logit_fn(1 - sig_prediction.cpu())
                ]]

                # print(predictions[unlabeled[predix]]["pre_softmax"])

                predix += 1

            pbar.update()

    print("The predictions are", predictions)

    return {"outputs": predictions}
Esempio n. 19
0
    shutil.rmtree(checkpoint_path)
os.mkdir(checkpoint_path)
model_name = "rnn.pt"

# do text parsing, get vocab size and class count
build_vocab(args.train, args.output_vocab_label, args.output_vocab_word)
label2id, id2label = load_vocab(args.output_vocab_label)
word2id, id2word = load_vocab(args.output_vocab_word)

vocab_size = len(word2id)
num_class = len(label2id)

# set model
model = RNN(vocab_size=vocab_size, num_class=num_class, emb_dim=args.embedding_dim, emb_droprate=args.embedding_droprate, sequence_len=args.sequence_len, rnn_droprate=args.rnn_droprate, rnn_cell_hidden=args.rnn_cell_hidden, rnn_cell_type=args.rnn_cell_type, birnn=args.birnn, num_layers=args.num_layers)
model.build()
model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6)
writer.add_graph(model, torch.randint(low=0, high=1000, size=(args.batch_size, args.sequence_len), dtype=torch.long).to(device))
print(summary(model, torch.randint(low=0, high=1000, size=(args.batch_size, args.sequence_len), dtype=torch.long).to(device)))

# padding sequence with <PAD>
def padding(data, fix_length, pad, add_first="", add_last=""):
    if add_first:
        data.insert(0, add_first)
    if add_last:
        data.append(add_last)
    pad_data = []
    data_len = len(data)
    for idx in range(fix_length):
        if idx < data_len:
Esempio n. 20
0
def main():

    parser = argparse.ArgumentParser(description="==========[RNN]==========")
    parser.add_argument("--mode",
                        default="train",
                        help="available modes: train, test, eval")
    parser.add_argument("--model",
                        default="rnn",
                        help="available models: rnn, lstm")
    parser.add_argument("--dataset",
                        default="all",
                        help="available datasets: all, MA, MI, TN")
    parser.add_argument("--rnn_layers",
                        default=3,
                        type=int,
                        help="number of stacked rnn layers")
    parser.add_argument("--hidden_dim",
                        default=16,
                        type=int,
                        help="number of hidden dimensions")
    parser.add_argument("--lin_layers",
                        default=1,
                        type=int,
                        help="number of linear layers before output")
    parser.add_argument("--epochs",
                        default=100,
                        type=int,
                        help="number of max training epochs")
    parser.add_argument("--dropout",
                        default=0.0,
                        type=float,
                        help="dropout probability")
    parser.add_argument("--learning_rate",
                        default=0.01,
                        type=float,
                        help="learning rate")
    parser.add_argument("--verbose",
                        default=2,
                        type=int,
                        help="how much training output?")

    options = parser.parse_args()
    verbose = options.verbose

    if torch.cuda.is_available():
        device = torch.device("cuda")
        if verbose > 0:
            print("GPU available, using cuda...")
            print()
    else:
        device = torch.device("cpu")
        if verbose > 0:
            print("No available GPU, using CPU...")
            print()

    params = {
        "MODE": options.mode,
        "MODEL": options.model,
        "DATASET": options.dataset,
        "RNN_LAYERS": options.rnn_layers,
        "HIDDEN_DIM": options.hidden_dim,
        "LIN_LAYERS": options.lin_layers,
        "EPOCHS": options.epochs,
        "DROPOUT_PROB": options.dropout,
        "LEARNING_RATE": options.learning_rate,
        "DEVICE": device,
        "OUTPUT_SIZE": 1
    }

    params["PATH"] = "models/" + params["MODEL"] + "_" + params[
        "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str(
            params["HIDDEN_DIM"]) + "_" + str(
                params["LIN_LAYERS"]) + "_" + str(
                    params["LEARNING_RATE"]) + "_" + str(
                        params["DROPOUT_PROB"]) + "_" + str(
                            params["EPOCHS"]) + "_model.pt"

    #if options.mode == "train":
    #    print("training placeholder...")

    train_data = utils.DistrictData(params["DATASET"], "train")
    val_data = utils.DistrictData(params["DATASET"], "val")

    params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1]

    if params["MODEL"] == "rnn":
        model = RNN(params)
    elif params["MODEL"] == "lstm":
        model = LSTM(params)
    model.to(params["DEVICE"])
    criterion = nn.MSELoss(reduction='sum')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["LEARNING_RATE"])

    if verbose == 0:
        print(params["PATH"])
    else:
        utils.print_params(params)
        print("Beginning training...")
        print()
    since = time.time()
    best_val_loss = 10.0

    for e in range(params["EPOCHS"]):

        running_loss = 0.0
        #model.zero_grad()
        model.train()
        train_loader = DataLoader(train_data,
                                  batch_size=32,
                                  shuffle=True,
                                  num_workers=4)

        for batch in train_loader:
            x = batch['sequence'].to(device)
            y = batch['target'].to(device)
            seq_len = batch['size'].to(device)

            optimizer.zero_grad()
            y_hat, hidden = model(x, seq_len)
            loss = criterion(y_hat, y)

            running_loss += loss

            loss.backward()
            optimizer.step()

        mean_loss = running_loss / len(train_data)
        val_loss = evaluate(val_data,
                            model,
                            params,
                            criterion,
                            validation=True)

        if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0):
            print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) +
                  '=' * 25)
            print('Training Loss: {}'.format(mean_loss))
            print('Validation Loss: {}'.format(val_loss))
            print()

        if e > params["EPOCHS"] / 3:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = model.state_dict()
                torch.save(best_model, params["PATH"])

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Final Training Loss: {:4f}'.format(mean_loss))
    print('Best Validation Loss: {:4f}'.format(best_val_loss))

    test_data = utils.DistrictData(params["DATASET"], "test")
    test_loss = evaluate(test_data, model, params, criterion)
    print('Test Loss: {}'.format(test_loss))
    print()
Esempio n. 21
0
def predict(cfg, model_path_c1, model_path_c2, model_path_c3, model_path_c4,
            loader, device, save_path):
    print(f'Saving predictions @ {save_path}')
    model_c1 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                   cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model_c2 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                   cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model_c3 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                   cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model_c4 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                   cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model_c1 = model_c1.to(device)
    model_c2 = model_c2.to(device)
    model_c3 = model_c3.to(device)
    model_c4 = model_c4.to(device)

    # load the model
    model_c1.load_state_dict(torch.load(model_path_c1))
    model_c2.load_state_dict(torch.load(model_path_c2))
    model_c3.load_state_dict(torch.load(model_path_c3))
    model_c4.load_state_dict(torch.load(model_path_c4))

    # just to be sure
    model_c1.eval()
    model_c2.eval()
    model_c3.eval()
    model_c4.eval()

    predictions = {
        'Object': [],
        'Sequence': [],
    }

    for c in range(cfg.output_dim):
        predictions[f'{cfg.task}_prob_{c}'] = []

    for batch in loader:
        inputs = batch['inputs'].to(device)

        with torch.set_grad_enabled(False):
            # (B, T, D)
            outputs_c1, hiddens = model_c1(inputs[:, 0, :, :])
            outputs_c2, hiddens = model_c2(inputs[:, 1, :, :])
            outputs_c3, hiddens = model_c3(inputs[:, 2, :, :])
            outputs_c4, hiddens = model_c4(inputs[:, 3, :, :])

            outputs = outputs_c1 + outputs_c2 + outputs_c3 + outputs_c4

            _, preds = torch.max(outputs, 1)
            softmaxed = torch.nn.functional.softmax(outputs, dim=-1)

        for i in range(len(batch['paths'])):
            sequence = pathlib.Path(batch['paths'][i]).stem
            predictions['Object'].append(batch['containers'][i])
            predictions['Sequence'].append(sequence)
            for c in range(cfg.output_dim):
                predictions[f'{cfg.task}_prob_{c}'].append(softmaxed[i,
                                                                     c].item())

    predictions_dataset = pd.DataFrame.from_dict(predictions).sort_values(
        ['Object', 'Sequence'])
    predictions_dataset.to_csv(save_path, index=False)
    # returning the dataset because it will be useful for test-time prediction averaging
    return predictions_dataset