def predict_fn(input_data, model):
    print('Inferring sentiment of input data.')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if model.word_dict is None:
        raise Exception('Model has not been loaded properly, no word_dict.')

    # TODO: Process input_data so that it is ready to be sent to our model.
    #       You should produce two variables:
    #         data_X   - A sequence of length 500 which represents the converted review
    #         data_len - The length of the review

    data_words = review_to_words(input_data)
    data_X, data_len = convert_and_pad(model.word_dict, data_words, pad=500)

    # Using data_X and data_len we construct an appropriate input tensor. Remember
    # that our model expects input data of the form 'len, review[500]'.
    data_pack = np.hstack((data_len, data_X))
    data_pack = data_pack.reshape(1, -1)

    data = torch.from_numpy(data_pack)
    data = data.to(device)

    # Make sure to put the model into evaluation mode
    model.eval()

    # TODO: Compute the result of applying the model to the input data. The variable `result` should
    #       be a numpy array which contains a single integer which is either 1 or 0

    result = model.forward(data.long())
    result = result.detach().numpy()
    result = 1 if result > 0.5 else 0

    return np.array([result], dtype=np.int32)
def train(args, model, device, train_loader, optimizer, epoch, timestamp,
          ngrams, id2w):
    model.train()
    count = 0
    train_loss = 0
    total = 0
    sos = 0
    criterion = nn.CrossEntropyLoss()

    for batch_idx, (data) in enumerate(train_loader):

        loss = 0
        data = data.long().to(device)

        inputs = [
            n_gram_batchify(sos, n_gram, device, data) for n_gram in ngrams
        ]

        for input in inputs:
            output = model(input)
            loss += criterion(output, data)

        optimizer.zero_grad()
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        total += len(data)

        if (batch_idx % args.log_interval == 1):

            print('Train Epoch: {} [{}/{}] \t Loss: {:.6f}  '.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                train_loss / (batch_idx + 1)))

    cpkt_fol_name = '/home/hatzis/Desktop/stim_ctc/multigram/checkpoints/test_day_' + timestamp

    if not os.path.exists(cpkt_fol_name):
        print(
            "Checkpoint Directory does not exist! Making directory {}".format(
                cpkt_fol_name))
        os.mkdir(cpkt_fol_name)
    logger(cpkt_fol_name + '/training.txt',
           [str(epoch), str(float(train_loss / (batch_idx + 1)))])
def validate(args, model, device, test_loader, optimizer, epoch, timestamp,
             ngrams, id2w):
    model.eval()
    eval_loss = 0
    # correct = 0

    sentences = []
    criterion = nn.CTCLoss(blank=0, reduction='mean')

    with torch.no_grad():
        for batch_idx, (data) in enumerate(test_loader):
            data = data.long().to(device)
            inputs = [
                n_gram_batchify(sos, n_gram, device, data) for n_gram in ngrams
            ]

            for input in inputs:
                output = model(input)
                loss += criterion(output, data)

            eval_loss += loss.item()  # sum up batch loss

            probs = nn.functional.softmax(output, dim=-1)
            pred = probs.argmax(dim=-1, keepdim=True).squeeze().cpu().numpy()

            ref = ''
            refs = target.squeeze().cpu().numpy()

            for i in range(target.size(1)):
                ref += id2w[refs[i]] + ' '

            s = greedy_decode(id2w, pred, output.size(0), ' ')

            sentences.append(s)

    #model_out_path = model.get_name() +'no_pad' +'_loss_' +str(float(eval_loss / len(test_loader.dataset)))+'_epoch_'+str(epoch) + ".pth"
    cpkt_fol_name = '/home/hatzis/Desktop/stim_ctc/multigram/checkpoints/test_day_' + timestamp
    if not os.path.exists(cpkt_fol_name):
        print(
            "Checkpoint Directory does not exist! Making directory {}".format(
                cpkt_fol_name))
        os.mkdir(cpkt_fol_name)
    pred_name = cpkt_fol_name + '/subunetseval_greedy_predictions_epoch' + str(
        epoch) + 'loss_' + str(float(
            eval_loss / len(test_loader.dataset))) + '_' + timestamp + '_.csv'
    write_csv(sentences, pred_name)
    wer = calc_wer(
        "/home/hatzis/Desktop/teo/ctc_last2/files/dev_phoenixv1.csv",
        pred_name)
    val_loss = eval_loss / len(test_loader.dataset)
    print('Evaluation : Average loss: {:.4f} Word error rate {}%'.format(
        eval_loss / len(test_loader.dataset), wer))

    global best_wer

    for_checkpoint = {
        'epoch': epoch,
        'model_dict': model.state_dict(),
        'optimizer_dict': optimizer.state_dict(),
        'validation_loss': str(val_loss),
        'word error rate': wer
    }
    is_best = wer < best_wer
    if (is_best):
        print("BEST WER")
        best_wer = wer
        save_checkpoint(for_checkpoint, is_best, cpkt_fol_name,
                        'best_wer' + str(wer))
    else:
        save_checkpoint(for_checkpoint, is_best, cpkt_fol_name, 'last')

    with open(cpkt_fol_name + '/params_args.txt', 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    logger(cpkt_fol_name + '/validation.txt', [
        str(epoch),
        str(float(eval_loss / len(test_loader.dataset))),
        str(wer)
    ])
Example #4
0
    def train(self, iteration, epochs=1):
        #move from write cache to db
        self.clearSampleCache()

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        #device = torch.device('cpu')

        if config.newIterNets:
            newNet = Net(softmax=self.softmax)
            #embedding should be preserved across iterations
            #but we want a fresh start for the strategy
            #actually, the deep cfr paper said don't do this
            #newNet.load_state_dict(self.net.state_dict())
            #I'm still going to copy over the embedding
            #newNet.embeddings.load_state_dict(self.net.embeddings.state_dict())
            self.net = newNet

        self.net = self.net.to(device)
        #self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        self.optimizer = OPTIMIZER(self.net.parameters(), lr=self.lr)
        self.net, self.optimizer = amp.initialize(self.net,
                                                  self.optimizer,
                                                  opt_level=AMP_OPT_LEVEL)
        #self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9)
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            'min',
            patience=config.schedulerPatience,
            verbose=False)
        self.net.train(True)

        #used for scheduling
        lowestLoss = 999
        lowestLossIndex = -1
        lastResetLoss = None
        runningLoss = []

        #we don't really use the dataset, but we use it to read some files
        #we should fix this, but it works and doesn't really hurt anything
        dataset = dataStorage.Dataset(self.name, self.sharedDict,
                                      self.outputSize)

        #validation split based on
        #https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets
        indices = list(range(dataset.size))
        split = int(
            np.floor(config.valSplit *
                     min(dataset.size, config.epochMaxNumSamples)))
        np.random.shuffle(indices)
        trainIndices, testIndices = indices[split:min(
            dataset.size, config.epochMaxNumSamples)], indices[:split]
        #trainSampler = SubsetRandomSampler(trainIndices)
        #testSampler = SubsetRandomSampler(testIndices)

        #we could scale the minibatch size by the number of samples, but this slows things down
        #miniBatchSize = min(config.miniBatchSize, len(trainIndices) // config.numWorkers)
        miniBatchSize = config.miniBatchSize

        #we could instead scale the number of workers by the number of minibatches
        #numWorkers = min(config.numWorkers, len(trainIndices) // miniBatchSize)
        numWorkers = config.numWorkers

        trainingLoader = dataStorage.BatchDataLoader(
            id=self.name,
            indices=trainIndices,
            batch_size=miniBatchSize,
            num_threads_in_mt=config.numWorkers)
        baseTrainingLoader = trainingLoader
        if numWorkers > 1:
            trainingLoader = MultiThreadedAugmenter(trainingLoader, None,
                                                    numWorkers, 2, None)

        testingLoader = dataStorage.BatchDataLoader(
            id=self.name,
            indices=testIndices,
            batch_size=miniBatchSize,
            num_threads_in_mt=numWorkers)
        baseTestingLoader = testingLoader
        if numWorkers > 1:
            testingLoader = MultiThreadedAugmenter(testingLoader, None,
                                                   numWorkers)

        print(file=sys.stderr)
        shuffleStride = 1  #TODO move to config
        for j in range(epochs):
            if epochs > 1:
                print('\repoch', j, end=' ', file=sys.stderr)

            if j == 0:
                print('training size:',
                      len(trainIndices),
                      'val size:',
                      len(testIndices),
                      file=sys.stderr)

            totalLoss = 0

            if (j + 1) % shuffleStride == 0:
                baseTrainingLoader.shuffle()

            i = 0
            sampleCount = 0
            chunkSize = dataset.size / (miniBatchSize * 10)
            for data, dataLengths, labels, iters in trainingLoader:
                sampleCount += 1  #dataLengths.shape[0]
                i += 1

                labels = labels.float().to(device)
                iters = iters.float().to(device)
                data = data.long().to(device)
                dataLengths = dataLengths.long().to(device)

                #evaluate on network
                self.optimizer.zero_grad()
                ys = self.net(data, lengths=dataLengths, trace=False).squeeze()

                #loss function from the paper, except we mask out ignored values
                #loss = iters.view(labels.shape[0],-1) * ((labels - ys) ** 2)
                #mask = loss == IGNORE_LABEL
                #loss[mask] = 0
                #loss = torch.sum(loss) / (torch.sum(iters).item())
                loss = DeepCfrModel._loss(labels, ys, iters)
                #get gradient of loss
                #use amp because nvidia said it's better
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
                #loss.backward()

                #clip gradient norm, which was done in the paper
                nn.utils.clip_grad_norm_(self.net.parameters(), 5)

                if config.gradPlotStride and (
                        j + 1) % config.gradPlotStride == 0 and i == 1:
                    gradPlot.plot_grad_flow(self.net.named_parameters())

                #train the network
                self.optimizer.step()
                totalLoss += loss.item()

            avgLoss = totalLoss / sampleCount
            with open('trainloss.csv', 'a') as file:
                print(avgLoss, end=',', file=file)

            #get validation loss
            #testLoader = torch.utils.data.DataLoader(dataset, batch_size=miniBatchSize, num_workers=config.numWorkers, collate_fn=myCollate, sampler=testSampler)
            self.net.train(False)
            baseTestingLoader.shuffle()
            totalValLoss = 0
            valCount = 0
            stdTotal = 0
            stdCount = 0
            #for data, dataLengths, labels, iters in testLoader:
            for data, dataLengths, labels, iters in testingLoader:
                labels = labels.float().to(device)
                #print('labels', np.round(100 * labels.cpu().numpy()) / 100, file=sys.stderr)
                iters = iters.float().to(device)
                data = data.long().to(device)
                dataLengths = dataLengths.long().to(device)
                ys = self.net(data, lengths=dataLengths, trace=False).squeeze()
                if config.verboseValidation and valCount == 0:
                    #print('data', data[0:min(10, len(data))])
                    print('labels', labels[0:min(10, len(labels))])
                    print('output', ys[0:min(10, len(labels))])
                    print('stddev', ys[:,
                                       0].std())  #first column is good enough
                stdTotal += ys.std().item()
                stdCount += 1

                #loss = torch.sum(iters.view(labels.shape[0],-1) * ((labels - ys) ** 2)) / (torch.sum(iters).item())
                loss = DeepCfrModel._loss(labels, ys, iters)
                totalValLoss += loss.item()
                valCount += 1  #dataLengths.shape[0]

            self.net.train(True)

            with open('stddev.csv', 'a') as file:
                print(stdTotal / stdCount, end=',', file=file)

            avgValLoss = totalValLoss / valCount

            #running average of last 3 validation losses
            runningLoss.append(avgValLoss)
            if len(runningLoss) > 3:
                runningLoss = runningLoss[-3:]
            schedLoss = sum(runningLoss) / len(runningLoss)

            if config.useScheduler:
                self.scheduler.step(schedLoss)

            if schedLoss < lowestLoss:
                lowestLoss = schedLoss
                lowestLossIndex = j
            """
            if schedLoss < 0.35:
                print('eh,', schedLoss, 'is good enough', file=sys.stderr)
                break
            """
            """
            if j - lowestLossIndex > 3 * config.schedulerPatience:#avoid saddle points
                #print('resetting learn rate to default', j, lowestLossIndex, lowestLoss, schedLoss, lastResetLoss, file=sys.stderr)
                #self.optimizer = optim.Adam(self.net.parameters(), lr=config.learnRate)
                #self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9)
                #self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=config.schedulerPatience, verbose=False)
                print('stopping epoch early')
                break
                lowestLossIndex = j

                #if we've reset before and made no progress, just stop
                if lastResetLoss is not None and (schedLoss - lastResetLoss) / lastResetLoss > -0.01:
                    print('stopping epoch early, (schedLoss - lastResetLoss) / lastResetLoss) is', (schedLoss - lastResetLoss) / lastResetLoss, file=sys.stderr)
                    break
                lastResetLoss = schedLoss
            """

            #show in console and output to csv
            print('val Loss', avgValLoss, end='', file=sys.stderr)
            with open('valloss.csv', 'a') as file:
                #print(avgValLoss, end=',', file=file)
                print(schedLoss, end=',', file=file)

        with open('valloss.csv', 'a') as file:
            print(file=file)
        with open('trainloss.csv', 'a') as file:
            print(file=file)
        with open('stddev.csv', 'a') as file:
            print(file=file)
        print('\n', file=sys.stderr)

        self.net.train(False)

        self.saveModel(iteration)

        #warPoker examples
        """
        exampleInfoSets = [
            ['start', 'hand', '2', '0', 'deal', '1', 'raise'],
            ['start', 'hand', '7', '0', 'deal', '1', 'raise'],
            ['start', 'hand', '14', '0', 'deal', '1', 'raise'],
            ['start', 'hand', '2', '1', 'deal'],
            ['start', 'hand', '7', '1', 'deal'],
            ['start', 'hand', '14', '1', 'deal'],
        ]
        for example in exampleInfoSets:
            print('example input:', example, file=sys.stderr)
            probs, expVal = self.predict(example, trace=False)
            print('exampleOutput (deal, fold, call, raise)', np.round(100 * probs), 'exp value', round(expVal * 100), file=sys.stderr)
        """

        #ace example
        """
Example #5
0
File: utils.py Project: uhh-lt/lttc
def dataAsLongDeviceTensor(data, device):
    if isinstance(data, torch.LongTensor):
        return data.to(device)
    if isinstance(data, torch.Tensor):
        return data.long().to(device)
    return torch.tensor(data, dtype=torch.long, device=device)