Esempio n. 1
0
def use_model(state_dict_file, image_file, device):
    print("Creating neural net...")
    cnn = CNN(IMG_SIZE, KERNEL_SIZE, device)
    cnn.load_state_dict(torch.load(state_dict_file))
    cnn.eval()

    if not os.path.isfile(image_file):
        raise Exception(f"File {image_file} does not exist")
    img = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise Exception(f"Cannot load file {image_file}")
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    img_data = torch.Tensor(img).view(-1, IMG_SIZE, IMG_SIZE)
    img_data = img_data / 255.0

    img_data = img_data.view(-1, 1, IMG_SIZE, IMG_SIZE)
    img_data = img_data.to(device)
    result = cnn(img_data)
    if result[0][0] > result[0][1]:
        print(
            f"{image_file} is a cat ({round(float(result[0][0]*100), 2)}% confidence)"
        )
    else:
        print(
            f"{image_file} is a dog ({round(float(result[0][1]*100), 2)}% confidence)"
        )
def model_fn(model_dir):
    
    model_info = {}
    
    with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f:
        model_info = torch.load(f)
    
    print('model_info: {}'.format(model_info))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    logger.info('Current device: {}'.format(device))
    model = CNN(similarity_dims=model_info['simililarity-dims'])

    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
    
    model.eval()
    return model
Esempio n. 3
0
class TrainModel:
    def __init__(self, training_data=None,
                 test_data=None, validation_data=None,
                 learning_rate=1.0, mini_batch_size=4, epochs=1, num_class=1):
        self.num_class = num_class
        self.training_data = training_data
        self.test_data = test_data
        self.validation_data = validation_data
        self.mini_batch_size = mini_batch_size
        self.mini_batches = DataLoader(self.training_data, batch_size=self.mini_batch_size, shuffle=True)

        self.learning_rate = learning_rate 
        self.epochs = epochs

        self.model = CNN()
        self.loss_fn = nn.MSELoss()  # 不开根号,torch.sum((a - b) * (a - b)) / n
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.1)
        self.optimizer2 = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)

        # early-stopping
        self.patience = 5000
        self.validation_frequency = 500  # [0, 499] = 500

    def train(self):
        n = len(self.training_data)
        # (n - 1) // bs + 1 == math.ceil( n / bs) 向上取整
        n_train_batches = (n - 1) // self.mini_batch_size + 1
        best_validation_accuracy = 0
        stop = False
        epoch = 0

        while epoch < self.epochs and (not stop):
            i = -1
            for xb, yb in self.mini_batches:
                i += 1
                self.model.train()
                pred = self.model(xb)
                loss = self.loss_fn(pred, yb)

                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

                t, best_validation_accuracy = self.validation(i, n_train_batches, epoch, best_validation_accuracy)
                if t >= self.patience:
                    stop = True
                    print("early-stop")
                    break
            epoch += 1
        print("Epoch {0} Test accuracy : {1}".format(epoch + 1, self.evaluate(self.test_data)))

    def validation(self, i, n_train_batches, epoch, best_validation_accuracy):
        """
        t: mini_batch 的迭代次数, 第多少个mini_batch
        """
        # 提高0.001倍, 提高self.patience
        improvement_threshold = 1.001
        # 连续3次验证acc降低,触发停止
        patience_increase = 5

        t = epoch * n_train_batches + i
        if (t + 1) % self.validation_frequency == 0:
            self.model.eval()
            this_validation_accuracy = self.evaluate(self.validation_data)
            # this_loss = self.evaluate_loss(self.validation_data)
            # print("iteration {0} loss: {1}".format(t, this_loss))
            print("[{0}, {1:5d}] accuracy: {2}".format(epoch + 1, t + 1, this_validation_accuracy))
            if this_validation_accuracy > best_validation_accuracy:
                if this_validation_accuracy > best_validation_accuracy * improvement_threshold:
                    self.patience = max(self.patience, t + patience_increase * self.validation_frequency)
                    print("patience increase:", self.patience)
                best_validation_accuracy = this_validation_accuracy
        return t,  best_validation_accuracy

    def evaluate(self, test_data):
        test_results = [(torch.argmax(self.model(x)), y)
                        for (x, y) in test_data]
        # 对一个比对结果的list求和, list=[1, 0, 1,..]
        sum_value = sum(int(x.item() == y.item()) for (x, y) in test_results)
        return sum_value / len(test_data)

    def evaluate_loss(self, test_data):
        y = sum(self.loss_fn(self.model(x), MNIST.vectorized_result(y.item())).item() for x, y in test_data)
        return y / len(test_data)
Esempio n. 4
0
def main():
    print('Starting process...')

    SEED = 111
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    TEXT = torchtext.data.Field(tokenize='spacy', batch_first=True)
    LABEL = torchtext.data.LabelField(dtype=torch.float)

    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    train_data, valid_data = train_data.split(random_state=random.seed(SEED))

    max_vocab_size = 25_000
    TEXT.build_vocab(
        train_data,
        max_size=max_vocab_size,
        vectors="glove.6B.100d",
        unk_init=torch.Tensor.normal_,
    )
    LABEL.build_vocab(train_data)

    train_iter, valid_iter, test_iter = torchtext.data.BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=ARGS.batch_size,
        device=DEVICE)

    vocab_size = len(TEXT.vocab)
    pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
    filter_sizes = np.array(ARGS.filter_sizes.split(','), dtype=int)
    model = CNN(vocab_size, ARGS.binary_neuron, ARGS.embed_dim, ARGS.n_filters,
                filter_sizes, ARGS.output_dim, ARGS.dropout_rate, pad_idx)

    pretrained_embeddings = TEXT.vocab.vectors
    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
    model.embedding.weight.data[UNK_IDX] = torch.zeros(ARGS.embed_dim)
    model.embedding.weight.data[pad_idx] = torch.zeros(ARGS.embed_dim)

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    model.to(DEVICE)
    criterion.to(DEVICE)

    min_valid_loss = float('inf')
    for epoch in range(1, ARGS.epochs + 1):
        start_time = time.time()

        model.train()
        train_loss, train_acc, train_p, train_tn, train_fp, train_fn = run_epoch(
            model, train_iter, criterion, optimizer)

        model.eval()
        with torch.no_grad():
            valid_loss, valid_acc, val_p, val_tn, val_fp, val_fn = run_epoch(
                model, valid_iter, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < min_valid_loss:
            min_valid_loss = valid_loss
            torch.save(model.state_dict(), 'model.pt')

        print(
          f'Epoch: {epoch:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n' \
          f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%\n' \
          f'\tVal. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
        )

    test_loss, test_acc, test_tp, test_tn, test_fp, test_fn = run_epoch(
        model, test_iter, criterion)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
    sns.heatmap(np.array([[test_tp, test_fp], [test_fn, test_tn]]),
                vmax=.5,
                linewidth=0.5,
                cmap="Blues",
                xticklabels=["Positive", "Negative"],
                yticklabels=["True", "False"])
    print(np.array([[test_tp, test_fp], [test_fn, test_tn]]))
    plt.show()
def train(args):

    device = args.device
    train_lines = open(args.train_file).readlines()
    val_lines = open(args.val_file).readlines()

    log_every = args.log_every
    valid_iter = args.valid_iter
    train_iter = 0
    cum_loss = 0
    avg_loss = 0
    valid_num = 0
    patience = 0
    num_trial = 0
    hist_valid_scores = []
    begin_time = time.time()

    vocab = get_vocab(args.vocab_file)
    model = CNN(args, vocab)
    if args.use_embed == 1:
        model.load_vector(args, vocab)
    if args.device == 'cuda':
        model.cuda()

    lr = args.lr
    optim = torch.optim.Adam(list(model.parameters()), lr=lr)
    criterion = torch.nn.CrossEntropyLoss().to(device=device)

    model.train()

    for ep in range(args.max_epochs):

        train_iter = 0
        val_iter = 0

        for examples, labels in batch_iter(train_lines, vocab, args.batch_size, \
                                               args.max_sent_len, shuffle=True):

            train_iter += 1
            optim.zero_grad()
            labels = torch.tensor(labels).to(device=device)
            examples = torch.tensor(examples).to(device=device)
            output = model(examples)

            loss = criterion(output, labels)
            avg_loss += loss.item()
            cum_loss += loss.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(list(model.parameters()),
                                           args.clip_grad)
            optim.step()

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg.loss %.2f, time elapsed %.2f'\
                     % (ep + 1, train_iter, avg_loss / log_every, time.time() - begin_time), file=sys.stderr)

                begin_time = time.time()
                avg_loss = 0

            if train_iter % valid_iter == 0:

                print('epoch %d, iter %d, cum.loss %.2f, time elapsed %.2f'\
                     % (ep + 1, train_iter, cum_loss / valid_iter, time.time() - begin_time), file=sys.stderr)

                cum_loss = 0
                valid_num += 1

                print("Begin Validation ", file=sys.stderr)

                model.eval()
                acc = test(val_lines, model, vocab, args)
                model.train()

                print('validation: iter %d, acc %f' % (train_iter, acc),
                      file=sys.stderr)

                is_better = (len(hist_valid_scores)
                             == 0) or (acc > max(hist_valid_scores))
                hist_valid_scores.append(acc)

                if is_better:
                    patience = 0
                    print("Save the current model and optimiser state")
                    torch.save(model, args.model_save_path)

                    torch.save(optim.state_dict(),
                               args.model_save_path + '.optim')

                elif patience < args.patience:

                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == args.patience:
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == args.max_num_trials:
                            print('early stop!', file=sys.stderr)
                            return

                        lr = lr * args.lr_decay

                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)
                        model = load(args.model_save_path)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)

                        optim = torch.optim.Adam(list(model.parameters()),
                                                 lr=lr)
                        optim.load_state_dict(
                            torch.load(args.model_save_path + '.optim'))
                        for state in optim.state.values():
                            for k, v in state.items():
                                if isinstance(v, torch.Tensor):
                                    state[k] = v.to(args.device)
                        for group in optim.param_groups:
                            group['lr'] = lr

                        patience = 0

    print("Training Finished", file=sys.stderr)
Esempio n. 6
0
import paddle
from PIL import Image
import numpy as np
from cnn import CNN

place = paddle.CPUPlace()
paddle.enable_imperative(place)

# 获取网络结构
cnn_infer = CNN()
# 加载模型参数
param_dict, _ = paddle.imperative.load("models/cnn")
# 把参数加载到网络中
cnn_infer.load_dict(param_dict)
# 开始执行预测
cnn_infer.eval()


# 预处理数据
def load_image(file):
    im = Image.open(file).convert('L')
    im = im.resize((28, 28), Image.ANTIALIAS)
    im = np.array(im).reshape(1, 1, 28, 28).astype(np.float32)
    im = im / 255.0 * 2.0 - 1.0
    return im


# 获取预测数据
tensor_img = load_image('image/infer_3.png')
# 执行预测
results = cnn_infer(paddle.imperative.to_variable(tensor_img))
Esempio n. 7
0
class Node:
    def __init__(self, parentId, nodeId, device, isTrain, level):
        self.parentId = parentId
        self.nodeId = nodeId
        self.device = device
        self.isTrain = isTrain
        self.level = level

    def setInput(self, trainInputDict, valInputDict, numClasses, giniValue,
                 isLeaf):
        self.trainInputDict = trainInputDict
        self.valInputDict = valInputDict
        imgSize = trainInputDict["data"][0].shape[2]
        inChannels = trainInputDict["data"][0].shape[0]
        print("nodeId: ", self.nodeId, ", imgSize : ", imgSize)
        outChannels = 16
        kernel = 5
        self.cnnModel = CNN(img_size=imgSize,
                            in_channels=inChannels,
                            out_channels=outChannels,
                            num_class=numClasses,
                            kernel=kernel)
        numFeatures = self.cnnModel.features
        self.mlpModel = MLP(numFeatures)
        self.numClasses = numClasses
        self.giniValue = giniValue
        self.isLeaf = isLeaf

    def trainCNN(self, labelMap, reverseLabelMap):
        loss_fn = nn.CrossEntropyLoss()
        loss_fn_mse = nn.MSELoss()
        optimizer = torch.optim.Adam(self.cnnModel.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 25, 0.4)
        self.cnnModel.to(self.device)
        trainLabels = self.trainInputDict["label"]
        trainInputs = self.trainInputDict["data"]
        trainLabels = trainLabels.to(self.device)
        trainInputs = trainInputs.to(self.device)

        numBatches = 100
        batchSize = int((len(trainInputs) + numBatches - 1) / numBatches)
        numEpochs = 100

        st_btch = 0
        batch_sep = []
        for i in range(numBatches):
            end_btch = min(st_btch + batchSize, len(trainInputs))
            batch_sep.append([st_btch, end_btch])
            st_btch = end_btch

        self.cnnModel.train()
        for epoch in range(numEpochs):
            total = 0
            correct = 0
            train_loss = 0
            random.shuffle(batch_sep)
            for batch in range(numBatches):
                st_btch, end_btch = batch_sep[batch]
                optimizer.zero_grad()
                _, _, est_labels, feat_same = self.cnnModel(
                    trainInputs[st_btch:end_btch])
                batch_loss_label = loss_fn(est_labels,
                                           trainLabels[st_btch:end_btch])
                # print(feat_same.shape)
                # print(trainInputs[st_btch:end_btch].shape)
                batch_loss_featr = loss_fn_mse(feat_same,
                                               trainInputs[st_btch:end_btch])
                batch_loss = batch_loss_featr + batch_loss_label
                batch_loss.backward()
                optimizer.step()
                # print(batch_loss_featr.item(), batch_loss_label.item())
                train_loss += batch_loss.item()
                _, predicted = est_labels.max(1)
                total += end_btch - st_btch
                correct += predicted.eq(
                    trainLabels[st_btch:end_btch]).sum().item()
            scheduler.step()

            #TODO: Add validation iteration here(first change mode to eval)

            print(
                epoch, 'Train Loss: %.3f | Train Acc: %.3f' %
                (train_loss, 100. * correct / total))

            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': self.cnnModel.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_loss': train_loss,
                    'labelMap': labelMap,
                    'reverseLabelMap': reverseLabelMap,
                }, 'ckpt/node_cnn_' + str(self.parentId) + '_' +
                str(self.nodeId) + '.pth')

    def trainMLP(self, trainInputs, trainTargets):
        # loss_fn = nn.CrossEntropyLoss()
        loss_fn = nn.BCELoss()
        optimizer = torch.optim.Adam(self.mlpModel.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, 0.4)

        trainInputs = trainInputs.to(self.device)
        trainTargets = trainTargets.to(self.device)
        self.mlpModel.to(self.device)

        numBatches = 50
        batchSize = int((len(trainInputs) + numBatches - 1) / numBatches)

        st_btch = 0
        batch_sep = []
        for i in range(numBatches):
            end_btch = min(st_btch + batchSize, len(trainInputs))
            batch_sep.append([st_btch, end_btch])
            st_btch = end_btch

        numEpochs = 60
        self.mlpModel.train()
        for epoch in range(numEpochs):
            train_loss = 0
            correct = 0
            total = 0
            random.shuffle(batch_sep)

            for batch in range(numBatches):
                st_btch, end_btch = batch_sep[batch]
                optimizer.zero_grad()
                est_labels = self.mlpModel(trainInputs[st_btch:end_btch])
                # print(est_labels.shape)
                est_labels = est_labels.view(-1)
                # batch_loss = loss_fn(est_labels, trainTargets[st_btch:end_btch]) #if crossentropy
                batch_loss = loss_fn(
                    est_labels,
                    trainTargets[st_btch:end_btch].float())  #if bce
                batch_loss.backward()
                optimizer.step()

                train_loss += batch_loss.item()
                # _, predicted = est_labels.max(1) #if cross entropy
                predicted = est_labels.detach()  #if bce
                predicted += 0.5
                predicted = predicted.long()
                # total += trainTargets.size(0)
                total += end_btch - st_btch
                correct += predicted.eq(
                    trainTargets[st_btch:end_btch]).sum().item()
            scheduler.step()
            #TODO: add validation testing of MLP here

            print(
                epoch, 'Loss: %.3f | Acc: %.3f' %
                (train_loss, 100. * correct / total))

            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': self.mlpModel.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_loss': train_loss,
                }, 'ckpt/node_mlp_' + str(self.parentId) + '_' +
                str(self.nodeId) + '.pth')

    def balanceData(self):
        shape = self.trainInputDict["data"].shape
        print("trainInputDict[data].shape : ", shape)
        copy = self.trainInputDict["data"]
        copy = copy.reshape(shape[0], -1)
        print("copy.shape : ", copy.shape)
        npDict = copy.numpy()
        copyLabel = self.trainInputDict["label"]
        print("copyLabel.shape : ", copyLabel.shape)
        # copyLabel = copyLabel.view(-1)
        npLabel = copyLabel.numpy()
        # [print('Class {} had {} instances originally'.format(label, count)) for label, count in zip(*np.unique(npLabel, return_counts=True))]
        # X_resampled, y_resampled = kmeans_smote.fit_sample(npDict, npLabel)

        # print(sv.get_all_oversamplers_multiclass())

        oversampler = sv.MulticlassOversampling(sv.SMOTE(n_jobs=6))

        # oversampler = sv.SMOTE(n_jobs=8)
        X_resampled, y_resampled = oversampler.sample(npDict, npLabel)
        [
            print('Class {} has {} instances after oversampling'.format(
                label, count)) for label, count in zip(
                    *np.unique(y_resampled, return_counts=True))
        ]

        newData = torch.from_numpy(
            X_resampled.reshape(len(X_resampled), shape[1], shape[2],
                                shape[3]))
        newLabel = torch.from_numpy(y_resampled)
        newData = newData.float()
        return newData, newLabel
        # self.trainInputDict["data"] = torch.from_numpy(X_resampled.reshape(len(X_resampled), shape[1], shape[2], shape[3]))
        # self.trainInputDict["label"] = torch.from_numpy(y_resampled)
        # self.trainInputDict["data"] = self.trainInputDict["data"].float()
        # print("trainInputDict[data].shape : ", self.trainInputDict["data"].shape, " trainInputDict[data].type() : ", self.trainInputDict["data"].type())
        # print("trainInputDict[label].shape : ", self.trainInputDict["label"].shape, " trainInputDict[label].type() : ", self.trainInputDict["label"].type())

    def work(self):
        labelsList = []
        labelMap = {}
        reverseLabelMap = {}
        self.trainInputDict["label"] = self.trainInputDict["label"].to(
            self.device)
        self.trainInputDict["data"] = self.trainInputDict["data"].to(
            self.device)

        oldData = self.trainInputDict["data"]
        oldLabel = self.trainInputDict["label"]
        # if self.parentId != 0:
        if self.isTrain:
            for i in self.trainInputDict["label"]:
                if (i.item() not in labelsList):
                    labelsList.append(i.item())
                if len(labelsList) == self.numClasses:
                    break

            for i, val in enumerate(sorted(labelsList)):
                labelMap[val] = i
                reverseLabelMap[i] = val

            for i, val in enumerate(self.trainInputDict["label"]):
                self.trainInputDict["label"][i] = labelMap[val.item()]

            newData, newLabel = self.balanceData()
            self.trainInputDict["data"] = newData
            self.trainInputDict["label"] = newLabel

        self.cnnModel.to(self.device)
        self.mlpModel.to(self.device)
        if self.isTrain:
            self.trainCNN(labelMap, reverseLabelMap)
            print("CNN trained successfully...")

        if self.isTrain and not self.isLeaf:
            final_dict = {}
            ckpt = torch.load('ckpt/node_cnn_' + str(self.parentId) + '_' +
                              str(self.nodeId) + '.pth')
            self.cnnModel.load_state_dict(ckpt['model_state_dict'])
            self.cnnModel.eval()

            # self.trainInputDict["data"].to(self.device)
            image_next, image_next_flat, _, _ = self.cnnModel(
                self.trainInputDict["data"].to(self.device))
            # image_next, image_next_flat, _ = self.cnnModel(self.trainInputDict["data"].to(self.device))
            image_next = image_next.detach()
            image_next_flat = image_next_flat.detach()
            img_flat_nmpy = image_next_flat.to("cpu")
            print("image_next_flat.shape : ", image_next_flat.shape)
            # cluster_ids, _ = kmeans_output(image_next_flat, self.device)
            img_flat_nmpy = img_flat_nmpy.numpy()
            # print("img_flat_nmpy.shape : " ,img_flat_nmpy.shape)
            # print("Starting K-Means...")
            kmeans = KMeans(n_clusters=2, n_jobs=-1).fit(img_flat_nmpy)
            # print("K-Means successfully completed...")
            # print("len(kmeans.labels_) : ", len(kmeans.labels_))
            #TODO: do
            cluster_ids = kmeans.labels_

            leftCnt = {}
            rightCnt = {}

            for i in range(len(self.trainInputDict["data"])):
                label = self.trainInputDict["label"][i].item()
                if cluster_ids[i] == 0:
                    if label in leftCnt:
                        leftCnt[label] += 1
                    else:
                        leftCnt[label] = 1
                else:
                    if label in rightCnt:
                        rightCnt[label] += 1
                    else:
                        rightCnt[label] = 1

            expected_dict = {}

            for label, count in leftCnt.items():
                if label in rightCnt:
                    if count >= rightCnt[label]:
                        expected_dict[label] = 0
                    else:
                        expected_dict[label] = 1
                else:
                    expected_dict[label] = 0

            for label, count in rightCnt.items():
                if not (label in expected_dict):
                    expected_dict[label] = 1

            print("printing expected split from k means")
            print(expected_dict)

            leftSortedListOfTuples = sorted(leftCnt.items(),
                                            reverse=True,
                                            key=lambda x: x[1])
            rightSortedListOfTuples = sorted(rightCnt.items(),
                                             reverse=True,
                                             key=lambda x: x[1])

            for ind, element in enumerate(leftSortedListOfTuples):
                if ind >= self.numClasses / 2:
                    final_dict[element[0]] = 1
                else:
                    final_dict[element[0]] = 0

            for ind, element in enumerate(rightSortedListOfTuples):
                if ind >= self.numClasses / 2:
                    final_dict[element[0]] = 0
                else:
                    final_dict[element[0]] = 1

            # for key, value in rightCnt.items():
            #     if not (key in final_dict):
            #         final_dict[key] = 1
            # final_dict=expected_dict
            print("Printing final_dict items...")
            print(final_dict)
            # for key, value in final_dict.items():
            #     print(key, " ", value)

            #TODO: separate for validation set too
            torch.save({
                'splittingDict': final_dict,
            }, 'ckpt/node_split_' + str(self.parentId) + '_' +
                       str(self.nodeId) + '.pth')

            expectedMlpLabels = []
            for i in range(len(self.trainInputDict["data"])):
                label = self.trainInputDict["label"][i].item()
                expectedMlpLabels.append(final_dict[label])
            expectedMlpLabels = torch.tensor(expectedMlpLabels,
                                             device=self.device)
            print("expectedMlpLabels.shape : ", expectedMlpLabels.shape)

        if self.isTrain and not self.isLeaf:
            self.trainMLP(image_next_flat, expectedMlpLabels)
            print("MLP trained successfully...")

        final_dict = {}
        if not self.isLeaf:
            ckpt = torch.load('ckpt/node_split_' + str(self.parentId) + '_' +
                              str(self.nodeId) + '.pth')
            final_dict = ckpt['splittingDict']
        #doing CNN on original tensors as those only goes to children
        self.trainInputDict["data"] = oldData
        self.trainInputDict["label"] = oldLabel
        ckpt = torch.load('ckpt/node_cnn_' + str(self.parentId) + '_' +
                          str(self.nodeId) + '.pth')
        reverseLabelMap = ckpt['reverseLabelMap']
        labelMap = ckpt['labelMap']
        imgSize = self.trainInputDict["data"][0].shape[2]
        inChannels = self.trainInputDict["data"][0].shape[0]
        # print("nodeId: ", self.nodeId, ", imgSize : ", imgSize)
        outChannels = 16
        kernel = 5
        self.cnnModel = CNN(img_size=imgSize,
                            in_channels=inChannels,
                            out_channels=outChannels,
                            num_class=len(reverseLabelMap),
                            kernel=kernel)
        self.cnnModel.load_state_dict(ckpt['model_state_dict'])
        self.cnnModel.eval()
        self.cnnModel.to(self.device)

        # self.trainInputDict["data"].to(self.device)
        # image_next, image_next_flat, est_labels = self.cnnModel(self.trainInputDict["data"].to(self.device))
        image_next, image_next_flat, est_labels, _ = self.cnnModel(
            self.trainInputDict["data"].to(self.device))

        if not self.isTrain:
            _, predicted = est_labels.max(1)
            predicted = predicted.to(self.device)
            for i, val in enumerate(predicted):
                predicted[i] = reverseLabelMap[val.item()]

            correct = predicted.eq(self.trainInputDict["label"].to(
                self.device)).sum().item()
            total = len(est_labels)

            if not self.isLeaf:
                print('Root Node Acc: %.3f' % (100. * correct / total))

            if self.isLeaf:
                ckpt = torch.load('ckpt/testPred.pth')
                testPredDict = ckpt['testPredDict']
                testPredDict['actual'] = testPredDict['actual'].to(self.device)
                testPredDict['pred'] = testPredDict['pred'].to(self.device)
                # print(testPredDict['pred'].dtype, testPredDict['actual'].dtype, self.trainInputDict["label"].dtype)
                testPredDict['actual'] = torch.cat(
                    (testPredDict['actual'], self.trainInputDict["label"]), 0)
                testPredDict['pred'] = torch.cat(
                    (testPredDict['pred'], predicted), 0)
                torch.save({
                    'testPredDict': testPredDict,
                }, 'ckpt/testPred.pth')

        if self.isLeaf:
            return

        ckpt = torch.load('ckpt/node_mlp_' + str(self.parentId) + '_' +
                          str(self.nodeId) + '.pth')
        self.mlpModel.load_state_dict(ckpt['model_state_dict'])
        self.mlpModel.eval()
        self.mlpModel.to(self.device)

        est_labels = self.mlpModel(image_next_flat)
        # print(est_labels.shape)
        est_labels = est_labels.view(-1)
        mlpPrediction = est_labels.detach()
        mlpPrediction += 0.5
        mlpPrediction = mlpPrediction.long()

        trainLimages = []
        trainRimages = []
        trainLLabels = []
        trainRLabels = []
        lclasses = [0] * 10
        rclasses = [0] * 10
        for i, val in enumerate(mlpPrediction):
            # print("i : ", i)
            if val <= 0.5:
                # trainLimages.append((image_next[i].detach()).tolist())
                lclasses[self.trainInputDict["label"][i].item()] += 1
                # trainLLabels.append(self.trainInputDict["label"][i].item())
            else:
                # trainRimages.append((image_next[i].detach()).tolist())
                rclasses[self.trainInputDict["label"][i].item()] += 1
                # trainRLabels.append(self.trainInputDict["label"][i].item())

        # lTrainDict = {"data":torch.tensor(trainLimages), "label":torch.tensor(trainLLabels)}
        # rTrainDict = {"data":torch.tensor(trainRimages), "label":torch.tensor(trainRLabels)}

        print(final_dict)
        totalLeftImages = 0.0
        totalRightImages = 0.0
        maxLeftClasses = 0.0
        maxRightClasses = 0.0
        testCorrectResults = 0.0
        for i, val in enumerate(lclasses):
            totalLeftImages += val
            if not self.isTrain and (
                    i in labelMap) and final_dict[labelMap[i]] == 0:
                testCorrectResults += val
            maxLeftClasses = max(maxLeftClasses, val)

        for i, val in enumerate(rclasses):
            totalRightImages += val
            if not self.isTrain and (
                    i in labelMap) and final_dict[labelMap[i]] == 1:
                testCorrectResults += val
            maxRightClasses = max(maxRightClasses, val)

        if not self.isTrain:
            total = float(len(self.trainInputDict["label"]))
            print('Split Acc: %.3f' % (100. * testCorrectResults / total))

        leftClassesToBeRemoved = []
        rightClassesToBeRemoved = []

        threshold = 10.0
        for i, val in enumerate(lclasses):
            if float(100 * val) / maxLeftClasses < threshold:
                leftClassesToBeRemoved.append(i)

        for i, val in enumerate(rclasses):
            if float(100 * val) / maxRightClasses < threshold:
                rightClassesToBeRemoved.append(i)

        lclasses = [0] * 10
        rclasses = [0] * 10
        for i, val in enumerate(mlpPrediction):
            # print("i : ", i)
            if val <= 0.5:
                if self.isTrain:
                    if not (self.trainInputDict["label"][i].item()
                            in leftClassesToBeRemoved):
                        trainLimages.append((image_next[i].detach()).tolist())
                        lclasses[self.trainInputDict["label"][i].item()] += 1
                        trainLLabels.append(reverseLabelMap[
                            self.trainInputDict["label"][i].item()])
                else:
                    trainLimages.append((image_next[i].detach()).tolist())
                    lclasses[self.trainInputDict["label"][i].item()] += 1
                    trainLLabels.append(self.trainInputDict["label"][i].item())
            else:
                if self.isTrain:
                    if not (self.trainInputDict["label"][i].item()
                            in rightClassesToBeRemoved):
                        trainRimages.append((image_next[i].detach()).tolist())
                        rclasses[self.trainInputDict["label"][i].item()] += 1
                        trainRLabels.append(reverseLabelMap[
                            self.trainInputDict["label"][i].item()])
                else:
                    trainRimages.append((image_next[i].detach()).tolist())
                    rclasses[self.trainInputDict["label"][i].item()] += 1
                    trainRLabels.append(self.trainInputDict["label"][i].item())

        lTrainDict = {
            "data": torch.tensor(trainLimages),
            "label": torch.tensor(trainLLabels)
        }
        rTrainDict = {
            "data": torch.tensor(trainRimages),
            "label": torch.tensor(trainRLabels)
        }

        giniLeftRatio = 0.0
        giniRightRatio = 0.0

        lcheck = 0.0
        rcheck = 0.0

        print("# of Left images: ", totalLeftImages)
        print("# of Right images: ", totalRightImages)
        noOfLeftClasses = 0
        noOfRightClasses = 0
        for i in lclasses:
            if i != 0:
                noOfLeftClasses += 1
            pi = float(i) / totalLeftImages
            lcheck += pi
            giniLeftRatio += pi * (1 - pi)

        print("---")
        for i in rclasses:
            if i != 0:
                noOfRightClasses += 1
            pi = float(i) / totalRightImages
            rcheck += pi
            giniRightRatio += pi * (1 - pi)

        print("giniRightRatio: ", giniRightRatio)
        print("giniLeftRatio: ", giniLeftRatio)

        leftChildrenRatio = totalLeftImages / totalRightImages

        impurityDrop = leftChildrenRatio * float(giniLeftRatio) + (
            1 - leftChildrenRatio) * float(giniRightRatio)

        print("impurityDrop: ", impurityDrop)
        print("giniGain: ", self.giniValue - impurityDrop)
        print("lclasses: ", lclasses)
        print("rclasses: ", rclasses)
        print("noOfLeftClasses: ", noOfLeftClasses)
        print("noOfRightClasses: ", noOfRightClasses)
        print("lTrainDict[data].shape: ", lTrainDict["data"].shape,
              "  lTrainDict[label].shape: ", lTrainDict["label"].shape)
        print("rTrainDict[data].shape: ", rTrainDict["data"].shape,
              "  rTrainDict[label].shape: ", rTrainDict["label"].shape)

        lValDict = {}
        rValDict = {}
        print("RETURNING FROM WORK...")
        #TODO: populate validation dictionaries too
        # return lTrainDict, lValDict, rTrainDict, rValDict, giniLeftRatio, giniRightRatio
        if self.isTrain and not self.isLeaf:
            return lTrainDict, lValDict, rTrainDict, rValDict, giniLeftRatio, giniRightRatio, noOfLeftClasses, noOfRightClasses
        elif not self.isTrain and not self.isLeaf:
            return lTrainDict, rTrainDict, giniLeftRatio, giniRightRatio, noOfLeftClasses, noOfRightClasses
        elif self.isTrain and self.isLeaf:
            return ""
        else:
            return ""
Esempio n. 8
0
        current_gray = cv.cvtColor(current_image, cv.COLOR_BGR2GRAY)

        start_time = time.time()

        for _ in range(num_iterations):
            flow = cv.calcOpticalFlowFarneback(prev_gray, current_gray, None,
                                               0.5, 1, 15, 2, 5, 1.3, 0)

        end_time = time.time()

    elif args.mode == 'forward':

        model = CNN(640, 360, 3)

        model.load_state_dict(torch.load(model_path))
        model.eval()

        resize_transform = transforms.Resize((640, 360))

        PIL_Image = Image.open(optical_flow_image_path)

        PIL_Image = resize_transform(PIL_Image)

        PIL_Image = np.transpose(PIL_Image, (2, 1, 0))

        PIL_Image = np.expand_dims(PIL_Image, axis=0)

        tensor_image = torch.Tensor(PIL_Image)

        start_time = time.time()
Esempio n. 9
0
def main():

    #We set the folder path containing the models and load the labels
    # folder = 'trained_models/New folder'
    # models = load_12CNN_model(folder)
    # data_folder = 'datasets'
    # data_file = join(data_folder, 'test_dataset_label6.pkl')
    # data = PPD.unpickle_dataset(data_file)
    # predictions = predict(data, models)

    # We set the name of the model and its parameters
    path = 'trained_models'
    model_file = join(
        path, 'cnn_300_200_[100]_12_[3]_zeros_60-20-20_polisis_state.pt')
    params_file = join(
        path, 'cnn_300_200_[100]_12_[3]_zeros_60-20-20_polisis_params.pkl')

    #We set the folder containing the data already prepared for predicting
    data_folder = 'datasets'
    data_file = join(data_folder, 'test_dataset_label6.pkl')

    # We now load the parameters
    with open(params_file, 'rb') as f:
        params = pickle.load(f)

    model = CNN(**params)
    model.load_state_dict(torch.load(model_file))
    model.eval()

    #We load 8the labels
    #with open('labels.pkl', 'rb') as f:
    #labels = pickle.load(f)

    # labels = ('First Party Collection/Use', 'Third Party Sharing/Collection', 'User Access, Edit and Deletion', 'Data Retention',
    #           'Data Security', 'International and Specific Audiences', 'Do Not Track', 'Policy Change', 'User Choice/Control',
    #  'Introductory/Generic', 'Practice not covered', 'Privacy contact information')
    #
    # all_tokens , all_paragraphs = get_policy_of_interest_tokens("random_pp", "embeddings_data")
    # segments_tensor = dp.process_policy_of_interest(all_tokens , all_paragraphs)
    # predictions = model(segments_tensor)
    # y_pred = predictions > 0.5
    #
    # for row in range(len(all_paragraphs)):
    #     predictedValues = y_pred[row, :]
    #     for label in range(12):
    #         if predictedValues[label] == 1:
    #             print("paragraph " + str(row) + " : " + labels[label])
    #             print('--------------------------------------')
    #
    #

    data = PPD.unpickle_dataset(data_file)
    x = PPD.collate_data(data)[0]
    y_pred = model(x) > 0.5
    predictions = model(x)

    #
    # for row in range(len(y_pred)):
    #     predictedValues = y_pred[row, :]
    #     for label in range(12):
    #         if predictedValues[label] == 1:
    #             print("paragraph " + str(row) + " : " + labels[label])
    #             print('--------------------------------------')

    #Computation of all metrics

    f1s, ps, rs, ts = _metrics_wrt_t(data.labels_tensor, predictions)
    figure = plt.figure(figsize=(18, 5))
    figure.suptitle('Micro Averages with respect to threshold')
    ax_f1 = figure.add_subplot(131)
    ax_f1.set_ylim(0.2, 0.72)
    ax_p = figure.add_subplot(132)
    ax_p.set_ylim(0, 1)
    ax_r = figure.add_subplot(133)
    ax_r.set_ylim(0, 1)
    ax_f1.plot(ts, f1s)
    ax_p.plot(ts, ps)
    ax_r.plot(ts, rs)
    plt.show()
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser("CNN")
    parser.add_argument("--dp", type=int, default=5)
    parser.add_argument("--dc", type=int, default=32)
    parser.add_argument("--lr", type=float, default=0.001)
    parser.add_argument("--seq_len", type=int, default=120)
    parser.add_argument("--vac_len_rel", type=int, default=19)
    parser.add_argument("--nepoch", type=int, default=100)
    parser.add_argument("--num_workers", type=int, default=4)
    parser.add_argument("--eval_every", type=int, default=10)
    parser.add_argument("--dropout_rate", type=float, default=0.4)
    parser.add_argument("--bz", type=int, default=128)
    parser.add_argument("--kernel_sizes", type=str, default="3,4,5")
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--train_filename', default='./data/train.txt')
    parser.add_argument('--test_filename', default='./data/test.txt')
    parser.add_argument('--model_file', default='./cnn.pt')
    parser.add_argument('--embedding_filename',
                        default='./data/embeddings.txt')
    parser.add_argument('--embedding_wordlist_filename',
                        default='./data/words.lst')

    args = parser.parse_args()
    args.kernel_sizes = list(map(int, args.kernel_sizes.split(',')))
    # Initilization
    torch.cuda.set_device(args.gpu)

    # Load data
    dataset = SemEvalDataset(args.train_filename, max_len=args.seq_len)
    dataloader = DataLoader(dataset,
                            args.bz,
                            True,
                            num_workers=args.num_workers)
    dataset_val = SemEvalDataset(args.test_filename,
                                 max_len=args.seq_len,
                                 d=(dataset.d, dataset.rel_d))
    dataloader_val = DataLoader(dataset_val,
                                args.bz,
                                True,
                                num_workers=args.num_workers)
    args.word_embedding = load_embedding(args.embedding_filename, args.embedding_wordlist_filename,\
      dataset.d)
    args.vac_len_pos = 122
    args.vac_len_word = len(dataset.d.word2id)
    args.vac_len_rel = len(dataset.rel_d.word2id)
    args.dw = args.word_embedding.shape[1]

    print(args)

    model = CNN(args).cuda()
    loss_func = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=0.2)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    best_eval_acc = 0.

    for i in range(args.nepoch):
        # Training
        total_loss = 0.
        total_acc = 0.
        ntrain_batch = 0
        model.train()
        for (seq, e1, e2, dist1, dist2, r) in dataloader:
            ntrain_batch += 1
            seq = Variable(seq).cuda()
            e1 = Variable(e1).cuda()
            e2 = Variable(e2).cuda()
            dist1 = Variable(dist1).cuda()
            dist2 = Variable(dist2).cuda()
            r = Variable(r).cuda()
            r = r.view(r.size(0))

            pred = model(seq, dist1, dist2, e1, e2)
            l = loss_func(pred, r)
            acc = accuracy(pred, r)
            total_acc += acc
            total_loss += l.data[0]

            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        print("Epoch: {}, Training loss : {:.4}, acc: {:.4}".\
          format(i, total_loss/ntrain_batch, total_acc / ntrain_batch))

        # Evaluation
        if i % args.eval_every == args.eval_every - 1:
            val_total_acc = 0.
            nval_batch = 0
            model.eval()
            for (seq, e1, e2, dist1, dist2, r) in dataloader_val:
                nval_batch += 1
                seq = Variable(seq).cuda()
                e1 = Variable(e1).cuda()
                e2 = Variable(e2).cuda()
                dist1 = Variable(dist1).cuda()
                dist2 = Variable(dist2).cuda()
                r = Variable(r).cuda()
                r = r.view(r.size(0))

                pred = model(seq, dist1, dist2, e1, e2)
                acc = accuracy(pred, r)
                val_total_acc += acc
            best_eval_acc = max(best_eval_acc, val_total_acc / nval_batch)
            print("Epoch: {}, Val acc: {:.4f}".\
              format(i, val_total_acc/nval_batch))
    print(best_eval_acc)
    torch.save(model.state_dict(), args.model_file)
Esempio n. 11
0
                                   feature_size=len(features),
                                   target_size=len(targets))
    LSTM = LSTM.load_from_checkpoint(lstm_path,
                                     input_size=len(features),
                                     hidden_size=32,
                                     target_size=len(targets),
                                     num_layers=3)
    CNN = CNN.load_from_checkpoint(cnn_path,
                                   feature_size=len(features),
                                   target_size=len(targets),
                                   kernel_size=2)

    # set models in eval mode
    MLP.eval()
    LSTM.eval()
    CNN.eval()

    # init trainer
    trainer = pl.Trainer(gpus=0, logger=logger, progress_bar_refresh_rate=30)

    # create test dataloader
    test_dataloader = DataLoader(sub_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=0)  # num_workers

    # test
    for model in [MLP, LSTM, CNN]:
        result = trainer.test(test_dataloaders=test_dataloader,
                              verbose=True,
                              model=model)
Esempio n. 12
0
    def parse_individual(self, indi):
        torch_device = torch.device('cuda')
        cnn = CNN(indi)
        cnn.cuda()
        print(cnn)
        complexity = get_total_params(cnn.cuda(), (220, 30, 30))

        train_loader = get_data.get_mixed_train_loader(self.batch_size)

        # Loss and optimizer 3.定义损失函数, 使用的是最小平方误差函数
        criterion = nn.MSELoss()
        criterion = criterion.to(torch_device)

        # 4.定义迭代优化算法, 使用的是Adam,SGD不行
        learning_rate = 0.004
        optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
        loss_dict = []
        num_epochs = train_loader.__len__()
        # Train the model 5. 迭代训练
        cnn.train()
        for i, data in enumerate(train_loader, 0):
            # Convert numpy arrays to torch tensors  5.1 准备tensor的训练数据和标签
            inputs, labels = data
            labels = get_data.get_size_labels(1, labels)
            inputs = inputs.cuda()
            labels = labels.cuda()
            # labels = get_data.get_size_labels(indi.get_layer_size(),labels)

            # Forward pass  5.2 前向传播计算网络结构的输出结果
            optimizer.zero_grad()
            outputs = cnn(inputs)
            # 5.3 计算损失函数
            loss = criterion(outputs, labels)
            loss = loss.cuda()

            # Backward and optimize 5.4 反向传播更新参数
            loss.backward()
            optimizer.step()

            # 可选 5.5 打印训练信息和保存loss
            loss_dict.append(loss.item())
            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Loss: {:.4f}'.format(i + 1, num_epochs, loss.item()))

        # evaluate
        cnn.eval()
        eval_loss_dict = []
        valid_loader = get_data.get_mixed_validate_loader(self.batch_size)
        for i, data in enumerate(valid_loader, 0):
            inputs, labels = data
            labels = get_data.get_size_labels(1, labels)
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = cnn(inputs)
            loss = criterion(outputs, labels)
            loss = loss.cuda()
            eval_loss_dict.append(loss.item())

        mean_test_loss = np.mean(eval_loss_dict)
        std_test_loss = np.std(eval_loss_dict)
        print("valid mean:{},std:{}".format(mean_test_loss, std_test_loss))
        return mean_test_loss, std_test_loss, complexity
Esempio n. 13
0
            img = fluid.dygraph.base.to_variable(dy_x_data)
            label = fluid.dygraph.base.to_variable(y_data)
            # 不需要训练label
            label.stop_gradient = True
            # 获取网络输出
            predict = cnn(img)
            # 获取准确率函数和损失函数
            accuracy = fluid.layers.accuracy(input=predict, label=label)
            loss = fluid.layers.cross_entropy(predict, label)
            avg_loss = fluid.layers.mean(loss)
            # 计算梯度
            avg_loss.backward()
            momentum.minimize(avg_loss)
            # 将参数梯度清零以保证下一轮训练的正确性
            cnn.clear_gradients()
            # 打印一次信息
            if batch_id % 100 == 0:
                print(
                    "Epoch:%d, Batch:%d, Loss:%f, Accuracy:%f" % (epoch, batch_id, avg_loss.numpy(), accuracy.numpy()))
        # 开始执行测试
        cnn.eval()
        test_cost, test_acc = test_train(test_reader, cnn, BATCH_SIZE)
        # 准备重新恢复训练
        cnn.train()
        print("Test:%d, Loss:%f, Accuracy:%f" % (epoch, test_cost, test_acc))

        if not os.path.exists('models'):
            os.makedirs('models')
        # 保存模型
        fluid.dygraph.save_dygraph(state_dict=cnn.state_dict(), model_path="models/cnn")
Esempio n. 14
0
class VoiceActivityDetector:

    DEFAULT_RATE = 16000

    def __init__(self, params=None):
        self.params = params
        self.net = CNN()
        self.rate = VoiceActivityDetector.DEFAULT_RATE

    IDX_TO_LABEL = {
        0: 'noise',
        1: 'speech'
    }

    LABEL_TO_IDX = {
        'noise': 0,
        'speech': 1
    }

    MEAN = np.array([0.485, 0.456, 0.406])
    STD = np.array([0.229, 0.224, 0.225])
    DEVICE = torch.device('cpu')

    @staticmethod
    def from_picture_to_tensor(picture):
        picture = np.array(picture)[:, :, :3]
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(VoiceActivityDetector.MEAN, VoiceActivityDetector.STD)
        ])
        picture = Image.fromarray(picture).resize(CNN.IMG_SIZE)
        return transform(np.array(picture, dtype='float32') / 255)

    @staticmethod
    def from_tensor_to_picture(tensor):
        tensor = tensor.numpy().transpose((1, 2, 0))
        tensor = VoiceActivityDetector.STD * tensor + VoiceActivityDetector.MEAN
        tensor *= 255
        tensor = np.clip(tensor, 0, 255).astype(dtype=int)
        return tensor

    def save(self, path: str):
        torch.save(
            {
                'params': self.params,
                'model_state_dict': self.net.state_dict()
            },
            path
        )

    def load(self, path: str):
        dump = torch.load(path, map_location=VoiceActivityDetector.DEVICE)
        self.params = dump['params']
        self.net.load_state_dict(dump['model_state_dict'])

    def __str__(self):
        return str(self.params) + '\n' + str(self.net)

    def train_epoch(self, train_loader, optimizer, loss_f):
        self.net.train()

        cum_loss = 0.0
        samples_count = 0
        accuracy = 0

        for inputs, targets in train_loader:
            inputs = inputs.to(device=VoiceActivityDetector.DEVICE, dtype=torch.float)
            targets = targets.to(device=VoiceActivityDetector.DEVICE, dtype=torch.long)

            optimizer.zero_grad()
            outputs = self.net(inputs)
            loss = loss_f(outputs, targets)
            loss.backward()
            optimizer.step()
            train_preds = torch.argmax(outputs, 1)

            cum_loss += loss.item() * inputs.size(0)

            accuracy += torch.sum(train_preds == targets.data).item()
            samples_count += inputs.size(0)

        cum_loss /= samples_count
        accuracy /= samples_count

        return cum_loss, accuracy

    def validate_epoch(self, val_loader, loss_f):
        self.net.eval()

        cum_loss = 0.0
        samples_count = 0
        accuracy = 0

        for inputs, targets in val_loader:
            inputs = inputs.to(device=VoiceActivityDetector.DEVICE, dtype=torch.float)
            targets = targets.to(device=VoiceActivityDetector.DEVICE, dtype=torch.long)

            with torch.no_grad():
                outputs = self.net(inputs)
                loss = loss_f(outputs, targets)
                val_preds = torch.argmax(outputs, 1)

            cum_loss += loss.item() * inputs.size(0)
            accuracy += torch.sum(val_preds == targets.data).item()
            samples_count += inputs.size(0)

        cum_loss /= samples_count
        accuracy /= samples_count

        return cum_loss, accuracy

    def fit(self, train_loader, val_loader, epochs, sessions_dir, session_id, verbose=True):
        self.net = self.net.to(VoiceActivityDetector.DEVICE)

        optimizer = torch.optim.Adam(self.net.parameters())
        loss_f = nn.CrossEntropyLoss()

        history = []
        best_val_loss = np.inf

        for epoch in tqdm(range(epochs)):
            train_loss, train_accuracy = self.train_epoch(train_loader, optimizer, loss_f)
            val_loss, val_accuracy = self.validate_epoch(val_loader, loss_f)
            history.append((train_loss, train_accuracy, val_loss, val_accuracy))

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                utils.save_model(sessions_dir, session_id, self)

            if verbose:
                print(f'epoch {epoch + 1}: train loss = {train_loss}, train accuracy = {train_accuracy}, '
                      f'val loss = {val_loss}, val accuracy = {val_accuracy}')

        return history

    def predict(self, data_loader):
        self.net.eval()
        self.net = self.net.to(VoiceActivityDetector.DEVICE)
        pred_labels = np.array([], dtype=int)

        for inputs in data_loader:
            inputs = inputs.to(device=VoiceActivityDetector.DEVICE, dtype=torch.float)

            with torch.no_grad():
                outputs = self.net(inputs)
                val_preds = torch.argmax(outputs, 1)
                val_preds = val_preds.cpu().numpy()
                pred_labels = np.append(pred_labels, val_preds)

        return pred_labels

    def setup(self, rate):
        self.rate = rate
        
        self.window_size_f, self.step_size_f = utils.calculate_spectrogram_params(
            self.params['window_size'],
            self.params['step_size_ratio'],
            self.rate
        )

        self.net_window_size_f = int(rate * self.params['net_window_size'])
        self.net_step_size_f = int(self.net_window_size_f * self.params['net_step_size_ratio'])

    def append_votes(self, stream_buffer):
        spectrogram_size_f = utils.calculate_coverage_size(
            stream_buffer.spectrogram.shape[1],
            self.step_size_f,
            self.window_size_f
        )

        ratio = stream_buffer.spectrogram.shape[1] / spectrogram_size_f

        net_window_size_pxl = int(np.ceil(self.net_window_size_f * ratio))
        net_step_size_pxl = int(np.ceil(self.net_step_size_f * ratio))

        cut_num_windows, cut_size_pxl = VoiceActivityDetector.calculate_cut_part_params(
            stream_buffer.spectrogram.shape[1],
            net_window_size_pxl,
            net_step_size_pxl
        )

        if cut_num_windows > 0:
            cut_part = stream_buffer.spectrogram[:, :cut_size_pxl, :]
            stream_buffer.spectrogram = stream_buffer.spectrogram[:, cut_size_pxl - net_window_size_pxl + net_step_size_pxl:, :]

            pxl_ls = np.arange(0, cut_part.shape[1], net_step_size_pxl)

            dataset = RealTimeVadDataset(cut_part, net_window_size_pxl, pxl_ls)
            data_loader = DataLoader(dataset, batch_size=len(pxl_ls), shuffle=False)
            pred_labels = self.predict(data_loader)

            mxl = 0
            for pred_label, pxl_l in zip(pred_labels, pxl_ls):
                pxl_r = pxl_l + net_step_size_pxl
                l = max(0, int(np.floor(pxl_l / ratio)))
                r = min(len(stream_buffer.speech_votes) - 1, int(np.ceil(pxl_r / ratio)))
                if l >= r:
                    continue
                mxl = l
                stream_buffer.total_votes[l] += 1
                stream_buffer.total_votes[r] -= 1
                if pred_label:
                    stream_buffer.speech_votes[l] += 1
                    stream_buffer.speech_votes[r] -= 1

            self.flush_predictions(stream_buffer, mxl)

    def append_spectrogram(self, stream_buffer):
        cut_num_windows, cut_size_f = VoiceActivityDetector.calculate_cut_part_params(
            len(stream_buffer.frames_buffer),
            self.window_size_f,
            self.step_size_f
        )
        if cut_num_windows > 0:
            cut_part = stream_buffer.frames_buffer[:cut_size_f]

            stream_buffer.frames_buffer = stream_buffer.frames_buffer[cut_size_f - self.window_size_f + self.step_size_f:]

            img = utils.build_spectrogram(
                cut_part,
                self.rate,
                self.params['n_filters'],
                self.params['window_size'],
                self.params['step_size_ratio'],
                last_prev_frame_signal=stream_buffer.last_prev_frame_signal
            )

            stream_buffer.last_prev_frame_signal = cut_part[-1]

            if stream_buffer.spectrogram is None:
                stream_buffer.spectrogram = img
            else:
                stream_buffer.spectrogram = np.concatenate((stream_buffer.spectrogram, img), axis=1)

    # should be called after 'setup(rate)'
    def append(self, added_frames, stream_buffer):
        stream_buffer.append(added_frames)
        self.append_spectrogram(stream_buffer)
        self.append_votes(stream_buffer)

    def query(self, stream_buffer):
        result = stream_buffer.labels
        stream_buffer.labels = np.array([], dtype=int)
        return result

    def flush_predictions(self, stream_buffer, c=np.inf):
        c = min(c, len(stream_buffer.total_votes))

        cut_total_votes, stream_buffer.total_votes = np.split(stream_buffer.total_votes, [c])
        cut_speech_votes, stream_buffer.speech_votes = np.split(stream_buffer.speech_votes, [c])

        cut_total_votes = np.cumsum(cut_total_votes)
        cut_speech_votes = np.cumsum(cut_speech_votes)

        stream_buffer.labels = np.append(stream_buffer.labels, (cut_speech_votes / (cut_total_votes + 1) > 0).astype(dtype=int))

        if len(cut_total_votes) > 0 and len(stream_buffer.total_votes) > 0:
            stream_buffer.total_votes[0] += cut_total_votes[-1]
        if len(cut_speech_votes) > 0 and len(stream_buffer.speech_votes) > 0:
            stream_buffer.speech_votes[0] += cut_speech_votes[-1]

    @staticmethod
    def calculate_cut_part_params(size, window_size, step_size):
        cut_num_windows = utils.calculate_num_windows(size, window_size, step_size)
        cut_size = 0
        while cut_num_windows > 0:
            cut_size = utils.calculate_coverage_size(cut_num_windows, step_size, window_size)
            if cut_size > size:
                cut_num_windows -= 1
            else:
                break
        return cut_num_windows, cut_size
Esempio n. 15
0
                                  weight_decay=5e-4)
    criterion = nn.CrossEntropyLoss()

    loaders = loaders
    criterion = criterion
    best_loss = 1000.0
    running_losses = {'train': [], 'eval': []}

    start_time = time.time()
    for epoch in range(1, epochs + 1):
        phases = ['train']
        if epoch % validate_every == 0:
            phases.append('eval')

        for phase in phases:
            model.eval() if phase == 'eval' else model.train()
            gc.collect()  # prevent OOM problems

            print("Epoch {}/{} Phase {}".format(epoch, epochs, phase))
            for idx, (imgs, labels) in enumerate(tqdm(loaders[phase])):
                # print(f'Phase: {phase}, current step: {idx}')
                imgs, labels = imgs.float().to(
                    device='cuda'), labels.float().to(device='cuda')
                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, labels.long())
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                running_losses[phase].append(loss.cpu().detach().numpy())
Esempio n. 16
0
def load_model(path_to_model, **model_kwargs):
    model = CNN(**model_kwargs)
    model.load_state_dict(torch.load(path_to_model))
    model.eval()
    return model