Example #1
0
def main(model_name: str):
    output_model = "models/" + model_name + ".pth"
    writer = SummaryWriter()

    # Init datasets and loaders
    train_set = PneumoniaDataset(SetType.val)
    val_set = PneumoniaDataset(SetType.val, shuffle=False)  # for now.
    final_check_set = PneumoniaDataset(SetType.test, shuffle=False)  # for now.
    train_loader = DataLoader(train_set,
                              batch_size=1,
                              shuffle=True,
                              num_workers=8)
    val_loader = DataLoader(val_set,
                            batch_size=8,
                            shuffle=False,
                            num_workers=8)
    final_loader = DataLoader(final_check_set,
                              batch_size=8,
                              shuffle=False,
                              num_workers=8)
    for dataset in [train_set, val_set, final_check_set]:
        print(f"Size of {dataset.set_type} set: {len(dataset)}")

    # Init network, loss and optimizer
    net = SimpleNet().cuda()

    # There are twice as much pneumonia as healthy, offset the bias in the loss.
    criterion = ContrastiveLoss(margin=1.0)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.3,
                                  mode="max",
                                  verbose=True,
                                  patience=15)

    # Training Loop
    train_iter = 0
    for epoch in range(100):
        running_loss = 0.0
        for i, (input1, input2, distance) in enumerate(train_loader):
            net.train()
            optimizer.zero_grad()
            vector1, vector2 = net(input1.float().cuda(),
                                   input2.float().cuda())
            loss = criterion(vector1.cpu(), vector2.cpu(), distance)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            duration = 10
            if i > 0 and (i) % duration == 0:
                print("[%d, %5d] loss %.3f" %
                      (epoch, i, running_loss / duration))
                writer.add_scalar("TrainLoss", (running_loss / duration),
                                  train_iter)
                train_iter += 1
                running_loss = 0.0
Example #2
0
    def __init__(
        self, model_name: str, epochs: int = 100, config: Optional[dict] = None
    ):
        self.output_model = model_name + ".pth"
        self.train_set = PneumoniaDataset(SetType.train)
        self.train_loader = DataLoader(
            PneumoniaDataset(SetType.train), batch_size=16, shuffle=True, num_workers=8
        )
        self.val_loader = DataLoader(
            PneumoniaDataset(SetType.val, shuffle=False),
            batch_size=16,
            shuffle=False,
            num_workers=8,
        )
        self.test_loader = DataLoader(
            PneumoniaDataset(SetType.test, shuffle=False),
            batch_size=16,
            shuffle=False,
            num_workers=8,
        )
        self.config = {
            "pos_weight_bias": 0.5,
            "starting_lr": 1e-2,
            "momentum": 0.9,
            "decay": 5e-4,
            "lr_adjustment_factor": 0.3,
            "scheduler_patience": 15,
            "print_cadence": 100,
            "comment": "Added large dense layer.",
            "pos_weight": 1341 / 3875,  # Number of negatives / positives.
        }

        self.epochs = epochs
        self.device = torch.device("cuda:0")
        self.writer = SummaryWriter(comment=self.config["comment"])
        self.net = SimpleNet(1).to(self.device)
        self.criterion = nn.BCEWithLogitsLoss(
            pos_weight=torch.tensor(self.config["pos_weight"])
        )
        self.optimizer = optim.SGD(
            self.net.parameters(),
            lr=self.config["starting_lr"],  # type: ignore
            momentum=self.config["momentum"],  # type: ignore
            weight_decay=self.config["decay"],  # type: ignore
            )
        self.scheduler = ReduceLROnPlateau(
            self.optimizer,
            factor=self.config["lr_adjustment_factor"],  # type: ignore
            mode="max",
            verbose=True,
            patience=self.config["scheduler_patience"],  # type: ignore
        )

        print("Trainer Initialized.")
        for dataset in [self.train_loader, self.test_loader, self.val_loader]:
            print(f"Size of set: {len(dataset)}")
def execute_similarity_search(img_path, num_algs=32):
    # this part can be refactored into a "setup model" func
    # the func can have a flag for initing the hook or not.
    path = "../saved_models/cifar_model_300_epochs.pth"
    model = SimpleNet(10)
    model.load_state_dict(torch.load(path))
    # Init forward hook for exposing dense layer vectors
    sf = SaveFeatureVectors(model.conv13[2])
    model.eval()
    is_correct = 0.0
    path_list = []
    for inputs, labels, paths in _get_single_image_batch(img_path):
        outputs = model(inputs.float())
        _, pred = outputs.max(1)
        is_correct += (pred == labels.long()).sum().item()
        print(f"Num correct: {is_correct}")
        path_list.extend(paths)

    # Generate Hash: todo-try stuff here.
    m = MinHash(num_perm=num_algs)
    m.update(sf.features.flatten())
def execute_inference(create_hashes=False,
                      num_algs=32,
                      hash_index="lsh-demo-3"):
    path = "../saved_models/cifar_model_300_epochs.pth"
    model = SimpleNet(10)
    model.load_state_dict(torch.load(path))

    # Init forward hook for exposing dense layer vectors
    sf = SaveFeatureVectors(model.conv13[2])
    model.eval()
    is_correct = 0.0
    path_list = []
    for inputs, labels, paths in _get_test_batch():
        outputs = model(inputs.float())
        _, pred = outputs.max(1)
        is_correct += (pred == labels.long()).sum().item()
        print(f"Num correct: {is_correct}")
        path_list.extend(paths)

    print("Extracting Vectors")
    if create_hashes:
        generate_hash_tables(path_list, sf.features)
Example #5
0
                                                    random_state=0)
#print(X_test.shape , y_test.shape  )
x_train = np.array(x_train, dtype=np.float32).reshape(len(x_train), 3)
y_train = np.array(y_train, dtype=np.float32).reshape(len(y_train), 1)
x_test = np.array(x_test, dtype=np.float32).reshape(len(x_test), 3)
y_test = np.array(y_test, dtype=np.float32).reshape(len(y_test), 1)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(x_train[:10])
#print(type(x_train ))
#quit()

#quit()
#
network = SimpleNet(input_size=3, hidden_size=10, output_size=1)

#iters_num = 30000  # 繰り返しの回数を適宜設定する
iters_num = 10000  # 繰り返しの回数を適宜設定する

train_size = x_train.shape[0]
print(train_size)
#quit()

#
global_start_time = time.time()

#    batch_size = 100
#batch_size = 32
batch_size = 8
Example #6
0
class PneumoniaTrainer:
    def __init__(
        self, model_name: str, epochs: int = 100, config: Optional[dict] = None
    ):
        self.output_model = model_name + ".pth"
        self.train_set = PneumoniaDataset(SetType.train)
        self.train_loader = DataLoader(
            PneumoniaDataset(SetType.train), batch_size=16, shuffle=True, num_workers=8
        )
        self.val_loader = DataLoader(
            PneumoniaDataset(SetType.val, shuffle=False),
            batch_size=16,
            shuffle=False,
            num_workers=8,
        )
        self.test_loader = DataLoader(
            PneumoniaDataset(SetType.test, shuffle=False),
            batch_size=16,
            shuffle=False,
            num_workers=8,
        )
        self.config = {
            "pos_weight_bias": 0.5,
            "starting_lr": 1e-2,
            "momentum": 0.9,
            "decay": 5e-4,
            "lr_adjustment_factor": 0.3,
            "scheduler_patience": 15,
            "print_cadence": 100,
            "comment": "Added large dense layer.",
            "pos_weight": 1341 / 3875,  # Number of negatives / positives.
        }

        self.epochs = epochs
        self.device = torch.device("cuda:0")
        self.writer = SummaryWriter(comment=self.config["comment"])
        self.net = SimpleNet(1).to(self.device)
        self.criterion = nn.BCEWithLogitsLoss(
            pos_weight=torch.tensor(self.config["pos_weight"])
        )
        self.optimizer = optim.SGD(
            self.net.parameters(),
            lr=self.config["starting_lr"],  # type: ignore
            momentum=self.config["momentum"],  # type: ignore
            weight_decay=self.config["decay"],  # type: ignore
            )
        self.scheduler = ReduceLROnPlateau(
            self.optimizer,
            factor=self.config["lr_adjustment_factor"],  # type: ignore
            mode="max",
            verbose=True,
            patience=self.config["scheduler_patience"],  # type: ignore
        )

        print("Trainer Initialized.")
        for dataset in [self.train_loader, self.test_loader, self.val_loader]:
            print(f"Size of set: {len(dataset)}")

    def train(self):
        training_pass = 0
        for epoch in range(self.epochs):
            running_loss = 0.0
            for i, (inputs, labels, metadata) in enumerate(self.train_loader):
                self.net.train()
                self.optimizer.zero_grad()
                outputs = self.net(inputs.float().to(self.device))
                loss = self.criterion(
                    outputs, labels.unsqueeze(1).float().to(self.device)
                )
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
                if i > 0 and i % self.config["print_cadence"] == 0:
                    mean_loss = running_loss / self.config["print_cadence"]
                    print(
                        f'Epoch: {epoch}\tBatch: {i}\tLoss: {mean_loss}'
                    )
                    self.writer.add_scalar(
                        "Train/RunningLoss",
                        mean_loss,
                        training_pass,
                    )
                    running_loss = 0.0
                training_pass += 1
            train_accuracy = self.log_training_metrics(epoch)
            self.log_validation_metrics(epoch)
            self.scheduler.step(train_accuracy)
        accuracy, metrics = self.calculate_accuracy(self.test_loader)
        self.writer.add_text("Test/Accuracy", f"{accuracy}")
        for key, val in metrics.items():
            self.writer.add_text(f"Test/{key}", f"{val}")
        self.save_model()

    def log_training_metrics(self, epoch: int):
        accuracy, metrics = self.calculate_accuracy(self.train_loader)
        self.writer.add_scalar(f"Train/Accuracy", accuracy, epoch)
        for key, val in metrics.items():
            self.writer.add_scalar(f"Train/{key}", val, epoch)
        return accuracy

    def log_validation_metrics(self, epoch: int):
        accuracy, metrics = self.calculate_accuracy(self.val_loader)
        self.writer.add_scalar("Validation/Accuracy", accuracy, epoch)
        for key, val in metrics.items():
            self.writer.add_scalar(f"Validation/{key}", val, epoch)
        return accuracy

    def calculate_accuracy(self, loader: DataLoader):
        truth_list: list = []
        pred_list: list = []
        with torch.no_grad():
            self.net.eval()
            correct = 0.0
            total = 0.0
            for inputs, labels, metadata in loader:
                outputs = self.net(inputs.float().to(self.device))
                sigmoid = torch.nn.Sigmoid()
                preds = sigmoid(outputs)
                preds = np.round(preds.detach().cpu().squeeze(1))
                pred_list.extend(preds)  # type: ignore
                truth_list.extend(labels)
                total += labels.size(0)
                correct += preds.eq(labels.float()).sum().item()
        print(f"Correct:\t{correct}, Incorrect:\t{total-correct}")

        tn, fp, fn, tp = confusion_matrix(truth_list, pred_list).ravel()
        metrics = {
            "Recall": tp / (tp + fn),
            "Precision": tp / (tp + fp),
            "FalseNegativeRate": fn / (tn + fn),
            "FalsePositiveRate": fp / (tp + fp),
        }

        return correct / total, metrics

    def save_model(self):
        print("saving...")
        torch.save(self.net.state_dict(), self.output_model)
Example #7
0
 def _load_model(self, model_path: str) -> SimpleNet:
     model = SimpleNet(1)
     model.load_state_dict(torch.load(model_path))
     return model
Example #8
0
                                                        test_size=0.25,
                                                        random_state=0)
    #print(X_test.shape , y_test.shape  )
    x_train = np.array(x_train, dtype=np.float32).reshape(len(x_train), 3)
    y_train = np.array(y_train, dtype=np.float32).reshape(len(y_train), 1)
    x_test = np.array(x_test, dtype=np.float32).reshape(len(x_test), 3)
    y_test = np.array(y_test, dtype=np.float32).reshape(len(y_test), 1)

    print(x_train.shape, y_train.shape)
    print(x_test.shape, y_test.shape)
    print(x_train[:10])
    #quit()

    # load model
    #    network = SimpleNet(input_size=1 , hidden_size=10, output_size=1 )
    network = SimpleNet(input_size=3, hidden_size=10, output_size=1)
    network.load_params("params.pkl")
    #print( network.params["W1"] )
    #pred
    train_acc = network.accuracy(x_train, y_train)
    test_acc = network.accuracy(x_test, y_test)
    #
    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
    #
    #    x_test_dt= conv_num_date(x_test_pred )
    #    x_train_dt= conv_num_date(x_train )
    #print(x_test_dt.shape )
    print(x_test[:10])
    y_val = network.predict(x_test[:10])
    y_val = y_val * num_max_y
    print(y_val)
Example #9
0
from simple_net import SimpleNet
from predictFromArchive import predictFromArchive
import torch
from glob import glob
import os

networkWeights = 'trained_simplenet.torch'
wantedShape = (41, 53, 38, 6)
crop = (slice(4, 28), slice(20, 44), slice(7, 31))
resizeFactor = 2

dimIn = 12 * 12 * 12 * 6
dimOut = 4
net = SimpleNet(dimIn=dimIn, dimOut=dimOut).cpu()
net.load_state_dict(torch.load(networkWeights, map_location='cpu'))
archivPath = os.path.dirname(os.path.abspath(__file__))
for archiv in glob("/flywheel/v0/input/dtiInitArchive/dti*.zip"):
    predictFromArchive(archiv, "/flywheel/v0/output/neuro-detect_report.txt",
                       net)
Example #10
0
import numpy as np
import sys
sys.path.append('./src')
sys.path.append('./src/lib')
from simple_net import SimpleNet
from gradient import numerical_gradient

net = SimpleNet()
print(net.W)
# >>> [[-0.44439281  0.30789016 -1.50579685]
#      [-0.93170709  0.08170439 -0.12740328]]

x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
# >>> [ 1.00824761 -1.47819523  0.03650346]

print(np.argmax(p))
# >> 1

t = np.array([0, 0, 1])
print(net.loss(x, t))
# >>> 1.704819611629646

f = lambda w: net.loss(x, t)
dW = numerical_gradient(f, net.W)
print(dW)
# >>> [[ 0.09999078  0.39092591 -0.49091668]
#      [ 0.14998616  0.58638886 -0.73637502]]
def main():
    es_staged_data_index = "cifar-metadata-1"
    es_logging_index = "custom-net-cifar-12"
    output_model = es_logging_index + ".pth"
    es = Elasticsearch("localhost:9200")
    data = [
        doc["_source"] for doc in list(scan(es, index=es_staged_data_index))
    ]

    np.random.seed(42)
    np.random.shuffle(data)
    training_data = [x for x in data if "train" in x["set_type"]]
    testing_data = [x for x in data if "test" in x["set_type"]]
    print(f"Size of training set: {len(training_data)}")
    print(f"Size of testing set: {len(testing_data)}")

    # didnt use this time around.
    train_dataset_loader = _get_dataset_loader(training_data,
                                               transform=transform_train,
                                               shuffle=True)
    test_dataset_loader = _get_dataset_loader(testing_data)

    net = SimpleNet(10).cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),
                          lr=1e-3,
                          momentum=0.9,
                          weight_decay=5e-4)
    # Train
    print("training...")
    for epoch in range(300):

        running_loss = 0.0

        for i, (inputs, labels) in enumerate(train_dataset_loader):
            optimizer.zero_grad()
            outputs = net(inputs.float().cuda())
            loss = criterion(outputs, labels.long().cuda())
            loss.backward()
            optimizer.step()

            # print stats
            running_loss += loss.item()
            print_on = 100
            if (i + 1) % print_on == 0:
                record = {
                    "timestamp": datetime.utcnow().isoformat(),
                    "cross-entropy-loss": running_loss / print_on,
                    "model-name": "train-simplenet-8"
                }
                es.index(index=es_logging_index, body=record)
                print('[%d, %5d] loss %.3f' % (epoch + 1, i + 1, running_loss /
                                               (print_on + 1)))
                running_loss = 0.0

        # Test
        if epoch + 1 % 10:
            print("testing...")
            with torch.no_grad():
                correct = 0.0
                total = 0.0
                i = 0.0
                for inputs, labels in test_dataset_loader:
                    outputs = net(inputs.float().cuda())
                    #_, predicted = torch.max(outputs.data, 1)
                    _, predicted = outputs.max(1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.cuda()).sum().item()
                    #correct += (predicted == labels.long().cuda()).sum().item()
                    i += 1

                test_accuracy = correct / total
            print(f"Test Accuracy: {test_accuracy}")
            print(f"Correct: {correct}, Incorrect: {total-correct}")
            record = {
                "accuracy": test_accuracy,
                "correct": correct,
                "incorrect": total - correct,
                "timestamp": datetime.utcnow().isoformat()
            }
            es.index(index=es_logging_index, body=record)

    # Save
    print("saving...")
    torch.save(net.state_dict(), output_model)
Example #12
0
y_train = np.array(y_train, dtype=np.float32).reshape(len(y_train), 1)
x_test = np.array(x_test, dtype=np.float32).reshape(len(x_test), 5)
y_test = np.array(y_test, dtype=np.float32).reshape(len(y_test), 1)
#
#x_train =np.array(x_train, dtype = np.float64 ).reshape(len(x_train), 5)
#y_train =np.array(y_train, dtype = np.float64).reshape(len(y_train), 1)
#x_test  =np.array(x_test, dtype  = np.float64).reshape(len(x_test), 5 )
#y_test =np.array(y_test, dtype   = np.float64).reshape(len(y_test), 1)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
#quit()

# load model
#network = SimpleNet(input_size=1 , hidden_size=10, output_size=1 )
network = SimpleNet(input_size=5, hidden_size=10, output_size=1)
network.load_params("params.pkl")
#
#pred
print(y_test[:10] * num_max_y)
y_val = network.predict(x_test)
y_val = y_val * num_max_y
print(y_val[:10])
#    quit()

#y_train = y_train * num_max_y
#y_val   = y_val * num_max_y
print('time : ', time.time() - global_start_time)
#quit()

#print(y_val[:10] )
Example #13
0
    return m


(x_train, t_train), (x_test, t_test) = mnist.load_data()
t_train_one_hot = one_hot(t_train)
t_test_one_hot = one_hot(t_test)
x_train_reshape = x_train.reshape(60000, 784)
x_test_reshape = x_test.reshape(10000, 784)  # 将之转化为我想要的数据格式

train_loss_list = []
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
rate = 0.1

network = SimpleNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train_reshape[batch_mask]  # little bracket is using  function
    t_batch = t_train_one_hot[batch_mask]  # 在样本中随机选择一小撮
    grads = network.numerical_gradient(x_batch, t_batch)
    for key in ('w1', 'b1', 'w2', 'b2'):
        network.param[key] -= rate * grads[key]  # 对参数进行了调整,发现梯度值太小
    loss = network.loss_function(x_batch, t_batch)
    print(loss)
    train_loss_list.append(loss)
    print(network.accuracy(x_batch, t_batch))  # 是随机概率,根本就没有提高

y = np.array(train_loss_list)
x = np.array(range(len(train_loss_list)))
Example #14
0
y_train = np.array(y_train, dtype=np.float32).reshape(len(y_train), 1)
x_test = np.array(x_test, dtype=np.float32).reshape(len(x_test), 5)
y_test = np.array(y_test, dtype=np.float32).reshape(len(y_test), 1)
#
#x_train =np.array(x_train, dtype = np.float64 ).reshape(len(x_train), 5)
#y_train =np.array(y_train, dtype = np.float64).reshape(len(y_train), 1)
#x_test  =np.array(x_test, dtype  = np.float64).reshape(len(x_test), 5 )
#y_test =np.array(y_test, dtype   = np.float64).reshape(len(y_test), 1)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
#print(x_train[: 10])
#print(type(x_train ))
#quit()
#
network = SimpleNet(input_size=5, hidden_size=10, output_size=1)

#iters_num = 30000  # 繰り返しの回数を適宜設定する
iters_num = 10000  # 繰り返しの回数を適宜設定する

train_size = x_train.shape[0]
print(train_size)
#quit()

#
global_start_time = time.time()

#batch_size = 100
#batch_size = 32
batch_size = 16
learning_rate = 0.1