def calcRPPlotsAndReconsError(iDataset):
    retDSs = []

    for ds in iDataset:
        noOfFeaturesList = range(2, len(ds.training_x[0]),
                                 int(len(ds.training_x[0]) / 10))
        sseList = []
        for nFeatures in noOfFeaturesList:
            sseAvg = 0
            sse = []
            for i in range(20):
                rp = RP(n_components=nFeatures, tol=0.001)
                rp.getReducer().fit(ds.training_x)
                xTransformed = rp.getReducer().transform(ds.training_x)
                reconError = reconstructionError(rp, ds.training_x)
                sseAvg += reconError
                sse.append(reconError)
            sseList.append(sseAvg / 20.)
            sseStd = np.std(sse)

        plt.style.use('seaborn-whitegrid')
        plt.plot(noOfFeaturesList, sseList, marker='o')
        plt.fill_between(noOfFeaturesList,
                         sseList - sseStd,
                         sseList + sseStd,
                         alpha=0.25,
                         color='b')
        plt.ylabel('Reconstruction SSE', fontsize=12)
        plt.xlabel('No. of Features', fontsize=12)
        plt.title('Reconstruction SSE Plot for ' + ds.name + ' using RP',
                  fontsize=12,
                  y=1.03)
        plt.savefig('Figures/DR/Reconstruction SSE Plot for ' + ds.name +
                    ' using RP.png')
        plt.close()

    retDS = dataset()
    rp = RP(n_components=80, tol=0.001)
    rp.getReducer().fit(iDataset[0].training_x)
    xTransformed = rp.getReducer().transform(iDataset[0].training_x)
    retDS.training_x = xTransformed
    retDS.training_y = iDataset[0].training_y
    retDS.name = iDataset[0].name + ' Reduced by RP'
    retDS.build_train_test_splitSecond()
    retDSs.append(retDS)

    retDS = dataset()
    rp = RP(n_components=9, tol=0.001)
    rp.getReducer().fit(iDataset[1].training_x)
    xTransformed = rp.getReducer().transform(iDataset[1].training_x)
    retDS.training_x = xTransformed
    retDS.training_y = iDataset[1].training_y
    retDS.name = iDataset[1].name + ' Reduced by RP'
    retDS.build_train_test_splitSecond()
    retDSs.append(retDS)

    return retDSs
def calcFAPlotsAndReconsError(iDataset):
    retDSs = []

    for ds in iDataset:
        noOfFeaturesList = range(2, len(ds.training_x[0]),
                                 int(len(ds.training_x[0]) / 10))
        sseList = []
        for nFeatures in noOfFeaturesList:
            fa = FA(n_clusters=nFeatures)
            fa.getReducer().fit(ds.training_x)
            xTransformed = fa.getReducer().transform(ds.training_x)
            xRevTransformed = fa.getReducer().inverse_transform(xTransformed)
            sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean()
            sseList.append(sse)

        plt.style.use('seaborn-whitegrid')
        plt.plot(noOfFeaturesList, sseList, marker='o')
        plt.ylabel('Reconstruction SSE', fontsize=12)
        plt.xlabel('No. of Features', fontsize=12)
        plt.title('Reconstruction SSE Plot for ' + ds.name + ' using FA',
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig('Figures/DR/Reconstruction SSE Plot for ' + ds.name +
                    ' using FA.png')
        plt.close()

    retDS = dataset()
    fa = FA(n_clusters=40)
    fa.getReducer().fit(iDataset[0].training_x)
    xTransformed = fa.getReducer().transform(iDataset[0].training_x)
    retDS.training_x = xTransformed
    retDS.training_y = iDataset[0].training_y
    retDS.name = iDataset[0].name + ' Reduced by FA'
    retDS.build_train_test_splitSecond()
    retDSs.append(retDS)

    retDS = dataset()
    fa = FA(n_clusters=8)
    fa.getReducer().fit(iDataset[1].training_x)
    xTransformed = fa.getReducer().transform(iDataset[1].training_x)
    retDS.training_x = xTransformed
    retDS.training_y = iDataset[1].training_y
    retDS.name = iDataset[1].name + ' Reduced by FA'
    retDS.build_train_test_splitSecond()
    retDSs.append(retDS)

    return retDSs
Beispiel #3
0
def loadData():
    trainingData = dataset(root='trainning.csv',
                           transform=transforms.Compose(
                               [transforms.ToTensor()]))
    training = DataLoader(trainingData,
                          batch_size=batchSize,
                          shuffle=True,
                          num_workers=0)

    testData = dataset(root='test.csv',
                       transform=transforms.Compose([transforms.ToTensor()]))
    test = DataLoader(testData,
                      batch_size=batchSize,
                      shuffle=True,
                      num_workers=0)
    return training, test
def calcPCAPlotsAndReconsError(iDataset):
    retDSs = []

    for ds in iDataset:
        retDS = dataset()
        pca = PCAreducer(n_components=len(ds.training_x[0]))
        pca.getReducer().fit(ds.training_x)
        xTransformed = pca.getReducer().transform(ds.training_x)
        varTransformed = pd.Series(pca.getReducer().explained_variance_)
        cumVar = np.cumsum(varTransformed)
        cumVarNorm = cumVar / cumVar[len(cumVar) - 1]
        varTransformedNorm = varTransformed / cumVar[len(cumVar) - 1]
        nintyFiveVarArg = np.argmax(cumVarNorm > 0.95)

        plt.style.use('seaborn-whitegrid')
        ax = varTransformedNorm.plot(kind='bar', label='Norm. Variance')
        cumVarNorm.plot(label='Norma. Cumulative Variance')
        ticks = ax.xaxis.get_ticklocs()
        ticklabels = [l.get_text() for l in ax.xaxis.get_ticklabels()]
        ax.xaxis.set_ticks(ticks[::10])
        ax.xaxis.set_ticklabels(ticklabels[::10])
        plt.axvline(np.argmax(cumVarNorm > 0.95), color='k', linestyle='--')
        plt.plot(nintyFiveVarArg,
                 cumVarNorm[nintyFiveVarArg],
                 color='k',
                 marker='o')
        plt.xlabel("Features")
        plt.ylabel("Variance")
        plt.title('Components Calculated using PCA for ' + ds.name,
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig('Figures/DR/PCA for ' + ds.name + '.png')
        plt.close()

        pca = PCAreducer(n_components=nintyFiveVarArg)
        pca.getReducer().fit(ds.training_x)
        xTransformed = pca.getReducer().transform(ds.training_x)
        xRevTransformed = pca.getReducer().inverse_transform(xTransformed)
        sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean()
        sse = reconstructionError(pca, ds.training_x)

        print(
            'PCA - Number of new features considering 95% acumulative variance for '
            + ds.name, ' is: ', nintyFiveVarArg)
        print(
            'PCA - The reconstruction SSE considering 95% acumulative variance for '
            + ds.name, ' is: ', sse)

        retDS.training_x = xTransformed[:, :nintyFiveVarArg]
        retDS.training_y = ds.training_y
        retDS.name = ds.name + ' Reduced by PCA'
        retDS.build_train_test_splitSecond()
        retDSs.append(retDS)

    return retDSs
Beispiel #5
0
def main():
    args = get_parser().parse_args()
    # Arguments by hand
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args.target_name = "LST_status"

    table = pd.read_csv(args.table_data)
    list_wsi = os.listdir(args.wsi)
    list_lst = [
        table[table['ID'] == x][args.target_name].item() for x in list_wsi
    ]
    list_dataset = []

    ## Initialisation model
    model = Classifier(args=args)

    ## Création des datasets
    for path in list_wsi:
        args.wsi = os.path.join(args.wsi, path)
        list_dataset.append(dataset(args))
        args.wsi = os.path.dirname(args.wsi)
    list_dataset = np.array(list_dataset)

    ## Kfold_validation
    splitter = StratifiedKFold(n_splits=3)
    for r_eval, (id_train,
                 id_val) in enumerate(splitter.split(list_lst, list_lst)):

        model.name = 'repeat_val_{}'.format(r_eval)
        dataset_train = list_dataset[id_train]
        dataset_val = list_dataset[id_val]
        for db in dataset_train:
            db.transform = get_transform(train=True)
        for db in dataset_val:
            db.transform = get_transform(train=False)
        dataset_train = torch.utils.data.ConcatDataset(dataset_train)
        dataset_val = torch.utils.data.ConcatDataset(dataset_val)
        dataloader_train = DataLoader(dataset=dataset_train,
                                      batch_size=args.batch_size,
                                      num_workers=24)
        dataloader_val = DataLoader(dataset=dataset_val,
                                    batch_size=args.batch_size,
                                    num_workers=24)

        # Initialize dataloader Creates 2 dataset : Careful, if I want to load all in memory ill have to change that, to have only one dataset.
        dataloader_train, dataloader_val = make_loaders(args=args)

        while model.counter['epochs'] < args.epochs:
            print("Begin training")
            train(model=model, dataloader=dataloader_train)
            val(model=model, dataloader=dataloader_val)
            if model.early_stopping.early_stop:
                break
        model.writer.close()
 def prepare_dataloaders(self, config):
     data = Rossler(config)
     train_data = dataset(data.train_X, data.train_Y, data.train_Z,
                          config['w_size'])
     self.train_dataloader = DataLoader(train_data,
                                        batch_size=config['batch_size'],
                                        shuffle=True,
                                        drop_last=True)
     valid_data = dataset(data.valid_X, data.valid_Y, data.valid_Z,
                          config['w_size'])
     self.valid_dataloader = DataLoader(valid_data,
                                        batch_size=config['batch_size'],
                                        shuffle=False,
                                        drop_last=True)
     test_data = dataset(data.test_X, data.test_Y, data.test_Z,
                         config['w_size'])
     self.test_dataloader = DataLoader(test_data,
                                       batch_size=config['batch_size'],
                                       shuffle=False,
                                       drop_last=True)
     self.data = data
Beispiel #7
0
def makeData(srcFile, tgtDicts):

    src1, src2, src3, tgt, srcv, tgtv = [], [], [], [], [], []
    count, ignored = 0, 0

    print('Processing %s  ...' % (srcFile))
    srcF = open(srcFile, 'r')
    for l in srcF:  # for each dialogue
        l = eval(l)
        src1_tmp, src2_tmp, src3_tmp, tgt_tmp,tgt_vtmp,src_vtmp = [], [], [], [],[],[]

        # hierarchical input for a whole dialogue with multiple turns
        slines = l['system_input']
        ulines = l['user_input']
        plines = l['belief_input']
        pvlines = l['labeld']
        tlines = l['labels']
        tvlines = l['labelv']

        for sWords, uWords, pWords, tWords, tvWords, pvWords in zip(
                slines, ulines, plines, tlines, tvlines, pvlines):

            # src vocab
            if bert:
                src1_tmp += [[tgtDicts[w] for w in uWords]]
                src2_tmp += [[tgtDicts[w] for w in sWords]]
            # tgt vocab
            src3_tmp += [[tgtDicts[w] for w in pWords]]
            tt = [tgtDicts[w] for w in pvWords]
            tgt_tmp += [tt]
            tv = [[tgtDicts[w] for w in ws] for ws in tWords]
            tgt_vtmp += [tv]

            tpv = [[[tgtDicts[w] for w in ws] for ws in wss]
                   for wss in tvWords]
            src_vtmp += [tpv]

        count += 1

        src1.append(src1_tmp)
        src2.append(src2_tmp)
        src3.append(src3_tmp)
        srcv.append(src_vtmp)
        tgt.append(tgt_tmp)
        tgtv.append(tgt_vtmp)

    srcF.close()
    print(srcv[:5])

    print('Prepared %d dialogues' % (len(src1)))

    return dataset(src1, src2, src3, tgt, tgtv, srcv)
def save_loader(tweet_pair_data, distance_vector_data, trigger_word_pos_data,
                labels_data, common_words_data, day_difference_data, type):
    """
    Creates dataset and Dataloader objects from provided data and stores in pickle file
    """
    dataset_ = dataset(tweet_pair_data, distance_vector_data,
                       trigger_word_pos_data, common_words_data,
                       day_difference_data, labels_data)
    loader = data.DataLoader(dataset_,
                             batch_size=128,
                             collate_fn=collate_fn,
                             shuffle=True)
    with open(f"{type}_loader.pkl", "wb") as f:
        pickle.dump(loader, f)
Beispiel #9
0
    def save_whole_test_image(self, idx, epoch=None):
        """ Pass through whole test image idx and save it.
        """
        dataset = dloader.dataset(self.opt,
                                  mode='test',
                                  return_mode='all',
                                  img_size=512)
        horizontal, vertical, diagonal = dataset.get_all_directions(idx)
        name = "Test" + str(idx) if epoch == None else "{:03d}_Test".format(
            epoch) + str(idx)

        aux.save_full_image_grid(horizontal,
                                 self.pass_through_image(horizontal),
                                 name + "_horizontal", self.opt)
        aux.save_full_image_grid(vertical, self.pass_through_image(vertical),
                                 name + "_vertical", self.opt)
        aux.save_full_image_grid(diagonal, self.pass_through_image(diagonal),
                                 name + "_diagonal", self.opt)
        aux.save_full_image_grid(
            diagonal, self.pass_through_image_sum(horizontal, vertical),
            name + "_diagonal_sum", self.opt)
def load_dict_stuff(name):
    opt = aux.extract_setup_info('setup_valen.txt')[0]
    save_directory = os.path.dirname(opt.Paths['save_path'] +
                                     opt.Paths['load_network_path'])
    dictionary = np.load(save_directory + '/save_generated/' + name + '.npy',
                         allow_pickle='TRUE').item()

    mode = dictionary["mode"]
    idx = dictionary["idx"]
    ratios = dictionary["ratios"]
    angles = dictionary["angles"]
    generated_angles = dictionary["generated_angles"]
    generated_diagonal = dictionary["generated_diagonal"]
    positions = dictionary["positions"]
    positions_diagonal = dictionary["positions_diagonal"]

    dataset = dloader.dataset(opt, mode=mode, return_mode='all', img_size=512)
    horizontal, vertical, diagonal = dataset.get_all_directions(idx)
    grid_tensor = dataset.get_whole_grid(idx)[..., 16:-16, 16:-16]
    grid_tensor_full = dataset.get_whole_grid(idx)

    return opt, save_directory, mode, idx, ratios, angles, generated_angles, generated_diagonal, positions, positions_diagonal, horizontal, vertical, diagonal, grid_tensor, grid_tensor_full
Beispiel #11
0
tweet_pair_data_test = [[data_[i[0]], data_[i[1]]] for i in X_test]
distance_vector_data_test = [[distance_vectors[i[0]], distance_vectors[i[1]]]
                             for i in X_test]
trigger_word_pos_data_test = [[trigger_word_pos[i[0]], trigger_word_pos[i[1]]]
                              for i in X_test]
labels_data_test = [i[2] for i in X_test]
common_words_data_test = [i[3] for i in X_test]
day_difference_data_test = [i[4] for i in X_test]

# ## Set up Dataloader for train, test and validation splits

# In[10]:

dataset_ = dataset(tweet_pair_data_train, distance_vector_data_train,
                   trigger_word_pos_data_train, common_words_data_train,
                   day_difference_data_train, labels_data_train)
loader_train = data.DataLoader(dataset_,
                               batch_size=128,
                               collate_fn=collate_fn,
                               shuffle=True)

# In[11]:

dataset_ = dataset(tweet_pair_data_val, distance_vector_data_val,
                   trigger_word_pos_data_val, common_words_data_val,
                   day_difference_data_val, labels_data_val)
loader_val = data.DataLoader(dataset_,
                             batch_size=128,
                             collate_fn=collate_fn,
                             shuffle=True)
    '-device',
    type=int,
    default=2,
    help='device to use for iterate data, -1 mean cpu [default: -1]')
# option
parser.add_argument('-predict',
                    type=str,
                    default=None,
                    help='predict the sentence given')
parser.add_argument('-test', type=bool, default=False, help='train or test')
args = parser.parse_args()

args.kernel_size = [int(k) for k in args.kernel_size.split(',')]
args.class_num = 8

training_set = dataset(args)
label_weight = training_set.labelweight()
training_iter = DataLoader(dataset=training_set,
                           batch_size=args.batch_size,
                           num_workers=args.device,
                           shuffle=True)

embed = training_set.getembed()
if args.nn_kind == 'cnn':
    model = TextCNN(args, embed)
else:
    model = LSTM(args, embed)

test_set = dataset(args, train=False)
test_iter = DataLoader(dataset=test_set,
                       batch_size=1,
Beispiel #13
0
    def train(self):
        n_samples = min(len(os.listdir(self.noisy_dir)),
                        len(os.listdir(self.clean_dir)))
        print("started training")
        for epoch in range(self.start_epoch, self.num_epochs):
            torch.cuda.empty_cache()
            self.generator_clean2noisy.train()
            self.generator_noisy2clean.train()
            self.discriminator_clean.train()
            self.discriminator_noisy.train()
            dataset = dataloader.dataset(self.noisy_dir,
                                         self.clean_dir,
                                         n_frames=128)
            train_loader = torch.utils.data.DataLoader(
                dataset=dataset,
                batch_size=self.batch_size,
                shuffle=True,
                drop_last=False,
                num_workers=1)
            for i, (noisy, clean) in enumerate(train_loader):
                num_iterations = (n_samples // self.batch_size) * epoch + i
                if num_iterations > self.identity_loss_stop:
                    self.identity_loss_lambda = 0
                if num_iterations > self.start_decay:
                    self.adjust_lr_rate(self.generator_optim, name="generator")
                    self.adjust_lr_rate(self.discriminator_optim,
                                        name="discriminator")

                # do some stuff with the noisy data!
                noisy = noisy.to(self.device).float()
                fake_clean = self.generator_noisy2clean(noisy)
                d_fake_clean = self.discriminator_clean(fake_clean)
                cycle_noisy = self.generator_clean2noisy(fake_clean)
                d_cycle_noisy = self.discriminator_noisy(cycle_noisy)
                identity_noisy = self.generator_clean2noisy(noisy)

                clean = clean.to(self.device).float()
                fake_noisy = self.generator_clean2noisy(clean)
                d_fake_noisy = self.discriminator_noisy(fake_noisy)
                cycle_clean = self.generator_noisy2clean(fake_noisy)
                d_cycle_clean = self.discriminator_clean(cycle_clean)
                identity_clean = self.generator_noisy2clean(clean)

                cycle_loss = torch.mean(
                    torch.abs(noisy - cycle_noisy)) + torch.mean(
                        torch.abs(clean - cycle_clean))
                identity_loss = torch.mean(
                    torch.abs(noisy - identity_noisy)) + torch.mean(
                        torch.abs(clean - identity_clean))

                g_forward_noisy2clean = torch.mean((1 - d_fake_clean)**2)
                g_backward_clean2noisy = torch.mean((1 - d_cycle_noisy)**2)
                g_forward_clean2noisy = torch.mean((1 - d_fake_noisy)**2)
                g_backward_noisy2clean = torch.mean((1 - d_cycle_clean)**2)
                generator_loss_noisy2clean = (g_forward_noisy2clean +
                                              g_backward_noisy2clean) / 2.0
                generator_loss_clean2noisy = (g_forward_clean2noisy +
                                              g_backward_clean2noisy) / 2.0
                generator_loss = generator_loss_noisy2clean + generator_loss_clean2noisy + self.cycle_loss_lambda * cycle_loss + self.identity_loss_lambda * identity_loss
                self.generator_loss.append(generator_loss.item())

                self.generator_optim.zero_grad()
                self.discriminator_optim.zero_grad()
                generator_loss.backward()
                self.generator_optim.step()

                d_real_noisy = self.discriminator_noisy(noisy)
                d_real_clean = self.discriminator_clean(clean)
                fake_clean = self.generator_noisy2clean(noisy)
                fake_noisy = self.generator_clean2noisy(clean)
                d_fake_noisy = self.discriminator_noisy(fake_noisy)
                d_fake_clean = self.discriminator_clean(fake_clean)

                d_loss_noisy_real = torch.mean((1 - d_real_noisy)**2)
                d_loss_noisy_fake = torch.mean((0 - d_fake_noisy)**2)
                d_loss_noisy = (d_loss_noisy_real + d_loss_noisy_fake) / 2.0
                d_loss_clean_real = torch.mean((1 - d_real_clean)**2)
                d_loss_clean_fake = torch.mean((0 - d_fake_clean)**2)
                d_loss_clean = (d_loss_clean_real + d_loss_clean_fake) / 2.0
                d_loss = (d_loss_noisy + d_loss_clean) / 2.0
                self.discriminator_loss.append(d_loss.item())

                self.generator_optim.zero_grad()
                self.discriminator_optim.zero_grad()
                self.generator_optim.zero_grad()
                d_loss.backward()
                self.discriminator_optim.step()

                if self.wandb:
                    log = {
                        "Generator Loss": generator_loss.item(),
                        "Discriminator Loss": d_loss.item(),
                        "Noisy2Clean": generator_loss_noisy2clean,
                        "Clean2Noisy": generator_loss_clean2noisy,
                        "Identity Loss": identity_loss,
                        "Cycle Loss": cycle_loss,
                        "D Loss Noisy": d_loss_noisy,
                        "D Loss Clean": d_loss_clean
                    }
                    wandb.log(log)
                else:
                    log = {
                        "Generator Loss": generator_loss.item(),
                        "Discriminator Loss": d_loss.item()
                    }
                    self.writer.add_scalars("logging/G,D-loss/", log,
                                            num_iterations)
                    log = {
                        "Noisy2Clean": generator_loss_noisy2clean,
                        "Clean2Noisy": generator_loss_clean2noisy
                    }
                    self.writer.add_scalars("Logging/conv-loss/", log,
                                            num_iterations)
                    log = {
                        "Identity Loss": identity_loss,
                        "Cycle Loss": cycle_loss
                    }
                    self.writer.add_scalars("Logging/other-loss/", log,
                                            num_iterations)
                    log = {
                        "D Loss Noisy": d_loss_noisy,
                        "D Loss Clean": d_loss_clean
                    }
                    self.writer.add_scalars("Logging/d-loss/", log,
                                            num_iterations)

            msg = "Iter:{}\t Generator Loss:{:.4f} Discrimator Loss:{:.4f} \tGA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}".format(
                num_iterations, generator_loss.item(), d_loss.item(),
                generator_loss_noisy2clean, generator_loss_clean2noisy,
                identity_loss, cycle_loss, d_loss_noisy, d_loss_clean)
            msg = f"{common.bcolors.RED}{msg}{common.bcolors.ENDC}"
            print("{}".format(msg))

            if epoch % 5 == 0 and epoch != 0:
                self.save_model_ckpt(
                    epoch,
                    "{}.tar".format(os.path.join(self.model_dir, str(epoch))))
                print(
                    f"{common.bcolors.GREEN}MODEL SAVED!{common.bcolors.GREEN}"
                )
                self.valid(epoch)
Beispiel #14
0
def Train(data_dir, EPOCH, BATCH_SIZE, SIZE, LR, MOM):
    partition = pickle.load(open(os.path.join(data_dir, 'partition.p'), 'rb'))
    label = pickle.load(open(os.path.join(data_dir, 'label.p'), 'rb'))

    training_dataset = dataset(partition['train'],
                               label,
                               data_dir,
                               transform=transform)
    training_generator = DataLoader(training_dataset,
                                    batch_size=BATCH_SIZE,
                                    shuffle=True)
    test_dataset = dataset(partition['test'],
                           label,
                           data_dir,
                           transform=transform)
    test_generator = DataLoader(test_dataset, batch_size=SIZE, shuffle=True)

    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOM)
    data = []
    for epoch in range(EPOCH):
        Losss = []
        acc = []
        test_loss = []
        test_acc = []

        model.train()
        for batch_idx, batch_info in tqdm(enumerate(training_generator)):
            batch_data, batch_labels = batch_info[0].to(
                device), batch_info[1].to(device)
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = loss_function(outputs, batch_labels)
            Losss.append(loss.item())
            loss.backward()
            optimizer.step()

            acc.append(eval(batch_data, batch_labels, outputs, train=False))

        model.eval()
        with torch.no_grad():
            for batch_idx, batch_info in tqdm(enumerate(test_generator)):
                batch_data, batch_labels = batch_info[0].to(
                    device), batch_info[1].to(device)
                outputs = model(batch_data)
                loss = loss_function(outputs, batch_labels)
                test_loss.append(loss)
                test_acc.append(
                    eval(batch_data, batch_labels, outputs, train=False))

        train_acc = float(sum(acc) / len(acc)) * 100
        train_loss = float(sum(Losss) / len(Losss))
        Val_Acc = float(sum(test_acc) / len(test_acc)) * 100
        Val_Loss = float(sum(test_loss) / len(test_loss))

        print(
            f"epoch = {epoch+1}   Acc = {train_acc}   Loss = {train_loss}   val_acc = {Val_Acc}   val_loss = {Val_Loss}"
        )
        data.append([epoch + 1, train_acc, train_loss, Val_Acc, Val_Loss])

    torch.save(
        model,
        '/data/plant_domain_classification/dataset/server_task_2/model_resnet34_25_0.01_0.01.pth'
    )

    return data
Beispiel #15
0
import pandas as pd
import numpy as np
import os
from keras.layers import Reshape, Flatten, LeakyReLU, Activation, LSTM
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras_adversarial import AdversarialModel, simple_gan, gan_targets
from keras_adversarial import normal_latent_sampling, AdversarialOptimizerSimultaneous
from keras_adversarial.legacy import l1l2, Dense, fit
from dataloader import dataset

# load dataset
db = dataset(seq_len=5)


def model_generator(latent_dim,
                    input_shape,
                    hidden_dim=1024,
                    reg=lambda: l1l2(1e-5, 1e-5)):
    return Sequential([
        Dense(int(hidden_dim / 4),
              name="generator_h1",
              input_dim=latent_dim,
              W_regularizer=reg()),
        LeakyReLU(0.2),
        Dense(int(hidden_dim / 2), name="generator_h2", W_regularizer=reg()),
        LeakyReLU(0.2),
        Dense(int(hidden_dim), name="generator_h3", W_regularizer=reg()),
        LeakyReLU(0.2),
        Dense(int(np.prod(input_shape)),
              name="generator_x_flat",
def main(opt):
    """============================================"""

    seed = opt.Training['global_seed']
    print(f'\nSetting everything to seed {seed}')
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

    ### Create Network
    network = net.VAE(opt.Network, opt).to(opt.Training['device'])
    if opt.Network['load_trained']:
        save_dict = torch.load(opt.Paths['save_path'] +
                               opt.Paths['load_network_path'])
        network.load_state_dict(save_dict['state_dict'])
        print('Loaded model from ' + opt.Paths['load_network_path'])

    ###### Define Optimizer ######
    loss_func = Loss.Loss(opt.Training).to(opt.Training['device'])
    optimizer = torch.optim.Adam(network.parameters(),
                                 lr=opt.Training['lr'],
                                 weight_decay=opt.Training['weight_decay'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=opt.Training['sched_factor'],
        patience=opt.Training['sched_patience'],
        min_lr=1e-8,
        threshold=0.0001,
        threshold_mode='abs')

    ###### Create Dataloaders ######
    train_dataset = dloader.dataset(opt, mode='train')
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        num_workers=opt.Training['workers'],
        batch_size=opt.Training['bs'])
    train_sum_dataset = dloader.dataset(opt, mode='train', return_mode='all')
    train_sum_data_loader = torch.utils.data.DataLoader(
        train_sum_dataset,
        num_workers=opt.Training['workers'],
        batch_size=opt.Training['bs'])
    test_dataset = dloader.dataset(
        opt,
        mode='test',
        return_mode='all' if opt.Misc['use_full_validate'] else '')
    test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        num_workers=opt.Training['workers'],
        batch_size=opt.Training['bs'])

    ###### Set Logging Files ######
    dt = datetime.now()
    dt = '{}-{}-{}-{}-{}'.format(dt.year, dt.month, dt.day, dt.hour, dt.minute)
    opt.Training['name'] = 'Model' + '_Date-' + dt  # +str(opt.iter_idx)+
    if opt.Training['savename'] != "":
        opt.Training['name'] += '_' + opt.Training['savename']

    save_path = opt.Paths['save_path'] + "/" + opt.Training['name']

    ### Make the saving directory
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    else:
        count = 1
        while os.path.exists(save_path):
            count += 1
            svn = opt.Training['name'] + "_" + str(count)
            save_path = opt.Paths['save_path'] + "/" + svn
        opt.Training['name'] = svn
        os.makedirs(save_path)
    opt.Paths['save_path'] = save_path

    # Make summary plots, images, segmentation and videos folder
    save_summary = save_path + '/summary_plots'
    Path(save_path + '/summary_plots').mkdir(parents=True, exist_ok=True)
    Path(save_path + '/images').mkdir(parents=True, exist_ok=True)
    if opt.Misc['use_full_validate']:
        Path(save_path + '/images_validate').mkdir(parents=True, exist_ok=True)

    ### Copy Code !!
    if opt.Misc["copy_code"]:
        copy_tree(
            './', save_path + '/code/'
        )  # Does not work for me, I think the paths are too long for windows
    save_str = aux.gimme_save_string(opt)

    ### Save rudimentary info parameters to text-file and pkl.
    with open(opt.Paths['save_path'] + '/Parameter_Info.txt', 'w') as f:
        f.write(save_str)
    pkl.dump(opt, open(opt.Paths['save_path'] + "/hypa.pkl", "wb"))

    ## Loss tracker is implented in such a way that the first 2 elements are added every iteration
    logging_keys = ["Loss", "L_recon", 'L_kl']
    logging_keys_test = [
        "L_recon_hor", "L_recon_vert", "L_recon_diag", "L_recon_diag_sum",
        "D_hor_vert", "D_diag"
    ] if opt.Misc['use_full_validate'] else logging_keys

    loss_track_train = aux.Loss_Tracking(logging_keys)
    loss_track_test = aux.Loss_Tracking(logging_keys_test)

    ### Setting up CSV writers
    full_log_train = aux.CSVlogger(save_path + "/log_per_epoch_train.csv",
                                   ["Epoch", "Time", "LR"] + logging_keys)
    full_log_test = aux.CSVlogger(save_path + "/log_per_epoch_test.csv",
                                  ["Epoch", "Time", "LR"] + logging_keys_test)

    epoch_iterator = tqdm(range(0, opt.Training['n_epochs']),
                          ascii=True,
                          position=1)
    best_loss = np.inf

    for epoch in epoch_iterator:
        epoch_time = time.time()

        ##### Training ########
        epoch_iterator.set_description("Training with lr={}".format(
            np.round([group['lr'] for group in optimizer.param_groups][0], 6)))
        trainer(network, opt, epoch, train_data_loader, train_sum_data_loader,
                loss_track_train, optimizer, loss_func, scheduler,
                opt.Training['use_sched'])

        ###### Validation #########
        epoch_iterator.set_description('Validating...          ')
        if epoch % opt.Training['validate_every'] == 0:
            if opt.Misc['use_full_validate']:
                validator_full(network, opt, epoch, test_data_loader,
                               loss_track_test, loss_func)
            else:
                validator(network, opt, epoch, test_data_loader,
                          loss_track_test, loss_func)

        ## Best Validation Loss
        current_loss = loss_track_test.get_current_mean()[0]
        if current_loss < best_loss:
            ###### SAVE CHECKPOINTS ########
            save_dict = {
                'epoch': epoch + 1,
                'state_dict': network.state_dict(),
                'optim_state_dict': optimizer.state_dict()
            }
            torch.save(save_dict,
                       opt.Paths['save_path'] + '/checkpoint_best_val.pth.tar')
            best_loss = current_loss

        ## Always save occasionally
        if epoch != 0 and epoch % opt.Training['save_every'] == 0:
            ###### SAVE CHECKPOINTS ########
            save_dict = {
                'epoch': epoch + 1,
                'state_dict': network.state_dict(),
                'optim_state_dict': optimizer.state_dict()
            }
            torch.save(
                save_dict, opt.Paths['save_path'] +
                '/checkpoint_epoch_{}.pth.tar'.format(epoch))

        ###### Logging Epoch Data ######]
        epoch_time = time.time() - epoch_time
        full_log_train.write([
            epoch, epoch_time,
            [group['lr'] for group in optimizer.param_groups][0],
            *loss_track_train.get_current_mean()
        ])
        full_log_test.write([
            epoch, epoch_time,
            [group['lr'] for group in optimizer.param_groups][0],
            *loss_track_test.get_current_mean()
        ])

        ## Full Image Test
        if epoch != 0 and epoch % opt.Training['full_test_every'] == 0:
            epoch_iterator.set_description('Saving test images     ')
            _ = network.eval()
            if opt.Training['full_test_which'] == 'any':
                network.save_whole_test_image(np.random.randint(0, 4), epoch)
            elif opt.Training['full_test_which'] == 'all':
                for i in range(4):
                    network.save_whole_test_image(i, epoch)
            else:
                network.save_whole_test_image(opt.Training['full_test_which'],
                                              epoch)

        ###### Generating Summary Plots #######
        # aux.summary_plots(loss_track_train.get_hist(), loss_track_test.get_hist(), epoch, save_summary)
        _ = gc.collect()
def calcICAPlotsAndReconsError(iDataset):
    retDSs = []

    for ds in iDataset:
        retDS = dataset()
        ica = ICA(n_components=len(ds.training_x[0]), tol=0.001)
        ica.getReducer().fit(ds.training_x)
        xTransformedNotOrdered = ica.getReducer().transform(ds.training_x)
        order = [
            -abs(kurtosis(xTransformedNotOrdered[:, i]))
            for i in range(xTransformedNotOrdered.shape[1])
        ]
        xTransformed = xTransformedNotOrdered[:, np.array(order).argsort()]
        ica_resNorOrdered = pd.Series([
            abs(kurtosis(xTransformedNotOrdered[:, i]))
            for i in range(xTransformedNotOrdered.shape[1])
        ])
        ica_res = pd.Series([
            abs(kurtosis(xTransformed[:, i]))
            for i in range(xTransformed.shape[1])
        ])
        featuresNumberCutoff = np.argmax(ica_res.values < 2.)

        plt.style.use('seaborn-whitegrid')
        ax = ica_resNorOrdered.plot(kind='bar',
                                    logy=True,
                                    label='Not Ordered Kurtosis',
                                    color='r')
        ax = ica_res.plot(kind='bar', logy=True, label='Kurtosis')
        ticks = ax.xaxis.get_ticklocs()
        ticklabels = [l.get_text() for l in ax.xaxis.get_ticklabels()]
        ax.xaxis.set_ticks(ticks[::10])
        ax.xaxis.set_ticklabels(ticklabels[::10])
        plt.axvline(featuresNumberCutoff, color='k', linestyle='--')
        plt.plot(featuresNumberCutoff,
                 ica_res[featuresNumberCutoff],
                 color='k',
                 marker='o')
        plt.xlabel("Features")
        plt.ylabel("Kurtosis")
        plt.title('Components Calculated using ICA for ' + ds.name,
                  fontsize=12,
                  y=1.03)
        plt.savefig('Figures/DR/ICA for ' + ds.name + '.png')
        plt.close()

        ica = ICA(n_components=featuresNumberCutoff, tol=0.001)
        ica.getReducer().fit(ds.training_x)
        xRevTransformed = ica.getReducer().inverse_transform(
            xTransformed[:, :featuresNumberCutoff])
        sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean()
        sse = reconstructionError(ica, ds.training_x)

        print(
            'ICA - Number of new features considering minimum of 1. for kurtosis,  '
            + ds.name, ' is: ', featuresNumberCutoff)
        print(
            'ICA - The reconstruction SSE considering minimum of 1. for kurtosis '
            + ds.name, ' is: ', sse)

        retDS.training_x = xTransformed[:, :featuresNumberCutoff]
        retDS.training_y = ds.training_y
        retDS.name = ds.name + ' Reduced by ICA'
        retDS.build_train_test_splitSecond()
        retDSs.append(retDS)

    return retDSs
Beispiel #18
0
def calcClusterAdded(iDataset):
    retDSs = []
    for ds in iDataset:
        if 'Income' in ds.name:
            clusterKM = 3
            clusterEM = 2

            if 'FA' in ds.name:
                clusterKM = 2
                clusterEM = 2

            if 'ICA' in ds.name:
                clusterKM = 2
                clusterEM = 2

            if 'PCA' in ds.name:
                clusterKM = 3
                clusterEM = 3

            if 'RP' in ds.name:
                clusterKM = 2
                clusterEM = 3

        elif 'Wine' in ds.name:
            clusterKM = 2
            clusterEM = 2

            if 'FA' in ds.name:
                clusterKM = 4
                clusterEM = 2

            if 'ICA' in ds.name:
                clusterKM = 2
                clusterEM = 2

            if 'PCA' in ds.name:
                clusterKM = 2
                clusterEM = 2
            if 'RP' in ds.name:
                clusterKM = 3
                clusterEM = 2

        retDS = dataset()
        emLearner = Clustering.KM(n_clusters=clusterKM)
        emLearner.getLearner().fit(ds.training_x)
        clustringY_KM = emLearner.getLearner().predict(ds.training_x)
        xTransformed = pd.concat([pd.DataFrame(ds.training_x), pd.DataFrame(clustringY_KM)], axis=1).to_numpy()
        retDS.training_x = xTransformed
        retDS.training_y = ds.training_y
        retDS.name = ds.name + ' with KM Clusters Added'
        retDS.build_train_test_splitSecond()
        retDSs.append(retDS)

        retDS = dataset()
        emLearner = Clustering.EM(n_components=clusterEM)
        emLearner.getLearner().fit(ds.training_x)
        clustringY_EM = emLearner.getLearner().predict(ds.training_x)
        xTransformed = pd.concat([pd.DataFrame(ds.training_x), pd.DataFrame(clustringY_EM)], axis=1).to_numpy()
        retDS.training_x = xTransformed
        retDS.training_y = ds.training_y
        retDS.name = ds.name + ' with EM Clusters Added'
        retDS.build_train_test_splitSecond()
        retDSs.append(retDS)

    return retDSs[0:2], retDSs[2:4]
Beispiel #19
0
else:

    class DistributedWrapper(torch.nn.Module):
        def __init__(self, module):
            super().__init__()
            self.module = module

        def forward(self, input):
            return self.module(input)

    model = DistributedWrapper(model)

Log('loading dataset')
trainset = dataset(valid=False,
                   verbose=True,
                   train_dir=args.train_dir,
                   gt_dir=args.train_gt,
                   cropsize=(crop_size_w * 16, crop_size_h * 16))

if distributed:
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, shuffle=True)

dl_args = {}

if distributed:
    dl_args['sampler'] = train_sampler

dataloader = DataLoader(
    trainset,
    batch_size=batch_size,