예제 #1
0
def store_queries(source: str, target):
    """
    Stores all queries from source dataset into target folder.
    Assumes that source dataset is separated by folds 1 to 5.
    """
    print( "Storing grouped queries locally...")

    for i in range(1, 6):
        print("\t - Processing fold: " + str(i))
        for dataset_type in ['train', 'vali', 'test']:
            source_folder = '{}/Fold{}/{}.txt'.format(source, i, dataset_type)
            target_folder = '{}/Fold{}/{}/'.format(target, i, dataset_type)
            if not os.path.exists(target_folder):
                os.makedirs(target_folder)
            get_data(source_folder, target_folder, strip_docid=False)
예제 #2
0
    def __init__(self, params):
        self.prepare_output_folder(params)

        self.global_iter = 0

        self.max_iter = params.max_iter
        self.params = params
        self.beta = params.beta

        self.net = self.get_model(params)
        self.viz_on = params.viz_on

        if self.viz_on:
            self.writer = SummaryWriter(params.summary)

        self.load_checkpoint(params.checkpoint_file)

        self.save_output = params.save_output

        self.gather_step = params.gather_step
        self.display_step = params.display_step
        self.save_step = params.save_step

        self.dataset = params.dataset
        self.batch_size = params.batch_size
        self.data_loader = get_data(params.dataset, params.batch_size,
                                    params.image_size)

        self.gather = DataGather()

        self.set_optimizer(params)
예제 #3
0
def start():
    global com1_selected, com2_selected
    sets = dt.get_data(com1_selected)
    for i in sets:
        show.insert('end', "当前正在运算:" + str(i['id']) + '\n')
        d = i['size']
        cubage = i['capacity']
        profit_in = i['profit']
        weight_in = i['weight']
        bp = Back_pack(d, cubage, profit_in, weight_in)
        bp.run(com2_selected)
예제 #4
0
def main(argv):
    data = get_data(FLAGS.train_data, FLAGS.num_classes)
    train_data = data.batch(16, drop_remainder=True)

    model = UNet(num_classes=FLAGS.num_classes)
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='binary_crossentropy')
    model.fit(train_data, epochs=25)

    for index, (image, label) in enumerate(data.batch(1).take(5)):
        prediction = model.predict(image)
        plot_result(f'results/{index}.png', image, label, prediction)
def train(max_length, model_size, epochs, learning_rate, device, num_heads,
          num_blocks, dropout, train_word_embeddings, batch_size, save_path):
    """
        Trains the classifier on the IMDB sentiment dataset
    """
    # train: train iterator
    # test: test iterator
    # vectors: train data word vector
    # vocab: train data vocab
    train, test, vectors, vocab = get_data(batch_size, max_length=max_length)
    # creat the transformer net
    torch.device(device)
    model = Net(model_size=model_size,
                embeddings=vectors,
                max_length=max_length,
                num_heads=num_heads,
                num_blocks=num_blocks,
                dropout=dropout,
                train_word_embeddings=train_word_embeddings).to(device)

    optimizer = optim.Adam((p for p in model.parameters() if p.requires_grad),
                           lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    best_correct = 0
    with open(save_path + '_train_results', 'a', encoding='utf-8') as file_re:
        for i in range(0, epochs + 1):
            loss_sum = 0.0
            model.train()
            # train data has been spited many batch, tadm: print progress bar
            for j, b in enumerate(iter(tqdm(train))):
                optimizer.zero_grad()
                # print('\nreview is \n\n', b.review[0])
                model_out = model(b.review[0].to(device))
                # calculate loss
                # print('\nrating is \n\n', b.rating)
                loss = criterion(model_out, b.rating.to(device))
                loss.backward()
                optimizer.step()
                loss_sum += loss.item()
            print('\n **********************************************')
            loss_temp = "Epoch: {}, Loss mean: {}\n".format(i, loss_sum / j)
            file_re.write(loss_temp + '\n')
            print(loss_temp)
            # Validate on test-set every epoch
            if i % 5 == 0:
                val_correct = val(model, test, vocab, device, i, save_path)
            if val_correct > best_correct:
                best_correct = val_correct
                best_model = model
    torch.save(best_model, save_path + '_model.pkl')
예제 #6
0
def test(modelname, modelfilename):
    modeldict = open(os.path.join('models/', modelfilename), 'rb')
    checkpoint = torch.load(modeldict)
    dict_args = checkpoint['dict_args']
    if modelname == 'mlp':
        model = MLP(dict_args)
    model.load_state_dict(checkpoint['state_dict'])
    if USE_CUDA:
        model = model.cuda()

    test_batch_size = 200
    testloader, dataiter, lexicon, test_data_size = loader.get_data(
        batch_size=test_batch_size)

    return evaluator.evaluate(model, testloader, dataiter)
예제 #7
0
def run():
    num_epochs = 10
    output_period = 1

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = FeatureModel()
    model.to(device)

    loss_func = nn.NLLLoss().to(device)

    optimizer = optim.SGD(model.parameters(), lr=0.1)

    epoch = 1
    while epoch <= num_epochs:
        print('Epoch: ' + str(epoch))

        data_loader = dataloader.get_data('val', {})
        running_loss = 0.

        model.train()

        for batch_num, (images, patches, labels) in enumerate(data_loader, 1):
            images = images.to(device)
            patches = patches.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images, patches)
            outputs = outputs.contiguous().view((outputs.size(0), -1))
            loss = loss_func(outputs, labels)
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            gc.collect()
            if batch_num % output_period == 0:
                print('Loss: {0:.4f}'.format(running_loss / output_period))
                running_loss = 0.

        torch.save(model.state_dict(), 'models/model.{0:}'.format(epoch))
        epoch += 1
예제 #8
0
def get_model(sh_path):
    if sh_path.count(".", 0, 2) == 2:
        arguments = " ".join([s.strip() for s in Path(sh_path).read_text().replace("\\", "").replace('"', "").replace("./", "../").splitlines()[1:-1]])
    else:
        arguments = " ".join([s.strip() for s in Path(sh_path).read_text().replace("\\", "").replace('"', "").splitlines()[1:-1]])
    parser = argument_parsing(preparse=True)
    args = parser.parse_args(arguments.split())

    device = "cuda" if (torch.cuda.is_available() and args.use_cuda) else "cpu"
    (src, trg), (train, _, test), (train_loader, _, test_loader) = get_data(args)
    src_vocab_len = len(src.vocab.stoi)
    trg_vocab_len = len(trg.vocab.stoi)
    enc_max_seq_len = args.max_length
    dec_max_seq_len = args.max_length
    pad_idx = src.vocab.stoi.get("<pad>") if args.pad_idx is None else args.pad_idx
    pos_pad_idx = 0 if args.pos_pad_idx is None else args.pos_pad_idx

    model = Transformer(enc_vocab_len=src_vocab_len, 
                        enc_max_seq_len=enc_max_seq_len, 
                        dec_vocab_len=trg_vocab_len, 
                        dec_max_seq_len=dec_max_seq_len, 
                        n_layer=args.n_layer, 
                        n_head=args.n_head, 
                        d_model=args.d_model, 
                        d_k=args.d_k, 
                        d_v=args.d_v, 
                        d_f=args.d_f, 
                        pad_idx=pad_idx,
                        pos_pad_idx=pos_pad_idx, 
                        drop_rate=args.drop_rate, 
                        use_conv=args.use_conv, 
                        linear_weight_share=args.linear_weight_share, 
                        embed_weight_share=args.embed_weight_share).to(device)
    if device == "cuda":
        model.load_state_dict(torch.load(args.save_path))
    else:
        model.load_state_dict(torch.load(args.save_path, map_location=torch.device(device)))
    
    return model, (src, trg), (test, test_loader)
예제 #9
0
def train(**kwargs:dict) -> None:

    for k, v in kwargs.items():
        setattr(opt, k, v)
    
    #vis = Visdom(env=opt.env)

    #data get 
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(
        data, 
        batch_size=opt.batch_size,
        shuffle = True,   
        )
    
    model = PoetryModel(len(word2ix), 2, 2)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.user_gpu:
        model.cuda()
        criterion.cuda()
    
    for epoch in range(opt.epoch):
        
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):
            data_ = data.long().transpose(1, 0).contiguous()
            if opt.user_gpu : data_ = data_.cuda()
            optimizer.zero_grad()
            input_, target = V(data_[:-1, :]), V(data[1:, :])
            ouput, _ = model(input_)
            loss = criterion(ouput, target.view(-1))
            loss.backward()
            optimizer.step()
예제 #10
0
파일: main.py 프로젝트: gxwangupc/FAAL
"""
opt.use_PCA = True
opt.use_SuperPCA = False
print(opt.use_PCA)
print(opt.use_SuperPCA)
assert (opt.use_PCA and opt.use_SuperPCA) == False
data, label = load(opt.DATASET)
if opt.use_PCA:
    data, pca = apply_pca(data)
else:
    pass

init_tr_labeled_idx, init_tr_unlabeled_idx, te_idx = get_init_indices(
    data, label)

init_trl_data, init_trl_label = get_data(data, label, init_tr_labeled_idx)
init_trunl_data, init_trunl_label = get_data(data, label,
                                             init_tr_unlabeled_idx)
te_data, te_label = get_data(data, label, te_idx)

init_trl_data = np.expand_dims(init_trl_data, axis=4)
init_trl_label = keras.utils.to_categorical(init_trl_label)
init_trunl_data = np.expand_dims(init_trunl_data, axis=4)
init_trunl_label = keras.utils.to_categorical(init_trunl_label)
te_data = np.expand_dims(te_data, axis=4)
te_label = keras.utils.to_categorical(te_label)

init_trl_set = tf.data.Dataset.from_tensor_slices(
    (init_trl_data,
     init_trl_label)).shuffle(len(init_trl_data)).batch(opt.BATCH_SIZE)
te_set = tf.data.Dataset.from_tensor_slices(
from dataloader import get_data
from sklearn.linear_model import LogisticRegression
from augmentation import plot_image
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
from sklearn.model_selection import validation_curve, learning_curve
from majority_vote import majority_vote
from confusion_matrix import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

np.random.seed(42)

train_set_1, train_labels_1 = get_data('IXI-T1-Preprocessed', 'mean', 0, 256)
test_set_1, test_labels = get_data('IXI-T1-Preprocessed', 'mean', 0, 256,
                                   False)

train_set_2, train_labels_2 = get_data('IXI-T1-Preprocessed', 'mean', 1, 256)
test_set_2, test_labels = get_data('IXI-T1-Preprocessed', 'mean', 1, 256,
                                   False)

train_set_3, train_labels_3 = get_data('IXI-T1-Preprocessed', 'mean', 2, 256)
test_set_3, test_labels = get_data('IXI-T1-Preprocessed', 'mean', 2, 256,
                                   False)

train_set_4, train_labels_4 = get_data('IXI-T1-Preprocessed', 'slice', 0, 256)
test_set_4, test_labels = get_data('IXI-T1-Preprocessed', 'slice', 0, 256,
                                   False)

train_set_5, train_labels_5 = get_data('IXI-T1-Preprocessed', 'slice', 1, 256)
test_set_5, test_labels = get_data('IXI-T1-Preprocessed', 'slice', 1, 256,
                                   False)
예제 #12
0
def main(args):
    # configs path to load data & save model
    from pathlib import Path
    if not Path(args.root_dir).exists():
        Path(args.root_dir).mkdir()

    p = Path(args.save_path).parent
    if not p.exists():
        p.mkdir()

    device = "cuda" if (torch.cuda.is_available() and args.use_cuda) else "cpu"
    import sys
    print(sys.version)
    print(f"Using {device}")
    print("Loading Data...")
    (src, trg), (train, valid, _), (train_loader, valid_loader,
                                    _) = get_data(args)
    src_vocab_len = len(src.vocab.stoi)
    trg_vocab_len = len(trg.vocab.stoi)
    # check vocab size
    print(f"SRC vocab {src_vocab_len}, TRG vocab {trg_vocab_len}")
    enc_max_seq_len = args.max_length
    dec_max_seq_len = args.max_length
    pad_idx = src.vocab.stoi.get(
        "<pad>") if args.pad_idx is None else args.pad_idx
    enc_sos_idx = src.vocab.stoi.get(
        "<s>") if args.enc_sos_idx is None else args.enc_sos_idx
    enc_eos_idx = src.vocab.stoi.get(
        "</s>") if args.enc_eos_idx is None else args.enc_eos_idx
    dec_sos_idx = trg.vocab.stoi.get(
        "<s>") if args.dec_sos_idx is None else args.dec_sos_idx
    dec_eos_idx = trg.vocab.stoi.get(
        "</s>") if args.dec_eos_idx is None else args.dec_eos_idx
    pos_pad_idx = 0 if args.pos_pad_idx is None else args.pos_pad_idx

    print("Building Model...")
    model = Transformer(enc_vocab_len=src_vocab_len,
                        enc_max_seq_len=enc_max_seq_len,
                        dec_vocab_len=trg_vocab_len,
                        dec_max_seq_len=dec_max_seq_len,
                        n_layer=args.n_layer,
                        n_head=args.n_head,
                        d_model=args.d_model,
                        d_k=args.d_k,
                        d_v=args.d_v,
                        d_f=args.d_f,
                        pad_idx=pad_idx,
                        pos_pad_idx=pos_pad_idx,
                        drop_rate=args.drop_rate,
                        use_conv=args.use_conv,
                        linear_weight_share=args.linear_weight_share,
                        embed_weight_share=args.embed_weight_share).to(device)

    if args.load_path is not None:
        print(f"Load Model {args.load_path}")
        model.load_state_dict(torch.load(args.load_path))

    # build loss function using LabelSmoothing
    loss_function = LabelSmoothing(trg_vocab_size=trg_vocab_len,
                                   pad_idx=args.pad_idx,
                                   eps=args.smooth_eps)

    optimizer = WarmUpOptim(warmup_steps=args.warmup_steps,
                            d_model=args.d_model,
                            optimizer=optim.Adam(model.parameters(),
                                                 betas=(args.beta1,
                                                        args.beta2),
                                                 eps=10e-9))

    trainer = Trainer(optimizer=optimizer,
                      train_loader=train_loader,
                      test_loader=valid_loader,
                      n_step=args.n_step,
                      device=device,
                      save_path=args.save_path,
                      enc_sos_idx=enc_sos_idx,
                      enc_eos_idx=enc_eos_idx,
                      dec_sos_idx=dec_sos_idx,
                      dec_eos_idx=dec_eos_idx,
                      metrics_method=args.metrics_method,
                      verbose=args.verbose)
    print("Start Training...")
    trainer.main(model=model, loss_function=loss_function)
예제 #13
0
# (In case the script starts just before midnight, but then
#  rolls over halfway through)
today = datetime.date.today()

#%% Load some data

selected_data = [
    ('datetime', ('datetime', 'datetime64[ms]'), 0),
    ('CPU temp (°C)', ('cpu_temperature', 'f4'), np.NaN),
    ('sun_ambient (°C)', ('sunambient', 'f4'), np.NaN),
    ('spa1 (raw)', ('spa1', 'f4'), np.NaN),
    ('spa2 (raw)', ('spa2', 'f4'), np.NaN),
    ('spa3 (raw)', ('spa3', 'f4'), np.NaN),
]

d = dataloader.get_data(hours_to_load=7 * 24, selected_channels=selected_data)

# Helper function


@ticker.FuncFormatter
def c2f_formatter(x, pos):
    deg_F = ((x / 5) * 9) + 32
    return f"{deg_F:0.1f}"


#%% Plot function


def make_plot(fnum,
              x,
예제 #14
0
def train():
    cur_dir = os.getcwd()
    save_dir, modeltype = 'models/MLP', 'mlp'
    save_dir_path = os.path.join(cur_dir, save_dir)
    if not os.path.exists(save_dir_path):
        os.makedirs(save_dir_path)
    print(save_dir)

    train_batch_size = 200
    val_batch_size = 200
    trainloader, dataiter, lexicon, train_data_size = loader.get_data(
        batch_size=train_batch_size)
    valloader = loader.get_data(mode='val',
                                dataiter=dataiter,
                                lexicon=lexicon,
                                batch_size=val_batch_size)

    lexiconfile = open(os.path.join(save_dir, 'lexicon.pkl'), 'wb')
    pickle.dump(lexicon, lexiconfile)
    lexiconfile.close()

    dict_args = {
        "gender_vocab_size": len(lexicon.lexicon["Gender"].cat2index),
        "ethnicity_vocab_size": len(lexicon.lexicon["Ethnicity"].cat2index),
        "admtype_vocab_size": len(lexicon.lexicon["AdmissionType"].cat2index),
        "embedding_dim": 3,
        "hidden_dim": 200,
        "numerical_dim": 18,
        "num_steps": 6,
        "output_dim": 1
    }

    model = MLP(dict_args)
    print(dict_args)

    learning_rate = 1
    #criterion = nn.NLLLoss()
    #criterion = nn.CrossEntropyLoss()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=learning_rate,
                               rho=0.9,
                               eps=1e-06,
                               weight_decay=0)
    #learning_rate = 0.1
    #optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.1)

    if USE_CUDA:
        model = model.cuda()
        criterion = criterion.cuda()

    print("Start training...")
    num_epochs = 10
    for epoch in range(num_epochs):

        start_time = time.time()
        for i, batch in enumerate(trainloader):
            numericalfeatures = batch[0]  #batch_size, num_steps, num_features
            categoricalfeatures = batch[
                1]  #batch_size, num_steps, num_features
            labels = batch[2]  #batch_size

            categoricalfeatures = categoricalfeatures.permute(2, 1, 0)
            gfeatures = categoricalfeatures[
                dataiter.categoricfeatureindex["Gender"]][0]
            efeatures = categoricalfeatures[
                dataiter.categoricfeatureindex["Ethnicity"]][0]
            afeatures = categoricalfeatures[
                dataiter.categoricfeatureindex["AdmissionType"]][0]

            load_time = time.time()
            #######Load Data
            if USE_CUDA:
                numericalfeatures = numericalfeatures.cuda()
                gfeatures = gfeatures.cuda()
                efeatures = efeatures.cuda()
                afeatures = afeatures.cuda()
                labels = labels.cuda()
            cuda_time = time.time()

            #######Forward
            model = model.train()
            optimizer.zero_grad()
            logprobs = model(numericalfeatures, gfeatures, efeatures,
                             afeatures)
            model_time = time.time()

            loss = criterion(logprobs.squeeze(), labels.float())

            loss_time = time.time()

            #######Backward
            loss.backward(retain_graph=False)
            optimizer.step()

            opt_time = time.time()

            #######Report
            if ((i + 1) % 50 == 0):
                accuracy, bayes, _, _, _ = evaluate(model, valloader, dataiter)
                print("Accuracy : ", accuracy, bayes)
                #torch.cuda.empty_cache()
                print('Epoch: [{0}/{1}], Step: [{2}/{3}], Test Loss: {4}'.format( \
                   epoch+1, num_epochs, i+1, train_data_size//train_batch_size, loss.data[0]))

                if not os.path.isdir(
                        os.path.join(save_dir, "epoch{}_{}".format(epoch, i))):
                    os.makedirs(
                        os.path.join(save_dir, "epoch{}_{}".format(epoch, i)))
                filename = modeltype + '.pth'
                file = open(
                    os.path.join(save_dir, "epoch{}_{}".format(epoch, i),
                                 filename), 'wb')
                torch.save(
                    {
                        'state_dict': model.state_dict(),
                        'dict_args': dict_args
                    }, file)
                print('Saving the model to {}'.format(
                    save_dir + "epoch{}_{}".format(epoch, i)))
                file.close()

            #print("Load : {0}, Cuda : {1}, Model : {2}, Loss : {3}, Opt : {4}".format(start_time-load_time, load_time - cuda_time, cuda_time - model_time, model_time - loss_time, loss_time-opt_time))
            start_time = time.time()
        '''if(epoch%1 == 0): #After how many epochs
예제 #15
0
def train(args):
    try:
        """
        ['se_resnet_18','se_resnet_34','se_resnet_50','se_resnet_101','se_resnet_152',]
        """
        # ---------------------    Save the training results to excel    -----------------------
        path = os.path.join(args.save_result_path,
                            args.model_name).replace("\\", "/")
        if not os.path.exists(path):
            os.makedirs(path)

        best_model_acc = 0
        best_matrix = None
        best_model = copy.deepcopy(model)
        Writer = pd.ExcelWriter('{}/train_{}_cross_validation.xlsx'.format(
            path, args.k))
        Matrix = pd.ExcelWriter('{}/test_{}_cross_validation.xlsx'.format(
            path, args.k))

        for i in range(args.k):
            try:
                net = importlib.import_module("models.{}".format(
                    args.model_name.split(".")[0]))
                model = get_model(args=args, net=net).to(args.device)
                start_time = time.time()
                # ----------------------------     Load dataset    -------------------------------------
                train_dataset, train_dataloader, train_datasize = dataloader.get_data(
                    args, i, flag='train')
                test_dataset, test_dataloader, test_datasize = dataloader.get_data(
                    args, i, flag='test')

                print('[Training data]:[{}]\n[Testing data]:[{}]'.format(
                    train_datasize, test_datasize))

                # -------------------------     Model Initialization    --------------------------------
                model.train()
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=args.lr)
                lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer,
                                                         step_size=1,
                                                         gamma=0.95)
                #change lr to gamma*lr each step_size epochs

                # ---------------------------     Steps computaion    ----------------------------------
                total_steps = len(
                    train_dataloader)  # to print the process obviously
                total_trained_samples = 0  # to calculate the average of accuracy
                total_right_pred = 0
                total_loss = 0
                epochs = []
                learning_rates = []
                train_losses = []
                train_accuracies = []
                test_accuracies = []
                for epoch in range(args.epochs):
                    for step, (batch_image,
                               batch_label) in enumerate(train_dataloader):
                        try:
                            inputs = Variable(batch_image.to(args.device))
                            labels = Variable(batch_label.to(args.device))

                            total_trained_samples += labels.size(0)
                            optimizer.zero_grad()  # param initialization

                            outputs = model(inputs)
                            _, prediction = torch.max(outputs.data, 1)
                            step_loss = criterion(outputs, labels)
                            acc = total_right_pred / total_trained_samples
                            print(
                                'Train:[{}/{}]-Epoch:[{}/{}]-Lr[{:.4e}]-Step:[{}/{}]--Loss:[{:.2%}]--Acc[{:.2%}]'
                                .format(i, args.k, epoch, args.epochs,
                                        lr_scheduler.get_lr()[0], step,
                                        total_steps, step_loss.item(), acc))
                            total_right_pred += (
                                prediction == labels).sum().item()
                            total_loss += step_loss.item()

                            step_loss.backward()
                            optimizer.step()
                        except Exception as err:
                            print('Error : train step  ----- >>', err)
                            print("Error : train step  ---- >>",
                                  err.__traceback__.tb_lineno)

                    test_acc, statistic_matrix = test.test(
                        args, test_dataloader, model, epoch, i)
                    if test_acc > best_model_acc:
                        best_model_acc = test_acc
                        best_model = copy.deepcopy(model.state_dict())
                        best_matrix = statistic_matrix

                    epochs.append(epoch)
                    learning_rates.append(lr_scheduler.get_lr()[0])
                    train_losses.append(total_loss / total_trained_samples)
                    train_accuracies.append(total_right_pred /
                                            total_trained_samples)
                    test_accuracies.append(test_acc)
                    lr_scheduler.step()
                    model.to(args.device)
                    model.train()
                train_epoch_pd = pd.DataFrame({
                    "epoch": epochs,
                    'learning_rate': learning_rates,
                    "train_loss": train_losses,
                    "train_acc": train_accuracies,
                    "test_acc": test_accuracies
                })
                train_epoch_pd.to_excel(Writer,
                                        sheet_name='folder_{}'.format(i))

                test_matrix = pd.DataFrame(best_matrix)
                test_matrix.to_excel(Matrix,
                                     sheet_name='folder_{}_{}'.format(
                                         i, best_model_acc))

                Writer.save()
                Matrix.save()
                torch.save(
                    model,
                    '{}/{}_{}_{}.pth'.format(args.save_model_path,
                                             args.model_name, args.k, i))
            except Exception as err:
                print("Error:train epoch  ---- >>", err)
                print("Error:train epoch  ---- >>",
                      err.__traceback__.tb_lineno)
    except Exception as err:
        print("Error:train folder  ---- >>", err)
        print("Error:train folder ---- >>", err.__traceback__.tb_lineno)
예제 #16
0
__location__ = os.path.realpath(
    os.path.join(os.getcwd(), os.path.dirname(__file__)))

#%% Load some data

selected_data = [
    ('datetime', ('datetime', 'datetime64[ms]'), 0),
    ('CPU temp (°C)', ('cpu_temperature', 'f4'), np.NaN),
    ('sun_ambient (°C)', ('sunambient', 'f4'), np.NaN),
    ('spa1 (raw)', ('spa1', 'f4'), np.NaN),
    ('spa2 (raw)', ('spa2', 'f4'), np.NaN),
    ('spa3 (raw)', ('spa3', 'f4'), np.NaN),
]

d = dataloader.get_data(
    hours_to_load=27, selected_channels=selected_data
)  # We want the last 24 hours, +2 more to correlate trends

#%% Inner plot function


@ticker.FuncFormatter
def c2f_formatter(
    x, pos
):  # Arguments name x and pos for clarity w.r.t. matplotlib documentation
    deg_F = ((x / 5) * 9) + 32
    return f"{deg_F:0.1f}"


def make_plot(fnum,
              x,
예제 #17
0
elif (params['dataset'] == 'CelebA'):
    from models.celeba_model import Generator, Discriminator, DHead, QHead
elif (params['dataset'] == 'FashionMNIST'):
    from models.mnist_model import Generator, Discriminator, DHead, QHead

# Set random seed for reproducibility.
seed = 1123
random.seed(seed)
torch.manual_seed(seed)
print("Random Seed: ", seed)

# Use GPU if available.
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print(device, " will be used.\n")

dataloader = get_data(params['dataset'], params['batch_size'])

# Set appropriate hyperparameters depending on the dataset used.
# The values given in the InfoGAN paper are used.
# num_z : dimension of incompressible noise.
# num_dis_c : number of discrete latent code used.
# dis_c_dim : dimension of discrete latent code.
# num_con_c : number of continuous latent code used.
if (params['dataset'] == 'MNIST'):
    params['num_z'] = 62
    params['num_dis_c'] = 1
    params['dis_c_dim'] = 10
    params['num_con_c'] = 2
elif (params['dataset'] == 'SVHN'):
    params['num_z'] = 124
    params['num_dis_c'] = 4
예제 #18
0
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from model import Net
from dataloader import get_data
import torch.nn as nn


def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


train, test, classes = get_data()
dataiter = iter(train)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(1)))

net = Net()
net = net.train()

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
예제 #19
0
        "Default : 'trained_model'+ current datetime (datetime is added itself)"
    )  # Model Save
    parser.add_argument(
        '--save_epoch',
        type=int,
        default=5,
        help='Epoch at which model checkpoint is saved, default: 5'
    )  # Saving epochs after

    args = parser.parse_args()
    print(args)

    model_name = args.model_path + str(datetime.datetime.now())
    os.mkdir(model_name)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    train_loader = get_data(model_name, args.batch_size, args.num_workers)

    S = SharedNetwork().to(device)
    D = Discriminator().to(device)
    Q = Recogniser().to(device)
    G = Generator().to(device)
    S.apply(init_weights)
    D.apply(init_weights)
    Q.apply(init_weights)
    G.apply(init_weights)

    criterionD = nn.BCELoss().to(device)
    classifyQ = nn.CrossEntropyLoss().to(device)
    contiQ = NormalNLLLoss()

    optimD = optim.Adam([{
예제 #20
0
# Capture one reference point for "today"
# (In case the script starts just before midnight, but then
#  rolls over halfway through)
today = datetime.date.today()

#%% Load some data, and setup

water_volume = 128e3  # liters, which is taken to be kg in this script
heat_capacity = 4186  # J/kg/K

pre_filter_cutoff_freq_per_hour = 0.8
post_filter_cutoff_freq_per_hour = 6.2
samples_per_hour = 20

d = dataloader.get_data(hours_to_load=7 * 24,
                        samples_per_hour=samples_per_hour)

#%% For the 'muricans, show degrees F


@ticker.FuncFormatter
def c2f_formatter(x, pos):
    deg_F = ((x / 5) * 9) + 32
    return f"{deg_F:0.1f}"


#%% Plot function


def make_plot(fnum, x, ydata, ylab, xlim=None, fill_in=True):
예제 #21
0
import datetime

import dataloader

# Path to current folder for output files
import os
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

# Capture one reference point for "today" 
# (In case the script starts just before midnight, but then
#  rolls over halfway through)
today = datetime.date.today()

#%% Load some data

d = dataloader.get_data(hours_to_load=7*24)

@ticker.FuncFormatter
def c2f_formatter(x, pos):
    deg_F = ((x/5)*9)+32
    return f"{deg_F:0.1f}"

#%% Plot function

def make_plot(fnum, x, ydata, ylab, ylab2, xlim=None):


    major_locator = mdates.HourLocator(interval=12)   # every year
    minor_locator = mdates.HourLocator(interval=2)  # every half hour
    x_major_fmt = mdates.DateFormatter('%H:%M')
    
def main(counter, prediction_interval, loss_function, data_root_path):

    print 'prediction_interval:', prediction_interval
    print 'loss_function:', loss_function
    print 'data:', data_root_path

    out_dir = 'results/' + datetime.now().strftime(
        '%Y%m%d%H%M%S') + 'p_' + str(prediction_interval) + 'out_' + str(
            counter) + '/'
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    rng = np.random.RandomState()

    # load data
    sequence_length = 500
    n_train = 2400
    n_test = 100
    n_data = sequence_length * (n_train + n_test)
    input_length = 18 + 3 * prediction_interval
    percept_length = 18
    sigma_min = 1e-4
    n_epochs = 400

    print 'load data ({0})...'.format(n_data)
    data = dataloader.get_data(0,
                               n_data + 1,
                               n_actions=3,
                               prediction_interval=prediction_interval,
                               data_path_lab=data_root_path + 'labels.dat',
                               data_path_act=data_root_path + 'actions.dat')
    inputs, percepts = dataloader.make_batches(
        data,
        sequence_length,
        n_actions=3,
        prediction_interval=prediction_interval,
        crop_end=2)
    x_train = inputs[:n_train]
    y_train = percepts[:n_train]

    x_test = inputs[n_train:]
    y_test = percepts[n_train:]

    # n_hidden = rng.choice([100, 200, 500])
    n_hidden = rng.randint(500, 800)
    early_stopping_patience = 3
    n_additional = rng.randint(800, 1000)
    # n_additional = rng.choice([0, 100, 200])
    # n_additional = 0

    print 'build model...'
    model = Sequential()
    model.add(LSTM(n_hidden, input_dim=input_length, return_sequences=True))

    if n_additional > 0:
        model.add(LSTM(n_additional, return_sequences=True))

    # model.add(Activation('tanh'))
    # model.add(Dropout(p=0.5))
    output_length = 2 * percept_length if loss_function in (
        'gauss', 'laplace') else percept_length
    model.add(TimeDistributedDense(output_length))
    model.add(Activation('sigmoid'))

    # model = Sequential()
    # model.add(TimeDistributedDense(n_hidden,
    #                input_dim=input_length))
    # model.add(Activation('tanh'))
    # # model.add(Dropout(p=0.5))
    # output_length = 2*percept_length if loss_function in ('gauss', 'laplace') else percept_length
    # model.add(TimeDistributedDense(output_length))
    # model.add(Activation('sigmoid'))

    tic = time.time()

    def sigma_mu_loss_(y_true, y_pred):
        return sigma_mu_loss(percept_length, sigma_min, y_true, y_pred)

    def laplace_loss_(y_true, y_pred):
        return laplace_loss(percept_length, sigma_min, y_true, y_pred)

    if loss_function == 'gauss':
        loss = sigma_mu_loss_
    elif loss_function == 'laplace':
        loss = laplace_loss_
    else:
        loss = loss_function

    # optimizer = SGD(lr=0.01, momentum=0.8, nesterov=True, clipnorm=5.0)
    optimizer = Adam(clipnorm=5.0)
    model.compile(loss=loss, optimizer=optimizer)
    compile_time = time.time() - tic
    print 'Compile time: {0} sec'.format(compile_time)

    early_stopping = EarlyStopping(monitor='val_loss',
                                   patience=early_stopping_patience)
    model_checkpoint = ModelCheckpoint(out_dir + 'model_checkpoint.h5',
                                       monitor='val_loss',
                                       save_best_only=True)

    choice_list = [
        prediction_interval, n_hidden, early_stopping_patience, n_additional,
        compile_time
    ]
    print 'choices:', choice_list
    save_list(choice_list, out_dir + '_choice.dat')
    json_string = model.to_json()
    save_list([json_string], out_dir + 'architecture.json')

    print 'start training...'

    tic = time.time()
    model.fit(x_train,
              y_train,
              batch_size=1,
              nb_epoch=n_epochs,
              validation_split=0.1,
              callbacks=[early_stopping, model_checkpoint],
              shuffle=False)
    training_duration = time.time() - tic

    score = model.evaluate(x_test, y_test, batch_size=1)

    save_list([training_duration, score], out_dir + 'result.dat')

    model.save_weights(out_dir + 'weights.h5', overwrite=True)
    # methods = ['hierarchical/euclidean', 'hierarchical/cityblock', 'hierarchical/DTW', 'kmeans']
    methods = ['kmeans']

    # data_sets = ['Irish_2010', 'London_2013']
    data_sets = ['London_2013']

    path = os.path.abspath(os.path.join(os.getcwd()))

    attr = pd.read_csv(os.path.join(path, 'data',
                                    'London_2013_attr_final.csv'))
    attr['Cate'] = attr['Cate'] - 1

    for times in range(1, 11):
        for data_set in data_sets:

            data = get_data(path, data_set)

            for method in methods:
                for n_clusters in range(5, 6):
                    for month in range(1, 13):

                        weather = get_weather(path, data_set, month)
                        week = get_dow(data_set, month)
                        day = get_hod(month)

                        # path_cluster = os.path.join(path, 'result', data_set, 'clustering', 'point', method, f'n_clusters_{n_clusters}.csv')
                        # clusters = pd.read_csv(path_cluster, header=None)

                        series = data[:, month - 1, :months[month - 1] * 24]

                        print('times:', times, ', data_set:', data_set,
예제 #24
0
    for model in models:
        parameters += list(model.parameters())

    return torch.optim.Adam(parameters, lr)

device = current_device()

n_features = 256
latent_dim = 1
latent_dim_cont = 0
categorical_dim = 10
batch_size = 100
lr = 1e-4
n_epochs = 100

dataloader, _, _ = get_data(batch_size)

models = {
    'classifier': Classifier(latent_dim, categorical_dim),
}

for model in models.values():
    model.to(device)
    model.apply(weights_init_normal)

optimizers = {
    'all': create_optimizer([
        models['classifier'],
    ], lr)
}
예제 #25
0
    'epoch_num': 50,  # Number of epochs to train for.
    'learning_rate': 1e-3,  # Learning rate.
    'beta1': 0.5,
    'clip': 5.0,
    'save_epoch':
    1,  # After how many epochs to save checkpoints and generate test output.
    'channel': None
}  # Number of channels for image.(3 for RGB, etc.)

# Use GPU is available else use CPU.
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print(device, " will be used.\n")

params['device'] = device

train_loader = get_data(params)
params['channel'] = 3
"""
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data/', train='train', download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor()])),
    batch_size=params['batch_size'], shuffle=True)

params['channel'] = 1
"""

# Plot the training images.
sample_batch = next(iter(train_loader))
plt.figure(figsize=(16, 16))
plt.axis("off")
예제 #26
0
파일: plot.py 프로젝트: zheyuye/TwitterSA
model_dir = './B96_lr1e-06_s1.0_0903_1905/'
config = BertConfig(num_labels=3, output_attentions=True)
# PRETRAINED_WEIGHTS = "bert-base-cased"
config.from_pretrained('bert-base-cased')
model = BertAttn(config,
                 option='emoji',
                 dropout=0.1,
                 gpu=False,
                 seed=0,
                 do_lower_case=False)
# model.set_focal_loss(alpha=class_weights,gamma=-1)
model.load_model(True, model_dir)
# model.bert.save_pretrained('./bert-cased/')

class_weights, train, dev, test = get_data(option='emoji',
                                           dataset_size=1,
                                           unbalanced=False)

# In[8]:

x_train, _, y_train = train
x_dev, _, y_dev = dev
x_test, emoji_test, y_test = test

# In[9]:

model.set_focal_loss(alpha=class_weights, gamma=-1)
print(model.device)
# test_predictions(model, test,"test.csv", batch_size=96)

# In[10]:
예제 #27
0
nz = 100
# Number of training epochs
epochs = 100
save_epoch = 10
# Learning rate for optimizers
lr = 0.0002
# Hyper-parameters for Adam optimizers
beta1 = 0.5
beta2 = 0.999

# Decide which device we want to run on
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device, " will be used.\n")

# Create the dataloader
dataloader = get_data(dataset, batch_size, image_size)

# Plot some training images
real_batch = next(iter(dataloader))
fig = plt.figure(figsize=(8, 8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(
    vutils.make_grid(real_batch[0].to(device)[:64], padding=2,
                     normalize=True).cpu(), (1, 2, 0)))
plt.show()
plt.close(fig)

model = DCGAN(Generator, Discriminator, weights_init, epochs, batch_size, nc,
              image_size, save_epoch, nz, lr, beta1, beta2, 1, 0, device)
예제 #28
0
def main(args):

    class_weights, train, dev, test = get_data(option=args.option,
                                               dataset_size=args.dataset_size,
                                               unbalanced=args.unbalanced)

    option = args.option
    using_GPU = torch.cuda.is_available() and args.using_GPU

    config = BertConfig(num_labels=len(TASK_LABELS[option]),
                        output_attentions=True)
    # config = BertConfig()

    if args.model_type == 'BertOrigin':
        from pretrained.BertOrigin import BertOrigin
        modelcreator = BertOrigin
    elif args.model_type == 'BertCNN':
        from pretrained.BertCNN import BertCNN
        modelcreator = BertCNN
    elif args.model_type == 'BertAttn':
        from pretrained.BertAttn import BertAttn
        modelcreator = BertAttn

    if args.do_train:
        # create and train model
        #BertConfig
        config.from_pretrained(PRETRAINED_WEIGHTS)
        # print('before load',config)
        model = modelcreator(config,
                             option=option,
                             dropout=args.dropout,
                             gpu=using_GPU,
                             seed=args.seed,
                             do_lower_case=args.do_lower_case)
        #froze the parameters of bert
        if args.frozen:
            for param in model.bert.parameters():
                param.requires_grad = False
            print_params(model)

        # optimizer and Warmup Schedule
        model_params = list(model.named_parameters())
        # print_params(model)
        #set the weight decay of LayerNorm and bias is zero
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                param for name, param in model_params
                if not any(nd in name for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params': [
                param for name, param in model_params
                if any(nd in name for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]

        if args.optimizer == 'Adam':
            optimizer = torch.optim.Adam(optimizer_grouped_parameters,
                                         lr=args.learning_rate)
        elif args.optimizer == 'AdamW':
            optimizer = AdamW(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              correct_bias=args.correct_bias)
        elif args.optimizer == 'SGD':
            optimizer = torch.optim.SGD(optimizer_grouped_parameters,
                                        lr=args.learning_rate,
                                        momentum=0.5)

        scheduler = None
        if args.warmup_proportion != 0:
            num_total_steps = int(len(train) / args.batch_size) * args.epochs
            # 1.implements AdamW without compensatation for the bias
            # 2.implements weight decay fix
            if args.warmup_schedules == 'linear':
                scheduler = WarmupLinearSchedule(optimizer,
                                                 warmup_steps=num_total_steps *
                                                 args.warmup_proportion,
                                                 t_total=num_total_steps,
                                                 last_epoch=-1)
            elif args.warmup_schedules == 'constant':
                scheduler = WarmupConstantSchedule(
                    optimizer,
                    warmup_steps=num_total_steps * args.warmup_proportion,
                    t_total=num_total_steps,
                    last_epoch=-1)
            elif args.warmup_schedules == 'cosine':
                scheduler = WarmupCosineSchedule(optimizer,
                                                 warmup_steps=num_total_steps *
                                                 args.warmup_proportion,
                                                 t_total=num_total_steps,
                                                 cycles=0.5,
                                                 last_epoch=-1)

        #datasample
        train_dataloader = dataloader(train,
                                      MAX_SEQ_LENGTH,
                                      model.tokenizer,
                                      args.batch_size,
                                      is_sample=args.sample)
        dev_dataloader = dataloader(dev, MAX_SEQ_LENGTH, model.tokenizer,
                                    args.batch_size)

        # reload for pretraining
        model.set_focal_loss(alpha=class_weights, gamma=args.gamma)
        model.load_model(args.model_load, args.model_dir)

        model_saved_path = do_train(model,
                                    train_dataloader,
                                    dev_dataloader,
                                    args.epochs,
                                    optimizer,
                                    scheduler,
                                    args.dataset_size,
                                    args.early_stop,
                                    args.print_step,
                                    args.gradient_accumulation_steps,
                                    args.batch_size,
                                    args.learning_rate,
                                    model_path=PATH_CONFIG)

        test_predictions(model, test, model_saved_path[:-1] + ".csv",
                         args.batch_size)
    elif args.model_dir:
        config.from_pretrained(PRETRAINED_WEIGHTS)
        model = modelcreator(config,
                             option=option,
                             dropout=args.dropout,
                             gpu=using_GPU,
                             seed=args.seed,
                             do_lower_case=args.do_lower_case)

        model.set_focal_loss(alpha=class_weights, gamma=args.gamma)
        model.load_model(args.model_load, args.model_dir)
        # model_dir = "./results/B64_lr1e-05_s0.01_0819_2023/"
        test_predictions(model,
                         test,
                         args.model_dir[:-1] + ".csv",
                         batch_size=args.batch_size)
예제 #29
0
import dataloader

# Path to current folder for output files
import os
__location__ = os.path.realpath(
    os.path.join(os.getcwd(), os.path.dirname(__file__)))

# Capture one reference point for "today"
# (In case the script starts just before midnight, but then
#  rolls over halfway through)
today = datetime.date.today()

#%% Load some data

d = dataloader.get_data(hours_to_load=12 * 31 * 24)


@ticker.FuncFormatter
def c2f_formatter(x, pos):
    deg_F = ((x / 5) * 9) + 32
    return f"{deg_F:0.1f}"


#%% Plot function


def make_plot(fnum, x, ydata, ylab, ylab2, xlim=None):

    major_locator = mdates.MonthLocator()
    minor_locator = mdates.WeekdayLocator(byweekday=1, interval=1)
from dataloader import get_data
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from augmentation import plot_image
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
from sklearn.model_selection import validation_curve, learning_curve
import pickle
from sklearn.model_selection import GridSearchCV
from confusion_matrix import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

np.random.seed(42)

train_set_1, train_labels = get_data('IXI-T1-Preprocessed', 'mean', 0, 256)
test_set_1, test_labels = get_data('IXI-T1-Preprocessed', 'mean', 0, 256, False)

train_set_2, _ = get_data('IXI-T1-Preprocessed', 'mean', 1, 256)
test_set_2, _ = get_data('IXI-T1-Preprocessed', 'mean', 1, 256, False)

train_set_3, _ = get_data('IXI-T1-Preprocessed', 'mean', 2, 256)
test_set_3, _ = get_data('IXI-T1-Preprocessed', 'mean', 2, 256, False)

train_set_4, _ = get_data('IXI-T1-Preprocessed', 'slice', 0, 256)
test_set_4, _ = get_data('IXI-T1-Preprocessed', 'slice', 0, 256, False)

train_set_5, _ = get_data('IXI-T1-Preprocessed', 'slice', 1, 256)
test_set_5, _ = get_data('IXI-T1-Preprocessed', 'slice', 1, 256, False)

train_set_6, _ = get_data('IXI-T1-Preprocessed', 'slice', 2, 256)
test_set_6, _ = get_data('IXI-T1-Preprocessed', 'slice', 2, 256, False)