Ejemplo n.º 1
0
 def __init__(self, model,use_cuda_if_available=True):
     self.model = model
     self.cuda = False
     if use_cuda_if_available and cuda.is_available():
         self.cuda = True
     self.loss_history = []
     self.__input_hooks = []
Ejemplo n.º 2
0
    def __init__(self, gen_model,disc_model,use_cuda_if_available=True):
        self.model_dir = os.getcwd()
        self.gen_model = gen_model
        self.disc_model = disc_model
        self.cuda = False
        if use_cuda_if_available and cuda.is_available():
            self.cuda = True

        self.gen_loss_history = []
        self.disc_loss_history = []

        self.__input_hooks = []
Ejemplo n.º 3
0
 def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=False, momentum=0.01,
              dilation=1, groups=1, device = 'cuda:0' if cuda.is_available() else 'cpu',norm_layer=nn.BatchNorm2d, **kwargs):
     super(ConvBNMish, self).__init__()
     self.cb = ConvBN(in_channels, out_channels, kernel_size, stride, padding, bias, momentum, dilation, groups, device, norm_layer)
     self.act = Mish()
Ejemplo n.º 4
0
    top = (height - new_height) / 2
    right = (width + new_width) / 2
    bottom = (height + new_height) / 2
    img = img.crop((left, top, right, bottom))

    img = np.array(img).transpose((2, 0, 1)) / 256

    means = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
    stds = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))

    img = img - means
    img = img / stds

    img_tensor = torch.Tensor(img)

    return img_tensor


checkpoint_path = 'vgg16-transfer-4.pth'
train_on_gpu = cuda.is_available()

if train_on_gpu:
    gpu_count = cuda.device_count()
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False

loaded_model, optimizer = load_checkpoint(path=checkpoint_path)

print(predict("pictures/test.jpg", loaded_model))
model_name = 'resnet50V4'

data_root = '/Users/yefeichen/Database/furniture/collect_from_matterport_all0823/'
data_root_ext = '.png'
test_root = '/Users/yefeichen/Database/furniture/chair_from_digital/'
test_root_ext = '.jpg'
# N_Cls = 109
N_Cls = 253

model_root = Path(model_root)

model = getattr(models, model_name)(num_classes=N_Cls, pretrained='imagenet')

# model = torch.nn.DataParallel(model)

use_cuda = cuda.is_available()

if use_cuda:
    model = model.cuda()

model_path = os.path.join(model_root, ckpt)
load_par_gpu_model_cpu(model, model_path)

print('weights loaded!')

fcnt = 0
ecnt = 0

#loading npy
featList = []
nameList = []
            preprocess.process_fact(fact)
print("Vocab size = {}".format(preprocess.vocab_size))

# Define dimensions
INPUT_DIM =  preprocess.vocab_size
OUTPUT_DIM =  preprocess.vocab_size  # This is the output vocabulary size
EMBEDDING_DIM = 100
HIDDEN_DIM = 100

MAX_CONTEXT_LEN = 60
MAX_FACT_LEN = 60
MAX_DESC_LEN = 60
BATCH_SIZE = 100

USE_CUDA = False
if cuda.is_available():
    USE_CUDA = True

#****************************** INPUT MODULE ********************************************
#****************************************************************************************
class PositionalFactEncoder(nn.Module):
    def __init__(self):
        super(PositionalFactEncoder, self).__init__()

    def forward(self, embedded_sentence, fact_lengths):

        _, slen, elen = embedded_sentence.size()

        l = [[(1 - s/(slen-1)) - (e/(elen-1)) * (1 - 2*s/(slen-1)) for e in range(elen)] for s in range(slen)]
        l = torch.FloatTensor(l)
        l = l.unsqueeze(0)
Ejemplo n.º 7
0
    from utils.datasets import UsageBasedDataset, RecognitionDataset

    train_root = args.t
    eval_root = args.e
    eval_indices = args.ei
    eval_files = args.ef
    logfile = args.l
    save = args.w
    best = args.b
    force_cuda = args.nc

    # Construct model, optimizer, and criterion
    darknet19: RPYOLOv2 = RPYOLOv2()
    darknet19.recognizing()

    if force_cuda and cuda.is_available():
        darknet19 = darknet19.cuda()
    adam = Adam(darknet19.parameters(), lr=.0001, weight_decay=.0005)
    crit = CrossEntropyLoss()

    # Construct dataset variables
    image_resolution = (224, 224)

    # Construct training dataset and loader
    train_transform = Compose([
        Lambda(maybe_blur),
        Lambda(maybe_darken_a_lot),
        Lambda(maybe_rotate),
        Lambda(maybe_random_perspective),
        Lambda(maybe_random_crop),
        Lambda(maybe_random_erase),
Ejemplo n.º 8
0
def validate_model(Net,
                   seed,
                   mini_batch_size=100,
                   optimizer=optim.Adam,
                   criterion=nn.CrossEntropyLoss(),
                   n_epochs=40,
                   eta=1e-3,
                   lambda_l2=0,
                   alpha=0.5,
                   beta=0.5,
                   plot=True,
                   rotate=False,
                   translate=False,
                   swap_channel=False,
                   GPU=False):
    """ 
    
    General :
         
         - Train a network model  which weights has been initialized with a specific seed over n_epochs 
         - Data is created with the same seed : train,validation and test calling the prologue
         - Record the train and validation accuracy and loss and can display they evolution curve
     
     
     Input :
     
         - Net : A network dictionnary from the <Nets> class
         - seed : seed for pseudo random number generator used in weight initialization and data loading
         -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py
         - plot : if true plot the learning curve evolution over the epochs -> default true
         -> rotate,translate and swap_channels -> data augmentation see loader.py 
     
     Output : printed loss and accuracy of the network after training on the test set and learning curve if plot true
     
    """

    # set the pytorch seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # set the seed for random spliting of the dataset in training and validation
    random.seed(0)

    # create the dataset
    data = PairSetMNIST()
    train_data = Training_set(data)
    test_data = Test_set(data)
    train_data_split = Training_set_split(train_data, rotate, translate,
                                          swap_channel)
    validation_data = Validation_set(train_data)

    # construct the net type with default parameter
    if (Net['net_type'] == 'Net2c'):
        model = Net['net'](nb_hidden=Net['hidden_layers'],
                           dropout_prob=Net['drop_prob'])
    if (Net['net_type'] == 'LeNet_sharing'):
        model = Net['net'](nb_hidden=Net['hidden_layers'],
                           dropout_ws=Net['drop_prob_ws'],
                           dropout_comp=Net['drop_prob_comp'])
    if (Net['net_type'] == 'LeNet_sharing_aux'):
        # check if any data augmentation has been called
        # if none construct with tuned parameters without data augmentation
        # if yes construct with tuned parameters with data augmentation
        if (rotate == False and translate == False and swap_channel == False):
            model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                               nbhidden_comp=Net['hidden_layers_comp'],
                               drop_prob_aux=Net['drop_prob_aux'],
                               drop_prob_comp=Net['drop_prob_comp'])
        else:
            Net['learning rate'] = Net['learning rate augm']
            model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                               nbhidden_comp=Net['hidden_layers_comp'],
                               drop_prob_aux=Net['drop_prob_aux_augm'],
                               drop_prob_comp=Net['drop_prob_comp_augm'])
    if (Net['net_type'] == 'Google_Net'):
        model = Net['net'](channels_1x1=Net['channels_1x1'],
                           channels_3x3=Net['channels_3x3'],
                           channels_5x5=Net['channels_5x5'],
                           pool_channels=Net['pool_channels'],
                           nhidden=Net['hidden_layers'],
                           drop_prob_comp=Net['drop_prob_comp'],
                           drop_prob_aux=Net['drop_prob_aux'])

    if GPU and cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model = model.to(device)

    # train the model on the train set and validate at each epoch
    train_losses, train_acc, valid_losses, valid_acc = train_model(
        model, train_data_split, validation_data, device, mini_batch_size,
        optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2, alpha,
        beta)

    if plot:

        learning_curve(train_losses, train_acc, valid_losses, valid_acc)

    # loss and accuracy of the network on the test
    test_loss, test_accuracy = compute_metrics(model, test_data, device)

    print('\nTest Set | Loss: {:.4f} | Accuracy: {:.2f}%\n'.format(
        test_loss, test_accuracy))
Ejemplo n.º 9
0
import torch
import dataset
import model
import cv2
import os
import torch.nn as nn
import numpy as np
import time
import torch.cuda as cuda
import glob

if __name__ == '__main__':

    gpu_avail = cuda.is_available()
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"

    scale = 0.8
    img_dir = '/home/zhangtk/data/test/0/'
    # img_dir = r'E:\data\rain_full\rain_imgs\test\2'
    # img_list = os.listdir(img_dir)
    img_list_ = glob.glob(os.path.join(img_dir, '*.jpg'))
    img_list = [os.path.basename(i) for i in img_list_]

    # save_dir = f'/home/zhangtk/data/test/0_test_ztkvgg1_{scale}/'
    save_dir = os.path.join(img_dir, 'result_mobilenet_conv1x1_new_930')

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    start = time.time()
Ejemplo n.º 10
0
def main():
    #cmd and arg parser
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--mode', choices=['input_grad'], default='input_grad')
    arg('--run_root', default='result/location_recommend_model_v6_5city_191113')
    arg('--fold', type=int, default=0)
    arg('--model', default='location_recommend_model_v6')
    arg('--ckpt', type=str, default='model_loss_best.pt')
    arg('--pretrained', type=str, default='imagenet')#resnet 1, resnext imagenet
    arg('--batch-size', type=int, default=1)
    arg('--step', type=str, default=8)#update the gradients every 8 batch(sample num = step*batch-size*inner_size)
    arg('--workers', type=int, default=16)
    arg('--lr', type=float, default=3e-4)
    arg('--patience', type=int, default=4)
    arg('--clean', action='store_true')
    arg('--n-epochs', type=int, default=1)
    arg('--epoch-size', type=int)
    arg('--tta', type=int, default=1)
    arg('--use-sample', action='store_true', help='use a sample of the dataset')
    arg('--debug', action='store_true')
    arg('--imgsize',type=int, default = 256)
    arg('--finetuning',action='store_true')
    arg('--cos_sim_loss',action='store_true')
    arg('--ensemble', action='store_true')
    arg('--sample_rate',type=float,default=1.0)#sample part of testing data for evaluating during training
    arg('--testStep',type=int,default=500000)
    arg('--query_location',action='store_true',help='use location as query')
    arg('--apps',type=str,default='_191113.csv')
    arg('--pre_name', type=str, default='sampled_ww_')



    #cuda version T/F
    use_cuda = cuda.is_available()

    args = parser.parse_args()
    #run_root: model/weights root
    run_root = Path(args.run_root)


    global model_name
    model_name = args.model

    df_comp_feat = pd.read_csv(pjoin(TR_DATA_ROOT,'company_feat'+args.apps),index_col=0)
    df_loc_feat = pd.read_csv(pjoin(TR_DATA_ROOT,'location_feat'+args.apps),index_col=0)

    feat_name = list(df_comp_feat.columns)[1:]+list(df_loc_feat.columns)[1:]
    print(len(feat_name))

    clfile = ['PA', 'SF', 'SJ', 'LA', 'NY']
    cfile = ['dnb_pa.csv', 'dnb_sf.csv', 'dnb_sj.csv', 'dnb_Los_Angeles.csv', 'dnb_New_York.csv']
    lfile = 'location_scorecard_191113.csv'

    clfile = [c + args.apps for c in clfile]
    pre_name = args.pre_name
    pred_save_name = [ pre_name + c.replace(args.apps,'') + '_similarity'+args.apps for c in clfile ]

    #Dont use ensemble score
    df_ensemble = pd.DataFrame(columns=['Blank'])

    loc_name_dict = translocname2dict(df_loc_feat)
    print('Location Embedding Number: %d'%len(loc_name_dict))

    ##::DataLoader
    def make_loader(df_comp_feat: pd.DataFrame, df_loc_feat: pd.DataFrame, df_pair: pd.DataFrame, emb_dict:dict,df_ensemble,
                    name='train',flag_ensemble=args.ensemble,testStep=args.testStep,shuffle=True) -> DataLoader:
        return DataLoader(
            TrainDatasetLocationRS(df_comp_feat=df_comp_feat, df_loc_feat=df_loc_feat, df_pair=df_pair,df_ensemble_score=df_ensemble,
                                   emb_dict=emb_dict, name=name,flag_ensemble=flag_ensemble,
                                   negN=nNegTr, posN=nPosTr, testStep=testStep),
            shuffle=shuffle,
            batch_size=args.batch_size,
            num_workers=args.workers,
            collate_fn=collate_TrainDatasetLocationRS
        )

    #Not used in this version
    criterion = softmax_loss
    lossType = 'softmax'

    # se- ception dpn can only use finetuned model from imagenet
    # model = getattr(models, args.model)(feat_comp_dim=102, feat_loc_dim=23) #location_recommend_model_v1
    model = getattr(rsmodels, args.model)(feat_comp_dim=102,feat_loc_dim=23,embedding_num=len(loc_name_dict)) #location_recommend_model_v3

    md_path = Path(str(run_root) + '/' + args.ckpt)
    if md_path.exists():
        print('load weights from md_path')
        load_model(model, md_path)

    model.freeze()

    all_params = filter(lambda p: p.requires_grad, model.parameters())


    #gpu first
    if use_cuda:
        model = model.cuda()

    #print(model)
    if args.mode == 'input_grad':
        if run_root.exists() and args.clean:
            shutil.rmtree(run_root)
        run_root.mkdir(exist_ok=True, parents=True)
        Path(str(run_root) + '/params.json').write_text(
            json.dumps(vars(args), indent=4, sort_keys=True))

        for ind_city in range(1):
            print('Operating %s...'%pred_save_name[ind_city])
            testing_pair = pd.read_csv(pjoin(TR_DATA_ROOT, pred_save_name[ind_city]))[['atlas_location_uuid', 'duns_number']]
            testing_pair['label'] = 0
            testing_pair = testing_pair[['duns_number', 'atlas_location_uuid','label']]

            predict_loader = make_loader(df_comp_feat=df_comp_feat, df_loc_feat=df_loc_feat, df_pair=testing_pair,
                                   emb_dict=loc_name_dict,df_ensemble=df_ensemble, name='valid',shuffle=False)

            predict_with_reason(model=model,criterion=criterion,predict_loader=predict_loader,use_cuda=use_cuda,test_pair=testing_pair,
                                feat_name=feat_name,save_name=pred_save_name[ind_city],pre_name='dlsub2_')
Ejemplo n.º 11
0
    def get_model_ddp(
        cls,
        rank,
        local_leader_rank,
        log,
        net_version,
        pretrained,
        freeze,
    ):  # @DontTrace
        '''
        Determine whether this process is the
        master node. If so, obtain the pretrained
        resnet18 model. Then distributed the model
        to the other nodes. 
        
        :param rank: this process' rank
            in the distributed data processing sense
        :type rank: int
        :param local_leader_rank: the lowest rank on this machine
        :type local_leader_rank: int
        :param log: logging service to log to
        :type log: LoggingService
        :param net_version: which resnet version to obtain
        :type net_version: int
        :param pretrained: if true, the pre-trained version
            is obtained. Else initial weights are undefined
        :type pretrained: bool
        :param freeze: how many layers to
            freeze, protecting them from training.
        :type freeze: int
        '''

        if net_version not in (18, 50):
            raise ValueError("Resnet version must be 18 or 50")

        hostname = socket.gethostname()
        # Let the local leader download
        # the model from the Internet,
        # in case it is not already cached
        # locally:

        # Case 1: not on a GPU machine:
        device = device('cuda' if cuda.is_available() else 'cpu')
        if device == device('cpu'):
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

        # Case2a: GPU machine, and this is this machine's
        #         leader process. So it is reponsible for
        #         downloading the model if it is not cached:
        elif rank == local_leader_rank:
            log.info(f"Procss with rank {rank} on {hostname} loading model")
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

            # Allow the others on this machine
            # to load the model (guaranteed to
            # be locally cached now):
            log.info(
                f"Procss with rank {rank} on {hostname} waiting for others to laod model"
            )
            dist.barrier()
        # Case 2b: GPU machine, but not the local leader. Just
        #          wait for the local leader to be done downloading:
        else:
            # Wait for leader to download the
            # model for everyone on this machine:
            log.info(
                f"Process with rank {rank} on {hostname} waiting for leader to laod model"
            )
            dist.barrier()
            # Get the cached version:
            log.info(f"Procss with rank {rank} on {hostname} laoding model")
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

        model = cls.freeze_model_layers(model, freeze)

        return model
Ejemplo n.º 12
0
def test():
    voc_size = 233
    timesteps = 13
    batch = 311
    hidden_size = 141
    num_layers = 2
    epsilon = Epsilon(0, 0)
    storage = Storage(0, multistep=True, tdlambda=0.9)
    step = Step(1)
    sReward = 0
    on = "cuda" if cuda.is_available() else "cpu"
    print("on: {}".format(on))
    lr = 1e-3
    SOS = 0
    PAD = 1
    dis = D(voc_size, hidden_size, on=on, num_layers=1).to(on)
    gen = G(
        voc_size,
        hidden_size,
        timesteps,
        on=on,
        sos_pad=(SOS, PAD),
        num_layers=num_layers,
    ).to(on)
    rec = R(voc_size,
            hidden_size,
            timesteps,
            on=on,
            sos=SOS,
            num_layers=num_layers).to(on)
    scr = S(voc_size, hidden_size, on=on, num_layers=1).to(on)
    do = SGD(dis.parameters(), lr=lr)
    go = SGD(gen.parameters(), lr=lr)
    ro = SGD(rec.parameters(), lr=lr)
    so = SGD(scr.parameters(), lr=lr)
    print("S")
    summarize_input(
        gen,
        wi_iw=[
            {
                "0": 0,
                "1": 1,
                "2": 2,
                "__UNK__": "u",
                "__PAD__": 99
            },
            [str(i) for i in range(voc_size)],
        ],
    )

    print("F")
    F_train_one_batch(
        (
            torch.randint(0, voc_size, (batch, timesteps)).to(on),
            torch.randint(0, voc_size, (batch, timesteps)).to(on),
            torch.randint(0, 5, (batch, )).to(on),
        ),
        dis,
        gen,
        gen,
        rec,
        scr,
        do,
        go,
        ro,
        so,
        F.binary_cross_entropy,
        F.mse_loss,
        F.cross_entropy,
        F.cross_entropy,
        epsilon,
        step,
        storage,
        sReward,
        "weight",
    )

    print("N")

    class TestDataset(Dataset):
        def __init__(self, low, high, shape):
            self.data = torch.randint(low, high, shape)

        def __len__(self):
            return len(self.data)

        def __getitem__(self, index):
            return self.data[index]

        def to(self, device):
            self.data = self.data.to(device)
            return self

    dataset = TestDataset(0, voc_size, (timesteps, batch)).to(on)
    print(dataset[:].shape)
    train_one_batch(
        dataset[:],
        dis,
        gen,
        gen,
        rec,
        do,
        go,
        ro,
        F.binary_cross_entropy,
        F.mse_loss,
        F.cross_entropy,
        epsilon,
        step,
        storage,
        sReward,
        "weight",
    )
Ejemplo n.º 13
0
 def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False, momentum=0.01, dilation=1, groups=1,
                    device = 'cuda:0' if cuda.is_available() else 'cpu', norm_layer=nn.BatchNorm2d,  **kwargs):
     super(ConvBN, self).__init__(
         nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=False),
         nn.BatchNorm2d(out_channels, momentum=momentum),
     )
Ejemplo n.º 14
0
def train_model(model: Module, optimizer: optim.Optimizer, epochs: int):
    """Train vanilla cnn pytorch model.
    
    Args:
        model: Module, pytorch model
        epochs: int, training epochs
    Returns:
        model: Module, trained model
        history: dict, history
    """
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(120),
            transforms.Resize((128, 128)),
            transforms.RandomRotation(40),
            transforms.ToTensor(),
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ToTensor(),
        ])
    }
    data_dir = '../../data/AsianSampleCategory/'
    image_datasets = {
        x: ImageFolder(os.path.join(data_dir, x), data_transforms[x])
        for x in ['train', 'val']
    }
    dataloaders = {
        x: DataLoader(image_datasets[x], batch_size=32, shuffle=True)
        for x in ['train', 'val']
    }
    data_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    use_gpu = cuda.is_available()

    criterion = CrossEntropyLoss()
    # Observe that all parameters are being optimized
    #     optimizer = optim.Adam(model.parameters(), lr=1e-4)

    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    history = {
        'loss': {
            'train': [],
            'val': []
        },
        'acc': {
            'train': [],
            'val': []
        }
    }

    for epoch in range(epochs):
        print('Epoch {}/{}:\t'.format(epoch, epochs - 1), end='')

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                    model = model.cuda()
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / data_sizes[phase]
            epoch_acc = running_corrects / data_sizes[phase]

            history['loss'][phase].append(epoch_loss)
            history['acc'][phase].append(epoch_acc)
            print('{} Loss: {:.4f} Acc: {:.4f}\t'.format(
                phase, epoch_loss, epoch_acc),
                  end='')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, history
Ejemplo n.º 15
0
def linux_with_gpu():
    """Returns if machine is running an Linux OS and has a GPU"""
    has_gpu = is_available()
    return is_linux() and has_gpu
Ejemplo n.º 16
0
    def train(
        self,
        base_path: Union[Path, str],
        sequence_length: int,
        learning_rate: float = 20,
        mini_batch_size: int = 100,
        anneal_factor: float = 0.25,
        patience: int = 10,
        clip=0.25,
        max_epochs: int = 1000,
        checkpoint: bool = False,
        grow_to_sequence_length: int = 0,
        num_workers: int = 2,
        use_amp: bool = False,
        amp_opt_level: str = "O1",
        **kwargs,
    ):

        if use_amp:
            if sys.version_info < (3, 0):
                raise RuntimeError(
                    "Apex currently only supports Python 3. Aborting.")
            if amp is None:
                raise RuntimeError(
                    "Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
                    "to enable mixed-precision training.")

        # cast string to Path
        if type(base_path) is str:
            base_path = Path(base_path)

        add_file_handler(log, base_path / "training.log")

        number_of_splits: int = len(self.corpus.train)

        val_data = self._batchify(self.corpus.valid, mini_batch_size)

        # error message if the validation dataset is too small
        if val_data.size(0) == 1:
            raise RuntimeError(
                f"ERROR: Your validation dataset is too small. For your mini_batch_size, the data needs to "
                f"consist of at least {mini_batch_size * 2} characters!")

        base_path.mkdir(parents=True, exist_ok=True)
        loss_txt = base_path / "loss.txt"
        savefile = base_path / "best-lm.pt"

        try:
            best_val_loss = self.loss
            optimizer = self.optimizer(self.model.parameters(),
                                       lr=learning_rate,
                                       **kwargs)
            if self.optimizer_state is not None:
                optimizer.load_state_dict(self.optimizer_state)

            if isinstance(optimizer, (AdamW, SGDW)):
                scheduler: ReduceLRWDOnPlateau = ReduceLRWDOnPlateau(
                    optimizer,
                    verbose=True,
                    factor=anneal_factor,
                    patience=patience)
            else:
                scheduler: ReduceLROnPlateau = ReduceLROnPlateau(
                    optimizer,
                    verbose=True,
                    factor=anneal_factor,
                    patience=patience)

            if use_amp:
                self.model, optimizer = amp.initialize(self.model,
                                                       optimizer,
                                                       opt_level=amp_opt_level)

            training_generator = DataLoader(self.corpus.train,
                                            shuffle=False,
                                            num_workers=num_workers)

            for epoch in range(self.epoch, max_epochs):
                epoch_start_time = time.time()
                # Shuffle training files randomly after serially iterating through corpus one
                if epoch > 0:
                    training_generator = DataLoader(self.corpus.train,
                                                    shuffle=True,
                                                    num_workers=num_workers)
                    self.model.save_checkpoint(
                        base_path / f"epoch_{epoch}.pt",
                        optimizer,
                        epoch,
                        0,
                        best_val_loss,
                    )

                # iterate through training data, starting at self.split (for checkpointing)
                for curr_split, train_slice in enumerate(
                        training_generator, self.split):

                    if sequence_length < grow_to_sequence_length:
                        sequence_length += 1
                    log.info(f"Sequence length is {sequence_length}")

                    split_start_time = time.time()
                    # off by one for printing
                    curr_split += 1
                    train_data = self._batchify(train_slice.flatten(),
                                                mini_batch_size)

                    log.info(
                        "Split %d" % curr_split +
                        "\t - ({:%H:%M:%S})".format(datetime.datetime.now()))

                    for group in optimizer.param_groups:
                        learning_rate = group["lr"]

                    # go into train mode
                    self.model.train()

                    # reset variables
                    hidden = self.model.init_hidden(mini_batch_size)

                    # not really sure what this does
                    ntokens = len(self.corpus.dictionary)

                    total_loss = 0
                    start_time = time.time()

                    for batch, i in enumerate(
                            range(0,
                                  train_data.size(0) - 1, sequence_length)):
                        data, targets = self._get_batch(
                            train_data, i, sequence_length)

                        if not data.is_cuda and cuda.is_available():
                            log.info(
                                "Batch %d is not on CUDA, training will be very slow"
                                % (batch))
                            raise Exception("data isnt on cuda")

                        self.model.zero_grad()
                        optimizer.zero_grad()

                        # do the forward pass in the model
                        output, rnn_output, hidden = self.model.forward(
                            data, hidden)

                        # try to predict the targets
                        loss = self.loss_function(output.view(-1, ntokens),
                                                  targets)
                        # Backward
                        if use_amp:
                            with amp.scale_loss(loss,
                                                optimizer) as scaled_loss:
                                scaled_loss.backward()
                        else:
                            loss.backward()

                        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                       clip)

                        optimizer.step()

                        total_loss += loss.data

                        # We detach the hidden state from how it was previously produced.
                        # If we didn't, the model would try backpropagating all the way to start of the dataset.
                        hidden = self._repackage_hidden(hidden)

                        # explicitly remove loss to clear up memory
                        del loss, output, rnn_output

                        if batch % self.log_interval == 0 and batch > 0:
                            cur_loss = total_loss.item() / self.log_interval
                            elapsed = time.time() - start_time
                            log.info(
                                "| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | "
                                "loss {:5.2f} | ppl {:8.2f}".format(
                                    curr_split,
                                    number_of_splits,
                                    batch,
                                    len(train_data) // sequence_length,
                                    elapsed * 1000 / self.log_interval,
                                    cur_loss,
                                    math.exp(cur_loss),
                                ))
                            total_loss = 0
                            start_time = time.time()

                    log.info("%d seconds for train split %d" %
                             (time.time() - split_start_time, curr_split))

                    ###############################################################################
                    self.model.eval()

                    val_loss = self.evaluate(val_data, mini_batch_size,
                                             sequence_length)
                    scheduler.step(val_loss)

                    log.info("best loss so far {:5.2f}".format(best_val_loss))

                    log.info(self.model.generate_text())

                    if checkpoint:
                        self.model.save_checkpoint(
                            base_path / "checkpoint.pt",
                            optimizer,
                            epoch,
                            curr_split,
                            best_val_loss,
                        )

                    # Save the model if the validation loss is the best we've seen so far.
                    if val_loss < best_val_loss:
                        self.model.best_score = best_val_loss
                        self.model.save(savefile)
                        best_val_loss = val_loss

                    ###############################################################################
                    # print info
                    ###############################################################################
                    log.info("-" * 89)

                    summary = (
                        "| end of split {:3d} /{:3d} | epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | "
                        "valid ppl {:8.2f} | learning rate {:3.4f}".format(
                            curr_split,
                            number_of_splits,
                            epoch + 1,
                            (time.time() - split_start_time),
                            val_loss,
                            math.exp(val_loss),
                            learning_rate,
                        ))

                    with open(loss_txt, "a") as myfile:
                        myfile.write("%s\n" % summary)

                    log.info(summary)
                    log.info("-" * 89)

                log.info("Epoch time: %.2f" % (time.time() - epoch_start_time))

        except KeyboardInterrupt:
            log.info("-" * 89)
            log.info("Exiting from training early")

        ###############################################################################
        # final testing
        ###############################################################################
        test_data = self._batchify(self.corpus.test, mini_batch_size)
        test_loss = self.evaluate(test_data, mini_batch_size, sequence_length)

        summary = "TEST: valid loss {:5.2f} | valid ppl {:8.2f}".format(
            test_loss, math.exp(test_loss))
        with open(loss_txt, "a") as myfile:
            myfile.write("%s\n" % summary)

        log.info(summary)
        log.info("-" * 89)
Ejemplo n.º 17
0
    def evaluate(
        self,
        num_samples: int = 10,
        report_every: int = 100,
        train_or_test: str = "test",
    ) -> None:
        """ eval code for validation/test set and saves the evaluation results in self.results.

        Args:
            num_samples: number of samples (clips) of the validation set to test
            report_every: print line of results every n times
            train_or_test: use train or test set
        """
        # asset train or test valid
        assert train_or_test in ["train", "test"]

        # set device and num_gpus
        num_gpus = num_devices()
        device = torch_device()
        torch.backends.cudnn.benchmark = True if cuda.is_available() else False

        # init model with gpu (or not)
        self.model.to(device)
        if num_gpus > 1:
            self.model = nn.DataParallel(self.model)
        self.model.eval()

        # set train or test
        ds = (
            self.dataset.test_ds
            if train_or_test == "test"
            else self.dataset.train_ds
        )

        # set num_samples
        ds.dataset.num_samples = num_samples
        print(
            f"{len(self.dataset.test_ds)} samples of {self.dataset.test_ds[0][0][0].shape}"
        )

        # Loop over all examples in the test set and compute accuracies
        ret = dict(
            infer_times=[],
            video_preds=[],
            video_trues=[],
            clip_preds=[],
            clip_trues=[],
        )
        report_every = 100

        # inference
        with torch.no_grad():
            for i in range(
                1, len(ds)
            ):  # [::10]:  # Skip some examples to speed up accuracy computation
                if i % report_every == 0:
                    print(
                        f"Processsing {i} of {len(self.dataset.test_ds)} samples.."
                    )

                # Get model inputs
                inputs, label = ds[i]
                inputs = inputs.to(device, non_blocking=True)

                # Run inference
                start_time = time()
                outputs = self.model(inputs)
                outputs = outputs.cpu().numpy()
                infer_time = time() - start_time
                ret["infer_times"].append(infer_time)

                # Store results
                ret["video_preds"].append(outputs.sum(axis=0).argmax())
                ret["video_trues"].append(label)
                ret["clip_preds"].extend(outputs.argmax(axis=1))
                ret["clip_trues"].extend([label] * num_samples)

        print(
            f"Avg. inference time per video ({len(ds)} clips) =",
            round(np.array(ret["infer_times"]).mean() * 1000, 2),
            "ms",
        )
        print(
            "Video prediction accuracy =",
            round(accuracy_score(ret["video_trues"], ret["video_preds"]), 2),
        )
        print(
            "Clip prediction accuracy =",
            round(accuracy_score(ret["clip_trues"], ret["clip_preds"]), 2),
        )
        return ret
Ejemplo n.º 18
0
#Transformations and data augmentation
transformations = transforms.Compose([
    transforms.Resize(28),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

#Load the training and test sets
train_set = MNIST(root="./data",transform=transformations,download=True)
test_set = MNIST(root="./data",train=False,transform=transformations,download=True)

train_loader = DataLoader(train_set,shuffle=True,batch_size=batch_size,num_workers=4)
test_loader = DataLoader(test_set,shuffle=False,batch_size=batch_size,num_workers=4)

#Move to GPU if available
if cuda.is_available():
    net.cuda()

#Setup the optimize and a loss function
optimizer = Adam(net.parameters(),lr=0.001)
loss_fn = nn.CrossEntropyLoss()

#Top 1 Train accuracy
train_metrics = tf.Accuracy(topK=1)

#Top 1 and Top 2 train accuracy
test_metrics_top1 = tf.Accuracy(name="Top 1 Acc ",topK=1)
test_metrics_top2 = tf.Accuracy(name="Top 2 Acc ",topK=2)

#Create an instance of the StandardModel
model = tf.StandardModel(net)
Ejemplo n.º 19
0
    def fit(self, train_cfgs):
        train_cfgs = Config(train_cfgs)

        model_dir = train_cfgs.get('model_dir', "checkpoints")
        os.makedirs(model_dir, exist_ok=True)

        if cuda.is_available():
            device = torch.device("cuda")
            num_devices = cuda.device_count()
            # Look for the optimal set of algorithms to use in cudnn. Use this only with fixed-size inputs.
            torch.backends.cudnn.benchmark = True
        else:
            device = torch.device("cpu")
            num_devices = 1

        data_loaders = {}
        if self.train_ds is not None:
            data_loaders['train'] = DataLoader(
                self.train_ds,
                batch_size=train_cfgs.get('batch_size', 8) * num_devices,
                shuffle=True,
                num_workers=
                0,  # Torch 1.2 has a bug when num-workers > 0 (0 means run a main-processor worker)
                pin_memory=True,
            )
        if self.valid_ds is not None:
            data_loaders['valid'] = DataLoader(
                self.valid_ds,
                batch_size=train_cfgs.get('batch_size', 8) * num_devices,
                shuffle=False,
                num_workers=0,
                pin_memory=True,
            )

        # Move model to gpu before constructing optimizers and amp.initialize
        self.model.to(device)

        named_params_to_update = {}
        total_params = 0
        for name, param in self.model.named_parameters():
            total_params += 1
            if param.requires_grad:
                named_params_to_update[name] = param

        print("Params to learn:")
        if len(named_params_to_update) == total_params:
            print("\tfull network")
        else:
            for name in named_params_to_update:
                print("\t{}".format(name))

        optimizer = optim.SGD(
            list(named_params_to_update.values()),
            lr=train_cfgs.lr,
            momentum=train_cfgs.momentum,
            weight_decay=train_cfgs.weight_decay,
        )

        # Use mixed-precision if available
        # Currently, only O1 works with DataParallel: See issues https://github.com/NVIDIA/apex/issues/227
        if train_cfgs.get('mixed_prec', False) and AMP_AVAILABLE:
            # 'O0': Full FP32, 'O1': Conservative, 'O2': Standard, 'O3': Full FP16
            self.model, optimizer = amp.initialize(
                self.model,
                optimizer,
                opt_level="O1",
                loss_scale="dynamic",
                # keep_batchnorm_fp32=True doesn't work on 'O1'
            )

        # Learning rate scheduler
        scheduler = None
        warmup_pct = train_cfgs.get('warmup_pct', None)
        lr_decay_steps = train_cfgs.get('lr_decay_steps', None)
        if warmup_pct is not None:
            # Use warmup with the one-cycle policy
            lr_decay_total_steps = train_cfgs.epochs if lr_decay_steps is None else lr_decay_steps
            scheduler = torch.optim.lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=train_cfgs.lr,
                total_steps=lr_decay_total_steps,
                pct_start=train_cfgs.get('warmup_pct', 0.3),
                base_momentum=0.9 * train_cfgs.momentum,
                max_momentum=train_cfgs.momentum,
                final_div_factor=1 / train_cfgs.get('lr_decay_factor', 0.0001),
            )
        elif lr_decay_steps is not None:
            lr_decay_total_steps = train_cfgs.epochs
            # Simple step-decay
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer,
                step_size=lr_decay_steps,
                gamma=train_cfgs.get('lr_decay_factor', 0.1),
            )

        # DataParallel after amp.initialize
        if num_devices > 1:
            model = nn.DataParallel(self.model)
        else:
            model = self.model

        criterion = nn.CrossEntropyLoss().to(device)

        for e in range(1, train_cfgs.epochs + 1):
            print("Epoch {} ==========".format(e))
            if scheduler is not None:
                print("lr={}".format(scheduler.get_lr()))

            self.train_an_epoch(
                model,
                data_loaders,
                device,
                criterion,
                optimizer,
                grad_steps=train_cfgs.grad_steps,
                mixed_prec=train_cfgs.mixed_prec,
            )
            if scheduler is not None and e < lr_decay_total_steps:
                scheduler.step()

            self.save(
                os.path.join(
                    model_dir,
                    "{model_name}_{epoch}.pt".format(model_name=train_cfgs.get(
                        'model_name', self.model_name),
                                                     epoch=str(e).zfill(3))))
Ejemplo n.º 20
0
 def __init__(self, out_planes, number, kernel_size=1, bias=True, device = 'cuda:0' if cuda.is_available() else 'cpu', norm_layer = nn.BatchNorm2d):
     super(SEModule, self).__init__()
     self.bn = norm_layer
     self.avg_pool = nn.AdaptiveAvgPool2d(1)
     self.fc = nn.Sequential(
         nn.Conv2d(out_planes, number, kernel_size=1, bias=bias),
         nn.Conv2d(number, out_planes, kernel_size=1, bias=bias),
         Sigmoid()
     )
Ejemplo n.º 21
0
def evaluate_model(Net,
                   seeds,
                   mini_batch_size=100,
                   optimizer=optim.Adam,
                   criterion=nn.CrossEntropyLoss(),
                   n_epochs=40,
                   eta=1e-3,
                   lambda_l2=0,
                   alpha=0.5,
                   beta=0.5,
                   plot=True,
                   statistics=True,
                   rotate=False,
                   translate=False,
                   swap_channel=False,
                   GPU=False):
    """ 
    General : 10 rounds of network training / validation with statistics
         
         - Repeat the training/validation procedure 10 times for ten different seeds in seeds
             1) At every seed -> reinitializes a network and a dataset : train,validation and test 
             2) Weights initialization and data loading are using the seed 
             3) Record the train and validation accuracy and loss and can display their evolution curve
             4) Compute the statistics at the end of each training for performance evaluation
                 i)  Mean training accuracy for each seed -> value at the end of the last epoch
                 ii) Mean validation accuracy for each seed -> value at the end of the last epoch
                 iii) Mean test accuracy for each seed -> compute the accuracy on the test after each training
                 -> display a boxplot of the statistics if statistics is true and print the mean and standard deviation
     
     Input :
     
         - Net : A network dictionnary from the <Nets> class
         - seeds : a list of seed to iterate over for pseudo random number generator used in weight initialization and data loading
         -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py
         - plot : if true plot the learning curve evolution over the epochs -> default true
         - statistics : if true display the boxplot of the train accuracies, validations and test and print the mean and standard deviation 
                        statistics
         -> rotate,translate and swap_channels -> data augmentation see loader.py 
     
     Output : 
     
         - train_result : A (10x4xn_epochs) tensor 
                             10 -> seed
                             4 -> train loss ,train accuracy, validation loss, validation accuracy
                             n_epochs -> evolution during training
         - test_losses : A tensor of shape (10,) containing the test loss at each seed
         - test_accuracies : A tensor of shape (10,) containing the test loss at each seed
         
    """

    # tensor initialization to store the metrics
    train_results = torch.empty(len(seeds), 4, n_epochs)
    test_losses = []
    test_accuracies = []

    for n, seed in enumerate(seeds):

        # set the pytorch seed
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

        # set the seed for random spliting of the dataset in training and validation
        random.seed(0)

        # load the dataset train,validation and test
        data = PairSetMNIST()
        train_data = Training_set(data)
        test_data = Test_set(data)
        train_data_split = Training_set_split(train_data, rotate, translate,
                                              swap_channel)
        validation_data = Validation_set(train_data)

        # construct the net type with default parameter
        if (Net['net_type'] == 'Net2c'):
            model = Net['net'](nb_hidden=Net['hidden_layers'],
                               dropout_prob=Net['drop_prob'])
        if (Net['net_type'] == 'LeNet_sharing'):
            model = Net['net'](nb_hidden=Net['hidden_layers'],
                               dropout_ws=Net['drop_prob_ws'],
                               dropout_comp=Net['drop_prob_comp'])
        if (Net['net_type'] == 'LeNet_sharing_aux'):
            # check if any data augmentation has been called
            # if none construct with tuned parameters without data augmentation
            # if yes construct with tuned parameters with data augmentation
            if (rotate == False and translate == False
                    and swap_channel == False):
                model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                                   nbhidden_comp=Net['hidden_layers_comp'],
                                   drop_prob_aux=Net['drop_prob_aux'],
                                   drop_prob_comp=Net['drop_prob_comp'])
            else:
                Net['learning rate'] = Net['learning rate augm']
                model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                                   nbhidden_comp=Net['hidden_layers_comp'],
                                   drop_prob_aux=Net['drop_prob_aux_augm'],
                                   drop_prob_comp=Net['drop_prob_comp_augm'])
        if (Net['net_type'] == 'Google_Net'):
            model = Net['net'](channels_1x1=Net['channels_1x1'],
                               channels_3x3=Net['channels_3x3'],
                               channels_5x5=Net['channels_5x5'],
                               pool_channels=Net['pool_channels'],
                               nhidden=Net['hidden_layers'],
                               drop_prob_comp=Net['drop_prob_comp'],
                               drop_prob_aux=Net['drop_prob_aux'])

        if GPU and cuda.is_available():
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        model = model.to(device)

        # train the model on the train set and validate at each epoch
        train_losses, train_acc, valid_losses, valid_acc = train_model(
            model, train_data_split, validation_data, device, mini_batch_size,
            optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2,
            alpha, beta)
        # store the training and validation accuracies and losses during the training
        train_results[n, ] = torch.tensor(
            [train_losses, train_acc, valid_losses, valid_acc])
        # compute the loss and accuracy of the model on the test set
        test_loss, test_acc = compute_metrics(model, test_data, device)
        # store the test metrics in the list
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)

        # learning curve
        if plot:
            learning_curve(train_losses, train_acc, valid_losses, valid_acc)

        print(
            'Seed {:d} | Test Loss: {:.4f} | Test Accuracy: {:.2f}%\n'.format(
                n, test_loss, test_acc))

    # store the train, validation and test accuracies in a tensor for the boxplot
    data = torch.stack([
        train_results[:, 1, (n_epochs - 1)], train_results[:, 3,
                                                           (n_epochs - 1)],
        torch.tensor(test_accuracies)
    ])
    data = data.view(1, 3, 10)
    # boxplot
    if statistics:
        Title = " Models accuracies"
        models = [Net['net_type']]
        boxplot(data, Title, models, True)

    return train_results, torch.tensor(test_losses), torch.tensor(
        test_accuracies)
def main(train_args):
    if cuda.is_available():
        net = fcn8s.FCN8s(num_classes=voc.num_classes, pretrained=False).cuda()
        #net = MBO.MBO().cuda()
        #net = deeplab_resnet.Res_Deeplab().cuda()
    else:
        print('cuda is not available')
        net = fcn8s.FCN8s(num_classes=voc.num_classes, pretrained=True)

    net.train()

    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    input_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
    target_transform = extended_transforms.MaskToTensor()
    restore_transform = standard_transforms.Compose([
        extended_transforms.DeNormalize(*mean_std),
        standard_transforms.ToPILImage(),
    ])
    visualize = standard_transforms.Compose([
        standard_transforms.Scale(400),
        standard_transforms.CenterCrop(400),
        standard_transforms.ToTensor()
    ])

    train_set = voc.VOC('train',
                        set='benchmark',
                        transform=input_transform,
                        target_transform=target_transform)
    train_loader = DataLoader(train_set,
                              batch_size=bsz,
                              num_workers=8,
                              shuffle=True)

    val_set = voc.VOC('val',
                      set='voc',
                      transform=input_transform,
                      target_transform=target_transform)
    val_loader = DataLoader(val_set,
                            batch_size=1,
                            num_workers=4,
                            shuffle=False)

    criterion = CrossEntropyLoss2d(size_average=False,
                                   ignore_index=voc.ignore_label).cuda()
    optimizer = optim.Adam([{
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] == 'bias'
        ],
        'lr':
        train_args['lr']
    }, {
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] != 'bias'
        ],
        'lr':
        train_args['lr']
    }],
                           betas=(train_args['momentum'], 0.999))
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  patience=2,
                                  min_lr=1e-10,
                                  verbose=True)

    lr0 = 1e-7
    max_epoch = 50
    max_iter = max_epoch * len(train_loader)
    #optimizer = optim.SGD(net.parameters(),lr = lr0, momentum = 0.9, weight_decay = 0.0005)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

    log_dir = os.path.join(root, 'logs', 'voc-fcn')
    time = datetime.datetime.now().strftime('%d-%m-%H-%M')
    train_file = 'train_log' + time + '.txt'
    val_file = 'val_log' + time + '.txt'
    #os.makedirs(log_dir,exist_ok=True)

    training_log = open(os.path.join(log_dir, train_file), 'w')
    val_log = open(os.path.join(log_dir, val_file), 'w')

    curr_epoch = 1
    for epoch in range(curr_epoch, train_args['epoch_num'] + 1):
        train(train_loader, net, criterion, optimizer, epoch, train_args,
              training_log, max_iter, lr0)
        val_loss = validate(val_loader, net, criterion, optimizer, epoch,
                            train_args, restore_transform, visualize, val_log)

        scheduler.step(val_loss)

        lr_tmp = 0.0
        k = 0
        for param_group in optimizer.param_groups:
            lr_tmp += param_group['lr']
            k += 1
        val_log.write('learning rate = {}'.format(str(lr_tmp / k)) + '\n')
Ejemplo n.º 23
0
from torch import cuda, device

DEVICE = device('cuda:0' if cuda.is_available() else 'cpu')
DATASET_PATH = '/run/timeshift/backup/thesis/EnhanceIt/src/Datasets/DIV2K/Train'
TEST_DATAPATH = '../src/Datasets/DIV2K/Validation'
ARCNN_DATASET = '../src/Datasets/BSDS500'
BATCH_SIZE = 4
UPSCALE_FACTOR = 4
EPOCHS = 500
ARCNN_MODEL = '../src/models/ARCNN.pth'
SIZE = 128
MODEL_SAVE_PATH = '/run/timeshift/backup/thesis/EnhanceIt/src/experiment_models/SRGAN_with_blur/models/bestSRGAN.pth'
DIS_PATH = '/run/timeshift/backup/thesis/EnhanceIt/src/experiment_models/disTest.pth'
LEARNING_RATE = 10e-5
CHECKPOINT_DIR = '../src/models/testSrgan/cp/'
MODEL_X2_DIR = '../src/model_x2.pth'
MODEL_X4_DIR = '../src/model_x4.pth'
CP = '../src/models/checkpoints/testcp'
EXTRACTED_FRAMES_DIR = '../src/extracted_frames/'
ENHANCED_FRAMES_DIR = '../src/enhanced_frames/'
ENHANCED_IMG_DIR = '../src/Single_Image_Results/'
ARCNN_FRAMES_DIR = '../src/arcnn_frames/'
BICUBIC_FRAMES_DIR = '../src/bicubic_resampling/'
Ejemplo n.º 24
0
    arg('--num_classes', type=int, default=4,
        help='num tags to predict')  # Fixed
    arg('--model', type=str, default='Resnet50')
    arg('--input_size', type=int, default=296)
    arg('--test_augments', default='resize, horizontal_flip', type=str)
    arg('--augment_ratio',
        default=0.5,
        type=float,
        help='probability of implementing transforms')
    arg('--device', type=int, default=0)
    arg('--hidden_size', type=int, default=128)
    args = parser.parse_args()

    device = args.device
    use_gpu = cuda.is_available()

    SEED = 2019
    seed_everything(SEED)

    global model
    model = models.densenet201(pretrained=False)
    model.classifier = nn.Linear(1920, args.num_classes)
    bind_model(model)
    if args.mode == 'train':
        nsml.save('last')

    if use_gpu:
        model = model.to(device)

    if args.pause:
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('mode', choices=['train', 'validate', 'predict_valid', 'predict_test'])
    arg('run_root')
    arg('--model', default='se_resnet50')
    arg('--pretrained', type=int, default=1)
    arg('--batch-size', type=int, default=64)
    arg('--step', type=int, default=1)
    arg('--workers', type=int, default=2 if ON_KAGGLE else 4)
    arg('--lr', type=float, default=1e-4)
    arg('--patience', type=int, default=4)
    arg('--clean', action='store_true')
    arg('--n-epochs', type=int, default=100)
    arg('--epoch-size', type=int)
    arg('--tta', type=int, default=4)
    arg('--use-sample', action='store_true', help='use a sample of the dataset')
    arg('--debug', action='store_true')
    arg('--limit', type=int)
    arg('--fold', type=int, default=0)
    arg('--multi', type=int, default=0)
    arg('--half', type=int, default=0)
    args = parser.parse_args()
 
    run_root = Path(args.run_root)
    folds = pd.read_csv('../folds.csv')
    train_root = DATA_ROOT / ('train_sample' if args.use_sample else 'train')
    if args.use_sample:
        folds = folds[folds['Id'].isin(set(get_ids(train_root)))]
    train_fold = folds[folds['fold'] != args.fold]
    valid_fold = folds[folds['fold'] == args.fold]
    if args.limit:
        train_fold = train_fold[:args.limit]
        valid_fold = valid_fold[:args.limit]

    def make_loader(df: pd.DataFrame, image_transform) -> DataLoader:
        return DataLoader(
            TrainDataset(train_root, df, image_transform, debug=args.debug),
            shuffle=True,
            batch_size=args.batch_size,
            num_workers=args.workers,
        )
    criterion = nn.BCEWithLogitsLoss(reduction='none')
    # criterion = FocalLoss()
    model = getattr(models, args.model)()
    feature_dim = model.last_linear.in_features
    class AvgPool(nn.Module):
        def forward(self, x):
            # print (x.size())
            return F.avg_pool2d(x, x.shape[2:])
    model.avg_pool = AvgPool()
    model.avgpool = AvgPool()
    model.last_linear = nn.Linear(feature_dim, N_CLASSES)
    use_cuda = cuda.is_available()
    fresh_params = list(model.last_linear.parameters())
    all_params = list(model.parameters())
    if use_cuda:
        if args.multi:
            model = torch.nn.DataParallel(model).cuda()
        else:
            model = model.cuda()

    if args.mode == 'train':
        if run_root.exists() and args.clean:
            shutil.rmtree(run_root)
        run_root.mkdir(exist_ok=True, parents=True)
        (run_root / 'params.json').write_text(
            json.dumps(vars(args), indent=4, sort_keys=True))
        writer = SummaryWriter(str(run_root/'tbx'))

        train_loader = make_loader(train_fold, train_transform)
        valid_loader = make_loader(valid_fold, test_transform)
        print(f'{len(train_loader.dataset):,} items in train, '
              f'{len(valid_loader.dataset):,} in valid')

        train_kwargs = dict(
            args=args,
            model=model,
            criterion=criterion,
            train_loader=train_loader,
            valid_loader=valid_loader,
            patience=args.patience,
            init_optimizer=lambda params, lr: Adam(params, lr),
            use_cuda=use_cuda,
        )
        
        if args.pretrained:
            if train(params=fresh_params, writer=writer, n_epochs=1, **train_kwargs):
                train(params=all_params, writer=writer, **train_kwargs)
        else:
            train(params=all_params, writer=writer, **train_kwargs)
        writer.export_scalars_to_json(str(run_root/'tbx/all_scalars.json'))
        writer.close()

    elif args.mode == 'validate':
        valid_loader = make_loader(valid_fold, test_transform)
        load_model(model, run_root / 'model.pt')
        validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation'),
                   use_cuda=use_cuda)

    elif args.mode.startswith('predict'):
        load_model(model, run_root / 'best-model.pt')
        predict_kwargs = dict(
            batch_size=args.batch_size,
            tta=args.tta,
            use_cuda=use_cuda,
            workers=args.workers,
        )
        if args.mode == 'predict_valid':
            predict(model, df=valid_fold, root=train_root,
                    out_path=run_root / 'val.h5',
                    **predict_kwargs)
        elif args.mode == 'predict_test':
            test_root = DATA_ROOT / (
                'test_sample' if args.use_sample else 'test')
            ss = pd.read_csv(DATA_ROOT / 'sample_submission.csv')
            if args.use_sample:
                ss = ss[ss['id'].isin(set(get_ids(test_root)))]
            if args.limit:
                ss = ss[:args.limit]
            predict(model, df=ss, root=test_root,
                    out_path=run_root / 'test.h5',
                    **predict_kwargs)
Ejemplo n.º 26
0
sys.path.append(os.getcwd())

import torch as tc
from torch import cuda

import wargs
from tools.inputs_handler import *
from tools.inputs import Input
from tools.optimizer import Optim
from models.losser import Classifier
from models.embedding import WordEmbedding
from models.model_builder import build_NMT
from tools.utils import init_dir, wlog

# Check if CUDA is available
if cuda.is_available():
    wlog(
        'CUDA is available, specify device by gpu_id argument (i.e. gpu_id=[0, 1, 2])'
    )
else:
    wlog('Warning: CUDA is not available, train on CPU')

if wargs.gpu_id is not None:
    #cuda.set_device(wargs.gpu_id[0])
    device = tc.device(
        'cuda:{}'.format(wargs.gpu_id[0]) if cuda.is_available() else 'cpu')
    wlog('Set device {}, will use {} GPUs {}'.format(wargs.gpu_id[0],
                                                     len(wargs.gpu_id),
                                                     wargs.gpu_id))

from trainer import *
Ejemplo n.º 27
0
DATA = "/data/horse2zebra/"
trainA = "trainA/"
trainB = "trainB/"
forge = "/data/forge/"
HOME = "/home/ubuntu/"
CLIP_LIMIT = 1e-3
BS = 4

# In[3]:

from models import discriminative
from models import generative_chimney as generative

from torch import cuda

CUDA = cuda.is_available()

# In[4]:

from torch.optim import Adam


class cycle(nn.Module):
    def __init__(self,
                 g_fn=[64, 64, 64, 64, 128, 128, 128, 128, 64],
                 d_fn=[64, 64, 64, 128, 128, 128]):
        """
        g_fn: filter numbers for genrative model, a list of int
        d_fn: filter numbers for discriminative model, a list of int, downsampling count is len(self.d_fn)-1
        """
        super(cycle, self).__init__()
Ejemplo n.º 28
0
# confirm PyTorch sees the GPU
from torch import cuda
assert cuda.is_available()
assert cuda.device_count() > 0
print(cuda.get_device_name(cuda.current_device()))
Ejemplo n.º 29
0
def train(model, optimizer, criterion, epoch, train_data, val_data, save_file,
          best_file):
    global force_cuda

    if os.path.exists(best_file):
        out('Loading best model')
        loader = torch.load(best_file)
        model.load_state_dict(loader['state_dict'])
        best_val_acc = loader['best_val']
    else:
        best_val_acc = None

    model.train()

    total_loss = 0.0
    total_acc = 0.0
    total_img = 0
    iteration_losses = []

    for i, data in enumerate(train_data):
        inputs, labels = data
        if force_cuda and cuda.is_available():
            inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()

        outputs = model(inputs)
        _, predictions = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        iteration_losses.append(loss.data.item())
        total_loss += loss.data.item()
        total_acc += torch.sum(predictions == labels.data).item()
        total_img += labels.size(0)

        del inputs, labels, outputs, predictions
        cuda.empty_cache()

    out(f'Training #{epoch}: {total_acc / total_img} accuracy and {total_loss / total_img} loss'
        )
    val_loss, val_acc = eval(model, criterion, val_data)
    out(f'Validation #{epoch}: {val_acc} accuracy and {val_loss} loss')

    if best_val_acc is None or best_val_acc < val_acc:
        best_val_acc = val_acc
        torch.save({
            'state_dict': model.state_dict(),
            'best_val': best_val_acc
        }, best_file)

    # Save per epoch
    saved_iteration_losses = iteration_losses
    if os.path.exists(save_file):
        saved_iteration_losses = torch.load(save_file)['iteration_losses']
        saved_iteration_losses.extend(iteration_losses)

    torch.save(
        {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'criterion': criterion.state_dict(),
            'iteration_losses': saved_iteration_losses,
            'last_epoch': epoch
        }, save_file)

    return model, optimizer, criterion
Ejemplo n.º 30
0
def setup():
    parser = argparse.ArgumentParser()
    parser.add_argument('--actions',
                        default='../actions_bb.txt',
                        help='define actions txt')
    parser.add_argument('--strategy',
                        default='../strategy_bb.txt',
                        help='define strategy txt')
    parser.add_argument('--dataset',
                        default='nt4096_ls128_nzf8_nzd32.pth',
                        help='folder | synth | pth | stead | ann2bb | deepbns')
    parser.add_argument(
        '--dataroot', default='../database/stead', help='Path to dataset'
    )  # '/home/filippo/Data/Filippo/aeolus/ann2bb_as4_') # '/home/filippo/Data/Filippo/aeolus/STEAD/waveforms_11_13_19.hdf5',help='path to dataset')
    parser.add_argument('--inventory',
                        default='RM07.xml,LXRA.xml,SRN.xml',
                        help='inventories')
    parser.add_argument('--workers',
                        type=int,
                        help='number of data loading workers',
                        default=2)
    parser.add_argument('--batchSize',
                        type=int,
                        default=5,
                        help='input batch size')
    parser.add_argument('--batchPercent',
                        type=int,
                        nargs='+',
                        default=[0.8, 0.1, 0.1],
                        help='train/test/validation %')
    parser.add_argument('--niter',
                        type=int,
                        default=2,
                        help='number of epochs to train for')
    parser.add_argument(
        '--imageSize',
        type=int,
        default=4096,
        help='the height / width of the input image to network')
    parser.add_argument(
        '--latentSize',
        type=int,
        default=128,
        help='the height / width of the input image to network')
    parser.add_argument('--cutoff',
                        type=float,
                        default=1.,
                        help='cutoff frequency')
    parser.add_argument('--nzd',
                        type=int,
                        default=32,
                        help='size of the latent space')
    parser.add_argument('--nzf',
                        type=int,
                        default=8,
                        help='size of the latent space')
    parser.add_argument('--ngf',
                        type=int,
                        default=32,
                        help='size of G input layer')
    parser.add_argument('--ndf',
                        type=int,
                        default=32,
                        help='size of D input layer')
    parser.add_argument('--glr',
                        type=float,
                        default=0.0001,
                        help='AE learning rate, default=0.0001')
    parser.add_argument('--rlr',
                        type=float,
                        default=0.0001,
                        help='GAN learning rate, default=0.00005')
    parser.add_argument('--b1',
                        type=float,
                        default=0.5,
                        help='beta1 for Adam. default=0.5')
    parser.add_argument('--cuda', action='store_true', help='enables cuda')
    parser.add_argument('--ngpu',
                        type=int,
                        default=2,
                        help='number of GPUs to use')
    parser.add_argument('--plot',
                        action='store_true',
                        help="flag for plotting")
    parser.add_argument('--outf',
                        default='./imgs',
                        help='folder to output images and model checkpoints')
    parser.add_argument('--manualSeed', type=int, help='manual seed')
    parser.add_argument('--mw', type=float, default=4.5, help='magnitude [Mw]')
    parser.add_argument('--dtm',
                        type=float,
                        default=0.01,
                        help='time-step [s]')
    parser.add_argument('--dep',
                        type=float,
                        default=50.,
                        help='epicentral distance [km]')
    parser.add_argument('--scc', type=int, default=0, help='site-class')
    parser.add_argument('--sst', type=int, default=1, help='site')
    parser.add_argument('--scl', type=int, default=1, help='scale [1]')
    parser.add_argument('--nsy',
                        type=int,
                        default=83,
                        help='number of synthetics [1]')
    parser.add_argument('--save_checkpoint',
                        type=int,
                        default=1,
                        help='Number of epochs for each checkpoint')
    parser.set_defaults(stack=False, ftune=False, feat=False, plot=True)
    opt = parser.parse_args()
    u'''Set-up GPU and CUDA'''
    opt.cuda = True if (tcuda.is_available() and opt.cuda) else False
    device = tdev("cuda:0" if opt.cuda else "cpu")
    FloatTensor = tcuda.FloatTensor if opt.cuda else tFT
    LongTensor = tcuda.LongTensor if opt.cuda else tLT
    ngpu = int(opt.ngpu)

    try:
        os.makedirs(opt.outf)
    except OSError:
        pass

    if opt.manualSeed is None:
        opt.manualSeed = random.randint(1, 10000)
    print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    mseed(opt.manualSeed)

    cudnn.benchmark = True

    if tcuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    if opt.dataset == 'synth':
        sd_mw = np.log(1. + (2. / opt.mw)**2)
        mu_mw = np.log(opt.mw) - 0.5 * sd_mw
        l=get_truncated_normal(mean=mu_mw,\
                               sd=sd_mw,\
                               low=np.log(3.5),\
                               upp=np.log(8))
        x = l.ppf(np.random.rand(opt.nsy))
        opt.mw = np.round(np.exp(x), 1)
        opt.dep = get_truncated_normal(mean=opt.dep,\
                                       sd=50.,low=10.,upp=200.).ppf(np.random.rand(opt.nsy))
        opt.dep = opt.dep.round(0)
        md = {'mw':opt.mw,'dep':opt.dep,'scc':opt.scc,\
          'sst':opt.sst,'dtm':opt.dtm,'scl':opt.scl,'cutoff':opt.cutoff}

        md['vTn'] = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        md['nTn'] = md['vTn'].size
        ths_trn,ths_tst,ths_vld,\
        vtm,fsc = synth_dataset(opt.batchPercent,opt.imageSize,opt.latentSize,\
                                opt.nzd,opt.nzf,md=md,nsy=opt.nsy,device=device)
        md['fsc'] = fsc
        opt.ncls = md['fsc']['ncat']
        # Create natural period vector
        opt.vTn = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        opt.nTn = md['vTn'].size

        tsave(ths_trn, './ths_trn.pth')
        tsave(ths_tst, './ths_tst.pth')
        tsave(ths_vld, './ths_vld.pth')
        tsave(vtm, './vtm.pth')
        with open('md.p', 'wb') as handle:
            pickle.dump(md, handle)
        handle.close()
        with open('opt.p', 'wb') as handle:
            pickle.dump(opt, handle)
        handle.close()

    elif opt.dataset == 'folder':
        src = opt.dataroot.split(',')
        for n in range(len(src)):
            src[n] = osj(src[n], '*.*.*.mseed')
        print('dataroots:')
        print(src)

        inv = opt.inventory.split(',')
        for n in range(len(inv)):
            inv[n] = osj(opt.dataroot, 'sxml', inv[n])
        print('inventories:')
        print(inv)

        ths_trn,ths_tst,ths_vld,thf_trn,thf_tst,thf_vld,\
        vtm,fsc = load_dataset(opt.batchPercent,\
                               source=src,inventory=inv)

        md = {'mw':[],'dep':[],'scc':[],\
          'sst':[],'dtm':vtm[1]-vtm[0],\
          'ntm':vtm.size,'scl':[]}
        opt.ncls = 1
        tsave(ths_trn, './ths_trn.pth')
        tsave(ths_tst, './ths_tst.pth')
        tsave(ths_vld, './ths_vld.pth')
        tsave(thf_trn, './thf_trn.pth')
        tsave(thf_tst, './thf_tst.pth')
        tsave(thf_vld, './thf_vld.pth')
        tsave(vtm, './vtm.pth')

    elif '.pth' in opt.dataset:
        ths_trn = tload(osj(opt.dataroot, 'ths_trn_' + opt.dataset))
        ths_tst = tload(osj(opt.dataroot, 'ths_tst_' + opt.dataset))
        ths_vld = tload(osj(opt.dataroot, 'ths_vld_' + opt.dataset))
        vtm = tload(osj(opt.dataroot, 'vtm.pth'))
        pickle_off = open(osj(opt.dataroot, "md.p"), "rb")
        md = pickle.load(pickle_off)
        pickle_off.close()
        pickle_off = open(osj(opt.dataroot, "opt.p"), "rb")
        optt = pickle.load(pickle_off)
        pickle_off.close()

        for k, v in optt.__dict__.items():
            flag = False
            if k in opt.__dict__:
                try:
                    if type(v).__module__ == 'numpy' and (
                            v != opt.__dict__[k]).any():
                        flag = True
                except:
                    pass
                try:
                    if any(v != opt.__dict__[k]):
                        flag = True
                except:
                    pass
                try:
                    if v != opt.__dict__[k]:
                        flag = True
                except:
                    pass
                if flag:
                    optt.__dict__[k] = opt.__dict__[k]

        for k, v in opt.__dict__.items():
            flag = False
            if k not in optt.__dict__:
                flag = True
            if flag:
                optt.__dict__[k] = opt.__dict__[k]
        opt = optt

    elif opt.dataset == 'deepbns':
        src = osj(opt.dataroot, "Hybrid_Database.h5")
        print('dataroots:')
        print(src)
        md = {
            'dtm': 9.452707692307693e-06,
            'cutoff': opt.cutoff,
            'ntm': opt.imageSize
        }
        md['vTn'] = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        md['nTn'] = md['vTn'].size
        ths_trn,ths_tst,ths_vld,\
        vtm,fsc = deepbns_dataset(src,opt.batchPercent,opt.imageSize,opt.latentSize,\
                                  opt.nzd,opt.nzf,md=md,nsy=opt.nsy,device=device)
        md['fsc'] = fsc
        opt.ncls = md['fsc']['ncat']
        # Create natural period vector
        opt.vTn = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        opt.nTn = md['vTn'].size
        tsave(ths_trn, './ths_trn.pth')
        tsave(ths_tst, './ths_tst.pth')
        tsave(ths_vld, './ths_vld.pth')
        tsave(vtm, './vtm.pth')
        with open('md.p', 'wb') as handle:
            pickle.dump(md, handle)
        handle.close()
        with open('opt.p', 'wb') as handle:
            pickle.dump(opt, handle)
        handle.close()

    elif opt.dataset == 'stead':
        src = opt.dataroot
        print('dataroots:')
        print(src)
        md = {'dtm': 0.01, 'cutoff': opt.cutoff, 'ntm': opt.imageSize}
        md['vTn'] = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        md['nTn'] = md['vTn'].size
        ths_trn,ths_tst,ths_vld,\
        vtm,fsc = stead_dataset(src,opt.batchPercent,opt.imageSize,opt.latentSize,\
                                opt.nzd,opt.nzf,md=md,nsy=opt.nsy,device=device)
        md['fsc'] = fsc
        opt.ncls = md['fsc']['ncat']
        # Create natural period vector
        opt.vTn = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        opt.nTn = md['vTn'].size
        tsave(ths_trn, './ths_trn.pth')
        tsave(ths_tst, './ths_tst.pth')
        tsave(ths_vld, './ths_vld.pth')
        tsave(vtm, './vtm.pth')
        with open('md.p', 'wb') as handle:
            pickle.dump(md, handle)
        handle.close()
        with open('opt.p', 'wb') as handle:
            pickle.dump(opt, handle)
        handle.close()

    elif opt.dataset == 'ann2bb':
        src = opt.dataroot
        print('dataroots:')
        print(src)
        md = {'dtm': 0.005, 'cutoff': opt.cutoff, 'ntm': opt.imageSize}
        md['vTn'] = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        md['nTn'] = md['vTn'].size
        ths_trn,ths_tst,ths_vld,\
        vtm,fsc,md = ann2bb_dataset(src,opt.batchPercent,opt.imageSize,opt.latentSize,\
                                    opt.nzd,opt.nzf,md=md,nsy=opt.nsy,device=device)
        md['fsc'] = fsc
        opt.ncls = md['fsc']['ncat']
        # Create natural period vector
        opt.vTn = np.arange(0.0, 3.05, 0.05, dtype=np.float64)
        opt.nTn = md['vTn'].size
        tsave(ths_trn, './ths_trn.pth')
        tsave(ths_tst, './ths_tst.pth')
        tsave(ths_vld, './ths_vld.pth')
        tsave(vtm, './vtm.pth')
        with open('md.p', 'wb') as handle:
            pickle.dump(md, handle)
        handle.close()
        with open('opt.p', 'wb') as handle:
            pickle.dump(opt, handle)
        handle.close()

    params = {'batch_size': opt.batchSize,\
              'shuffle': True,'num_workers':int(opt.workers)}

    trn_loader,tst_loader,vld_loader = \
        dataset2loader(ths_trn,ths_tst,ths_vld,**params)
    #         dataset2loader(ths_trn,thf_trn,wnz_trn,\
    #                        ths_tst,thf_tst,wnz_tst,\
    #                        ths_vld,thf_vld,wnz_vld,\
    #                        **params)
    actions = pd.read_csv(filepath_or_buffer=opt.actions,
                          sep=',',
                          true_values='True',
                          false_values='False')
    tract = {'tract': actions['tract'].to_dict()}
    trplt = {'trplt': actions['trplt'].to_dict()}
    trcmp = {'trcmp': actions['trcmp'].to_dict()}
    trdis = {'trdis': actions['trdis'].to_dict()}
    strategy = pd.read_csv(filepath_or_buffer=opt.strategy,
                           sep=',',
                           na_values='None')
    strategy = strategy.where((pd.notnull(strategy)), None)
    strategy['strategy'] = strategy.values.tolist()
    strategy = dict(strategy['strategy'].to_dict(), **tract, **trplt, **trcmp,
                    **trdis)
    opt.strategy = strategy
    cv = {'parser':parser,'opt':opt,'vtm':vtm,\
          'trn_loader':trn_loader,\
          'tst_loader':tst_loader,\
          'vld_loader':vld_loader,\
          'params':params,\
          'device':device,\
          'FloatTensor':FloatTensor,\
          'LongTensor':LongTensor,\
          'md':md}
    return cv
Ejemplo n.º 31
0
from torchvision.utils import save_image, make_grid
from os import path
from google.colab import drive

notebooks_dir_name = 'notebooks'
drive.mount('/content/gdrive')
notebooks_base_dir = path.join('./gdrive/My Drive/', notebooks_dir_name)
if not path.exists(notebooks_base_dir):
    print('Check your google drive directory. See you file explorer')
# Settings
download_root = 'mnist'
stored_path = 'images'
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=(0.5, ), std=(0.5, ))])
device = 'cuda' if cuda.is_available() else 'cpu'

leraing_rate = 0.0002
# Dataset
train_set = MNIST(download_root,
                  train=True,
                  transform=transform,
                  download=True)

# Dataloader
train_loader = DataLoader(train_set, batch_size=60, shuffle=True)

# Image_dir
import os
import imageio
Ejemplo n.º 32
0
# 데이터 경로 지정부분
datadir, traindir, validdir, testdir, image_transforms, data, dataloaders = train_util.init_dataset(
)
cat_df, image_df = train_util.category_dataframe(traindir, validdir, testdir)

# 학습된 데이터 저장시 이름을 정하는 부분
save_file_name = './ModelSave/' + model_choice + '-transfer.pt'
checkpoint_path = './ModelSave/' + model_choice + '-transfer_bts' + str(
    batch_size) + "_ep" + str(
        training_epoch) + "_" + train_util.get_date() + '.pth'

# 이미지 갯수 분포 그래프 이미지로 저장
# train_util.save_distribution_of_images(cat_df, model_choice)

# Whether to train on a gpu
train_on_gpu = cuda.is_available()  # GPU를 사용할 수 있는지 없는지 판단한다.
if train_on_gpu:
    print('학습 모드 : GPU\n')
else:
    print('학습 모드 : CPU\n')

cat_df.sort_values('n_train', ascending=False, inplace=True)
# print(f'{cat_df.head()}\n')
# print(f'{cat_df.tail()}\n')

# iter() : 전달된 데이터의 반복자를 꺼내 반환한다.
# trainiter = iter(dataloaders['train'])
# next() : 반복자를 입력받아 그 반복자가 다음에 출력해야할 요소를 반환한다.
# features, labels = next(trainiter) # 1개만 꺼내기위해 넣은 코드인듯
# print(f'{features.shape} , {labels.shape}') # 그냥 단순히 어떤 데이터가 어떤 형태로 들어있는지 알려주기 위한 코드인듯.
    s.collapse_unary(True, True)

# get the structure IDs from the dictionary keys
conditions = list(structures.keys())

# filter down to those conditions found in conditions
data = data[data.condition.isin(conditions)]

# build the vocab list up from the structures
vocab = list({word
              for tree in structures.values()
              for word in tree.leaves()})

# load the glove embedding
embeddings = load_glove_embedding(args.embeddings, vocab)
device_to_use = device("cuda:0" if is_available() else "cpu")

if args.rnntype == "tree":
    x_raw = [structures[c] for c in data.condition.values]
    x = [x_raw[i:i + args.batch] for i in range(0, len(x_raw), args.batch)]
    y_raw = data.response.values
    y = [y_raw[i:i + args.batch] for i in range(0, len(y_raw), args.batch)]
    rnntype = ChildSumConstituencyTreeLSTM
    args.batch = 1
elif args.rnntype == "linear":
    # Implmenent mini-batching
    x_raw = [structures[c].words() for c in data.condition.values]
    x = [x_raw[i:i + args.batch] for i in range(0, len(x_raw), args.batch)]
    y_raw = data.response.values
    y = [y_raw[i:i + args.batch] for i in range(0, len(y_raw), args.batch)]
    rnntype = LSTM
Ejemplo n.º 34
0
def train(config: Config):
    """学習処理の実行スクリプト."""
    transforms = get_transforms(config.resize_image)
    dataset_type = DatasetName.value_of(config.dataset_name)
    dataset_train, dataset_valid = get_dataset(dataset_type, transforms)

    dataloader_train = td.DataLoader(
        dataset_train,
        config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        worker_init_fn=_worker_init_random,
    )
    dataloader_valid = td.DataLoader(
        dataset_valid,
        config.batch_size,
        shuffle=False,
        num_workers=config.num_workers,
        pin_memory=True,
        worker_init_fn=_worker_init_random,
    )

    params = dc.asdict(config)
    pl.seed_everything(config.random_seed)

    network = get_network(
        NetworkName.value_of(config.network_name),
        in_channels=config.in_channels,
        out_channels=config.out_channels,
        image_size=config.resize_image,
    )
    model = VAETrainer(network, params)
    model.set_dataloader(dataloader_train, dataloader_valid)

    cache_dir = directories.get_processed().joinpath(config.cache_dir)
    cache_dir.mkdir(exist_ok=True)
    model_checkpoint = pl_callbacks.ModelCheckpoint(
        filepath=str(cache_dir),
        monitor="val_loss",
        save_last=True,
        save_top_k=config.save_top_k,
        save_weights_only=config.save_weights_only,
        mode="min",
        period=1,
    )

    experiment_dir = cache_dir.joinpath(
        "default", f"version_{config.experiment_version}"
    )
    pl_logger = pl_logging.TensorBoardLogger(
        save_dir=str(cache_dir), version=config.experiment_version
    )
    trainer_params = dict()
    if config.resume:
        trainer_params["resume_from_checkpoint"] = str(cache_dir.joinpath("last.ckpt"))
    elif experiment_dir.exists():
        shutil.rmtree(experiment_dir)
        for filepath in cache_dir.glob("*.ckpt"):
            filepath.unlink()
        for filepath in cache_dir.glob("*.pth"):
            filepath.unlink()

    pl_trainer = pl.Trainer(
        early_stop_callback=config.early_stop,
        default_root_dir=str(cache_dir),
        fast_dev_run=False,
        min_epochs=config.min_epochs,
        max_epochs=config.max_epochs,
        gpus=[0] if config.use_gpu and tc.is_available() else None,
        progress_bar_refresh_rate=config.progress_bar_refresh_rate,
        profiler=config.profiler,
        checkpoint_callback=model_checkpoint,
        logger=pl_logger,
        log_gpu_memory=True,
        **trainer_params,
    )
    pl_trainer.fit(model)

    for ckptfile in cache_dir.glob("*.ckpt"):
        pthfile = cache_dir.joinpath(ckptfile.stem + ".pth")
        model = model.load_from_checkpoint(str(ckptfile), network, params)
        torch.save(model.network.state_dict(), pthfile)