def __init__(self, root_dir, model_path, save_dir=None): self.root_dir = root_dir self.transform = transforms.Compose([Normalize(), ToTensor()]) self.model_path = model_path self.model = model.GoNet() if use_gpu: self.model = self.model.cuda() self.model.load_state_dict(torch.load(model_path)) frames = os.listdir(root_dir + '/img') frames = [root_dir + "/img/" + frame for frame in frames] self.len = len(frames) - 1 frames = np.array(frames) frames.sort() self.x = [] for i in xrange(self.len): self.x.append([frames[i], frames[i + 1]]) self.x = np.array(self.x) # uncomment to select rectangle manually # init_bbox = bbox_coordinates(self.x[0][0]) f = open(root_dir + '/groundtruth_rect.txt') lines = f.readlines() init_bbox = lines[0].strip().split('\t') init_bbox = [float(x) for x in init_bbox] init_bbox = [ init_bbox[0], init_bbox[1], init_bbox[0] + init_bbox[2], init_bbox[1] + init_bbox[3] ] init_bbox = np.array(init_bbox) print init_bbox self.prev_rect = init_bbox
def main(): args = parser.parse_args() print args # load dataset transform = transforms.Compose([Normalize(), ToTensor()]) alov = datasets.ALOVDataset('../data/alov300/imagedata++/', '../data/alov300/alov300++_rectangleAnnotation_full/', transform) dataloader = DataLoader(alov, batch_size=args.batch_size, shuffle=True, num_workers=4) # load model net = model.GoNet() loss_fn = torch.nn.L1Loss(size_average = False) if use_gpu: net = net.cuda() loss_fn = loss_fn.cuda() optimizer = optim.SGD(net.classifier.parameters(), lr=args.learning_rate, momentum=args.momentum) if os.path.exists(args.save_directory): print('Directory %s already exists' % (args.save_directory)) else: os.makedirs(args.save_directory) # start training net = train_model(net, dataloader, loss_fn, optimizer, args.epochs, args.learning_rate, args.save_directory)
def __init__(self, root_dir, model_path, save_dir=None): self.root_dir = root_dir self.transform = transforms.Compose([Normalize(), ToTensor()]) self.model_path = model_path self.model = model.GoNet() if use_gpu: self.model = self.model.cuda() self.model.load_state_dict(torch.load(model_path)) frames = os.listdir(root_dir) self.len = len(frames) - 1 frames = [root_dir + "/" + frame for frame in frames] frames = np.array(frames) frames.sort() self.x = [] for i in xrange(self.len): self.x.append([frames[i], frames[i + 1]]) self.x = np.array(self.x) # code for previous rectange init_bbox = bbox_coordinates(self.x[0][0]) print init_bbox self.prev_rect = init_bbox
def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, resolution, z_channels, give_pre_end=False, **ignorekwargs): super().__init__() self.ch = ch self.temb_ch = 0 self.num_resolutions = len(ch_mult) self.num_res_blocks = num_res_blocks self.resolution = resolution self.in_channels = in_channels self.give_pre_end = give_pre_end # compute in_ch_mult, block_in and curr_res at lowest res in_ch_mult = (1, ) + tuple(ch_mult) block_in = ch * ch_mult[self.num_resolutions - 1] curr_res = resolution // 2**(self.num_resolutions - 1) self.z_shape = (1, z_channels, curr_res, curr_res) print("Working with z of shape {} = {} dimensions.".format( self.z_shape, np.prod(self.z_shape))) # z to block_in self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1) # middle self.mid = nn.Module() self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout) self.mid.attn_1 = AttnBlock(block_in) self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout) # upsampling self.up = nn.ModuleList() for i_level in reversed(range(self.num_resolutions)): block = nn.ModuleList() attn = nn.ModuleList() block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks + 1): block.append( ResnetBlock(in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout)) block_in = block_out if curr_res in attn_resolutions: attn.append(AttnBlock(block_in)) up = nn.Module() up.block = block up.attn = attn if i_level != 0: up.upsample = Upsample(block_in, resamp_with_conv) curr_res = curr_res * 2 self.up.insert(0, up) # prepend to get consistent order # end self.norm_out = Normalize(block_in) self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1)
from torchvision import transforms from helper import ToTensor, Normalize, show_batch from torch.utils.data import DataLoader import torch.optim as optim import numpy as np from helper import * from multiprocessing.dummy import Pool as ThreadPool from tensorboardX import SummaryWriter # constants use_gpu = torch.cuda.is_available() kSaveModel = 20000 # save model after every 20000 steps batchSize = 50 # number of samples in a batch kGeneratedExamplesPerImage = 10 # generate 10 synthetic samples per image in a dataset transform = transforms.Compose([Normalize(), ToTensor()]) writer = SummaryWriter() args = None parser = argparse.ArgumentParser(description='GOTURN Training') parser.add_argument('-n', '--num-batches', default=500000, type=int, help='number of total batches to run') parser.add_argument('-lr', '--learning-rate', default=1e-6, type=float, help='initial learning rate') parser.add_argument('--gamma',
def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks, attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, resolution, z_channels, double_z=True, convd="torch.nn.Conv1d", **ignore_kwargs): super().__init__() self.ch = ch self.temb_ch = 0 self.num_resolutions = len(ch_mult) self.num_res_blocks = num_res_blocks self.resolution = resolution self.in_channels = in_channels exec(f"self.convd={convd}") # downsampling self.conv_in = self.convd(in_channels, self.ch, kernel_size=3, stride=1, padding=1) curr_res = resolution in_ch_mult = (1, ) + tuple(ch_mult) self.down = nn.ModuleList() for i_level in range(self.num_resolutions): block = nn.ModuleList() attn = nn.ModuleList() block_in = ch * in_ch_mult[i_level] block_out = ch * ch_mult[i_level] for i_block in range(self.num_res_blocks): block.append( ResnetBlock(in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=dropout, convd=self.convd)) block_in = block_out if curr_res in attn_resolutions: attn.append(AttnBlock(block_in, convd=self.convd)) down = nn.Module() down.block = block down.attn = attn if i_level != self.num_resolutions - 1: down.downsample = Downsample(block_in, resamp_with_conv, convd=self.convd) curr_res = curr_res // 2 self.down.append(down) # middle self.mid = nn.Module() self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout, convd=self.convd) self.mid.attn_1 = AttnBlock(block_in, convd=self.convd) self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=dropout, convd=self.convd) # end self.norm_out = Normalize(block_in) self.conv_out = self.convd(block_in, 2 * z_channels if double_z else z_channels, kernel_size=3, stride=1, padding=1)