def load_dataset(dataset): print("\nLoading dataset...\n") print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) # image_transform = ext_transforms.RandomCrop(336) image_transform = transforms.ToTensor() val_transform = transforms.ToTensor() train_set = dataset(args.dataset_dir, transform=image_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset(args.dataset_dir, transform=val_transform, mode='val') val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, transform=val_transform, mode='test') test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) return train_loader, val_loader, test_loader
def load_training_dataset(path, inputs_filename=TRAIN_INPUTS, labels_filename=TRAIN_LABELS, rescale=True, training_set_size=50000): """ """ inputs = load_inputs(path, inputs_filename, rescale) labels = load_labels(path, labels_filename) targets = data.targets_from_labels(labels, NUM_CLASSES) n = training_set_size train = data.dataset(inputs[0:n], targets[0:n], labels[0:n]) valid = data.dataset(inputs[n:], targets[n:], labels[n:]) return train, valid
def generate_images(): data = dataset() shops = get_shops(data) items = get_items(data) applied_image_item = map(lambda x: partial(image_item, x), shops) mapnp(lambda f: pmap(f, items), applied_image_item)
def main(cfgs): trans_in_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset_train = data.dataset(cfgs, flag='train', trans=trans_in_train) trainer.trainer(cfgs, dataset_train)
def test_tau_variances_weighted_small(): ys = [np.array([20.,20.,20.,20.,20.,20.,30.,30.,30.,30.,30.,30.]), np.array([20.,20.,20.,20.,20.,20.,30.,30.,30.,30.,30.,30.]), np.array([40., 60.]) ] treatment = np.array([0,0,0,0,0,0,1,1,1,1,1,1]) W = np.vstack([np.ones(12), treatment]).T W2 = np.vstack([np.ones(2), np.array([0,1])]).T dats = [dataset(W, X(12), ys[0]), dataset(W, X(12), ys[1]), dataset(W2, X(2), ys[2])] dist = c._tau_variances(dats) assert np.isclose(dist, 7.101, 1e-3)
def test_feature_importance_works_with_weights(): tree = Node(Leaf(0, 0, 0, 0), 0, 4, gain=.15, tot_gain=.40, left=Node(Leaf(0, 0, 0, 0), 1, 8, gain=.25, tot_gain=.25, left=Leaf(0, 0, 0, 0), right=Leaf(0, 0, 0, 0)), right=Leaf(0, 0, 0, 0)) X = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5], [7, 4], [8, 3]]) y = np.array([10, 20, 30, 40, 50, 60, 70, 80], dtype=np.float64) w = np.array([1, 1, 1, 1, 3, 3, 3, 3], dtype=np.float64).reshape(-1, 1) w /= w.sum() dat = dataset(w, X, y) importance = t.feature_importance(tree, dat) expected = np.array([1.0 * .15, 0.25 * .25]) expected /= expected.sum() assert np.all(importance == expected)
def fiff_events(source_path=None, name=None): """ Returns a dataset containing events from a raw fiff file. Use :func:`fiff_epochs` to load MEG data corresponding to those events. source_path : str (path) the location of the raw file (if ``None``, a file dialog will be displayed). name : str A name for the dataset. """ if source_path is None: source_path = ui.ask_file("Pick a Fiff File", "Pick a Fiff File", ext=[('fif', 'Fiff')]) if name is None: name = os.path.basename(source_path) raw = mne.fiff.Raw(source_path) events = mne.find_events(raw) if any(events[:,1] != 0): raise NotImplementedError("Events starting with ID other than 0") # this was the case in the raw-eve file, which contained all event # offsets, but not in the raw file created by kit2fiff. For handling # see :func:`fiff_event_file` istart = _data.var(events[:,0], name='i_start') event = _data.var(events[:,2], name='eventID') info = {'source': source_path} return _data.dataset(event, istart, name=name, info=info)
def self_play(self, dataset=dataset(), iterations=400): # DIRICHELET NMOISE if (self.root.is_terminal): print("TERMINAL") self.root.display() nod = self.root print() print("visits: ", nod.visits, "reward: ", nod.total_reward) return -(self.root.state.get_reward()) initial_state = copy.deepcopy(self.root.state) for _ in range(iterations): self.current_node = self.root self.current_node.state.copy(initial_state) self.MCTS_to_reward() self.current_node = self.root self.current_node.state.copy(initial_state) # policy = self.policy_policy() # if (DEBUG > 2): # print("policy", policy) # print(dataset) # dataset_index = dataset.add_point(state=self.root.state, policy=policy) # verify inDEX YOYOYO # action = np.random.choice(7, 1, p=policy)[0] action = self.select_highest_UCB1() self.play_action(action) self.root = self.current_node # self.root.state.display() v = self.self_play(dataset) # dataset.data[dataset_index].V = np.array([v]) return -v
def load_test_dataset(path, inputs_filename=TEST_INPUTS, labels_filename=TEST_LABELS, rescale=True): """ """ inputs = load_inputs(path, inputs_filename, rescale) labels = load_labels(path, labels_filename) targets = data.targets_from_labels(labels, NUM_CLASSES) test = data.dataset(inputs, targets, labels) return test
def test_tau_variances_same(): ys = [np.array([10.,10.,10.,10.,30.,30.,30.,30.]), np.array([10.,10.,10.,10.,30.,30.,30.,30.]), np.array([10.,10.,10.,10.,30.,30.,30.,30.])] treatment = np.array([0,0,0,0,1,1,1,1,]) W = np.vstack([np.ones(8), treatment]).T dats = [dataset(W, X(8), ys[i]) for i in range(3)] dist = c._tau_variances(dats) assert np.isclose(dist, 0.0, 1e-6)
def get_permutated_dataset(variables, count='caseID', randomize=False): # sort variables perm_rand = [] # permutated and randomized perm_nonrand = [] # permutated and not randomized for v in variables: if v.is_rand: perm_rand.append(v) else: perm_nonrand.append(v) # variables = perm_rand + perm_nonrand # set the variables IDs for i,v in enumerate(variables): v._set_list_ID(i) perm_n = [v.Ndraw for v in variables] n_trials = np.prod(perm_n) n_properties = len(variables) out = np.empty((n_trials, n_properties), dtype=np.uint8) # permutatet variables for i,v in enumerate(variables): t = np.prod(perm_n[:i]) r = np.prod(perm_n[i+1:]) if len(v.urn) == 0: out[:,i] = np.tile(np.arange(v.N), t).repeat(r) else: base = np.arange(v.N) for v0 in variables[:i]: if v0 in v.urn: base = np.ravel([base[base!=j] for j in xrange(v.N)]) else: base = np.tile(base, v.Ndraw) out[:,i] = np.repeat(base, r) if randomize: # shuffle those perm factors that should be shuffled n_rand_bins = np.prod([v.Ndraw for v in perm_nonrand]) rand_bin_len = int(n_trials / n_rand_bins) for i in xrange(0, n_trials, rand_bin_len): np.random.shuffle(out[i:i+rand_bin_len]) # create dataset ds = _data.dataset(name='Design') for v in variables: x = out[:,v.ID] f = _data.factor(x, v.name, labels=v.cells) ds.add(f) if count: ds.add(_data.var(np.arange(ds.N), count)) return ds
def validate(valid=1): if isinstance(val_vars, int): print('loading validation data') val_data_f = os.path.join(data_path, 'V{}Test.txt'.format(val_vars)) val_data = dataset(val_data_f, val_vars, False) else: val_data = val_vars val_data.UpdBatchesSchedule(test_batch_size, seed) for stps in test_rnn_steps: net.test(val_data, valid_batch_size, train_epochs, stps, tsw, valid=valid)
def fiff_event_file(path, labels={}): events = mne.read_events(path).reshape((-1,6)) name = os.path.basename(path) assert all(events[:,1] == events[:,5]) assert all(events[:,2] == events[:,4]) istart = _data.var(events[:,0], name='i_start') istop = _data.var(events[:,3], name='i_stop') event = _data.var(events[:,2], name='eventID') dataset = _data.dataset(event, istart, istop, name=name) if labels: dataset.add(_data.factor(events[:,2], name='event', labels=labels)) return dataset
def __init__(self): self.data = dataset() self.data.reset() self.reset() # self.load(1) self.setLR() self.time = time.time() self.dataRate = xp.float32(0.8) self.mado = xp.hanning(442).astype(xp.float32) # n=10 # load_npz(f"param/gen/gen_{n}.npz",self.generator) # load_npz(f"param/dis/dis_{n}.npz",self.discriminator) self.training(batchsize=6)
def test_split_data(): X = np.array([[1, 10], [2, 20], [3, 30], [4, 40]]) y = np.array([10, 20, 30, 40], dtype=np.float64) dat = dataset(None, X, y) dl, dr = t.split_data_by_idx(dat, 2) assert np.all(dl.X == np.array([[1, 10], [2, 20]])) assert np.all(dl.y == np.array([10, 20])) assert np.all(dr.X == np.array([[3, 30], [4, 40]])) assert np.all(dr.y == np.array([30, 40]))
def test_split_data_by_thresh(): X = np.array([[1, 10], [20, 9], [30, 8], [4, 7], [5, 6], [60, 5], [7, 4], [8, 3]]) y = np.array([10, 20, 30, 40, 50, 60, 70, 80], dtype=np.float64) dat = dataset(None, X, y) dl, dr = t.split_data_by_thresh(dat, 1, 5.5) assert np.all(dl.X == np.array([[8, 3], [7, 4], [60, 5]])) assert np.all(dl.y == np.array([80, 70, 60])) assert np.all( dr.X == np.array([[5, 6], [4, 7], [30, 8], [20, 9], [1, 10]])) assert np.all(dr.y == np.array([50, 40, 30, 20, 10]))
def train(): train_data = dataset(parameters.train_path) # quit() val_data = dataset(parameters.validate_path) train_data.run_thread() val_data.run_thread() myfitter = fitter(num_gpus, model_path, save_path, parameters.we_name) h = myfitter.m.fit_generator( generator=train_data.generate_data(myfitter.gpu_nums * batch_size, train_times_each_data), steps_per_epoch=train_times_each_data // batch_size * train_data.files_number // myfitter.gpu_nums // epoch_scale_factor, epochs=epochs * epoch_scale_factor, callbacks=myfitter.callback_func(), validation_data=val_data.generate_data(myfitter.gpu_nums * batch_size, val_times_each_data), validation_steps=val_times_each_data // batch_size * val_data.files_number // myfitter.gpu_nums, initial_epoch=init_epoch) myfitter.save_final() hh = h.history with open('history.json', 'w') as f: json.dump(hh, f, ensure_ascii=False, indent=2)
def __call__(self): print("training start!") self.model.train() for epoch in range(EPOCH): loss_sum = 0. for i, (input, target) in enumerate(self.train_data): input = input.permute(0, 3, 1, 2) input, target = input.to(DEVICE), target.to(DEVICE) output = self.model(input) loss = F.mse_loss(output, target) self.opt.zero_grad() loss.backward() self.opt.step() loss_sum += loss.detach().item() #一个batch的loss if i % 10 == 0: print("Epoch {},batch {},loss:{:.6f}".format( epoch, i, loss.detach().item())) avg_loss = loss_sum / len( self.train_data) #train_data的长度是batch,dataset的长度是整个数据集的长度 print("\033[1;45m Train Epoch:{}\tavg_Loss:{:.6f} \33[0m".format( epoch, avg_loss)) torch.save(self.model.state_dict(), f'./saved/{epoch}.t') if epoch == EPOCH - 1: train_data = DataLoader(dataset(DATAPATH), batch_size=16, shuffle=True, num_workers=0) for i, (x, y) in enumerate(train_data): x = x.permute(0, 3, 1, 2) imgdata, label = x.to(DEVICE), y.to(DEVICE) out = self.model(imgdata) #画图 x = x.permute(0, 2, 3, 1) x.cpu() output = out.cpu().detach().numpy() * 300 y = y.cpu().numpy() * 300 img_data = np.array((x[0] + 0.5) * 255, dtype=np.int8) img = Image.fromarray(img_data, 'RGB') draw = ImageDraw.Draw(img) draw.rectangle(output[0], outline="red", width=2) #网络输出的结果 draw.rectangle(y[0], outline="yellow", width=2) #原始标签 img.show()
def test_sort_for_dim(): X = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5], [7, 4], [8, 3]]) y = np.array([10, 20, 30, 40, 50, 60, 70, 80], dtype=np.float64) dat = dataset(None, X, y) # sorts by given x and returns 2-d array do = t.sort_for_dim(dat, 0) assert np.all(do.X == X) assert np.all(do.y == y) do = t.sort_for_dim(dat, 1) assert np.all(do.X == np.array([[8, 3], [7, 4], [6, 5], [5, 6], [4, 7], [3, 8], [2, 9], [1, 10]])) assert np.all(do.y == np.flip(y))
def __init__(self): self.train_data = DataLoader(dataset(path=DATAPATH), batch_size=BATCH_SIZE, shuffle=True, num_workers=0) self.resnet = resnet18() self.resnet.fc = nn.Linear(512, 4) self.model = self.resnet self.model.to(DEVICE) self.opt = optim.Adam(self.model.parameters()) ckpt_path = "./saved" ckpt_file = os.listdir(ckpt_path) # print(ckpt_file) # exit() if len(ckpt_file) > 1: ckpt_file = os.path.join(ckpt_path, ckpt_file[-1]) self.model.load_state_dict(torch.load(ckpt_file))
def __init__(self): self.model = Model_() self.model.to_gpu() self.model_opt = optimizers.Adam(alpha=0.0001) self.model_opt.setup(self.model) i = 37000 load_npz(f"param/model_/model{i}.npz", self.model) # self.model_opt.add_hook(optimizer.WeightDecay(0.0001)) self.data = dataset() self.data.reset() # self.reset() # self.load(1) # self.setLR() self.time = time.time() self.training(batchsize=16)
def pretty_print(): for x in data.dataset(): if x.latlon is not None: print "======================================\n" print "Record: \t", x.accession_number, \ "({})".format(x.js_safe_id()) print "Year: \t\t", x.year print "Species: \t", x.species print "Location: \t", x.country, if x.latlon is not None: print "({}, {})".format(x.latlon[0], x.latlon[1]) else: print '' if x.locality: print "Locality: \t", x.locality print "\n======================================\n"
def run(self): self.minC = float(self.minC_input.get()) self.minS = float(self.minS_input.get()) # delete all self.display_info.delete(0, tkinter.END) # get dataset inputFile = data.dataset('goods.csv') # apriori items, rules = Apriori.run(inputFile, self.minS, self.minC) self.display_info.insert(0, '----------Items-----------') line = 0 for item, support in sorted(items): line += 1 self.display_info.insert( line, 'item: {}, {}'.format(str(item), str(support))) line += 1 self.display_info.insert(line, '----------Rules-----------') for rule, confidence in sorted(rules): line += 1 self.display_info.insert( line, 'rule: {}, {}'.format(str(rule), str(confidence)))
def __init__(self, node=node(), dataset=dataset(), tree_policy=None, rollout_policy=None): ''' tree policy takes a node and returns an action, rollout_policy takes a node and retruns a value. ''' self.current_node = node self.root = self.current_node self.tree_root = self.current_node self.size = 0 self.dataset = dataset if (tree_policy != None): self.tree_policy = tree_policy else: self.tree_policy = lambda: self.select() if (rollout_policy != None): self.rollout_policy = rollout_policy else: self.rollout_policy = lambda: self.simulate() self.dnn = Deep_Neural_Net()
def main(): ds = dataset(batch_size=BATCH_SIZE, image_dim=IMAGE_DIM, file_path=C_IMGS_DIR) train_dataset = ds.GetDataset() tf.keras.backend.clear_session() gan = BigGAN(noise_dim=NOISE_DIM, image_dim=IMAGE_DIM, channel_width_multiplier=CHANNEL_MULTIPLIER, Generator_init_size=G_INIT_SIZE) generator = gan.GeneratorNetwork() discriminator = gan.DiscriminatorNetwork() if GENERATOR_PRETRAIN_PATH: print('Load generator pretrain weights') generator.load_weights(GENERATOR_PRETRAIN_PATH) if DISCRIMINATOR_PRETRAIN_PATH: print('Load discriminator pretrain weights') discriminator.load_weights(DISCRIMINATOR_PRETRAIN_PATH) G_optimizer = tf.keras.optimizers.Adam(lr=G_LR, beta_1=0.0, beta_2=0.9) D_optimizer = tf.keras.optimizers.Adam(lr=D_LR, beta_1=0.0, beta_2=0.9) train(train_dataset, int(ds.__len__())) print('*' * 20) print('Model training finished') print('Saving trained weights...') print('*' * 20) generator.save_weights(GENERATOR_CHECKPOINT_PATH) discriminator.save_weights(DISCRIMINATOR_CHECKPOINT_PATH)
def fiff(raw, events, conditions, varname='condition', dataname='MEG', tstart=-.2, tstop=.6, properties=None, name=None, c_colors={}, sensorsname='fiff-sensors'): """ Loads data directly when two files (raw and events) are provided separately. conditions : dict ID->name dictionary of conditions that should be imported event : str path to the event file properties : dict set properties in addition to the defaults raw : str path to the raw file varname : str variable name that will contain the condition value """ if name is None: name = os.path.basename(raw) raw = mne.fiff.Raw(raw) # parse sensor net sensor_list = [] for ch in raw.info['chs']: ch_name = ch['ch_name'] if ch_name.startswith('MEG'): x, y, z = ch['loc'][:3] sensor_list.append([x, y, z, ch_name]) sensor_net = sensors.sensor_net(sensor_list, name=sensorsname) events = mne.read_events(events) picks = mne.fiff.pick_types(raw.info, meg=True, eeg=False, stim=False, eog=False, include=[], exclude=[]) data = [] c_x = [] # read the data for ID in conditions: epochs = mne.Epochs(raw, events, ID, tstart, tstop, picks=picks) samplingrate = epochs.info['sfreq'][0] # data c_data = epochs.get_data() # n_ep, n_ch, n_t for epoch in c_data: data.append(epoch.T) # data.append(c_data.T) T = epochs.times # conditions variable n_ep = len(c_data) c_x.extend([ID] * n_ep) # construct the dataset c_factor = _data.factor(c_x, name=varname, labels=conditions, colors=c_colors, retain_label_codes=True) props = {'samplingrate': samplingrate} props.update(_default_fiff_properties) if properties is not None: props.update(properties) data = np.array(data) # data = np.concatenate(data, axis=0) timevar = _data.var(T, 'time') dims = (timevar, sensor_net) Y = _data.ndvar(dims, data, properties=props, name=dataname) dataset = _data.dataset(Y, c_factor, name=name, default_DV=dataname) return dataset
import numpy as np import pickle from sklearn.datasets import load_diabetes from sklearn.preprocessing import StandardScaler scaler = StandardScaler() #hyperparameters batch_size = 8 validate_every_no_of_batches = 80 epochs = 100000 input_size = 10 output_size = 1 hidden_shapes = [16] lr = 0.0085 has_dropout = True dropout_perc = 0.5 output_log = r"runs/diabetes_log.txt" #diabetes dataset diabetes_dataset = load_diabetes() X = diabetes_dataset['data'] data = dataset(X, diabetes_dataset['target'], batch_size) splitter = dataset_splitter(data.compl_x, data.compl_y, batch_size, 0.6, 0.2) ds_train = splitter.ds_train ds_val = splitter.ds_val ds_test = splitter.ds_test
VALREP = 2 saver = ut.ckpter('wts/model*.npz') if saver.iter >= MAXITER: MAXITER=550e3 LR = 1e-5 if saver.iter >= MAXITER: MAXITER=600e3 LR = 1e-6 #### Build Graph # Build phase2 d = data.dataset(BSZ) net = model.Net() output = net.predict(d.limgs, d.cv, d.lrl) tloss, loss, l1, pc, pc3 = dops.metrics(output,d.disp,d.mask) vals = [loss,pc,l1,pc3] tnms = ['loss.t','pc.t','L1.t','pc3.t'] vnms = ['loss.v','pc.v','L1.v','pc3.v'] opt = tf.train.AdamOptimizer(LR) tstep = opt.minimize(tloss+WD*net.wd,var_list=list(net.weights.values())) sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=4)) sess.run(tf.global_variables_initializer()) # Load Data File Names
__author__ = 'Jordan Guerin' import numpy import math import random from data import dataset data = dataset() data.load() x_train = data.points[0:80] #x_valid = data.points[60:80] x_test = data.points[80:] def shuffle(pts, nb=100): i1 = random.randint(0, len(pts)-1) i2 = random.randint(0, len(pts)-1) pt1 = pts[i1] pt2 = pts[i2] pts[i2] = pt1 pts[i1] = pt2 return pts def entrainerModele(pts, deg=0): a_x = [] a_y = []
parser.add_argument('--evaluate', action='store_true', help='evaluate the model') args = parser.parse_args() print('==> Options:',args) # set the seed torch.manual_seed(1) torch.cuda.manual_seed(1) # prepare the data if not os.path.isfile(args.data+'/train_data'): # check the data path raise Exception\ ('Please assign the correct data path with --data <DATA_PATH>') trainset = data.dataset(root=args.data, train=True) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = data.dataset(root=args.data, train=False) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) # define classes classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # define the model print('==> building model',args.arch,'...') if args.arch == 'nin': model = nin.Net()
def as_json(): return json.dumps(list(row._asdict() for row in data.dataset()))
from data import dataset from analytics import analytics def parse_commandline(): parser = argparse.ArgumentParser(description='Fashion Dataset Viewer') parser.add_argument('-r', '--dataroot', help='Path to stored data', required=True) parser.add_argument( '-o', '--output', help='Path to a pickle file to save intermediate results', required=True) parser.add_argument('-s', '--set', help='Name of the point set', required=True) return parser.parse_args() if __name__ == "__main__": args = parse_commandline() cfg = config(args.dataroot, args.set) d = dataset(cfg) a = analytics(cfg, d) a.validate(args.output)
def dataset(hr_flist, lr_flist, scale): return data.dataset(hr_flist, lr_flist, scale, resize, residual)
def train(args): ########################## # Relevant config values # ########################## log_interval = 1 #print losses every epoch checkpoint_interval = eval(args['checkpoints']['checkpoint_interval']) checkpoint_overwrite = eval(args['checkpoints']['checkpoint_overwrite']) checkpoint_on_error = eval(args['checkpoints']['checkpoint_on_error']) figures_interval = eval(args['checkpoints']['figures_interval']) figures_overwrite = eval(args['checkpoints']['figures_overwrite']) no_progress_bar = not eval(args['checkpoints']['epoch_progress_bar']) N_epochs = eval(args['training']['N_epochs']) output_dir = args['checkpoints']['output_dir'] n_gpus = eval(args['training']['parallel_GPUs']) checkpoint_resume = args['checkpoints']['resume_checkpoint'] cond_net_resume = args['checkpoints']['resume_cond_net'] checkpoints_dir = join(output_dir, 'checkpoints') figures_dir = join(output_dir, 'figures') os.makedirs(checkpoints_dir, exist_ok=True) os.makedirs(figures_dir, exist_ok=True) ####################################### # Construct and load network and data # ####################################### cinn = model.CINN(args) cinn.train() cinn.cuda() if checkpoint_resume: cinn.load(checkpoint_resume) if cond_net_resume: cinn.load_cond_net(cond_net_resume) if n_gpus > 1: cinn_parallel = nn.DataParallel(cinn, list(range(n_gpus))) else: cinn_parallel = cinn scheduler = torch.optim.lr_scheduler.MultiStepLR(cinn.optimizer, gamma=0.1, milestones=eval(args['training']['milestones_lr_decay'])) dataset = data.dataset(args) val_x = dataset.val_x.cuda() val_y = dataset.val_y.cuda() x_std, y_std = [], [] x_mean, y_mean = [], [] with torch.no_grad(): for x, y in tqdm(dataset.train_loader): x_std.append(torch.std(x, dim=(0,2,3)).numpy()) y_std.append(torch.std(y, dim=(0,2,3)).numpy()) x_mean.append(torch.mean(x, dim=(0,2,3)).numpy()) y_mean.append(torch.mean(y, dim=(0,2,3)).numpy()) break print(np.mean(x_std, axis=0)) print(np.mean(x_mean, axis=0)) print(np.mean(y_std, axis=0)) print(np.mean(y_mean, axis=0)) #################### # Logging business # #################### logfile = open(join(output_dir, 'losses.dat'), 'w') def log_write(string): logfile.write(string + '\n') logfile.flush() print(string, flush=True) log_header = '{:>8s}{:>10s}{:>12s}{:>12s}'.format('Epoch', 'Time (m)', 'NLL train', 'NLL val') log_fmt = '{:>8d}{:>10.1f}{:>12.5f}{:>12.5f}' log_write(log_header) if figures_interval > 0: checkpoint_figures(join(figures_dir, 'init.pdf'), cinn, dataset, args) t_start = time.time() #################### # V Training V # #################### for epoch in range(N_epochs): progress_bar = tqdm(total=dataset.epoch_length, ascii=True, ncols=100, leave=False, disable=True)#no_progress_bar) loss_per_batch = [] for i, (x, y) in enumerate(dataset.train_loader): x, y = x.cuda(), y.cuda() nll = cinn_parallel(x, y).mean() nll.backward() # _check_gradients_per_block(cinn.inn) loss_per_batch.append(nll.item()) print('{:03d}/445 {:.6f}'.format(i, loss_per_batch[-1]), end='\r') cinn.optimizer.step() cinn.optimizer.zero_grad() progress_bar.update() # from here: end of epoch scheduler.step() progress_bar.close() if (epoch + 1) % log_interval == 0: with torch.no_grad(): time_delta = (time.time() - t_start) / 60. train_loss = np.mean(loss_per_batch) val_loss = cinn_parallel(val_x, val_y).mean() log_write(log_fmt.format(epoch + 1, time_delta, train_loss, val_loss)) if figures_interval > 0 and (epoch + 1) % figures_interval == 0: checkpoint_figures(join(figures_dir, 'epoch_{:05d}.pdf'.format(epoch + 1)), cinn, dataset, args) if checkpoint_interval > 0 and (epoch + 1) % checkpoint_interval == 0: cinn.save(join(checkpoints_dir, 'checkpoint_{:05d}.pt'.format(epoch + 1))) logfile.close() cinn.save(join(output_dir, 'checkpoint_end.pt'))
def test(args): out_dir = args['checkpoints']['output_dir'] figures_output_dir = join(out_dir, 'testing') os.makedirs(figures_output_dir, exist_ok=True) batch_norm_mode = args['testing']['average_batch_norm'] print('. Loading the dataset') dataset = data.dataset(args) print('. Constructing the model') cinn = model.CINN(args) cinn.cuda() print('. Loading the checkpoint') if batch_norm_mode == 'NONE': cinn.load(join(out_dir, 'checkpoint_end.pt')) elif batch_norm_mode == 'FORWARD': try: cinn.load(join(out_dir, 'checkpoint_end_avg.pt')) except FileNotFoundError: print('. Averaging BatchNorm layers') cinn.load(join(out_dir, 'checkpoint_end.pt')) _average_batch_norm(cinn, dataset, args, tot_iterations=500) cinn.save(join(out_dir, 'checkpoint_end_avg.pt')) elif batch_norm_mode == 'INVERSE': try: cinn.load(join(out_dir, 'checkpoint_end_avg_inv.pt')) except FileNotFoundError: print('. Averaging BatchNorm layers') cinn.load(join(out_dir, 'checkpoint_end.pt')) _average_batch_norm(cinn, dataset, args, inverse=True) cinn.save(join(out_dir, 'checkpoint_end_avg_inv.pt')) else: raise ValueError( 'average_batch_norm ini value must be FORWARD, INVERSE or NONE') cinn.eval() do_test_loss = False do_samples = False do_features = True if do_test_loss: print('. Computing test loss') loss = _test_loss(cinn, dataset, args, test_data=True) print('TEST LOSS', loss) with open(join(figures_output_dir, 'test_loss'), 'w') as f: f.write(str(loss)) if do_samples: print('. Generating samples') os.makedirs(join(figures_output_dir, 'samples'), exist_ok=True) #for t in [0.7, 0.9, 1.0]: for t in [1.0]: sampling.sample(cinn, dataset, args, temperature=t, test_data=False, big_size=False, N_examples=353, N_samples_per_y=24, save_separate_ims=join( figures_output_dir, 'samples/val_{:.3f}'.format(t))) if do_features: print('. Visualizing feature pyramid') from .features_pca import features_pca features_pca(cinn, dataset, args, join(figures_output_dir, 'c_pca'))
parser.add_argument("--num_cols", default = 640, type = int) parser.add_argument("--imgdepth", default = 1, type = int) parser.add_argument("--cropsize", default = 32, type = int) parser.add_argument("--batchsize", default = 16, type = int) parser.add_argument("--layers", default = 16, type = int) parser.add_argument("--filters", default = 256, type = int) parser.add_argument("--epochs", default = 1, type = int) parser.add_argument("--resume", default = 0, type = int) parser.add_argument("--test", default = True, type = bool) args = parser.parse_args() # Initialize dataset. training_data = dataset(args.dataset, args.imgdepth, args.num_rows, args.num_cols, args.cropsize, args.batchsize) edsr = edsr_model(training_data.num_train_iterations, training_data.num_test_iterations, args.batchsize, args.layers, args.filters, args.imgdepth, args.cropsize, args.test) edsr.set_functions(training_data.get_train_batch, training_data.get_test_batch, training_data.shuffle)
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width), transforms.InterpolationMode.NEAREST), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset( args.dataset_dir, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset( args.dataset_dir, mode='val', transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader( val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset( args.dataset_dir, mode='test', transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader( test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Remove the road_marking class from the CamVid dataset as it's merged # with the road class if args.dataset.lower() == 'camvid': del class_encoding['road_marking'] # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get a batch of samples to display if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nWeighing technique:", args.weighing) print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': class_weights = enet_weighing(train_loader, num_classes) elif args.weighing.lower() == 'mfb': class_weights = median_freq_balancing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
# # print("Save last model...") # # discriminator.save(cfg.DISC_SAVE_DIR + "lsat.h5", save_format='h5') # # generator.save(cfg.GEN_SAVE_DIR + "last.h5", save_format='h5') # # if epoch % 1 == 0: # print("Save model...") # discriminator.save(cfg.DISC_SAVE_DIR+str(epoch)+".h5", save_format='h5') # generator.save(cfg.GEN_SAVE_DIR+str(epoch)+".h5", save_format='h5') if __name__ == '__main__': # load myself dataset # train_data = Dataset(istrain=True) # test_data = Dataset(istrain=False) # load tf.data.Dataset, more efficiently train_data, train_num = dataset(istrain=True) test_data, test_num = dataset(istrain=False) train_steps_per_epoch = int(train_num / cfg.BATCH_SIZE) test_steps_per_epoch = int(test_num / cfg.BATCH_SIZE) # train_ds = [train_data, train_steps_per_epoch] # test_ds = [test_data, test_steps_per_epoch] # load target model tmodel = target_model() check_dir(cfg.GEN_SAVE_DIR) check_dir(cfg.DISC_SAVE_DIR) # function advgan