def TrainTestValHoldout(dataset, sample_size, random_seed, return_holdouts=False): name = '_'.join(dataset.split(' ') + [str(sample_size), str(random_seed)]) ds = Dataset(dataset) names = ds.names df = ds.df # these are manually selected label combinations for holdouts A-E combinations = [['GO:0008144', 'GO:0022857'], ['GO:0003677', 'GO:0003723'], ['GO:0043169', 'GO:0015075'], ['GO:0036094', 'GO:0016301', 'GO:0140096', 'GO:0038023'], ['GO:0003824', 'GO:0043168', 'GO:0048037']] holdouts = [] for comb in combinations: _labels = set(comb) mask = df['labels'].map(lambda x: _labels.issubset(x)) holdouts.append(df[mask]) for comb in combinations: _labels = set(comb) mask = df['labels'].map(lambda x: _labels.issubset(x)) df = df[~mask] ds = BaseDataset().from_df(df) #print(len(ds.df.index)) #print(ds.terms['count'].min()) ds.names = names if return_holdouts: return combinations, holdouts else: return _TrainTestVal(ds, sample_size, random_seed)
def __init__(self, task_config): self.model_config = load_json(task_config['model_config_file']) self.model_config['voc_data_dir'] = task_config['data_path'] self.opt = opt self.opt.log_filename = task_config['log_filename'] self.opt._parse(self.model_config) self.dataset = Dataset(self.opt) logging.info('load data') self.dataloader = DataLoader(self.dataset, batch_size=self.model_config['batch_size'], shuffle=True, num_workers=self.opt.num_workers) # TODO: add a valset for validate self.testset = TestDataset(self.opt) self.test_dataloader = DataLoader( self.testset, batch_size=self.model_config['batch_size'], num_workers=self.opt.test_num_workers, shuffle=False, pin_memory=True ) self.train_size = self.dataset.__len__() self.valid_size = self.testset.__len__() self.faster_rcnn = FasterRCNNVGG16() logging.info('model construct completed') self.trainer = FasterRCNNTrainer( self.faster_rcnn, self.opt.log_filename ).cuda() if self.opt.load_path: self.trainer.load(self.opt.load_path) logging.info('load pretrained model from %s' % self.opt.load_path) self.best_map = 0 self.lr_ = self.opt.lr
class PairwiseSampler(object): def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1): self.batch_size = batch_size self.data_name = data_name self.data = Data() self.num_neg = num_neg self.dataset = Dataset(data_name=self.data_name) def get_train_data(self): user_movie = self.dataset.get_user_movie() num_item = self.dataset.get_max_movie_id() data_value = self.data.get_train_data(data_name=self.data_name) for idx in range(len(data_value)): j = np.random.choice(num_item) + 1 while j in user_movie[data_value[idx, 0]]: j = np.random.choice(num_item) + 1 data_value[idx, 2] = j return data_value def get_train_batch(self): data_value = self.get_train_data() for start in range(0, len(data_value), self.batch_size): end = min(start + self.batch_size, len(data_value)) yield data_value[start:end] def get_test_batch(self): data_value = self.data.get_test_data(data_name=self.data_name) for start in range(0, len(data_value), self.batch_size): end = min(start + self.batch_size, len(data_value)) yield data_value[start:end] def get_batch_number(self): data_value = self.data.get_train_data(data_name=self.data_name) return (len(data_value) + self.batch_size - 1) // self.batch_size
def _filter_matches(self, matches, jm_dataset: Dataset, wm_dataset, jmdf_prefix='jmdf_', wmdf_prefix='wmdf_'): timestamp_metrics = [Metric.START_TIME, Metric.STOP_TIME] for metric in timestamp_metrics: jmdf_ts_col = jmdf_prefix + jm_dataset.col(metric) wmdf_ts_col = wmdf_prefix + wm_dataset.col(metric) matches = matches[ (self._timestamp_diff_series(matches[jmdf_ts_col], matches[wmdf_ts_col]) < self.timestamp_tolerance) # | # (matches[jmdf_ts_col].isnull()) | (matches[wmdf_ts_col].isnull()) ] jm_workflow_col = jmdf_prefix + jm_dataset.col(Metric.WORKFLOW) wm_workflow_col = wmdf_prefix + wm_dataset.col(Metric.WORKFLOW) # Only accept jobs that match in their workflow matches = matches[matches[jm_workflow_col] == matches[wm_workflow_col]] return matches
def __init__(self, view, parent = None): '''Construtor da classe Controlador. Inicia seus atributos e conecta os sinais dos botões da tela a métodos Parâmetros: view: : MainGui que o controlador está controlando ''' super(ControladorDataset, self).__init__(parent) #conecta o controlador a view self._view = view self._view.setupUi(self) self._dataset = Dataset() #conecta controlador ao modelo dos dados a ser usado #pelos elementos da view self._datasetmodel = DatasetModel(self._dataset) #conecta os sinais dos botões e das ações do usuário na tela a funções self._view.abrirButton.clicked.connect(self.abrir_janela_para_escolher_arquivo) self._view.tabelaAtributos.entered.connect(self.atualizar_atributo_selecionado) self._view.tabelaAtributos.clicked.connect(self.atualizar_atributo_selecionado) self._view.removerButton.clicked.connect(self.remover_atributos) #modifica parâmetros da view self._view.tabelaAtributos.setSelectionBehavior(QTableView.SelectRows) self._view.tabelaAtributos.setSelectionMode(QTableView.SingleSelection) self._view.tabelaEstatistica.setFocusPolicy(Qt.NoFocus) #atribui os modelos aos elementos da tela self._view.tabelaAtributos.setModel(self._datasetmodel)
def _match_on_cpu_time(self, jm_dataset: Dataset, wm_dataset: Dataset, jm_subset=None, wm_subset=None): jmdf = jm_subset if jm_subset is not None else jm_dataset.df wmdf = wm_subset if wm_subset is not None else wm_dataset.df # Round CPU time to account for rounding errors while matching float values jmdf['cpuApprox'] = jmdf[jm_dataset.col(Metric.CPU_TIME)].round() wmdf['cpuApprox'] = wmdf[wm_dataset.col(Metric.CPU_TIME)].round() jmdf_index = jmdf.index.name wmdf_index = wmdf.index.name self._prefix_columns(jmdf, 'jmdf_') self._prefix_columns(wmdf, 'wmdf_') matches = jmdf.reset_index().merge(wmdf.reset_index(), left_on='jmdf_cpuApprox', right_on='wmdf_cpuApprox') filtered = self._filter_matches(matches, jm_dataset, wm_dataset, jmdf_prefix='jmdf_', wmdf_prefix='wmdf_') perfect_matches = filtered.groupby(jmdf_index).filter( lambda x: len(x) == 1) return perfect_matches[[jmdf_index, wmdf_index]]
def sample_mini_dataset(self, num_classes, num_shots, test_shots, classes=None): if classes is None: classes = np.random.choice(10, 5) X, y, X_test, y_test = [], [], [], [] for idx, c in enumerate(classes): X_c = self.X[self.y == c] p = np.random.choice(X_c.shape[0], size=num_shots + test_shots, replace=False) X.append(X_c[p[:num_shots]]) X_test.append(X_c[p[num_shots:]]) y.append(np.ones(num_shots, ) * idx) y_test.append(np.ones(test_shots, ) * idx) X = np.concatenate(X, axis=0) X_test = np.concatenate(X_test, axis=0) y = np.concatenate(y, axis=0) y_test = np.concatenate(y_test, axis=0) if self.one_hot: y = helpers.one_hot(y, num_classes) y_test = helpers.one_hot(y_test, num_classes) train_set = Dataset(batch_size=self.inner_batch_size, X=X, y=y, shuffle=True) test_set = Dataset(batch_size=self.inner_batch_size, X=X_test, y=y_test, shuffle=False) return train_set, test_set
def create_data_loaders(opt, split='train'): """ Create the training data loader and test data loader """ if split == 'train': tr_dataset = Dataset(opt.data, opt, 'train') train_loader = torch.utils.data.DataLoader(tr_dataset, batch_size=opt.batchSize, shuffle=True, drop_last=True, num_workers=opt.nThreads, pin_memory=True) return train_loader elif split == 'val': val_dataset = Dataset(opt.data, opt, 'val') val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=opt.nThreads, pin_memory=True) return val_loader elif split == 'test': te_dataset = Dataset(opt.data, opt, 'test') test_loader = torch.utils.data.DataLoader(te_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=opt.nThreads, pin_memory=True) return test_loader
def sample_mini_dataset(self, num_classes, num_shots, test_shots): shuffled = list(self.data_source) random.shuffle(shuffled) cs = shuffled[:num_classes] X = [] y = [] X_test = [] y_test = [] for idx, c in enumerate(cs): inputs = c.sample(num_shots+test_shots) targets = np.array([idx for i in range(num_shots+test_shots)]) X.append(inputs[:num_shots]) y.append(targets[:num_shots]) X_test.append(inputs[num_shots:]) y_test.append(targets[num_shots:]) X = np.concatenate(X, axis=0) y = np.concatenate(y, axis=0) X_test = np.concatenate(X_test, axis=0) y_test = np.concatenate(y_test, axis=0) if self.one_hot: y = helpers.one_hot(y, num_classes) y_test = helpers.one_hot(y_test, num_classes) train_set = Dataset(batch_size=self.inner_batch_size, X=X, y=y, shuffle=True) test_set = Dataset(batch_size=self.inner_batch_size, X=X_test, y=y_test, shuffle=False) # train_set = self._load_dataset(X, y, num_classes, self.inner_batch_size, self.one_hot) # test_set = self._load_dataset(X_test, y_test, num_classes, self.inner_batch_size, self.one_hot) return train_set, test_set
def predict(self, sine=None, z_value=None): if sine is None: sine = self.eval_set.sample(1)[0] samples = sine.sample(2000) train_set, val_set = samples[:1000], samples[1000:] train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1]) val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1]) Xs, ys, ps = [], [], [] for data in train_set: if z_value is None: feed_dict = self._make_feed_dict(data, is_training=False) else: feed_dict = self._make_feed_dict(data, is_training=False, z_value=z_value, use_z_ph=True) data = self._data_preprocessing(data) X, y = data p = self.session.run([m.predictions for m in self.parallel_models], feed_dict=feed_dict) Xs.append(X) ys.append(y) ps += p Xs = np.concatenate(Xs, axis=0) ys = np.concatenate(ys, axis=0) ps = np.concatenate(ps, axis=0) return Xs, ys, ps
def load_data(self, *args): x_train, y_train, x_valid, y_valid = get_data(*args) x_train_normalized, x_valid_normalized = normalize_train_eval( x_train, x_valid) train_ds = Dataset(x_train_normalized, y_train) valid_ds = Dataset(x_valid_normalized, y_valid) c = y_train.max().item() + 1 return train_ds, valid_ds, c
def main(): parser = get_parser() args = parser.parse_args() print(args) # name name = '{}-{}_{}-{}-{}-{}'.format(args.env, args.model, args.hidden_dim, args.num_layers, args.T, args.lr) wandb.init(name=name, project="finance", entity="liuyuezhang", config=args) # dim dims = {'single': 6, 'pair': 4} dim = dims[args.env] # data train_dataset = Dataset(dir=args.dir + args.env + '/train.pkl', T=args.T) train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True) # model model = VanillaLSTM(input_dim=dim, hidden_dim=args.hidden_dim, output_dim=dim, num_layers=args.num_layers).to("cuda") loss_fn = torch.nn.MSELoss() optimiser = torch.optim.Adam(model.parameters(), lr=args.lr) for e in range(args.epochs): # train print("\n================Epoch: {}================\n".format(e)) model.train() for data in tqdm(train_loader): x_train, y_train = data x_train = (x_train.float()).to("cuda") y_train = (y_train.float()).to("cuda") # forward y_pred = model(x_train) loss = loss_fn(y_pred, y_train) # backward optimiser.zero_grad() loss.backward() optimiser.step() # log wandb.log({"loss": loss.item()}) # if (e % args.save_epochs) == 9: # save print("model saved.") torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model.pt')) # test test_dataset = Dataset(dir=args.dir + args.env + '/test.pkl', T=args.T) scale = np.array(pd.read_pickle(args.dir + args.env + '/test_max.pkl')) test_loader = torch.utils.data.DataLoader(test_dataset) model.eval() test(model, test_loader, scale)
def __init__(self): self.data_path = path_params['data_path'] self.tfrecord_dir = path_params['tfrecord_dir'] self.train_tfrecord_name = path_params['train_tfrecord_name'] self.input_width = model_params['input_width'] self.input_height = model_params['input_height'] self.channels = model_params['channels'] self.class_num = len(model_params['classes']) self.batch_size = solver_params['batch_size'] self.dataset = Dataset()
def train_epoch(self): for k in range(100): sines= self.train_set.sample(1) samples = sines[0].sample(200) train_set, val_set = samples[:100], samples[100:] train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1]) val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1]) data = next(train_set) feed_dict = self._make_feed_dict(data, is_training=True) self.session.run(self.optimize_op, feed_dict=feed_dict)
def __init__(self, trainer_type, **kwargs): self.dataset = Dataset(**kwargs) additional_args = { "tot_speakers": self.dataset.tot_speakers, "type": trainer_type } kwargs.update(additional_args) self.args = kwargs
class PointSampler(object): def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1): self.batch_size = batch_size self.data = Data() self.data_name = data_name self.num_neg = num_neg self.dataset = Dataset(data_name=self.data_name) def get_train_data(self, keep_label=False, value_for_negative=0.): self.data_value = self.data.get_train_data(data_name=self.data_name) user_movie_train = self.dataset.get_user_movie_for_train() num_item = self.dataset.get_max_movie_id() new_data_value = [] for user_item in self.data_value: user, item, label = user_item[0], user_item[1], user_item[2] if keep_label: new_data_value.append([user, item, label]) else: new_data_value.append([user, item, 1]) for i in range(self.num_neg): j = np.random.choice(num_item) while j in user_movie_train[user]: j = np.random.choice(num_item) new_data_value.append([user, j, value_for_negative]) new_data_value = np.array(new_data_value) new_data_value[:, 2].astype(np.float32) return new_data_value def get_train_batch(self, shuffle=False, keep_label=False, value_for_negative=0.): self.data_value_batch = self.get_train_data( keep_label=keep_label, value_for_negative=value_for_negative) if shuffle: index = [i for i in range(len(self.data_value_batch))] random.shuffle(index) self.data_value_batch = self.data_value_batch[index] for start in range(0, len(self.data_value_batch), self.batch_size): end = min(start + self.batch_size, len(self.data_value_batch)) yield self.data_value_batch[start:end] def get_test_batch(self): test_data = self.data.get_test_data() len_test_data = len(test_data) batch_index = np.random.choice(len_test_data, size=self.batch_size) batch_data = test_data[batch_index, :] return batch_data def get_batch_number(self): i = 1 data_value = self.data.get_train_data(data_name=self.data_name) return (len(data_value)) // self.batch_size * (i + self.num_neg)
def __init__(self): self.data_path = path_params['data_path'] self.tfrecord_dir = path_params['tfrecord_dir'] self.train_tfrecord_name = path_params['train_tfrecord_name'] self.test_tfrecord_name = path_params['test_tfrecord_name'] self.image_size = model_params['image_size'] self.cell_size = model_params['cell_size'] self.class_num = model_params['num_classes'] self.class_ind = dict(zip(CLASSES, range(self.class_num))) self.batch_size = solver_params['batch_size'] self.flipped = solver_params['flipped'] self.dataset = Dataset()
def evaluate(self): ls = [] for k in range(100): sines= self.eval_set.sample(1) samples = sines[0].sample(200) train_set, val_set = samples[:100], samples[100:] train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1]) val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1]) data = next(train_set) feed_dict = self._make_feed_dict(data, is_training=False) l = self.session.run([m.loss for m in self.parallel_models], feed_dict=feed_dict) ls.append(l) return np.mean(ls)
def main(): opt = Config(os.getcwd()) if opt.backbone == 'resnet18': model = resnet_face18(opt.use_se) elif opt.backbone == 'resnet34': model = resnet34() elif opt.backbone == 'resnet50': model = resnet50() model = DataParallel(model) # load_model(model, opt.test_model_path) model.load_state_dict( torch.load(opt.test_model_path, map_location={'cuda:0': 'cpu'})) model.to(torch.device(device)) model.eval() global args train_dataset = Dataset(opt.train_root, opt.train_list, phase='train', input_shape=opt.input_shape) trainloader = data.DataLoader(train_dataset, batch_size=opt.train_batch_size, shuffle=True, num_workers=opt.num_workers) # centroid_map = create_centroid(model, trainloader) test_dataset = Dataset(opt.test_root, opt.test_list, phase='test', input_shape=opt.input_shape) test_loader = data.DataLoader( test_dataset, batch_size=1000, # batch_size=opt.test_batch_size, shuffle=True, num_workers=opt.num_workers) for x, y in test_loader: latent_vecs = model(x) print(latent_vecs.shape, y.shape) target = y plot3d_tsne( latent_vecs, target, ) show_umap(latent_vecs, target) t_sne(latent_vecs, target)
def _test(self): print("testing ......") ls = [] for k in range(1): sines= self.eval_set.sample(1) samples = sines[0].sample(200) train_set, val_set = samples[:100], samples[100:] train_set = Dataset(batch_size=100, X=train_set[:, 0:1], y=train_set[:, 1]) val_set = Dataset(batch_size=100, X=val_set[:, 0:1], y=val_set[:, 1]) data = next(train_set) feed_dict = self._make_feed_dict(data, is_training=False) mean = self.session.run([m.z_mu for m in self.parallel_models], feed_dict=feed_dict) std = self.session.run([m.z_sigma for m in self.parallel_models], feed_dict=feed_dict) print(mean) print(std)
def data_processor(bs, url="MNIST_URL"): x_train, y_train, x_valid, y_valid = get_data(url) train_mean, train_std = x_train.mean(), x_train.std() x_train = normalize(x_train, train_mean, train_std) # NB: Use training, not validation mean for validation set x_valid = normalize(x_valid, train_mean, train_std) train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid) train_samp = Sampler(train_ds, bs, shuffle=True) valid_samp = Sampler(valid_ds, bs, shuffle=False) train_dl = DataLoader(train_ds, sampler=train_samp) valid_dl = DataLoader(valid_ds, sampler=valid_samp) return train_dl, valid_dl
def predict(_): data = Dataset(feature_sets, files) data.split(batch_size, sequence_length, test_size=test_size, shuffle=False, include_std=False) predictor_a = TestModel(model_a, batch_size, 'model_A') predictor_v = TestModel(model_v, batch_size, 'model_V') rmse_a = [] rmse_v = [] angle_err = [] angle_no_small = [] # plotter = AnimatedPredictionAndColorPlotter(config_str_a, song_names, # interpolation_factor=interp, # num_plotted_frames=num_plotted_frames) for _ in range(0, data.test.num_batches): data.test.next_batch() # data.test.normalize_mode_test(predictor_a.f_means, predictor_a.f_std) for seq_x, seq_y in data.test.sequences: pa = predictor_a.predict(seq_x) pv = predictor_v.predict(seq_x) ya = seq_y[0, 0, 0] yv = seq_y[0, 0, 1] rmse_a.append((pa - ya) / 1490) rmse_v.append((pv - yv) / 1430) p_angle = renormalize_angle(np.angle(pa + 1j * pv), deg=False) y_angle = renormalize_angle(np.angle(ya + 1j * yv), deg=False) err = p_angle - y_angle if -np.pi < err < np.pi: angle_err.append(err) if (abs(ya) > 90) and (abs(yv) > 90): angle_no_small.append(err) else: angle_err.append(2 * np.pi - abs(err)) if (abs(ya) > 90) and (abs(yv) > 90): angle_no_small.append(2 * np.pi - abs(err)) return rmse_a, rmse_v, angle_err, angle_no_small
def runCV(): config.new_experiment() start = timeit.default_timer() # ----------------- model = AE(n_input=1, n_hidden=config.n_hidden, n_output=1, n_layers=1) dataset = Dataset() data = dataset[:, [config.CHANNEL]] target = dataset[:, [config.CHANNEL]] mean = cv(model, data, target, temperature=config.temperature, weight_decay=config.weight_decay, learning_rate=config.learning_rate, sparsity=config.sparsity, sparsity_penalty=config.sparsity_penalty, n_epochs=config.MAX_TRAINING_EPOCHS, n_splits=config.CV_N_SPLITS, seed=config.SEED, batch_size=config.batch_size, shuffle=False) stop = timeit.default_timer() # ----------------- print(stop - start) # save_result(mean) print('OK')
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ #pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) print('Loading Model') # faster_rcnn = FasterRCNNVGG16() print('model construct completed') # trainer = FasterRCNNTrainer(faster_rcnn).cuda() lr_ = opt.lr extractor, classifier = decom_vgg16() img, bbox_, label_, scale = dataset[1] _, H, W = img.shape img_size = (H, W) img, bbox_, label_ = to_tensor(img), to_tensor(bbox_), to_tensor(label_) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) pdb.set_trace() features = extractor(img) rpn = RegionProposalNetwork(512, 512, ratios=ratios, anchor_scales=anchor_scales, feat_stride=self.feat_stride) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale )
def merge_datasets(self, matches, left: Dataset, right: Dataset, left_index, right_index, left_suffix='left', right_suffix='right'): left_df = left.df right_df = right.df # Join with matches, preserving all entries in base dataframe # Right is base data frame, so use right merge half_joined = matches.join(left_df, on=left_index, how='right') # Preserve all entries, even those not in joined = half_joined.join(right_df, on=right_index, how='outer', lsuffix=left_suffix, rsuffix=right_suffix) # Reset index to the original index: ID of the left dataframe # Todo Create new identifier? # joined = joined.set_index(left_index) # Todo Actually join values # Columns that are suffixed are overlapping, included in both data sets for left_col in [ col for col in joined.columns if col.endswith(left_suffix) ]: col_name = self.remove_trailing(left_col, left_suffix) right_col = col_name + right_suffix # if right_col in right_df.columns: # If suffixed column exists, it also exists in other data set joined = self.merge_cols(joined, col_name, left_col, right_col) # for right_col in set(right_df.columns) - set(left_df.columns): # # col_name = self.remove_trailing(right_col, right_suffix) # joined[right_col] = joined[right_col + right_suffix] # Put result back into a dataset # Compute union of dates start_date = min(left.start, right.start) end_date = max(left.end, right.end) # Only retain extra data frames of the base dataset extra_dfs = left.extra_dfs result = Dataset(joined, left.name, start=start_date, end=end_date, sep=self.part_sep, extra_dfs=extra_dfs) return result
def get_test_inputs(self): """Get the inputs of all test samples. Returns: An np.chararray, where each row corresponds to an image file name. """ return Dataset.get_test_inputs(self)
def get_feature_vectors(real, fakes): train_data = Dataset(namedtuple('Conf', 'batch_size')(50), only_plain=True).get_plain_values() labels = tf.placeholder(tf.int64, [None]) input, keep_prob, feature_vectors, logits = classifier() cross_entropy = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('ckpts', latest_filename='metric') if ckpt and ckpt.model_checkpoint_path: print('restoring classifier') saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) print('training classifier') for i in range(20000): X, y = sess.run(train_data) train_step.run(feed_dict={input: X, labels: y, keep_prob: 0.5}) if i % 100 == 0: print(' iteration', i, 'of', 20000) saver.save(sess, 'ckpts/metric.ckpt', latest_filename='metric') print('evaluating feature vectors') real = feature_vectors.eval(feed_dict={input: real}) fakes = [ feature_vectors.eval(feed_dict={input: fake}) for fake in fakes ] return real, fakes
def input_to_torch_tensor(self, x, device, mode='inference', force_no_preprocessing=False, sample_ids=None): """This method can be used to map the internal numpy arrays to PyTorch tensors. Note, this method has been overwritten from the base class. The input images are preprocessed if data augmentation is enabled. Preprocessing involves normalization and (for training mode) random perturbations. Args: (....): See docstring of method :meth:`data.dataset.Dataset.input_to_torch_tensor`. Returns: (torch.Tensor): The given input ``x`` as PyTorch tensor. """ # FIXME Method copied from `CIFAR100Data`. if self._augment_inputs and not force_no_preprocessing: if mode == 'inference': transform = self._test_transform elif mode == 'train': transform = self._train_transform else: raise ValueError('"%s" not a valid value for argument "mode".' % mode) return CIFAR10Data.torch_augment_images(x, device, transform) else: return Dataset.input_to_torch_tensor(self, x, device, mode=mode, force_no_preprocessing=force_no_preprocessing, sample_ids=sample_ids)
def main(_): # create global configuration object model_config = Configuration(FLAGS.config) model = create_model(FLAGS, model_config) placeholders = { 'l': tf.placeholder(tf.float32, (1, None, None, 3)), 'r': tf.placeholder(tf.float32, (1, None, None, 3)), 'd': tf.placeholder(tf.float32, (1, None, None, 1)), } x = { 'l': tf.placeholder(tf.float32, (1, None, None, 3)), 'r': tf.placeholder(tf.float32, (1, None, None, 3)), 'd': tf.placeholder(tf.float32, (1, None, None, 1)), } p = namedtuple('Placeholders', placeholders.keys())(**placeholders) px = namedtuple('Placeholders', x.keys())(**x) model.build(px, True, None, build_loss=False) model.build(p, False, True, build_loss=False) session = tf.Session() saver = tf.train.Saver() # init variables session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) # restore model if provided a checkpoint if model_config.checkpoint is not None: print("Restoring model from {}".format(model_config.checkpoint)) saver.restore(session, model_config.checkpoint) # init dataset paths = get_paths_for_dataset(FLAGS.dataset) ratios = { 'train_ratio': FLAGS.train_ratio, 'train_valid_ratio': FLAGS.train_valid_ratio, 'valid_ratio': FLAGS.valid_ratio, 'test_ratio': FLAGS.test_ratio, } paths = split_dataset_paths(paths, **ratios) dataset = Dataset(get_example_class(FLAGS.dataset), paths, FLAGS.dataset) results = {} fd = lambda x: {p.l: x.left, p.r: x.right} phases = ['valid', 'train', 'train_valid'] reconstructions = os.path.join(model_config.directory, 'results') directories = [os.path.join(reconstructions, phase) for phase in phases] for dirname in directories: os.makedirs(dirname, exist_ok=True) f = open(os.path.join(model_config.directory, 'results.txt'), 'w') sys.stdout = Logger(sys.stdout, f) subset_iterator = zip(phases, [dataset.valid, dataset.train, dataset.train_valid], directories) for phase, subset, store_dir in subset_iterator: for example in subset: gt = example.disparity.squeeze() start = time() d = session.run(model.outputs[p], fd(example)).squeeze() print("Time: {}".format(1000 * (time() - start)), file=sys.stderr) hits, total = disp_precision(gt, d, model_config.get('max_disp', FLAGS.max_disp), 3) all_hits, all_total = results.get(phase, (0, 0)) results[phase] = (hits + all_hits, total + all_total) store_disparity(d, os.path.join(store_dir, '{}.png'.format(example.name))) print('{} {} {}%'.format(phase, example.name, 100 * hits / total)) for phase in results: print('Total {} {}'.format(phase, 100 * results[phase][0] / results[phase][1]))
class ControladorDataset(QMainWindow): '''Classe que controla e atualiza as telas com base nos dados adquiridos das classes de modelo Atributos: _view : MainGui que o controlador está controlando _dataset : DatasetModel com os dados carregados através da tela ''' def __init__(self, view, parent = None): '''Construtor da classe Controlador. Inicia seus atributos e conecta os sinais dos botões da tela a métodos Parâmetros: view: : MainGui que o controlador está controlando ''' super(ControladorDataset, self).__init__(parent) #conecta o controlador a view self._view = view self._view.setupUi(self) self._dataset = Dataset() #conecta controlador ao modelo dos dados a ser usado #pelos elementos da view self._datasetmodel = DatasetModel(self._dataset) #conecta os sinais dos botões e das ações do usuário na tela a funções self._view.abrirButton.clicked.connect(self.abrir_janela_para_escolher_arquivo) self._view.tabelaAtributos.entered.connect(self.atualizar_atributo_selecionado) self._view.tabelaAtributos.clicked.connect(self.atualizar_atributo_selecionado) self._view.removerButton.clicked.connect(self.remover_atributos) #modifica parâmetros da view self._view.tabelaAtributos.setSelectionBehavior(QTableView.SelectRows) self._view.tabelaAtributos.setSelectionMode(QTableView.SingleSelection) self._view.tabelaEstatistica.setFocusPolicy(Qt.NoFocus) #atribui os modelos aos elementos da tela self._view.tabelaAtributos.setModel(self._datasetmodel) def abrir_janela_para_escolher_arquivo(self): '''Abre uma janela para o usuário escolher um arquivo e chama o método abrir''' #o endereço do arquivo + nome estão no primeiro campo de nome nome = QFileDialog.getOpenFileName(self, "Abrir", "", "Arquivos de Texto (*.csv)")[0] self.abrir(nome) def abrir(self, nome): '''Abre o arquivo com nome *nome* e carrega esse arquivo em DatasetModel e atualiza a aba Dados da tela Parâmetros: nome: String contendo o nome completo do arquivo (nome + caminho) Retorna: True: se conseguir abrir o dataset em csv False: caso não consiga abrir o arquivo csv ''' self._view.statusbar.showMessage(u"Abrindo dataset...") #avisa que o modelo vai mudar if self._dataset.ler_csv(nome): self._datasetmodel.beginResetModel() self.atualizar_grupo_dados() self._view.statusbar.showMessage(u"Dataset aberto") #avisa que o modelo encerrou a mudança self._datasetmodel.endResetModel() return True else: self._view.statusbar.showMessage(u"Nenhum dataset selecionado") return False def atualizar_grupo_dados(self): '''Atualiza os dados na tela sobre o dataset (nome, instancias e atributos)''' self._view.nomeLabel.setText(self._dataset.get_nomearquivo()) self._view.atributosLabel.setText(str(self._dataset.get_natributos())) self._view.instanciaLabel.setText(str(self._dataset.get_ninstancias())) def atualizar_botao_remover(self): '''Ativa/desativa o *removerButton* de acordo com a contagem de checkboxes preenchidas''' #se não há checkbox marcada, desativa botão de remover if sum(self._dataset.get_marcados()) == 0: self._view.removerButton.setEnabled(False) else: self._view.removerButton.setEnabled(True) def atualizar_atributo_selecionado(self, indiceClicado = None): '''Atualiza os dados do atributo na tela (nome, ausentes, distintos, tipo, estatisticas) e chama atualiza_botão_remover caso uma checkbox seja marcada ou desmarcada Parâmetros: indiceCLicado: QModelIndex contendo a posição do item clicado ''' #o método foi acionado a partir de remover dados #nesse caso, deve mostrar os dados do atributo na primeira linha if indiceClicado is None: row = 0 column = 1 else: row = indiceClicado.row() column = indiceClicado.row() #se o usuario marcar uma checkbox if column == 0: self.atualizar_botao_remover() #encontra o nome do atributo selecionado atributo = self._dataset.get_atributos()[row] #encontra a coluna do dataframe relativa ao atributo dados_atributo = self._dataset.get_dados()[atributo] #calcula estatisticas sobre a coluna estatisticas = dados_atributo.describe() #calcula o número de dados ausentes ausentes = int(len(dados_atributo) - estatisticas['count']) pct_ausentes = int(100 * ausentes / len(dados_atributo)) #coluna contém strings if 'unique' in estatisticas: distintos = estatisticas['unique'] tipo = "Nominal" self._view.tabelaEstatistica.setModel(EstatisticaNominalModel(dados_atributo)) #coluna contém números else: distintos = len(dados_atributo.unique()) tipo = "Numérico" self._view.tabelaEstatistica.setModel(EstatisticaNumericaModel(dados_atributo)) #atualiza os dados do atributo na tela self._view.nomeAtributoLabel.setText(atributo) self._view.ausentesLabel.setText("%d (%d%%)" % (ausentes, pct_ausentes)) self._view.tipoLabel.setText(tipo) self._view.distintosLabel.setText(str(distintos)) def remover_atributos(self): '''Remove os atributos com as checkboxes selecionadas''' self._datasetmodel.beginResetModel() self._dataset.remover_atributos() self._datasetmodel.endResetModel() self.atualizar_grupo_dados() self.atualizar_atributo_selecionado()