def prepare_data(self): download_dataset(self.hparams.data_name, self.hparams.data_root) if self.hparams.data_name == "METABRIC": alldata = import_dataset_METABRIC(self.hparams.data_root) elif self.hparams.data_name == "SYNTHETIC": alldata = import_dataset_SYNTHETIC(self.hparams.data_root) full_dataset = SurvivalDataset(alldata) # make sure the validation set is balanced full_indices = range(len(full_dataset)) full_targets = full_dataset.label train_indices, test_indices = train_test_split(full_indices, test_size=0.2, stratify=full_targets) train_indices, val_indices = train_test_split( train_indices, test_size=0.2, stratify=full_targets[train_indices]) train_dataset, val_dataset, test_dataset = (Subset( full_dataset, train_indices), Subset( full_dataset, val_indices), Subset(full_dataset, test_indices)) self.full_dataset = full_dataset self.train_dataset = train_dataset self.val_dataset = val_dataset self.test_dataset = test_dataset self.train_indices = train_indices self.val_indices = val_indices self.test_indices = test_indices
def load_twins(dataroot=DATA_FOLDER, data_format=NUMPY, return_sketchy_ites=False, return_sketchy_ate=False, observe_sketchy_counterfactuals=False): """ Load the Twins dataset :param dataroot: path to folder for data :param return_sketchy_ites: if True, return sketchy ITEs :param return_sketchy_ate: if True, return sketchy ATE :param observe_sketchy_counterfactuals: TODO :return: dictionary of results """ if observe_sketchy_counterfactuals: raise NotImplementedError('Let Brady know if you need this.') download_dataset(TWINS_URL, 'Twins', dataroot=dataroot, filename=TWINS_FILENAME) full_df = pd.read_csv(os.path.join(dataroot, TWINS_FILENAME), index_col=0) if data_format == NUMPY: d = { 'w': full_df.drop(['T', 'y0', 'y1', 'yf', 'y_cf', 'Propensity'], axis='columns').to_numpy(), 't': full_df['T'].to_numpy(), 'y': full_df['yf'].to_numpy() } elif data_format == PANDAS: d = { 'w': full_df.drop(['T', 'y0', 'y1', 'yf', 'y_cf', 'Propensity'], axis='columns'), 't': full_df['T'], 'y': full_df['yf'] } if return_sketchy_ites or return_sketchy_ate: ites = full_df['y1'] - full_df['y0'] ites_np = ites.to_numpy() if return_sketchy_ites: d['ites'] = ites if data_format == PANDAS else ites_np if return_sketchy_ate: d['ate'] = ites_np.mean() return d
def main(_): if config.mode == "prepare": download_dataset(config) elif config.mode == "train": train(config) elif config.mode == "debug": config.epochs = 2 train(config, debug=True) elif config.mode == "inpaint": config.batch_size = 5 inpaint(config) elif config.mode == "impute": pass
def main(_): config = flags.FLAGS if config.mode == "train": assert config.dataset in ("mnist", "cifar10") config.in_shape = (config.batch_size, 32, 32, 3) config.block_list = [eval(x) for x in config.block_list] config.stride_list = [eval(x) for x in config.stride_list] config.channel_list = [eval(x) for x in config.channel_list] train(config) elif config.mode == "debug": config.train_steps = 1 config.viz_steps = 1 config.block_list = [2, 2, 2] config.channel_list = [3, 4, 5] config.stride_list = [1, 1, 2] config.in_shape = (config.batch_size, 28, 28, 1) train(config, debug=True) elif config.mode == "prepare": download_dataset(config) elif config.mode == "sn": test_spectral_norm() elif config.mode == "iresnet": test_iresnet() elif config.mode == "trace": test_trace_approximation() elif config.mode == "inverse": test_block_inversion() elif config.mode == "squeeze": test_squeeze() elif config.mode == "trace_sn": test_trace_sn() elif config.mode == "generate": generate(config) elif config.mode == "reconstruct": reconstruct(config)
def main(*args): base_model_path = "/tmp/jax2tf/tf_js_quickdraw" dataset_path = os.path.join(base_model_path, "data") classes = utils.download_dataset(dataset_path, NB_CLASSES) assert len(classes) == NB_CLASSES, classes print(f"Classes are: {classes}") train_ds, test_ds = utils.load_classes(dataset_path, classes) flax_params = train(train_ds, test_ds, classes) model_dir = os.path.join(base_model_path, "saved_models") # the model must be converted with with_gradient set to True to be able to # convert the saved model to TF.js, as "PreventGradient" is not supported saved_model_lib.convert_and_save_model(predict, flax_params, model_dir, input_signatures=[tf.TensorSpec([1, 28, 28, 1])], with_gradient=True, compile_model=False, enable_xla=False) conversion_dir = os.path.join(base_model_path, 'tfjs_models') convert_tf_saved_model(model_dir, conversion_dir)
def load_ihdp_datasets(split='train', n_realizations=100, dataroot=None): """ Load the IHDP data with the nonlinear response surface ("B") that was used by Shalit et al. (2017). Description of variables: x: covariates (25: 6 continuous and 19 binary) t: treatment (binary) yf: "factual" (observed) outcome ycf: "counterfactual" outcome (random) mu0: noiseless potential outcome under control mu1: noiseless potential outcome under treatment ate: I guess just what was reported in the Hill (2011) paper... Not actually accurate. The actual SATEs for the data are the following (using (mu1 - mu0).mean()): train100: 4.54328871735309 test100: 4.269906127209613 all100: 4.406597422281352 train1000: 4.402550421661204 test1000: 4.374712690625632 all1000: 4.388631556143418 yadd: ??? ymul: ??? :param split: 'train', 'test', or 'both' :param n_realizations: 100 or 1000 (the two options that the data source provides) :return: NpzFile with all the data ndarrays in the 'f' attribute """ if split.lower() not in SPLIT_OPTIONS: raise ValueError( 'Invalid "split" option {} ... valid options: {}'.format( split, SPLIT_OPTIONS)) if isinstance(n_realizations, str): n_realizations = int(n_realizations) if n_realizations not in N_REALIZATIONS_OPTIONS: raise ValueError( 'Invalid "n_realizations" option {} ... valid options: {}'.format( n_realizations, N_REALIZATIONS_OPTIONS)) if n_realizations == 100: if split == 'train' or split == 'all': path = download_dataset(IHDP_100_TRAIN_URL, 'IHDP train 100', dataroot=dataroot) train = np.load(path) if split == 'test' or split == 'all': path = download_dataset(IHDP_100_TEST_URL, 'IHDP test 100', dataroot=dataroot) test = np.load(path) elif n_realizations == 1000: if split == 'train' or split == 'all': path = download_dataset(IHDP_1000_TRAIN_URL, 'IHDP train 1000', dataroot=dataroot) unzip_path = unzip(path) train = np.load(unzip_path) if split == 'test' or split == 'all': path = download_dataset(IHDP_1000_TEST_URL, 'IHDP test 1000', dataroot=dataroot) unzip_path = unzip(path) test = np.load(unzip_path) if split == 'train': return train elif split == 'test': return test elif split == 'all': return train, test
return best_acc if __name__ == "__main__": start = time.time() if args.model == 'lstmcnn': vocab_list = list( """abcdefghijklmnopqrstuvwxyzABSCEFGHIJKLMNOPQRSTUVWXYZ0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """ ) else: vocab_list = list( """abcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """ ) print('==> download dataset ' + args.dataset) download_dataset(args.data_path) print('==> make dataset') train_dataset = TextDataset(args.data_path, args.seq_length, vocab_list, is_train=True) test_dataset = TextDataset(args.data_path, args.seq_length, vocab_list, is_train=False) train_loader = data_utils.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) test_loader = data_utils.DataLoader(test_dataset,
def main(): download_dataset() vectorize_dataset() train_model() evaluate_model()
def main(benchmark, force_download, overwrite): config_path = os.path.join('benchmarks', benchmark, 'config.yml') config = resolve_config(config_path) source_folder = config.sources.root videos = scan_videos(source_folder, '**') if len(videos) == 0 or force_download: download_dataset(source_folder, url='https://winnowpre.s3.amazonaws.com/augmented_dataset.tar.xz') videos = scan_videos(source_folder, '**') print(f'Videos found after download:{len(videos)}') if len(videos) > 0: print('Video files found. Checking for existing signatures...') signatures_path = os.path.join( config.repr.directory, 'video_signatures', '**', '**.npy') signatures = glob(os.path.join(signatures_path), recursive=True) if len(signatures) == 0 or overwrite: # Load signatures and labels # command = f'python extract_features.py -cp {config_path}' command = shlex.split(command) subprocess.run(command, check=True) # Check if signatures were generated properly signatures = glob(os.path.join(signatures_path), recursive=True) assert len(signatures) > 0, 'No signature files were found.' available_df = pd.read_csv( os.path.join( 'benchmarks', benchmark, 'labels.csv')) frame_level = glob( os.path.join( config.repr.directory, 'frame_level', '**', '**.npy'), recursive=True) signatures_permutations = get_frame_sampling_permutations( list(range(1, 6)), frame_level) scoreboard = dict() for fs, sigs in signatures_permutations.items(): results_analysis = dict() for r in np.linspace(0.1, 0.25, num=10): results = [] for i in range(5): mAP, pr_curve = get_result( available_df, sigs, ratio=r, file_index=frame_level) results.append(mAP) results_analysis[r] = results scoreboard[fs] = results_analysis results_file = open('benchmarks/scoreboard.json', "w") json.dump(scoreboard, results_file) print('Saved scoreboard on {}'.format('benchmarks/scoreboard.json')) else: print(f'Please review the dataset (@ {source_folder})')
def download(): """Download and prepare dataset for training usaging kiwi Challenge structure of data""" ut.download_dataset()
def initialize_model(): model = download_model(model_name) # normalizacja do sredniej i odchylenia standardowego zbioru ImageNet preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) return PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) if __name__ == "__main__": model = initialize_model() # sciagnij dataset i przetestuj model images, labels = download_dataset(base_path=data_path, examples=examples, data_format=model.data_format, bounds=model.bounds, dimension=(224, 224)) # wyniki dla niezmodyfikowanych danych initial_accuracy = accuracy(model, images, labels) # przeprowadz ataki dla kazdego epsilona i oblicz czas ataku atak = FGSM() duration = {} start_attack_time = time.time() modified_images = [] for epsilon in epsilons: modified_images.append(atak(model, images, labels, epsilon)) end_attack_time = time.time() duration["czas_ataku__fgsm"] = round(end_attack_time - start_attack_time,
weight_prune_perc=tune.grid_search([0, 0.1, 0.2, 0.3, 0.4]), grad_prune_perc=tune.grid_search([0, 0.1, 0.2, 0.3, 0.4]), ) tune_config = dict( name="SET_DSNN_GS1", num_samples=1, local_dir=os.path.expanduser("~/nta/results"), config=exp_config, checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 300}, resources_per_trial={ "cpu": 1, "gpu": 1 }, loggers=DEFAULT_LOGGERS, verbose=1, ) # TODO: automatically counts how many GPUs there are # override when running local for test if not torch.cuda.is_available(): exp_config["device"] = "cpu" tune_config["resources_per_trial"] = {"cpu": 1} download_dataset(exp_config) ray.init() tune.run(Trainable, **tune_config)
# -*- coding: utf-8 -*- import os import matplotlib.pyplot as plt import numpy as np from utils import download_dataset def save_org_images(path, filename, images_org): images_org = np.transpose(images_org, axes=(0, 2, 3, 1)) for i, image in enumerate(images_org): plt.imsave(path + os.sep + f"{filename}_{i+1}_original.jpg", image.numpy()) data_path = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)), "imagenet")) destination_path = data_path + os.sep + "preprocessed" images, _ = download_dataset(base_path = data_path, examples = 1000, data_format = "channels_first", bounds = (0, 1), dimension = (224, 224)) save_org_images(destination_path, "imagenet", images)
from train_gan import train, get_arguments import torch from utils import download_blob, upload_local_directory_to_gcs, download_dataset from google.cloud import storage if __name__ == "__main__": args = get_arguments() print("on GPU:", torch.cuda.is_available()) print(args) print("downloading dataset...") download_dataset() print("dataset downloaded") print("training dataset...") train(args) print("training done") print("uploading results...") storage_client = storage.Client() bucket = storage_client.bucket("semantic_inpainting") upload_local_directory_to_gcs("./checkpoints", bucket, "checkpoints") print("results uploaded")