def main(): os.makedirs('working', exist_ok=True) for name, url in datasets.DATASETS.items(): print('Downloading {} dataset... '.format(name), end='', flush=True) req = requests.get(url) if req.status_code != 200: print('Error getting {} dataset'.format(name), file=sys.stderr, flush=True) continue else: print('Done', flush=True) outpath = datasets.path(name) with open(outpath, 'w') as outfile: outfile.write(req.text)
def main() -> None: os.makedirs('output', exist_ok=True) fns = [(datasets.US_DAILY_NAME, process_us_daily), (datasets.STATES_DAILY_NAME, process_states_daily)] for name, process_fn in fns: with open(datasets.path(name)) as json_file: data = json.load(json_file) if type(data) != list: fatal('Error with {}: expected list of data, got {}'.format( name, type(data))) if len(data) == 0: fatal('Error with {}: got no data'.format(name)) for datum in data: if type(datum) != dict: fatal('Error with {}: expected list of dicts, but got ' 'type {} in list'.format(name, type(datum))) processed_data = process_fn(data) output_path = os.path.join('output', '{}.js'.format(name)) if name == datasets.US_DAILY_NAME: us_data = {'US': processed_data} json_str = json.dumps(us_data, indent=2) else: json_str = json.dumps(processed_data, indent=2) with open(output_path, 'w') as output_file: print('export const {} = {};'.format(name, json_str), file=output_file) # Dump the data indices as well index_strs = [ ' "{}": {},'.format(DATA_ORDER[i], i) for i in range(len(DATA_ORDER)) ] index_str = '\n'.join(index_strs) data_js = ('export const DATA_INDICES = {{\n' '{}\n' '}};').format(index_str) index_path = os.path.join('output', 'data_indices.js') with open(index_path, 'w') as output_file: output_file.write(data_js)
def generate_hp(self, path): # Seed random number generator np.random.seed(datetime.now().microsecond) # Hyperparameters hp = dd() # Set debug mode hp.debug = False # Experiment parameters hp.meta = dd() hp.meta.path = path hp.meta.result_folder = './' hp.meta.export_folder = 'export' # Dataset parameters hp.dataset = dd() hp.dataset.path = join(datasets.path(), 'catsanddogs') # Feature extraction parameters hp.iterator = dd() hp.iterator.patch_sz = (90, 90, 3) hp.iterator.reshape_sz = (100, 100, 3) # Feature learning layers hp.model = dd() hp.model.name = 'convnet' hp.model.layers = dd() # Preprocess layer i = 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_preprocess' hp.model.layers[i].nb_channels = 3 hp.model.layers[i].nb_pretrain_iterations = 1 # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 16 hp.model.layers[i].filter_sz = (3, 3) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 32 hp.model.layers[i].filter_sz = (3, 3) # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 32 hp.model.layers[i].filter_sz = (3, 3) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 64 hp.model.layers[i].filter_sz = (5, 5) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 64 hp.model.layers[i].filter_sz = (5, 5) # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Fully connected layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'hidden' hp.model.layers[i].activation = "relu" hp.model.layers[ i].nb_hid = 256 # int(10**np.random.uniform(log(128)/log(10), log(512)/log(10))) # Logistic layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'logistic' hp.model.layers[i].nb_out = 2 # Trainer hp.trainer = dd() hp.trainer.max_epoch = 1000 hp.trainer.lookback = randint(10, 30) hp.trainer.minibatch_sz = 100 hp.trainer.init_lr = None hp.trainer.incr_lr = None hp.trainer.lr = 10**np.random.uniform( log(0.01) / log(10), log(0.1) / log(10)) hp.trainer.decay_rate = uniform(0.985, 1.0) hp.trainer.momentum = 10**uniform( log(0.8) / log(10), log(0.99) / log(10)) hp.trainer.momentum_reset_prob = 0 print("Save hyperparameters to file") hp.dump(join(hp.meta.path, 'hp.pkl'), save_pretty_textfile=True) print(hp) return hp
# Extract key and value key = split[0] val = split[len(split) == 2] # Add class description if not in label mapping (from class description to class number) if val not in labelmap: currentlabel += 1 labelmap[val] = currentlabel # Add key to classmap if key not in classmap : classmap[key] = dd() # Assign description and label to key classmap[key].label = labelmap[val] classmap[key].desc = val return classmap if __name__ == '__main__': import datasets from os.path import join from cellavision_ import cellavision from clemex_ import clemex clmx = clemex(join(datasets.path(), 'leuko', 'clemex')) clvs = cellavision(join(datasets.path(), 'leuko', 'cellavision', 'images')) classmap_path = join(datasets.path(), 'leuko', 'classmap.txt') marshall(classmap_path, s1=clmx, s2=clvs)
idx = int(fn[4:-4]) # Add class to class list if cls not in self.classes: self.classes.append(cls) example = element(self.path, fn, label=self.classes.index(cls), desc=cls, index=idx) self.examples.append(example) if idx < 20000 // 2: self.splits.train.append(example) elif idx < 22500 // 2: self.splits.valid.append(example) else: self.splits.test.append(example) print("Cats and dogs dataset is loaded with:") for split_id in self.splits: print(" {:7d} examples in {} set".format( len(self.splits[split_id]), split_id)) if __name__ == '__main__': import datasets s = source(join(datasets.path(), 'catsanddogs'))