def _init_TRADE_TAQ(self): self.TRADE_df, self.TAQ_df = pd.DataFrame(), pd.DataFrame() for i in progress_bar(self.TAQ_dict): df = self.TAQ_dict[i] df = df[[ 'Symbol', 'Market', 'BuyPrice01', 'SellPrice01', 'BuyVolume01', 'SellVolume01', 'TotalBuyOrderVolume', 'TotalSellOrderVolume', 'WtAvgSellPrice', 'WtAvgBuyPrice' ]] self.TAQ_df = self.TAQ_df.append(df) for i in progress_bar(self.TRADE_dict): df = self.TRADE_dict[i] self.TRADE_df = self.TRADE_df.append(df) self.TRADE_df = self.TRADE_df.sort_index() self.TAQ_df = self.TAQ_df.sort_index() start = datetime.datetime(year=int(self.date[:4]), month=int(self.date[4:6]), day=int(self.date[6:8]), hour=9, minute=30, second=0, microsecond=0) self.TRADE_df = self.TRADE_df.loc[self.TRADE_df.index.values >= start. strftime('%Y-%m-%d %H:%M:%S.000')] self.TAQ_df = self.TAQ_df.loc[self.TAQ_df.index.values >= start.strftime('%Y-%m-%d %H:%M:%S.000')]
def fit(self, epochs): self.logger.log_info(epochs, self.lr) mb = master_bar(range(epochs)) for epoch in mb: self.model.train() for xb, yb in progress_bar(self.train_dl, parent=mb): loss = self.loss_func(self.model(xb), yb) loss.backward() self.opt.step() self.opt.zero_grad() self.model.eval() with torch.no_grad(): tot_loss, tot_acc = 0., 0. for xb, yb in progress_bar(self.valid_dl, parent=mb): pred = self.model(xb) temp = self.loss_func(pred, yb) tot_loss += temp tot_acc += self.metric(pred, yb) if self.metric else 1 - temp nv = len(self.valid_dl) val_loss = tot_loss / nv acc = (tot_acc / nv) * 100. mb.write('Epoch: {:3}, train loss: {: .4f}, val loss: {: .4f}, ' 'Acc: {: .4f}%'.format(epoch + 1, loss, val_loss, acc)) self.logger.log([loss.cpu(), val_loss.cpu(), acc.cpu()]) self.logger.done() io.save(self.model, self.logger.full_path)
def fit(self, train_dl, valid_dl, epochs, lr, metrics=None, optimizer=None, scheduler=None): device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model.to(device) optimizer = optimizer or Adam(self.model.parameters(), lr) if scheduler != False: scheduler = scheduler or OneCycleLR(optimizer, lr, epochs*len(train_dl)) else: scheduler = None self.train_stats = TrainTracker(metrics, validate=(valid_dl is not None)) bar = master_bar(range(epochs)) bar.write(self.train_stats.metrics_names, table=True) for epoch in bar: self.model.train() for batch in progress_bar(train_dl, parent=bar): batch = batch_to_device(batch, device) loss = self._train_batch(batch, optimizer, scheduler) loss.backward() optimizer.step() optimizer.zero_grad() if scheduler: scheduler.step() self.train_stats.update_train_loss(loss) valid_outputs = [] if valid_dl: self.model.eval() for batch in progress_bar(valid_dl, parent=bar): batch = batch_to_device(batch, device) output = self._valid_batch(batch) valid_outputs.append(output) self.train_stats.log_epoch_results(valid_outputs) bar.write(self.train_stats.get_metrics_values(), table=True)
def fit_siamese(self, epochs): mb = master_bar(range(epochs)) for epoch in mb: self.model.train() for x1b, x2b, rdm in progress_bar(self.train_dl, parent=mb): mb.child.comment = 'Train loop' out1 = self.model(x1b) out2 = self.model(x2b) loss = self.loss_func(out1, out2, rdm) loss.backward() self.opt.step() self.opt.zero_grad() self.model.eval() with torch.no_grad(): tot_loss = 0. for x1b, x2b, rdm in progress_bar(self.valid_dl, parent=mb): out1 = self.model(x1b) out2 = self.model(x2b) temp = self.loss_func(out1, out2, rdm) tot_loss += temp nv = len(self.valid_dl) val_loss = tot_loss / nv mb.write( 'Epoch: {}, train loss: {: .6f}, val loss: {: .6f}'.format( epoch + 1, loss, val_loss)) self.logger.log([loss.cpu(), val_loss.cpu()])
def fit_supervised(self, epochs): mb = master_bar(range(epochs)) for epoch in mb: self.model.train() for xb, yb in progress_bar(self.train_dl, parent=mb): mb.child.comment = 'Train loop' loss = self.loss_func(self.model(xb), yb) loss.backward() self.opt.step() self.opt.zero_grad() self.model.eval() with torch.no_grad(): tot_loss, tot_acc = 0., 0. for xb, yb in progress_bar(self.valid_dl, parent=mb): mb.child.comment = 'Valid loop' pred = self.model(xb) temp = self.loss_func(pred, yb) tot_loss += temp tot_acc += self.metric(pred, yb) if self.metric else 1 - temp nv = len(self.valid_dl) val_loss = tot_loss / nv acc = (tot_acc / nv) * 100. mb.write('Epoch: {:3}, train loss: {: .4f}, val loss: {: .4f}, ' 'Acc: {: .4f}%'.format(epoch + 1, loss, val_loss, acc)) self.logger.log([loss.cpu(), val_loss.cpu(), acc.cpu()])
def parallel(func, arr:Collection, max_workers:int=None): "Call `func` on every element of `arr` in parallel using `max_workers`." max_workers = ifnone(max_workers, defaults.cpus) if max_workers<2: results = [func(o,i) for i,o in progress_bar(enumerate(arr), total=len(arr))] else: with ProcessPoolExecutor(max_workers=max_workers) as ex: futures = [ex.submit(func,o,i) for i,o in enumerate(arr)] results = [] for f in progress_bar(concurrent.futures.as_completed(futures), total=len(arr)): results.append(f.result()) if any([o is not None for o in results]): return results
def _pre_process(self): x, y = self.x, self.y cfg = x.config if len(x.items) > 0: if not cfg.resample_to: _set_sr(x.items[0], x.config, x.path) if cfg._nchannels is None: _set_nchannels(x.items[0], x.config, x.path) if cfg.downmix or cfg.remove_silence or cfg.segment_size or cfg.resample_to: items = list(zip(x.items, y.items)) def concat(x, y): return np.concatenate((x, y)) if len(y) > 0 else x if x.config.downmix: print("Preprocessing: Downmixing to Mono") cfg._nchannels = 1 items = [ downmix_item(i, x.config, x.path) for i in progress_bar(items) ] items = reduce(concat, items, np.empty((0, 2))) if x.config.resample_to: print("Preprocessing: Resampling to", x.config.resample_to) cfg._sr = x.config.resample_to items = [ resample_item(i, x.config, x.path) for i in progress_bar(items) ] items = reduce(concat, items, np.empty((0, 2))) if x.config.remove_silence: print("Preprocessing: Removing Silence") items = [ remove_silence(i, x.config, x.path) for i in progress_bar(items) ] items = reduce(concat, items, np.empty((0, 2))) if x.config.segment_size: print("Preprocessing: Segmenting Items") items = [ segment_items(i, x.config, x.path) for i in progress_bar(items) ] items = reduce(concat, items, np.empty((0, 2))) nx, ny = tuple(zip(*items)) x.items, y.items = np.array(nx), np.array(ny) self.x, self.y = x, y self.y.x = x
def __init__(self, tokenizer, file_path, cache_path, logger, block_size=512): assert os.path.isfile(file_path) if os.path.exists(cache_path): logger.info("Loading features from cached file %s", cache_path) with open(cache_path, "rb") as handle: self.examples = pickle.load(handle) else: logger.info("Creating features from dataset file %s", file_path) self.examples = [] text = (line.strip() for line in open(file_path, encoding="utf-8")) text = progress_bar(list(text)) text = map( lambda x: tokenizer.convert_tokens_to_ids(tokenizer.tokenize(x) ), text) text = itertools.chain.from_iterable(text) text = more_itertools.chunked(text, block_size) self.examples = list(text)[:-1] # Note that we are loosing the last truncated example here for the sake of simplicity (no padding) # If your dataset is small, first you should loook for a bigger one :-) and second you # can change this behavior by adding (model specific) padding. logger.info("Saving features into cached file %s", cache_path) with open(cache_path, "wb") as handle: pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
def dtlz2_test(): #Run the DTLZ2 benchmark errors = 0 num_inputs = 6 num_objectives = 2 lab = DTLZ2(num_inputs=num_inputs, num_objectives=num_objectives) models = { f'y_{i}': GPyModel(Exponential(input_dim=num_inputs, ARD=True)) for i in range(num_objectives) } warnings.filterwarnings("ignore", category=RuntimeWarning) tsemo = TSEMO(lab.domain, models=models, random_rate=0.00) experiments = tsemo.suggest_experiments(5 * num_inputs) mb = master_bar(range(1)) for j in mb: mb.main_bar.comment = f'Repeats' for i in progress_bar(range(100), parent=mb): mb.child.comment = f'Iteration' # Run experiments experiments = lab.run_experiments(experiments) # Get suggestions try: experiments = tsemo.suggest_experiments( 1, experiments, **tsemo_options) except Exception as e: print(e) errors += 1 tsemo.save(f'new_tsemo_params_{j}.json')
def _download_images(cat_list, path_images, max_images, remove_crowded): cat_ids = CocoData.coco.getCatIds(catNms=cat_list); idx2cat = {e['id']:e['name'] for e in CocoData.coco.loadCats(CocoData.coco.getCatIds())} img_id2fn = {} print(f"Found {len(cat_ids)} valid categories.") print([idx2cat[e] for e in cat_ids]) print("Starting download.") mb = master_bar(range(len(cat_ids))) for i in mb: c_id = cat_ids[i] print(f"Downloading images of category {idx2cat[c_id]}") img_ids = CocoData.coco.getImgIds(catIds=c_id) # small function to filter images with crowded objects def _f(iid): annos = CocoData.coco.loadAnns(CocoData.coco.getAnnIds(imgIds=iid)) annos = [a for a in annos if idx2cat[a["category_id"]] in cat_list] is_crowd = [a["iscrowd"] for a in annos] return 1 in is_crowd if remove_crowded: img_ids = [i for i in img_ids if not _f(i)] if max_images is not None: img_ids = img_ids[:max_images] for i in img_ids: img_id2fn[i] = path_images/(str(i).zfill(12)+".jpg") for i in progress_bar(range(len(img_ids)), parent=mb): with contextlib.redirect_stdout(io.StringIO()): CocoData.coco.download(path_images, [img_ids[i]]) print(len([fn for fn in path_images.ls()]), "images downloaded.")
def clear_cache(self): '''Delete the files and empty dirs in the cache folder''' num_removed = 0 parent_dirs = set() if not os.path.exists(self.cache_dir / "cache_contents.txt"): print( "Cache contents not found, try calling again after creating your AudioList" ) with open(self.cache_dir / "cache_contents.txt", 'r') as f: pb = progress_bar(f.read().split('\n')[:-1]) for line in pb: if not os.path.exists(line): continue else: try: os.remove(line) except Exception as e: print( f"Warning: Failed to remove {line}, due to error {str(e)}...continuing" ) else: parent = Path(line).parents[0] parent_dirs.add(parent) num_removed += 1 for parent in parent_dirs: if (os.path.exists(parent) and len(parent.ls()) == 0): try: os.rmdir(str(parent)) except Exception as e: print( f"Warning: Unable to remove empty dir {parent}, due to error {str(e)}...continuing" ) os.remove(self.cache_dir / "cache_contents.txt") print(f"{num_removed} files removed")
def run(self, photos): crop_photos = self.rec_model.get_crops(photos) for c in progress_bar(crop_photos): c.embedding = self.rec_model.get_embedding(c) crop_embeddings = np.stack([x.embedding[256:] for x in crop_photos]) cluster_labels = self.get_cluster_labels(crop_embeddings) return crop_photos, cluster_labels
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=False): if amp: scaler = torch.cuda.amp.GradScaler() model.train() # Iterate over the batches of the dataset for images, targets in progress_bar(train_loader, parent=mb): if torch.cuda.is_available(): images = images.cuda() targets = targets.cuda() images = batch_transforms(images) optimizer.zero_grad() if amp: with torch.cuda.amp.autocast(): out = model(images) train_loss = cross_entropy(out, targets) scaler.scale(train_loss).backward() # Update the params scaler.step(optimizer) scaler.update() else: out = model(images) train_loss = cross_entropy(out, targets) train_loss.backward() optimizer.step() scheduler.step() mb.child.comment = f"Training loss: {train_loss.item():.6}"
def train_epoch(self, mb: MasterBar) -> List[torch.tensor]: "One epoch used for training" self.mdl.train() trn_loss = SmoothenValue(0.9) trn_acc = SmoothenValue(0.9) for batch in progress_bar(self.data.train_dl, parent=mb): # Increment number of iterations self.num_it += 1 for b in batch.keys(): batch[b] = batch[b].to(self.device) self.optimizer.zero_grad() out = self.mdl(batch) loss = self.loss_fn(out, batch) loss = loss.mean() loss.backward() self.optimizer.step() metric = self.eval_fn(out, batch) trn_loss.add_value(loss.detach().cpu()) trn_acc.add_value(metric.detach().cpu()) mb.child.comment = ( f'LossB {loss: .4f} | SmLossB {trn_loss.smooth: .4f} | AccB {trn_acc.smooth: .4f}') del batch self.optimizer.zero_grad() return trn_loss.smooth, trn_acc.smooth
def compute_miou(model, dl, mean, num_classes, show_progress, ignore_mapped_class=[]): ious = [] model.learn.model.eval() with torch.no_grad(): for input, target in progress_bar(dl, display=show_progress): pred = model.learn.model(input) target = target.squeeze(1) if ignore_mapped_class != []: _, total_classes, _, _ = pred.shape for k in ignore_mapped_class: pred[:, k] = -1 pred = pred.argmax(dim=1) else: pred = pred.argmax(dim=1) mask1 = [] mask2 = [] for i in range(pred.shape[0]): mask1.append(pred[i].to(model._device) == num_classes[:, None, None].to(model._device)) mask2.append(target[i].to(model._device) == num_classes[:, None, None].to(model._device)) mask1 = torch.stack(mask1) mask2 = torch.stack(mask2) iou = mask_iou(mask1, mask2) ious.append(iou.tolist()) return np.mean(ious, 0)
def fit_one_epoch(model, train_loader, optimizer, scheduler, mb, amp=False): if amp: scaler = torch.cuda.amp.GradScaler() model.train() # Iterate over the batches of the dataset for images, targets in progress_bar(train_loader, parent=mb): targets = convert_to_abs_coords(targets, images.shape) if torch.cuda.is_available(): images = images.cuda() targets = [{k: v.cuda() for k, v in t.items()} for t in targets] optimizer.zero_grad() if amp: with torch.cuda.amp.autocast(): loss_dict = model(images, targets) loss = sum(v for v in loss_dict.values()) scaler.scale(loss).backward() # Update the params scaler.step(optimizer) scaler.update() else: loss_dict = model(images, targets) loss = sum(v for v in loss_dict.values()) loss.backward() optimizer.step() mb.child.comment = f'Training loss: {loss.item()}' scheduler.step()
def test_tsemo(test_num_improve_iter=2, save=False): num_inputs = 2 num_objectives = 2 lab = VLMOP2() strategy = TSEMO(lab.domain) experiments = strategy.suggest_experiments(5 * num_inputs) warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=DeprecationWarning) num_improve_iter = 0 best_hv = None pb = progress_bar(range(20)) for i in pb: # Run experiments experiments = lab.run_experiments(experiments) # Get suggestions experiments = strategy.suggest_experiments(1, experiments) if save: strategy.save("tsemo_settings.json") y_pareto, _ = pareto_efficient(lab.data[["y_0", "y_1"]].to_numpy(), maximize=False) hv = hypervolume(y_pareto, [11, 11]) if best_hv == None: best_hv = hv elif hv > best_hv: best_hv = hv num_improve_iter += 1 pb.comment = f"Hypervolume: {hv}" if num_improve_iter >= test_num_improve_iter: print( "Requirement to improve fbest in at least {} satisfied, test stopped." .format(test_num_improve_iter)) break
def validate(self, mb, model, device): model.eval() valid_loss = 0 with torch.no_grad(): for ind, xy in enumerate(progress_bar(self.valid_dl, parent=mb)): y_tag = xy.pop('target_tag') y_pos = xy.pop('target_pos') x = xy inputs, target_tag, target_pos = { key: x_.to(device) for key, x_ in x.items() }, y_tag.to(device), y_pos.to(device) *out, loss = model(**inputs, target_tag=y_tag, target_pos=y_pos) valid_loss += loss.item() if ind % 500 == 0: self.log( f'Batch: {ind}, Valid loss: {valid_loss / (ind+1) :.3f}' ) mb.child.comment = f'{valid_loss / (ind+1) :.3f}' return valid_loss / len(self.valid_dl)
def train(self, mb, model, opt, device, sched=None): model.train() train_loss = 0 for ind, xy in enumerate(progress_bar(self.train_dl, parent=mb)): y_tag = xy.pop('target_tag') y_pos = xy.pop('target_pos') x = xy inputs, target_tag, target_pos = { key: x_.to(device) for key, x_ in x.items() }, y_tag.to(device), y_pos.to(device) opt.zero_grad() *out, loss = model(**inputs, target_tag=y_tag, target_pos=y_pos) loss.backward() opt.step() if sched is not None: sched.step() train_loss += loss.item() if ind % 500 == 0: self.log( f'Batch: {ind}, Train loss: {train_loss/ len(self.train_dl)}' ) mb.child.comment = f'{train_loss / (ind+1) :.3f}' return train_loss / len(self.train_dl)
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=False): if amp: scaler = torch.cuda.amp.GradScaler() model.train() # Iterate over the batches of the dataset for images, targets in progress_bar(train_loader, parent=mb): if torch.cuda.is_available(): images = images.cuda() images = batch_transforms(images) train_loss = model(images, targets)['loss'] optimizer.zero_grad() if amp: with torch.cuda.amp.autocast(): train_loss = model(images, targets)['loss'] scaler.scale(train_loss).backward() # Gradient clipping scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), 5) # Update the params scaler.step(optimizer) scaler.update() else: train_loss = model(images, targets)['loss'] train_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step() scheduler.step() mb.child.comment = f'Training loss: {train_loss.item():.6}'
def get_preds_cyclegan(learn, test_path, pred_path, convert_to='B', bs=4, num_workers=4, device='cuda', suffix='tif'): """ A prediction function that takes the Learner object `learn` with the trained model, the `test_path` folder with the images to perform batch inference on, and the output folder `pred_path` where the predictions will be saved. The function will convert images to the domain specified by `convert_to` (default is 'B'). The other arguments are the batch size `bs` (default=4), `num_workers` (default=4), the `device` to run inference on (default='cuda') and suffix of the prediction images `suffix` (default='tif'). """ assert os.path.exists(test_path) if not os.path.exists(pred_path): os.mkdir(pred_path) test_dl = load_dataset(test_path, bs, num_workers) if convert_to == 'B': model = learn.model.G_B.to(device) else: model = learn.model.G_A.to(device) for i, xb in progress_bar(enumerate(test_dl), total=len(test_dl)): fn, im = xb preds = (model(im.to(device)) / 2 + 0.5) for i in range(len(fn)): new_fn = os.path.join( pred_path, '.'.join([ os.path.basename(fn[i]).split('.')[0] + f'_fake{convert_to}', suffix ])) torchvision.utils.save_image(preds[i], new_fn)
def predict(self, device='cuda:0', pbar=None): """ Evaluate the model on a validation set :param device: str (defaults to 'cuda:0') :param pbar: fast_progress progress bar (defaults to None) :returns: overall_val_loss (float), accuracies (dict{'acc': value}, preds (dict) """ current_size = len(self.val_data_loader.dataset) preds_dict = { 'y_true': np.zeros([current_size, 11]), 'y_pred': np.zeros([current_size, 11]) } overall_val_loss = 0.0 self.model.eval() with torch.no_grad(): index_dict = 0 for step, batch in enumerate( progress_bar(self.val_data_loader, parent=pbar, leave=(pbar is not None))): loss, num_rows, y_pred, targets = self.model(batch, device) overall_val_loss += loss.item() * num_rows current_index = index_dict preds_dict['y_true'][current_index:current_index + num_rows, :] = targets preds_dict['y_pred'][current_index:current_index + num_rows, :] = y_pred index_dict += num_rows overall_val_loss = overall_val_loss / len(self.val_data_loader.dataset) return overall_val_loss, preds_dict
def download_url(url, dest, overwrite=False, pbar=None, show_progress=True, chunk_size=1024*1024, timeout=4, retries=5): "Download `url` to `dest` unless it exists and not `overwrite`" if os.path.exists(dest) and not overwrite: return s = requests.Session() s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries)) # additional line to identify as a firefox browser, see fastai/#2438 s.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'}) u = s.get(url, stream=True, timeout=timeout) try: file_size = int(u.headers["Content-Length"]) except: show_progress = False with open(dest, 'wb') as f: nbytes = 0 if show_progress: pbar = progress_bar(range(file_size), leave=False, parent=pbar) try: if show_progress: pbar.update(0) for chunk in u.iter_content(chunk_size=chunk_size): nbytes += len(chunk) if show_progress: pbar.update(nbytes) f.write(chunk) except requests.exceptions.ConnectionError as e: fname = url.split('/')[-1] data_dir = dest.parent print(f'\n Download of {url} has failed after {retries} retries\n' f' Fix the download manually:\n' f'$ mkdir -p {data_dir}\n' f'$ cd {data_dir}\n' f'$ wget -c {url}\n' f'$ tar xf {fname}\n' f' And re-run your code once the download is successful\n')
def download_url(url: str, dest: str, overwrite: bool = False, show_progress=True, chunk_size=1024 * 1024, timeout=4, retries=5) -> None: "Download `url` to `dest` unless it exists and not `overwrite`." if os.path.exists(dest) and not overwrite: return s = requests.Session() s.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries)) u = s.get(url, stream=True, timeout=timeout) try: file_size = int(u.headers["Content-Length"]) except: show_progress = False with open(dest, 'wb') as f: nbytes = 0 if show_progress: pbar = progress_bar(range(file_size), auto_update=False, leave=False) try: for chunk in u.iter_content(chunk_size=chunk_size): nbytes += len(chunk) if show_progress: pbar.update(nbytes) f.write(chunk) except requests.exceptions.ConnectionError as e: print(f'Try downloading your file manually from {url}') import sys sys.exit(1)
def validate_w_dropout( model: nn.Module, dl: DataLoader, loss_func: OptLossFunc = None, cb_handler: Optional[CallbackHandler] = None, pbar: Optional[PBar] = None, average=True, n_batch: Optional[int] = None ) -> Iterator[Tuple[Union[Tensor, int], ...]]: "Calculate `loss_func` of `model` on `dl` in evaluation mode." model.train() with torch.no_grad(): val_losses, nums = [], [] if cb_handler: cb_handler.set_dl(dl) for xb, yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)): if cb_handler: xb, yb = cb_handler.on_batch_begin(xb, yb, train=False) val_loss = loss_batch(model, xb, yb, loss_func, cb_handler=cb_handler) val_losses.append(val_loss) if not is_listy(yb): yb = [yb] nums.append(yb[0].shape[0]) if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break if n_batch and (len(nums) >= n_batch): break nums = np.array(nums, dtype=np.float32) if average: return (to_np(torch.stack(val_losses)) * nums).sum() / nums.sum() else: return val_losses model.eval()
def download_url(url:str, dest:str, overwrite:bool=False, pbar:ProgressBar=None, show_progress=True, chunk_size=1024*1024, timeout=4, retries=5)->None: "Download `url` to `dest` unless it exists and not `overwrite`." if os.path.exists(dest) and not overwrite: return s = requests.Session() s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries)) u = s.get(url, stream=True, timeout=timeout) try: file_size = int(u.headers["Content-Length"]) except: show_progress = False with open(dest, 'wb') as f: nbytes = 0 if show_progress: pbar = progress_bar(range(file_size), auto_update=False, leave=False, parent=pbar) try: for chunk in u.iter_content(chunk_size=chunk_size): nbytes += len(chunk) if show_progress: pbar.update(nbytes) f.write(chunk) except requests.exceptions.ConnectionError as e: fname = url.split('/')[-1] from fastai.datasets import Config data_dir = Config().data_path() timeout_txt =(f'\n Download of {url} has failed after {retries} retries\n' f' Fix the download manually:\n' f'$ mkdir -p {data_dir}\n' f'$ cd {data_dir}\n' f'$ wget -c {url}\n' f'$ tar -zxvf {fname}\n\n' f'And re-run your code once the download is successful\n') print(timeout_txt) import sys;sys.exit(1)
def validate_old(self): self.logger.info("Running evaluation") all_logits = None all_labels = None self.model.eval() eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 validation_scores = {metric['name']: 0. for metric in self.metrics} validation_scores2 = {metric['name']: 0. for metric in self.metrics} for step, batch in enumerate(progress_bar(self.data.val_dl)): batch = tuple(t.to(self.device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch if self.is_fp16 and self.multi_label: label_ids = label_ids.half() with torch.no_grad(): outputs = self.model(input_ids, segment_ids, input_mask, label_ids) tmp_eval_loss, logits = outputs[:2] # logits = self.model(input_ids, segment_ids, input_mask) tmp_eval_accuracy = self.metrics[0]['function'](logits, label_ids) if all_logits is None: all_logits = logits else: all_logits = torch.cat((all_logits, logits), 0) if all_labels is None: all_labels = label_ids else: all_labels = torch.cat((all_labels, label_ids), 0) eval_loss += tmp_eval_loss.mean().item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps # Evaluation metrics for metric in self.metrics: validation_scores[metric['name']] = metric['function'](all_logits, all_labels) result = {'eval_loss': eval_loss, 'metrics': validation_scores} self.logger.info("Eval results:") for key in sorted(result.keys()): self.logger.info(" %s = %s", key, str(result[key])) self.logger.info( "--------------------------------------------------------------------------------" ) return result
def _df_predict(self, dataframe): # from fastai.data_block import split_kwargs_by_func, grab_idx, DatasetType # ds_type = DatasetType.Valid # ds = self.learn.dl(ds_type).dataset # # current_databunch = self.learn.data # databunch_half, databunch_second_half = self._data._prepare_validation_databunch(dataframe) # return dataframe, databunch_half, databunch_second_half # try: # self.learn.data = databunch # preds = self.learn.get_preds(ds_type)[0] # except Exception as e: # raise e # finally: # self.learn.data = current_databunch # # analyze_kwargs, kwargs = split_kwargs_by_func({}, ds.y.analyze_pred) # preds = [ds.y.analyze_pred(grab_idx(preds, i), **analyze_kwargs) for i in range(len(preds))] # preds = [ds.y.reconstruct(z) for z in preds] if not HAS_NUMPY: raise Exception("This function requires numpy.") preds = [] for i in progress_bar(range(len(dataframe))): prediction = self._predict(dataframe.iloc[i])[0].obj if isinstance(prediction, (list, np.ndarray)): prediction = prediction[0] preds.append(prediction) return preds
def get_preds_cyclegan(learn, test_path, pred_path, bs=4, num_workers=4, suffix='tif'): """ A prediction function that takes the Learner object `learn` with the trained model, the `test_path` folder with the images to perform batch inference on, and the output folder `pred_path` where the predictions will be saved, with a batch size `bs`, `num_workers`, and suffix of the prediction images `suffix` (default='png'). """ assert os.path.exists(test_path) if not os.path.exists(pred_path): os.mkdir(pred_path) test_dl = load_dataset(test_path, bs, num_workers) model = learn.model.G_B.cuda() for i, xb in progress_bar(enumerate(test_dl), total=len(test_dl)): fn, im = xb preds = (model(im.cuda()) / 2 + 0.5) for i in range(len(fn)): new_fn = os.path.join( pred_path, '.'.join( [os.path.basename(fn[i]).split('.')[0] + '_fakeB', suffix])) torchvision.utils.save_image(preds[i], new_fn)
def run_chains_parallel(chains, progressbar, to_run, params, random_seed, kernel_kwargs, cores): pbar = progress_bar((), total=100, display=progressbar) pbar.update(0) pbars = [pbar] + [None] * (chains - 1) pool = mp.Pool(cores) # "manually" (de)serialize params before/after multiprocessing params = tuple(cloudpickle.dumps(p) for p in params) kernel_kwargs = { key: cloudpickle.dumps(value) for key, value in kernel_kwargs.items() } results = _starmap_with_kwargs( pool, to_run, [(*params, random_seed[chain], chain, pbars[chain]) for chain in range(chains)], repeat(kernel_kwargs), ) results = tuple(cloudpickle.loads(r) for r in results) pool.close() pool.join() return results