def normalize_triplet_funcs(mean: FloatTensor, std: FloatTensor, do_x: bool = True, do_y: bool = False) -> Tuple[Callable, Callable]: "Create normalize/denormalize func using `mean` and `std`, can specify `do_y` and `device`." mean, std = tensor(mean), tensor(std) return ( partial(_normalize_triplet_batch, mean=mean, std=std, do_x=do_x, do_y=do_y), partial(denormalize_triplet, mean=mean, std=std, do_x=do_x), )
def create_dataset(path_fullRes: Path, path_list, downsize=True): il = ImageList.from_folder(path_fullRes) for p, size, qf in path_list: if not p.exists(): print(f"Creating {p}") print(f"Size: {size} with {qf} quality factor") parallel(partial(create_training_images, p_hr=path_fullRes, p_lr=p, size=size, qualityFactor=qf, downsize=downsize), il.items)
def get_all_issue_text(owner, repo, inf_wrapper, workers=64): """ Prepare embedding features of all issues in a given repository. Returns ------ dict {'features':list, 'labels':list, 'nums':list} """ # prepare list of issue nums max_num = find_max_issue_num(owner, repo) get = partial(get_issue_text, owner=owner, repo=repo, skip_issue=True) issues = parallel(get, list(range(1, max_num + 1)), max_workers=workers) # filter out issues with problems filtered_issues = [] for issue in issues: if issue: filtered_issues.append(issue) logging.info(f'Retrieved {len(filtered_issues)} issues.') features = [] labels = [] nums = [] for issue in tqdm(filtered_issues): labels.append(issue['labels']) nums.append(issue['num']) # calculate embedding text = inf_wrapper.process_dict(issue)['text'] feature = inf_wrapper.get_pooled_features(text).detach().cpu() # only need the first 1600 dimensions features.append(feature[:, :1600]) assert len(features) == len( labels ), 'Error you have mismatch b/w number of observations and labels.' return { 'features': torch.cat(features).numpy(), 'labels': labels, 'nums': nums }
def get_all_issue_text(owner, repo, inf_wrapper, workers=64): """ Prepare embedding features of all issues in a given repository. Returns ------ dict {'features':list, 'labels':list, 'nums':list} """ # prepare list of issue nums max_num = find_max_issue_num(owner, repo) get = partial(get_issue_text, owner=owner, repo=repo, skip_issue=True) issues = parallel(get, list(range(1, max_num + 1)), max_workers=workers) # filter out issues with problems filtered_issues = [] if not issues: raise ValueError(f"No issues retrieved for {owner}/{repo}") for issue in issues: if issue: filtered_issues.append(issue) logging.info( f'Repo {owner}/{repo} Retrieved {len(filtered_issues)} issues.') features = [] labels = [] nums = [] issues_dict = {'title': [], 'body': []} for issue in tqdm(filtered_issues): labels.append(issue['labels']) nums.append(issue['num']) issues_dict['title'].append(issue['title']) issues_dict['body'].append(issue['body']) features = inf_wrapper.df_to_embedding(pd.DataFrame.from_dict(issues_dict)) assert len(features) == len( labels ), 'Error you have mismatch b/w number of observations and labels.' return {'features': features[:, :1600], 'labels': labels, 'nums': nums}
from fastai.vision import get_transforms, ImageList, FloatList from fastai.vision import cnn_learner, Learner from optim.ranger import Ranger from fastai.core import partial from torch import Tensor def accuracy_thresh(y_pred, y_true, thresh:float=0.5, sigmoid:bool=False): "Computes accuracy when `y_pred` and `y_true` are the same size." if sigmoid: y_pred = y_pred.sigmoid() y_pred = np.array(y_pred).flatten() y_true = np.array(y_true).flatten() return Tensor([((y_pred>thresh)==y_true).mean()]) optar = partial(Ranger) # In[25]: CV=1 seed = CV bs = 20 tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2, max_warp=0.05, max_rotate=5.) data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId') .split_from_df() .label_from_df(cols='Detected',label_cls=FloatList) .transform(tfms) .databunch(bs=bs,num_workers=4)