Beispiel #1
0
def normalize_triplet_funcs(mean: FloatTensor,
                            std: FloatTensor,
                            do_x: bool = True,
                            do_y: bool = False) -> Tuple[Callable, Callable]:
    "Create normalize/denormalize func using `mean` and `std`, can specify `do_y` and `device`."
    mean, std = tensor(mean), tensor(std)
    return (
        partial(_normalize_triplet_batch,
                mean=mean,
                std=std,
                do_x=do_x,
                do_y=do_y),
        partial(denormalize_triplet, mean=mean, std=std, do_x=do_x),
    )
Beispiel #2
0
def create_dataset(path_fullRes: Path, path_list, downsize=True):
    il = ImageList.from_folder(path_fullRes)

    for p, size, qf in path_list:
        if not p.exists():
            print(f"Creating {p}")
            print(f"Size: {size} with {qf} quality factor")
            parallel(partial(create_training_images,
                             p_hr=path_fullRes,
                             p_lr=p,
                             size=size,
                             qualityFactor=qf,
                             downsize=downsize), il.items)
Beispiel #3
0
def get_all_issue_text(owner, repo, inf_wrapper, workers=64):
    """
    Prepare embedding features of all issues in a given repository.

    Returns
    ------
    dict
        {'features':list, 'labels':list, 'nums':list}
    """
    # prepare list of issue nums
    max_num = find_max_issue_num(owner, repo)

    get = partial(get_issue_text, owner=owner, repo=repo, skip_issue=True)
    issues = parallel(get, list(range(1, max_num + 1)), max_workers=workers)
    # filter out issues with problems
    filtered_issues = []

    for issue in issues:
        if issue:
            filtered_issues.append(issue)

    logging.info(f'Retrieved {len(filtered_issues)} issues.')

    features = []
    labels = []
    nums = []
    for issue in tqdm(filtered_issues):
        labels.append(issue['labels'])
        nums.append(issue['num'])
        # calculate embedding
        text = inf_wrapper.process_dict(issue)['text']
        feature = inf_wrapper.get_pooled_features(text).detach().cpu()
        # only need the first 1600 dimensions
        features.append(feature[:, :1600])

    assert len(features) == len(
        labels
    ), 'Error you have mismatch b/w number of observations and labels.'

    return {
        'features': torch.cat(features).numpy(),
        'labels': labels,
        'nums': nums
    }
def get_all_issue_text(owner, repo, inf_wrapper, workers=64):
    """
    Prepare embedding features of all issues in a given repository.

    Returns
    ------
    dict
        {'features':list, 'labels':list, 'nums':list}
    """
    # prepare list of issue nums
    max_num = find_max_issue_num(owner, repo)

    get = partial(get_issue_text, owner=owner, repo=repo, skip_issue=True)
    issues = parallel(get, list(range(1, max_num + 1)), max_workers=workers)
    # filter out issues with problems
    filtered_issues = []

    if not issues:
        raise ValueError(f"No issues retrieved for {owner}/{repo}")
    for issue in issues:
        if issue:
            filtered_issues.append(issue)

    logging.info(
        f'Repo {owner}/{repo} Retrieved {len(filtered_issues)} issues.')

    features = []
    labels = []
    nums = []
    issues_dict = {'title': [], 'body': []}
    for issue in tqdm(filtered_issues):
        labels.append(issue['labels'])
        nums.append(issue['num'])
        issues_dict['title'].append(issue['title'])
        issues_dict['body'].append(issue['body'])

    features = inf_wrapper.df_to_embedding(pd.DataFrame.from_dict(issues_dict))

    assert len(features) == len(
        labels
    ), 'Error you have mismatch b/w number of observations and labels.'

    return {'features': features[:, :1600], 'labels': labels, 'nums': nums}
Beispiel #5
0

from fastai.vision import get_transforms, ImageList, FloatList
from fastai.vision import cnn_learner, Learner
from optim.ranger import Ranger
from fastai.core import partial
from torch import Tensor
def accuracy_thresh(y_pred, y_true, thresh:float=0.5, sigmoid:bool=False):
    "Computes accuracy when `y_pred` and `y_true` are the same size."
    if sigmoid: y_pred = y_pred.sigmoid()
    y_pred = np.array(y_pred).flatten()
    y_true = np.array(y_true).flatten()
    return Tensor([((y_pred>thresh)==y_true).mean()])


optar = partial(Ranger)


# In[25]:


CV=1
seed = CV
bs = 20
tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2,
                      max_warp=0.05, max_rotate=5.)
data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId')
        .split_from_df()
        .label_from_df(cols='Detected',label_cls=FloatList)
        .transform(tfms)
        .databunch(bs=bs,num_workers=4)