def test_from_df(): this_tests(ItemList.from_df) df = pd.DataFrame(["123.png"], columns=["name"]) try: ImageList.from_df(path="dummy_path", df=df) except Exception as ex: assert not isinstance(ex, TypeError)
def preprocess(dicom_paths): dicom_paths = [f'{path}.png' for path in dicom_paths] df = pd.DataFrame(dicom_paths, columns=['name']) train_data_stats = torch.load('normal_stats') images = ImageList.from_df(df, '.') return images
df = pd.read_csv(LABELS) nunique = list(df.nunique())[1:-1] print(nunique) df.head() range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds) # + stats = ([0.0692], [0.2051]) data = (ImageList.from_df( df, path='.', folder=TRAIN, suffix='.png', cols='image_id', convert_mode='L').split_by_idx( range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds)).label_from_df(cols=[ 'grapheme_root', 'vowel_diacritic', 'consonant_diacritic' ]).transform( transform.get_transforms(do_flip=False, max_warp=0.1), size=sz, padding_mode='zeros').databunch(bs=bs)).normalize(stats) data.show_batch() # + class Head(nn.Module): def __init__(self, nc, n, ps=0.5): super().__init__() layers = [AdaptiveConcatPool2d(), Mish(), Flatten()] + \
def create_covidx_databunch(self): bs = self.bs data_path = DATA_DIR / "COVIDx" assert data_path.exists() train_df_path = data_path / f"train_split_{self.version}.txt" # train set LOGGER.info(f'Reading train_df from {train_df_path}') self.train_df = (pd.read_csv(train_df_path, header=None, delimiter=" ", index_col=0, names=["name", "label", "dataset"]).reset_index(drop=True)) self.train_df["name"] = ["train/" + f for f in self.train_df["name"]] self.train_df["is_valid"] = False # validation set test_df_path = data_path / f"test_split_{self.version}.txt" # train set LOGGER.info(f'Reading test_df from {test_df_path}') self.test_df = (pd.read_csv(data_path / test_df_path, header=None, delimiter=" ", index_col=0, names=["name", "label", "dataset"]).reset_index(drop=True)) self.test_df["name"] = ["test/" + f for f in self.test_df["name"]] self.test_df["is_valid"] = True # merge data_df = pd.concat([self.train_df, self.test_df]).reset_index(drop=True) data_df = data_df.drop("dataset", axis=1) # import covidnet test set which is included in testset covidnet_test_df = (pd.read_csv(data_path / "test_COVIDx4.txt", header=None, delimiter=" ", index_col=0, names=["name", "label" ]).reset_index(drop=True)) covidnet_test_df["name"] = [ "test/" + f for f in covidnet_test_df["name"] ] # sanity check a = set(covidnet_test_df.name) b = set(self.train_df.name) c = set(self.test_df.name) assert a.intersection(b) == set() assert a.intersection(c) == a # create fastai databunch tfms = get_dataaug_transformations() np.random.seed(42) src = (ImageList.from_df( data_df, data_path).split_from_df().label_from_df().transform(tfms, size=224)) test = (ImageList.from_df( covidnet_test_df, data_path).split_none().label_from_df().transform(None, size=224)) data = (src.databunch(bs=bs).normalize(imagenet_stats)) data.add_test(test.train.x) # check that proportion classes are same in train and valid train_counts = np.unique(data.train_ds.y.items, return_counts=True) LOGGER.info( f'prop in train set: {train_counts[1]/ train_counts[1].sum()}') valid_counts = np.unique(data.valid_ds.y.items, return_counts=True) LOGGER.info( f'prop in valid set: {valid_counts[1]/ valid_counts[1].sum()}') test_counts = np.unique(test.y.items, return_counts=True) LOGGER.info( f'prop in test set: {test_counts[1]/ test_counts[1].sum()}') return data, test
optar = partial(Ranger) # In[25]: CV=1 seed = CV bs = 20 tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2, max_warp=0.05, max_rotate=5.) data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId') .split_from_df() .label_from_df(cols='Detected',label_cls=FloatList) .transform(tfms) .databunch(bs=bs,num_workers=4) .normalize(IMAGE_STATS_GLOBAL2) ) from fastai.vision.models import resnet50 from models.efficientnet import EfficientNet #making model arch = 'efficientnet-b0' model_name = f'{arch}-v1' # Parameters for the entire model (stem, all blocks, and head) md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5) # md_ef = resnet50(pretrained=False, num_classes=1)
from densenet import densenet121, densenet161, densenet169 from resnet import (resnet50, resnet101, resnet152, resnext50_32x4d, resnext101_32x8d) from vgg import vgg13_bn, vgg16_bn, vgg19_bn from se_resnet import se_resnet50, se_resnet101, se_resnet152 # In[1] print(os.listdir("./input/")) train_dir = "./input/train/train" test_dir = "./input/test/train" train = pd.read_csv('./input/train.csv') test = pd.read_csv("./input/sample_submission.csv") path = Path("./input") device = torch.device('cuda:0') test_img = ImageList.from_df(test, path=path / 'test', folder='test') # In[2] def predict(learn: Learner, name: str): # submission.csv preds, _ = learn.get_preds(ds_type=DatasetType.Test) test['has_cactus'] = preds.numpy()[:, 0] test.to_csv('submission_{}.csv'.format(name), index=False) print('Finish creating submission_{}.csv'.format(name)) # loss.csv id_ = range(len(learn.recorder.losses)) loss_df = pd.DataFrame({ 'id': id_, 'loss': np.array(learn.recorder.losses) })
def get_label_list(self, df_val, image_path): il_val = ImageList.from_df(df_val, image_path, cols='filename').split_none() label_list = il_val.label_from_func(TestCnnLearners.get_label) return label_list