def test_from_df():
    this_tests(ItemList.from_df)
    df = pd.DataFrame(["123.png"], columns=["name"])
    try:
        ImageList.from_df(path="dummy_path", df=df)
    except Exception as ex:
        assert not isinstance(ex, TypeError)
def preprocess(dicom_paths):
    dicom_paths = [f'{path}.png' for path in dicom_paths]
    df = pd.DataFrame(dicom_paths, columns=['name'])
    train_data_stats = torch.load('normal_stats')
    images = ImageList.from_df(df, '.')

    return images
df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]
print(nunique)
df.head()

range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds)

# +
stats = ([0.0692], [0.2051])
data = (ImageList.from_df(
    df,
    path='.',
    folder=TRAIN,
    suffix='.png',
    cols='image_id',
    convert_mode='L').split_by_idx(
        range(fold * len(df) // nfolds,
              (fold + 1) * len(df) // nfolds)).label_from_df(cols=[
                  'grapheme_root', 'vowel_diacritic', 'consonant_diacritic'
              ]).transform(
                  transform.get_transforms(do_flip=False, max_warp=0.1),
                  size=sz,
                  padding_mode='zeros').databunch(bs=bs)).normalize(stats)

data.show_batch()


# +
class Head(nn.Module):
    def __init__(self, nc, n, ps=0.5):
        super().__init__()
        layers = [AdaptiveConcatPool2d(), Mish(), Flatten()] + \
Esempio n. 4
0
    def create_covidx_databunch(self):
        bs = self.bs

        data_path = DATA_DIR / "COVIDx"
        assert data_path.exists()
        train_df_path = data_path / f"train_split_{self.version}.txt"
        # train set
        LOGGER.info(f'Reading train_df from {train_df_path}')
        self.train_df = (pd.read_csv(train_df_path,
                                     header=None,
                                     delimiter=" ",
                                     index_col=0,
                                     names=["name", "label",
                                            "dataset"]).reset_index(drop=True))
        self.train_df["name"] = ["train/" + f for f in self.train_df["name"]]
        self.train_df["is_valid"] = False

        # validation set
        test_df_path = data_path / f"test_split_{self.version}.txt"
        # train set
        LOGGER.info(f'Reading test_df from {test_df_path}')
        self.test_df = (pd.read_csv(data_path / test_df_path,
                                    header=None,
                                    delimiter=" ",
                                    index_col=0,
                                    names=["name", "label",
                                           "dataset"]).reset_index(drop=True))
        self.test_df["name"] = ["test/" + f for f in self.test_df["name"]]
        self.test_df["is_valid"] = True

        # merge
        data_df = pd.concat([self.train_df,
                             self.test_df]).reset_index(drop=True)
        data_df = data_df.drop("dataset", axis=1)

        # import covidnet test set which is included in testset
        covidnet_test_df = (pd.read_csv(data_path / "test_COVIDx4.txt",
                                        header=None,
                                        delimiter=" ",
                                        index_col=0,
                                        names=["name", "label"
                                               ]).reset_index(drop=True))
        covidnet_test_df["name"] = [
            "test/" + f for f in covidnet_test_df["name"]
        ]
        # sanity check
        a = set(covidnet_test_df.name)
        b = set(self.train_df.name)
        c = set(self.test_df.name)
        assert a.intersection(b) == set()
        assert a.intersection(c) == a

        # create fastai databunch
        tfms = get_dataaug_transformations()

        np.random.seed(42)
        src = (ImageList.from_df(
            data_df,
            data_path).split_from_df().label_from_df().transform(tfms,
                                                                 size=224))
        test = (ImageList.from_df(
            covidnet_test_df,
            data_path).split_none().label_from_df().transform(None, size=224))

        data = (src.databunch(bs=bs).normalize(imagenet_stats))
        data.add_test(test.train.x)

        # check that proportion classes are same in train and valid
        train_counts = np.unique(data.train_ds.y.items, return_counts=True)
        LOGGER.info(
            f'prop in train set: {train_counts[1]/ train_counts[1].sum()}')
        valid_counts = np.unique(data.valid_ds.y.items, return_counts=True)
        LOGGER.info(
            f'prop in valid set: {valid_counts[1]/ valid_counts[1].sum()}')
        test_counts = np.unique(test.y.items, return_counts=True)
        LOGGER.info(
            f'prop in test set: {test_counts[1]/ test_counts[1].sum()}')

        return data, test
Esempio n. 5
0

optar = partial(Ranger)


# In[25]:


CV=1
seed = CV
bs = 20
tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2,
                      max_warp=0.05, max_rotate=5.)
data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId')
        .split_from_df()
        .label_from_df(cols='Detected',label_cls=FloatList)
        .transform(tfms)
        .databunch(bs=bs,num_workers=4)
        .normalize(IMAGE_STATS_GLOBAL2)
       )


from fastai.vision.models import resnet50
from models.efficientnet import EfficientNet
#making model
arch = 'efficientnet-b0'
model_name = f'{arch}-v1'
# Parameters for the entire model (stem, all blocks, and head)

md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5)
# md_ef = resnet50(pretrained=False, num_classes=1)
Esempio n. 6
0
from densenet import densenet121, densenet161, densenet169
from resnet import (resnet50, resnet101, resnet152, resnext50_32x4d,
                    resnext101_32x8d)
from vgg import vgg13_bn, vgg16_bn, vgg19_bn
from se_resnet import se_resnet50, se_resnet101, se_resnet152

# In[1]
print(os.listdir("./input/"))
train_dir = "./input/train/train"
test_dir = "./input/test/train"
train = pd.read_csv('./input/train.csv')
test = pd.read_csv("./input/sample_submission.csv")
path = Path("./input")
device = torch.device('cuda:0')
test_img = ImageList.from_df(test, path=path / 'test', folder='test')


# In[2]
def predict(learn: Learner, name: str):
    # submission.csv
    preds, _ = learn.get_preds(ds_type=DatasetType.Test)
    test['has_cactus'] = preds.numpy()[:, 0]
    test.to_csv('submission_{}.csv'.format(name), index=False)
    print('Finish creating submission_{}.csv'.format(name))
    # loss.csv
    id_ = range(len(learn.recorder.losses))
    loss_df = pd.DataFrame({
        'id': id_,
        'loss': np.array(learn.recorder.losses)
    })
 def get_label_list(self, df_val, image_path):
     il_val = ImageList.from_df(df_val, image_path,
                                cols='filename').split_none()
     label_list = il_val.label_from_func(TestCnnLearners.get_label)
     return label_list