コード例 #1
0
def test_set_random_seed(tiny_ic_data_path):
    # check two data batches are the same after seeding
    set_random_seed(1)
    first_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct().
                  label_from_folder().transform().databunch(bs=5).normalize())
    first_batch = first_data.one_batch()

    set_random_seed(1)
    second_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct(
    ).label_from_folder().transform().databunch(bs=5).normalize())
    second_batch = second_data.one_batch()
    assert first_batch[1].tolist() == second_batch[1].tolist()
def test_model_to_learner(tmp):
    model = models.resnet18

    # Test if the function loads an ImageNet model (ResNet) trainer
    learn = model_to_learner(model(pretrained=True))
    assert len(learn.data.classes) == 1000  # Check Image net classes
    assert isinstance(learn.model, models.ResNet)

    # Test if model can predict very simple image
    IM_URL = "https://cvbp.blob.core.windows.net/public/images/cvbp_cup.jpg"
    imagefile = os.path.join(tmp, "cvbp_cup.jpg")
    urllib.request.urlretrieve(IM_URL, imagefile)

    category, ind, predict_output = learn.predict(
        open_image(imagefile, convert_mode="RGB"))
    assert learn.data.classes[ind] == str(category) == "coffee_mug"

    # Test if .predict() yield the same output when use .get_preds()
    one_data = (
        ImageList.from_folder(tmp).split_none().label_const(
        )  # cannot use label_empty because of fastai bug: # https://github.com/fastai/fastai/issues/1908
        .transform(
            tfms=None,
            size=IMAGENET_IM_SIZE).databunch(bs=1).normalize(imagenet_stats))
    learn.data.train_dl = one_data.train_dl
    get_preds_output = learn.get_preds(ds_type=DatasetType.Train)

    assert np.all(
        np.isclose(
            np.array(get_preds_output[0].tolist()
                     [0]),  # Note, get_preds() produces a batch (list) output
            np.array(predict_output.tolist()),
            rtol=1e-05,
            atol=1e-08,
        ))
コード例 #3
0
    def prediction(self, directorio, num_batch=8):

        data = ImageList.from_folder(
            directorio)  # build the ImageList from the folder
        learn.data.add_test(data)  # add data to the test set of learn

        learn.to_fp32()  # pass the model and data to FP16

        if self.arquitecture == 'resnet18':
            sf = SaveFeatures(learn.model[1][4])
        elif self.arquitecture == 'effB4':
            sf = SaveFeatures(learn.model._avg_pooling)

        # get the probabilities of images
        preds, _ = learn.get_preds(ds_type=DatasetType.Test, n_batch=num_batch)

        # Get the predictions (intenger indexes)
        y_pred = preds.argmax(
            dim=1).tolist()  # using the arguments of the max probabilities

        self.predictions_dict = {
            n.name: learn.data.classes[y]
            for n, y in zip(data.items, y_pred)
        }
        self.features = sf.features.squeeze()
        self.labels = list(set(self.predictions_dict.values()))
コード例 #4
0
def get_data_from_folder(path: Union[Path, str],
                         bs: int,
                         img_size: int,
                         tfms: Transform = None,
                         extensions: List[str] = [".jpg"]) -> ImageDataBunch:
    """Takes Imagenet style folder structure of test/train/valid and returns DataBunch with different
    batch and image sizes to train with PyTorch.

    Args:
        path : path to folder with data in train/valid/test folder structure
        bs : batch size
        img_size : resize to img_size for training
        tfms : transformations to do
        extensions : extensions to grab from the folder path

    Returns:
        data : Train/Test data organized in Fastai DataBunch

    """
    if tfms is None:
        tfms = get_transforms()

    data = (ImageList.from_folder(
        path,
        extensions=extensions).split_by_folder().label_from_folder().transform(
            tfms,
            size=img_size).databunch(bs=bs,
                                     num_workers=0).normalize(imagenet_stats))

    return data
コード例 #5
0
 def get_data(train_sampler=None):
     data = (ImageList.from_folder(chip_dir).split_by_folder(
         train='train', valid='val').label_from_folder().transform(
             tfms, size=size).databunch(bs=self.train_opts.batch_sz,
                                        num_workers=num_workers,
                                        train_sampler=train_sampler))
     return data
コード例 #6
0
ファイル: predict.py プロジェクト: lemonwaffle/VroomNet
def main(ensemble, tta, output):
    # Read in test data images from the 'data/test' folder
    print("Loading test data.")
    test_imgs = ImageList.from_folder(path=os.path.join(DATA_DIR,
                                                        TEST_FOLDER), )

    # Get predictions
    if ensemble:
        # Load ensemble of learners
        learners = []
        learner_names = ['dpn92', 'inceptionv4', 'se_resnext101']
        for name in learner_names:
            print(f"Loading {name}")
            learn = load_learner(SAVED_DIR, f'{name}.pkl', test=test_imgs)
            learners.append(learn)

        # Init ensemble
        print("Initializing ensemble.")
        ensemble = Ensemble(learners)

        # Get predictions
        print("Performing inference...")
        preds = ensemble.predict(tta)
        print("Predictions done.")

        # Get classes list
        classes = learners[0].data.classes
        # Get image names list
        img_names = [i.name for i in learners[0].data.test_ds.items]

    else:
        learner_name = 'se_resnext101'

        # Initialize Learner
        print(f"Loading {learner_name}")
        learn = load_learner(SAVED_DIR, f'{learner_name}.pkl', test=test_imgs)

        # Get predictions
        print("Performing inference...")
        if tta:
            preds, _ = learn.TTA(ds_type=DatasetType.Test)
        else:
            preds, _ = learn.get_preds(ds_type=DatasetType.Test)
        print("Predictions done.")

        # Get classes list
        classes = learn.data.classes
        # Get image names list
        img_names = [i.name for i in learn.data.test_ds.items]

    # Initialize DataFrame with the predictions
    df = pd.DataFrame(np.array(preds), columns=classes)
    # Insert image names to DataFrame
    df.insert(0, 'img_name', img_names)

    # Save predictions as csv file
    df.to_csv(output, index=False)
    print(f"Predictions saved to {output}")
コード例 #7
0
def get_data(data_path: PathOrStr,
             bs: int = 16,
             img_size: int = 160,
             pct_partial: float = 1.0,
             num_workers: int = 0,
             seed: int = 42) -> ImageDataBunch:
    """
    Create data object from Imagenet-style directory structure.

    This is a wrapper around fastai's Data Block API. The purpose is to automate and package together datasets and
    dataloaders, transforms, splitting the data, etc.

    :param data_path: path to data in Imagenet-style folder structure.
    :param bs: batch size
    :param img_size: target image size
    :param pct_partial: proportion of all data to use
    :param num_workers: number of workers used to parallelize data transformations when feeding into the model
    :param seed:
    :return: data object containing data set and data loader (in PyTorch sense)

    .. note:: more on Data Block API here: https://docs.fast.ai/data_block.html
    .. note:: Imagenet-style directory structure: https://docs.fast.ai/vision.data.html#ImageDataBunch.from_folder
    .. note:: `num_workers` anything from 0 crashes on my laptop, ideally, should equal the number of cores of your CPU
    .. note:: all of the data will be used as training set, even images in `valid` folder
    """

    label_lists: LabelLists = (
        ImageList.from_folder(data_path)  # -> ImageList
        .use_partial_data(pct_partial, seed=seed)  # -> ImageList
        .split_none()  # -> ItemLists: train and valid ItemList
        .label_from_folder()  # -> LabelLists: train and valid LabelList
        .transform(size=img_size))

    # handle the case when number of images is too small - fastai gives warning and throws error when showing batch
    n_images = len(label_lists.train)
    if n_images < bs:
        print(
            f"Too few images. Decreasing batch size from {bs} to {n_images}.")
        bs = n_images

    data: ImageDataBunch = (
        label_lists.databunch(bs=bs,
                              num_workers=num_workers)  # -> ImageDataBunch
        .normalize(imagenet_stats))  # -> ImageDataBunch

    # we want the order of images to not be shuffled to be able to find the right images easily
    data.train_dl = data.train_dl.new(shuffle=False)
    data.img_size = img_size  # data object needs to know its image size
    return data
コード例 #8
0
def upload_file():
    
    if request.method == 'POST':
        image = request.files['file']
        filename = secure_filename(image.filename)
        
        #saving file in upload path
        image.save(Path(app.config["IMAGE_UPLOADS"]+"/"+ filename))

        my_dict = {}
        #loading images from upload path      
        img_list_loader = ImageList.from_folder(upload_path)
        
        #Checking if valid images are uploaded
        if len(img_list_loader.items)>0:
            #loading model
            load_model = load_learner(model, 
                                  test=img_list_loader)
            #running inference
            preds,y = load_model.get_preds(ds_type=DatasetType.Test)
            index =0
            
            #Processing results for UI
            for preds,img_src in zip(preds,img_list_loader.items):

                top3_return_msg,top_pred = print_top_3_pred(preds)
                
                if(np.round(preds[top_pred].numpy()*100,2)<threshold):
                    custom_msg = "NA"
                    Prediction_percent = "NA"
                else:
                    custom_msg= str(get_label(int(top_pred)))
                    Prediction_percent = str("{:.2f}%".format(np.round(preds[top_pred].numpy()*100,2)))

                temp_val=[]
                temp_val.append(img_src)
                temp_val.append(custom_msg)
                temp_val.append(Prediction_percent)
                temp_val.append(top3_return_msg)

                my_dict[index]=temp_val
                index+=1

            return render_template('result.html', mydict=my_dict)

            
        elif len(img_list_loader.items)== 0:
            return "ERROR: Invalid image. Go back to upload new image"
コード例 #9
0
    def _get_data_bunch(path: Union[Path, str], transform: bool, im_size: int,
                        bs: int) -> ImageDataBunch:
        """
        Create ImageDataBunch and return it. TODO in future version is to allow
        users to pass in their own image bunch or their own Transformation
        objects (instead of using fastai's <get_transforms>)

        Args:
            path (Union[Path, str]): path to data to create databunch with
            transform (bool): a flag to set fastai default transformations (get_transforms())
            im_size (int): image size of databunch
            bs (int): batch size of databunch
        Returns:
            ImageDataBunch
        """
        path = path if type(path) is Path else Path(path)
        tfms = get_transforms() if transform else None
        return (ImageList.from_folder(path).split_by_rand_pct(
            valid_pct=0.33).label_from_folder().transform(
                tfms=tfms,
                size=im_size).databunch(bs=bs).normalize(imagenet_stats))
コード例 #10
0
from model_utils import get_data, create_gen_learner

# Loading Paths for Model Load
path = Path('')  # Path to data folder to load your model
path_lr = path / ''  # Path to model weights

# Loading Paths to Inference
path_t = Path('')  # Path to undamaged files
dmgpath = Path('')  # Path to damage templates
inf_path = Path('')  # Directory for files to be inferenced

# Creating gen and Loading saved Weights
src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42)
data_gen = get_data(1, 500, src, path_lr)
learn_gen = create_gen_learner(data_gen).load('')  # LOAD MODEL HERE
test_list = ImageList.from_folder(inf_path)

# Starting Streamlit App
st.markdown('# **ML for Photo Repair**')
st.markdown('### Choose an Image and Damage Template:')
st.markdown('Click **Generate** to Create a damaged photo')
names = []
for filename in (os.listdir(path_t)):
    if '.png' in filename:
        names.append(filename)
    elif '.jpg' in filename:
        names.append(filename)
dmgnames = []
for filename in (os.listdir(dmgpath)):
    if '.png' in filename:
        dmgnames.append(filename)
コード例 #11
0
from fastai.callbacks import SaveModelCallback
#from fastai.vision import *
from fastai.train import ClassificationInterpretation, DatasetType, load_learner
from fastai.vision import get_transforms, ImageList, cnn_learner, accuracy, jitter, open_image, learner
from torchvision import models as tv_models
from matplotlib import pyplot as plt
from pathlib import Path

base_path = Path('data', 'dataset-15')
dataset_path = base_path
img_size = 224
bs = 128
arch = tv_models.resnext50_32x4d
tfms = get_transforms(do_flip=True, flip_vert=True, max_warp=0.0, max_zoom=1.0)
data = (ImageList.from_folder(dataset_path).split_by_folder(
    train='images',
    valid='testset-15-cropped').label_from_folder().transform(tfms).databunch(
        bs=bs).normalize())
data.valid_dl = data.valid_dl.new(shuffle=True)

# plot one image with transformations
# example_img = open_image('/home/hoth/Desktop/lego-brick-recognition/data/datasets/train-15/images/3008/3008_0.jpg')
# example_img.apply_tfms(tfms[0], size=224).show(figsize=(10, 10))
# plt.show()

Path.mkdir(base_path / 'classification', exist_ok=True)
# view data
data.show_batch(rows=10, ds_type=DatasetType.Train)
plt.savefig(base_path / 'classification' / 'batch_example_train.svg')
data.show_batch(rows=10, ds_type=DatasetType.Valid)
plt.savefig(base_path / 'classification' / 'batch_example_valid.svg')
コード例 #12
0
ファイル: test.py プロジェクト: titipakorn/s_docker
# local modules

print(f"Fast.ai version = {fastai.__version__}")
which_processor()

EPOCHS = 10
LEARNING_RATE = 1e-4
IM_SIZE = 300

BATCH_SIZE = 16
ARCHITECTURE = models.resnet18
path = Path('/app/classifier_data/')

data = (ImageList.from_folder(path).split_by_rand_pct(
    valid_pct=0.2,
    seed=10).label_from_folder().transform(size=IM_SIZE).databunch(
        bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats))

print(f'number of classes: {data.c}')
print(data.classes)

learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy],
    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)])
learn.unfreeze()
learn.fit(EPOCHS, LEARNING_RATE)
learn.export(file=Path("/app/classifier_model.pkl"))
_, validation_accuracy = learn.validate(learn.data.valid_dl,
                                        metrics=[accuracy])
コード例 #13
0
HEAD_LEARNING_RATE = 0.01
BODY_LEARNING_RATE = 0.0001
BATCH_SIZE = 32
IM_SIZE = 224
DROPOUT = 0
ARCHITECTURE = models.resnet50

# Desired embedding dimension. Higher dimensions slow down retrieval but often provide better accuracy.
EMBEDDING_DIM = 4096
assert EMBEDDING_DIM == 4096 or EMBEDDING_DIM <= 2048

# Load images into fast.ai's ImageDataBunch object
random.seed(642)
data_finetune = (ImageList.from_folder(DATA_FINETUNE_PATH).split_by_rand_pct(
    valid_pct=0.05, seed=20).label_from_folder().transform(
        tfms=fastai.vision.transform.get_transforms(), size=IM_SIZE).databunch(
            bs=BATCH_SIZE,
            num_workers=db_num_workers()).normalize(imagenet_stats))

print(
    f"Data for fine-tuning: {len(data_finetune.train_ds.x)} training images and {len(data_finetune.valid_ds.x)} validation images."
)

learn = cnn_learner(data_finetune, ARCHITECTURE, metrics=[], ps=DROPOUT)

print(learn.model[1])

# By default uses the 2048 dimensional pooling layer as implemented in the paper.
# Optionally can instead keep the 4096-dimensional pooling layer from the ResNet-50 model.
if EMBEDDING_DIM != 4096:
    modules = []
コード例 #14
0
def test_wrong_order():
    this_tests('na')
    path = untar_data(URLs.MNIST_TINY)
    with pytest.raises(Exception, match="Your data isn't split*"):
        ImageList.from_folder(path).label_from_folder().split_by_folder()
コード例 #15
0
def test_wrong_order():
    this_tests('na')
    path = untar_data(URLs.MNIST_TINY)
    with pytest.raises(Exception, match="Your data isn't split*"):
        ImageList.from_folder(path).label_from_folder().split_by_folder()
コード例 #16
0
## helpful way to initially get folders
# import split_folders
# split_folders.ratio('<path>', output='<path>/split', seed=1337, ratio=(.8, .2)) # uses default values
# sys.exit()

path = Path("data/CNN/-released/split")

################################################################################
# fastai uses databunches
################################################################################
data = (
    ImageList.from_folder(path / "train").split_by_rand_pct(
        0.1, seed=33).label_from_folder()
    # .add_test_folder('..'/path/'test')
    .transform(
        get_transforms(do_flip=True, flip_vert=True),
        size=150,
        resize_method=ResizeMethod.SQUISH,
        padding_mode="zeros",
    ).databunch(bs=64).normalize(imagenet_stats))

# ## turn this on for regular testing
# option_name = 'CNN__original'
# data_test = (ImageList.from_folder(path)
#                 .split_by_folder(train='train', valid='test')
#                 .label_from_folder()
#                 .transform(get_transforms(do_flip=True,flip_vert=True),size=150,resize_method=ResizeMethod.SQUISH,padding_mode='zeros')
#                 .databunch(bs=64)
#                 .normalize(imagenet_stats))

## turn this on for test_suite
コード例 #17
0
import fastai
from fastai.vision import ImageList, ImageImageList
from pathlib import Path
from model_utils import get_data
from model_utils import create_gen_learner

path = Path('../data/')
path_hr = path / 'preprocessed'
path_lr = path / 'processed'
path_test = path / 'test_imgs'

# Gather and select Data / Output size
bs, size = 1, 500

src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42)
data_gen = get_data(bs, size, src, path_hr)

# Load model to inference from
learn_gen = create_gen_learner(data_gen).load('') # Input model to load

# Open file to be inferenced
test_list = ImageList.from_folder(path_test)
test_list.open(test_list.items[0])

# Inference and display output
test_list[0].show(figsize=(7, 7),y=learn_gen.predict(test_list[0])[0])

# Save File if desired
#y = learn_gen.predict(test_list[8])[0]
#y.save(path_test/'inf1.png')
コード例 #18
0
from fastai.core import Path
from fastai.vision import load_learner, defaults, ImageList,DatasetType
import torch as torch
import os
import yaml

pic_name = 'GK_RDR_PG3_2'

mp = Path('/Users/nicholasbangs/Notebooks/personal/greek_reader_master')
im_path = mp/'lgi_data'/'gk_letter_imgs'/'ω'

defaults.device = torch.device('cpu')
model_path = mp/'models'
model_name = 'rn_34.pkl'
model = load_learner(model_path, model_name, test=ImageList.from_folder(im_path))
preds,y = model.get_preds(ds_type=DatasetType.Test)
classes = model.data.classes
zipped = list([zip(classes, p) for p in preds])
sorted_preds = [sorted(z, key=lambda x: x[1], reverse=True)[0] for z in zipped]
print(sorted_preds)
コード例 #19
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        num_workers = 0 if self.train_opts.debug else 4
        tfms = get_transforms(flip_vert=self.train_opts.flip_vert)

        data = (ImageList.from_folder(chip_dir).split_by_folder(train='train',
                                                                valid='val'))
        train_count = None
        if self.train_opts.train_count is not None:
            train_count = min(len(data.train), self.train_opts.train_count)
        elif self.train_opts.train_prop != 1.0:
            train_count = int(
                round(self.train_opts.train_prop * len(data.train)))
        train_items = data.train.items
        if train_count is not None:
            train_inds = np.random.permutation(np.arange(len(
                data.train)))[0:train_count]
            train_items = train_items[train_inds]
        items = np.concatenate([train_items, data.valid.items])

        data = ImageList(items, chip_dir) \
            .split_by_folder(train='train', valid='val') \
            .label_from_folder(classes=classes) \
            .transform(tfms, size=size) \
            .databunch(bs=self.train_opts.batch_size, num_workers=num_workers)
        log.info(str(data))

        if self.train_opts.debug:
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        # Setup learner.
        ignore_idx = -1
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted',
                  clas_idx=1,
                  beta=1,
                  ignore_idx=ignore_idx)
        ]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = cnn_learner(data,
                            model_arch,
                            metrics=metrics,
                            wd=self.train_opts.weight_decay,
                            path=train_dir)
        learn.unfreeze()

        if self.train_opts.mixed_prec and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to
            # adjust this for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            log.info('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model = torch.load(pretrained_path,
                                     map_location=learn.data.device)['model']

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        if self.train_opts.log_tensorboard:
            callbacks.append(TensorboardLogger(learn, 'run'))

        if self.train_opts.run_tensorboard:
            log.info('Starting tensorboard process')
            log_dir = join(train_dir, 'logs', 'run')
            tensorboard_process = Popen(
                ['tensorboard', '--logdir={}'.format(log_dir)])
            terminate_at_exit(tensorboard_process)

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                log.info('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        if self.train_opts.run_tensorboard:
            tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
コード例 #20
0
from fastprogress.fastprogress import force_console_behavior
import fastprogress
fastprogress.fastprogress.NO_BAR = True
master_bar, progress_bar = force_console_behavior()
fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar



def get_file(aString):
    return str(aString.split('/')[-1])

image_folder = 'images/'

path = untar_data(URLs.DOGS)
learn = load_learner(path, test=ImageList.from_folder(image_folder), bs = 1)
preds,y = learn.get_preds(ds_type=DatasetType.Test, )
predList = list(preds.numpy()[:,0])

f_names = listdir(image_folder)

pred_df = pd.DataFrame(list(zip(f_names,predList)), columns = ['f_name','prob_dog'])

registry = 'registry/downloaded_files.csv'
regDF = pd.read_csv(registry)


regDF['f_name'] = regDF.file.apply(get_file)

out_df = pd.merge(regDF,pred_df, on = ['f_name'])