예제 #1
0
def diagnose():
    IMAGE_WIDTH = 128
    IMAGE_HEIGHT = 128
    IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
    IMAGE_CHANNELS = 3

    test_filenames = os.listdir(
        "/home/sheila/Desktop/Learning_data_science/crop check/images/test1")

    test_df = pd.DataFrame({'filename': test_filenames})
    nb_samples = test_df.shape[0]

    batch_size = 15
    test_gen = ImageDataGenerator(rescale=1. / 255)
    test_generator = test_gen.flow_from_dataframe(
        test_df,
        "/home/sheila/Desktop/Learning_data_science/crop check/images/test1/",
        x_col='filename',
        y_col=None,
        class_mode=None,
        target_size=IMAGE_SIZE,
        batch_size=batch_size,
        shuffle=False)
    predict = model.predict_generator(test_generator,
                                      steps=np.ceil(nb_samples / batch_size))
    9 + 90

    test_df['category'] = np.argmax(predict, axis=-1)

    d = test_df.to_dict(orient='records')
    j = json.dumps(d)

    submission_df = test_df.copy()
    submission_df['id'] = submission_df['filename'].str.split('.').str[0]
    submission_df['label'] = submission_df['category']
    submission_df.drop(['filename', 'category'], axis=1, inplace=True)
    submission_df.to_csv('submission_test1.csv', index=False)
    print(j)

    # return [j[key] for key in sorted(j.keys())]
    return j
예제 #2
0
def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 320, target_h = 320):
    print("getting train generator...") 
    # normalize images
    image_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization= True)
    
    # flow from directory with specified batch size
    # and target image size
    generator = image_generator.flow_from_dataframe(
            dataframe=df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=shuffle,
            seed=seed,
            target_size=(target_w,target_h))
    
    return generator
예제 #3
0
def input_loads():
    df = pd.read_csv(label_dir)
    df['file_id'] = df['file_id'].astype('str')
    df['file_id'] = df['file_id'].apply(lambda x: x + '.png')

    df['accent'] = df['accent'].astype('str')
    # df['accent'] = df['accent'].map(onehot)

    # label = pd.DataFrame(df['accent'].map(onehot).tolist(), columns=['0', '1', '2'])
    # df = pd.concat([df['file_id'], label], axis=1)
    print(df.head())

    datagen = ImageDataGenerator(rescale=1. / 255)
    train_generator = datagen.flow_from_dataframe(dataframe=df,
                                                  directory=train_dir,
                                                  x_col='file_id',
                                                  y_col='accent',
                                                  class_mode='categorical',
                                                  target_size=(128, 174),
                                                  batch_size=32)
    return train_generator
예제 #4
0
def trainGenerator(batch_size, train_path, data_frame, aug_dict, image_color_mode="grayscale",
                    mask_color_mode="grayscale", flag_multi_class=False,
                    save_to_dir=None, target_size=(256,256), seed=1):
    '''
    if you want to visualize the results of generator, set save_to_dir = "your path"
    '''
    image_datagen = ImageDataGenerator(**aug_dict)
    
    image_generator = image_datagen.flow_from_dataframe(
        data_frame,
        directory = train_path,
        x_col = 'field',
        y_col = 'lai',
        target_size = target_size,
        color_mode = image_color_mode,
        class_mode = 'other',
        batch_size = batch_size,
        seed = seed,
        save_to_dir = save_to_dir
        )
    return image_generator
예제 #5
0
def test_model(data, model, class_cols, model_path):
    """Tests models that have already been initialized and trained.

    Parameters
    ----------
        data : :obj:`pandas.core.frame.DataFrame`
            The data frame containing testing data formatted for use
            with :func:`ImageDataGenerator.flow_from_dataframe`
        model : :obj:`keras.models.Model`
            The model to test.
        class_cols : list of str
            The dataframe columns containing the classes. For use with
            multi-output models.
        model_path : str
            Path where model is saved. Used to store test results with
            model.

    Returns
    -------
    score : list of float
        The model's test scores.

    """
    testgen = ImageDataGenerator()
    testgen = testgen.flow_from_dataframe(data,
                                          directory=MODULE_PATH,
                                          x_col='imgpath',
                                          y_col=class_cols,
                                          batchsize=BATCH_SIZE,
                                          target_size=INPUT_DIM,
                                          class_mode='other')

    test_step_size = testgen.n // testgen.batch_size

    score = model.evaluate_generator(testgen, steps=test_step_size, verbose=1)
    with open(os.path.join(model_path, 'test.results'), 'a') as f_res:
        print("loss: {0:.4f}".format(score[0]), file=f_res)
        print("acc: {0:.4f}".format(score[1]), file=f_res)

    return score
예제 #6
0
def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 320, target_h = 320):

    """
    Return generator for training set, normalizing using batch
    statistics.

    Args:
      train_df (dataframe): dataframe specifying training data.
      image_dir (str): directory where image files are held.
      x_col (str): name of column in df that holds filenames.
      y_cols (list): list of strings that hold y labels for images.
      sample_size (int): size of sample to use for normalization statistics.
      batch_size (int): images per batch to be fed into model during training.
      seed (int): random seed.
      target_w (int): final width of input images.
      target_h (int): final height of input images.
    
    Returns:
        train_generator (DataFrameIterator): iterator over training set
    """        
    print("getting train generator...") 
    # normalize images
    image_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization= True)
    
    # flow from directory with specified batch size
    # and target image size
    generator = image_generator.flow_from_dataframe(
            dataframe=df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=shuffle,
            seed=seed,
            target_size=(target_w,target_h))
    
    return generator
예제 #7
0
def extract_train_features(pitcher,sample_count,train_df,pitch_type):
    
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest')
    batch_size = 20

    train_generator = train_datagen.flow_from_dataframe(
        dataframe = train_df,
        directory = f'../data/{pitcher}/image/combined/',
        x_col = 'file_name',
        y_col = f'{pitch_type}',
        class_mode = 'binary',
        batch_size = batch_size,
        shuffle = False
    #     target_size=(1280,720)
        )
    
    features = np.zeros(shape=(sample_count, 8, 8, 512))
    labels = np.zeros(shape=(sample_count))
    
    i = 0
    for inputs_batch, labels_batch in train_generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    
    features = np.reshape(features, (sample_count, 8*8*512))
    
    return features, labels
예제 #8
0
def get_data_generator(dataframe, x_col, y_col, subset=None, shuffle=True, batch_size=16, class_mode="categorical"):
    datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=False,
    width_shift_range=0.1,
    height_shift_range=0.1)
    
    data_generator = datagen.flow_from_dataframe(
        dataframe=dataframe,
        x_col=x_col,
        y_col=y_col,
        subset=subset,
        target_size=(width, height),
        class_mode=class_mode,
        # color_mode="rgb",
        batch_size=batch_size,
        shuffle=shuffle,
    )
    return data_generator			
 def df_to_generators(self, df: DataFrame, path: str, mode: str):
     y_col = None if mode == 'test' else 'class'
     class_mode = None if mode == 'test' else 'categorical'
     
     preprocessing_function = get_random_eraser(v_l=0, v_h=255) if mode == 'train' else None
     
     datagen = ImageDataGenerator(preprocessing_function=preprocessing_function)
     
     generator = datagen.flow_from_dataframe(
         dataframe=df,
         directory=os.path.abspath(path),
         x_col='img_file',
         y_col=y_col,
         target_size=self.img_size,
         color_mode='rgb',
         class_mode=class_mode,
         batch_size=self.batch_size,
         seed=self.seed,
         shuffle=mode != 'test'
     )
     
     return generator
예제 #10
0
def test(args, ckpt_file):
    print("Currently processing fold ", FOLD)
    output_directory = "{}{}/".format(args["OUTPUT_DIRECTORY"], FOLD)
    test_label_file = "{}test_subset{}.csv".format(args["LABEL_DIRECTORY"],
                                                   FOLD)
    test_dataframe = pd.read_csv(test_label_file)
    test_dataframe["Absolutefilename"] = (args["IMG_DIRECTORY"] +
                                          test_dataframe["Filename"])
    test_image_count = test_dataframe.shape[0]

    # No testing image augmentation (except for converting pixel values to floats)
    test_data_generator = ImageDataGenerator(rescale=1.0 / 255)

    # Load test images in batches from directory and apply rescaling
    test_data_generator = test_data_generator.flow_from_dataframe(
        test_dataframe,
        args["IMG_DIRECTORY"],
        x_col="Absolutefilename",
        y_col="Label",
        target_size=args["IMG_SIZE"],
        batch_size=args["BATCH_SIZE"],
        has_ext=True,
        shuffle=False,
        classes=args["CLASSES_STR"],
        class_mode="categorical",
    )

    # Load the last best model
    model = load_model(output_directory + ckpt_file)
    # Evaluate model on test subset for kth fold
    predictions = model.predict_generator(
        test_data_generator, test_image_count // args["BATCH_SIZE"] + 1)
    y_true = test_data_generator.classes
    y_pred = np.argmax(predictions, axis=1)
    y_pred[
        np.max(predictions, axis=1) < 1 /
        9] = 8  # Assign predictions worse than random guess to negative class

    return {"predictions": y_pred, "labels": y_true}
예제 #11
0
def inference(csv_file):
    json_file = open('model_isolation.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights("model_wieghts_isolation.h5")
    df = pd.read_csv(csv_file, header=None, names=["id", "label"], dtype=str)
    df = df.replace({
        'true': 'isolation',
        'false': 'noaction',
        'True': 'isolation',
        'False': 'noaction'
    })
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    valid_generator = test_datagen.flow_from_dataframe(
        dataframe=df,
        directory="./",
        x_col="id",
        y_col="label",
        class_mode="categorical",
        batch_size=1,
        target_size=(320, 180))
    loaded_model.compile(loss='binary_crossentropy',
                         optimizer=optimizers.RMSprop(lr=2e-5),
                         metrics=['acc'])
    result1 = loaded_model.predict_generator(valid_generator,
                                             steps=valid_generator.samples)
    result = []
    result1 = result1.argmax(axis=1)
    for i in result1:
        if i == 0:
            result.append('isolation')
        else:
            result.append('noaction')
    accuracy = (df['label'].values == result).mean()
    ans = precision_recall_fscore_support(df['label'].values,
                                          result,
                                          average='macro')
    return {'accuracy': accuracy, 'recall': ans[1], 'precision': ans[0]}
def train_generator_from_dataframe(dataframe_keras_master, batch_size,
                                   classes):
    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       rotation_range=20,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       horizontal_flip=True,
                                       validation_split=0.2)

    train_gen = train_datagen.flow_from_dataframe(
        dataframe=dataframe_keras_master,
        directory=None,
        x_col="origin",
        y_col="class_name",
        classes=classes,
        batch_size=batch_size,
        target_size=(299, 299),
        class_mode="categorical",
        subset="training",
        shuffle=True,
        validate_filenames=False)

    return [train_datagen, train_gen]
예제 #13
0
파일: train.py 프로젝트: ptd006/WeedML
def img_flow(csv_file,base_path):
    datagen = ImageDataGenerator(
                rescale=1. / 255,
                fill_mode="reflect",
                shear_range=0.2,
                # zoom_range=(0.5, 1),
                horizontal_flip=True,
                rotation_range=10,
                channel_shift_range=10,
                brightness_range=(0.85, 1.15))

    return datagen.flow_from_dataframe(
        dataframe=pd.read_csv(csv_file),
        directory=base_path,
        x_col='Filename', 
        y_col='Label', 
        class_mode='categorical',
        target_size=RAW_IMG_SIZE, 
        batch_size=BATCH_SIZE,
        classes=CLASS_NAMES,
        shuffle=True,
        seed=123
        )
예제 #14
0
def get_prediction(filepath):
    dir = os.path.dirname(filepath)
    file_name = os.path.basename(filepath)

    df = pd.DataFrame([{'file_path': file_name}])

    loaded_model = load_model(os.path.abspath('./model.h5'))

    test_datagen = ImageDataGenerator(rescale=1. / 255.)
    test_gen = test_datagen.flow_from_dataframe(dataframe=df,
                                                directory=dir,
                                                x_col="file_path",
                                                y_col=None,
                                                batch_size=1,
                                                class_mode=None,
                                                target_size=(64, 64))

    pred = loaded_model.predict_generator(test_gen, steps=1, verbose=1)

    emotions = [
        'angry', 'fearful', 'disgust', 'sad', 'surprised', 'happy', 'calm',
        'neutral'
    ]

    # TODO: Find a better solution for this. Google Cloud will probably do this for us anyway...
    mapped_emotions = {
        'angry': round_pred(pred[0][0]),
        'fearful': round_pred(pred[0][1]),
        'disgust': round_pred(pred[0][2]),
        'sad': round_pred(pred[0][3]),
        'surprised': round_pred(pred[0][4]),
        'happy': round_pred(pred[0][5]),
        'calm': round_pred(pred[0][6]),
        'neutral': round_pred(pred[0][7])
    }

    return emotions[np.argmax(pred, axis=1)[0]], mapped_emotions
예제 #15
0
    def generate_validation_iterator(self):
        # IMAGES
        validation_images_path = './res/ILSVRC2012_img_val/val/'
        validations_labels_path = './res/ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt'

        validation_x = [
            f for f in listdir(validation_images_path)
            if isfile(join(validation_images_path, f))
        ]
        validation_x.sort()

        # LABELS
        with open(validations_labels_path) as f:
            content = f.readlines()
        # you may also want to remove whitespace characters like `\n` at the end of each line
        validation_y = [in_classes[int(x.strip())][0] for x in content]

        # merge images and labels
        validation_sequence = [[validation_x[i], validation_y[i]]
                               for i in range(0, len(validation_x))]
        validation_dataframe = pd.DataFrame(validation_sequence,
                                            columns=['x', 'y'])

        # create generator
        datagen = ImageDataGenerator()

        valid_it = datagen.flow_from_dataframe(
            dataframe=validation_dataframe,
            directory=validation_images_path,
            x_col='x',
            y_col='y',
            target_size=TARGET_SIZE,
            class_mode="categorical",
            color_mode='rgb',
            batch_size=BATCH_SIZE)

        return valid_it
예제 #16
0
def image_feature_extraction(df):
    '''
    images: A numpy 4D array of shape (no. of examples,299,299,3)
    '''
    if os.path.exists("../data/image_features.npy"):
        ans = input("Feature already exists,do you want to re run it?(y/n)\n")
        if ans == "n":
            return
        else:
            pass
    file_list = os.listdir('../data/train_images')
    print(len(file_list))
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    test_generator = test_datagen.flow_from_dataframe(
        dataframe=df,
        directory="../data/train_images/",
        x_col="image",
        y_col=None,
        batch_size=8,
        shuffle=False,
        seed=123,
        class_mode=None,
        target_size=(229, 229))
    filenames = test_generator.filenames
    nb_samples = len(filenames)
    print(nb_samples)
    model = keras.applications.inception_v3.InceptionV3(include_top=True,
                                                        weights='imagenet',
                                                        pooling='avg')
    model.layers.pop()
    model = Model(model.input, model.layers[-1].output)
    image_features = model.predict_generator(test_generator,
                                             steps=np.ceil(nb_samples / 8),
                                             use_multiprocessing=False,
                                             verbose=1)
    print(image_features.shape)
    np.save("../data/image_features.npy", image_features)
예제 #17
0
def get_predictions(model_path, draws_path):
    result = ''
    model = load_model(model_path)
    draws = os.listdir(draws_path)
    filenames_test = []
    categories_test = []

    for file in draws:
        filenames_test.append(file)
        categories_test.append('')

    df_draws = pd.DataFrame({
        'filename': filenames_test,
        'category': categories_test
    })
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    for i in range(df_draws.shape[0]):
        with suppress_stdout():
            df_draw = df_draws.iloc[[i]]
            draw_generator = test_datagen.flow_from_dataframe(
                df_draw,
                draws_path,
                x_col='filename',
                y_col='category',
                target_size=(28, 28),
                class_mode='categorical',
                color_mode='grayscale')

        predict = model.predict_generator(draw_generator, steps=1)
        prediction = predict.argmax()
        balance = ''
        for b in range(len(predict[0])):
            balance += str('{:0.2f}'.format(predict[0, b] * 100)) + '\n'

        result += df_draw.iloc[0, 0] + '\n' + str(
            prediction) + '\n' + balance + '\n'
    return result
예제 #18
0
    def create_image_generator(self,
                               df,
                               x_col,
                               base_directory,
                               batch_size=32,
                               target_size=(64, 64),
                               rotation_range=15,
                               width_shift_range=0.05,
                               height_shift_range=0.05,
                               shear_range=0.05,
                               zoom_range=0.1,
                               horizontal_flip=True):

        image_generator_settings = ImageDataGenerator(
            rescale=1. / 255.,
            rotation_range=rotation_range,
            width_shift_range=width_shift_range,
            height_shift_range=height_shift_range,
            shear_range=shear_range,
            zoom_range=zoom_range,
            horizontal_flip=horizontal_flip,
            fill_mode='nearest')

        y_col = list(df.columns)
        y_col.remove(x_col)

        image_generator = image_generator_settings.flow_from_dataframe(
            dataframe=df,
            directory=base_directory,
            x_col=x_col,
            y_col=y_col,
            target_size=target_size,
            batch_size=batch_size,
            class_mode='raw')

        return image_generator
예제 #19
0
def predictGenerator(batch_size,
                     dataframe,
                     x_col='file_gfp',
                     image_color_mode="grayscale",
                     target_size=(256, 256)):
    '''
    can generate image and mask at the same time
    use the same seed for image_datagen and mask_datagen to ensure the transformation for image and mask is the same
    if you want to visualize the results of generator, set save_to_dir = "your path"
    '''
    image_datagen = ImageDataGenerator(
        rescale=1. /
        255)  #,samplewise_center=True,samplewise_std_normalization=True)
    image_generator = image_datagen.flow_from_dataframe(
        dataframe,
        directory=None,
        class_mode=None,
        x_col=x_col,
        color_mode=image_color_mode,
        target_size=target_size,
        batch_size=batch_size,
        shuffle=False)

    return image_generator
예제 #20
0
def get_test_and_valid_generator(valid_df,
                                 test_df,
                                 train_df,
                                 image_dir,
                                 x_col,
                                 y_cols,
                                 sample_size=100,
                                 batch_size=8,
                                 seed=1,
                                 target_w=320,
                                 target_h=320):
    """
    Return generator for validation set and test test set using 
    normalization statistics from training set.

    Args:
      valid_df (dataframe): dataframe specifying validation data.
      test_df (dataframe): dataframe specifying test data.
      train_df (dataframe): dataframe specifying training data.
      image_dir (str): directory where image files are held.
      x_col (str): name of column in df that holds filenames.
      y_cols (list): list of strings that hold y labels for images.
      sample_size (int): size of sample to use for normalization statistics.
      batch_size (int): images per batch to be fed into model during training.
      seed (int): random seed.
      target_w (int): final width of input images.
      target_h (int): final height of input images.
    
    Returns:
        test_generator (DataFrameIterator) and valid_generator: iterators over test set and validation set respectively
    """
    print("getting train and valid generators...")
    # get generator to sample dataset
    raw_train_generator = ImageDataGenerator().flow_from_dataframe(
        dataframe=train_df,
        directory=IMAGE_DIR,
        x_col="Image",
        y_col=labels,
        class_mode="raw",
        batch_size=sample_size,
        shuffle=True,
        target_size=(target_w, target_h))

    # get data sample
    batch = raw_train_generator.next()
    data_sample = batch[0]

    # use sample to fit mean and std for test set generator
    image_generator = ImageDataGenerator(featurewise_center=True,
                                         featurewise_std_normalization=True)

    # fit generator to sample from training data
    image_generator.fit(data_sample)

    # get test generator
    valid_generator = image_generator.flow_from_dataframe(
        dataframe=valid_df,
        directory=image_dir,
        x_col=x_col,
        y_col=y_cols,
        class_mode="raw",
        batch_size=batch_size,
        shuffle=False,
        seed=seed,
        target_size=(target_w, target_h))

    test_generator = image_generator.flow_from_dataframe(
        dataframe=test_df,
        directory=image_dir,
        x_col=x_col,
        y_col=y_cols,
        class_mode="raw",
        batch_size=batch_size,
        shuffle=False,
        seed=seed,
        target_size=(target_w, target_h))
    return valid_generator, test_generator
예제 #21
0
all_data_info_true300_count = all_data_info_true300.groupby('artist').count()
print(all_data_info_true300_count.shape)
artist_list = all_data_info_true300_count.index.values.tolist()

## Image processing to get the starting data for training the model
from keras.preprocessing.image import ImageDataGenerator

df = all_data_info_true300.loc[:, ['artist', 'new_filename']]

train_datagen = ImageDataGenerator(horizontal_flip=True)
valid_datagen = ImageDataGenerator(horizontal_flip=False)
#featurewise_center=True   0-center
#featurewise_std_normalization    normalize

train_generator = train_datagen.flow_from_dataframe(df,\
"data/train", \
target_size=(224, 224), x_col='new_filename',\
y_col='artist', has_ext=True, seed=100)
#Found 13680 images belonging to 57 classes.

valid_generator = valid_datagen.flow_from_dataframe(df,\
"data/valid",\
target_size=(224, 224), x_col='new_filename',\
y_col='artist', has_ext=True, seed=100)
#Found 1710 images belonging to 57 classes.

#color_mode='rgb' default
#has_ext has been deprecated, extensions included
#class_mode= default categorical
#batch_size: size of the batches of data (default: 32)

STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
예제 #22
0
    shuffle=True
    batch_size=16
    img_size = 224
    bntk_input = (img_size, img_size, 3)
    kelas = len(np.unique(label))
    
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
    # sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for train_index, val_index in sss.split(filename, label):
        training_data   = imgData.iloc[train_index]
        validation_data = imgData.iloc[val_index]
        clear_session()
        train_generator    = image_generator.flow_from_dataframe(
                                training_data, 
                                # directory = image_dir,
                                batch_size=batch_size,                                                 
						            x_col = "filepaths", 
                                y_col = "labels",
						            class_mode = "categorical", shuffle = True)
        
        validation_generator= image_generator2.flow_from_dataframe(
                                validation_data, 
                                # directory = image_dir,
                                batch_size=batch_size,                                                 
							        x_col = "filepaths", 
                                y_col = "labels",
							       class_mode = "categorical", shuffle = True)

                 
        print('------------------------------------------------------------------------')
        print(f'Training for fold {fold_no} ...')
예제 #23
0
import numpy as np
from keras.layers import InputLayer

traindf = pd.read_csv("Horror_Test.csv", dtype=str)
valid_datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.05)
test_datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.15)
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

train_generator = train_datagen.flow_from_dataframe(dataframe=traindf,
                                                    directory="Horror_images",
                                                    x_col='Image_cropped',
                                                    y_col='Emotion',
                                                    subset="training",
                                                    batch_size=64,
                                                    seed=42,
                                                    shuffle=True,
                                                    class_mode="categorical",
                                                    target_size=(299, 299))

valid_generator = valid_datagen.flow_from_dataframe(dataframe=traindf,
                                                    directory="Horror_images",
                                                    x_col="Image_cropped",
                                                    y_col="Emotion",
                                                    subset="validation",
                                                    batch_size=64,
                                                    seed=42,
                                                    shuffle=True,
                                                    class_mode="categorical",
                                                    target_size=(299, 299))
예제 #24
0
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

train_datagen = ImageDataGenerator(fill_mode='nearest', rescale=1. / 255)

test_datagen = ImageDataGenerator(fill_mode='nearest', rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(dataframe=csv,
                                                    directory="./dataset",
                                                    x_col="image",
                                                    y_col="category",
                                                    target_size=(img_height,
                                                                 img_width),
                                                    batch_size=batch_size,
                                                    class_mode='categorical')

label_map = (train_generator.class_indices)
print(label_map)

validation_generator = test_datagen.flow_from_dataframe(
    dataframe=csv,
    directory="./dataset",
    x_col="image",
    y_col="category",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')
def cnn_model(channels, nb_epoch, batch_size, nb_classes, nb_gpus, cl_weights,
              leakiness, w_regu, b_regu, initializer, img_height, img_width,
              labels_train, train_data_dir):
    '''
  #Sample data+labels path
  sample_data_dir = r"D:\Final Year Project\sample_cnn\sample.npy"
  sample_label = r"D:\Final Year Project\sample_cnn\sample.csv"

  sample_data= np.load(sample_data_dir)
  sam_labels = pd.read_csv(sample_label)
  sam_labels = sam_labels.values
  '''

    input_shape = (img_height, img_width, channels)
    '''
  Conv2D takes a 4D tensor as input_shape but we need to pass only
  3D while keras takes care of batch size on its own
  so pass (img_height,img_width,channels) not (batch_size,img_height,img_width,channels)
  '''

    model = Sequential()

    model.add(
        Conv2D(
            32,
            (3, 3),
            strides=(1, 1),
            input_shape=input_shape,
            padding='same',
        ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(Conv2D(
        64,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    #model.add(Dropout(0.20))

    model.add(Conv2D(
        128,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    #model.add(Dropout(0.20))
    model.add(Conv2D(
        64,
        (1, 1),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    model.add(Conv2D(
        128,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(Conv2D(
        256,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    model.add(Conv2D(
        128,
        (1, 1),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    #model.add(Dropout(0.20))

    model.add(Conv2D(
        256,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

    #model.add(Dropout(0.4))
    model.add(Conv2D(
        512,
        (3, 3),
        strides=(1, 1),
        padding='same',
    ))
    model.add(BatchNormalization(axis=-1))
    model.add(LeakyReLU(alpha=leakiness))

    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    ##  model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=leakiness))
    model.add(Dropout(0.3))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=leakiness))
    model.add(Dropout(0.3))

    model.add(Dense(classes, activation='softmax'))

    model.summary()
    ggwp

    sgd = SGD(lr=0.0001, momentum=0.9, decay=0, nesterov=True)

    model.compile(optimizer=sgd,
                  loss='mean_squared_error',
                  metrics=['accuracy'])
    tensorboard = TensorBoard(log_dir='log/',
                              histogram_freq=0,
                              write_graph=True,
                              write_images=True)
    stop = EarlyStopping(monitor='loss', patience=0, verbose=2, mode='auto')
    model_chkpt = ModelCheckpoint(
        filepath=
        'saved_model/best_model/best_model_weights_Epoch_{epoch:02d}-ValLoss_{val_loss:.2f}.h5',
        monitor='val_loss',
        save_best_only=True)

    train_datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.2)

    train_generator = train_datagen.flow_from_dataframe(
        labels_train,
        train_data_dir,
        x_col='train_image_name',
        y_col='level',
        has_ext=True,
        target_size=(img_height, img_width),
        class_mode='categorical',
        batch_size=batch_size,
        subset='training')
    validation_generator = train_datagen.flow_from_dataframe(
        labels_train,
        train_data_dir,
        x_col='train_image_name',
        y_col='level',
        has_ext=True,
        target_size=(img_height, img_width),
        class_mode='categorical',
        batch_size=batch_size,
        subset='validation')
    STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
    STEP_SIZE_VALID = validation_generator.n // validation_generator.batch_size
    model.fit_generator(train_generator,
                        steps_per_epoch=STEP_SIZE_TRAIN,
                        validation_data=validation_generator,
                        validation_steps=STEP_SIZE_VALID,
                        epochs=nb_epoch,
                        class_weight=cl_weights,
                        verbose=1,
                        callbacks=[stop, tensorboard, model_chkpt])

    return model, validation_generator, train_generator
batch_size = 15

# In[15]:

# training generator
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   rotation_range=15,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                    'data/train/train/',
                                                    x_col='filename',
                                                    y_col='category',
                                                    target_size=(64, 64),
                                                    class_mode='categorical',
                                                    batch_size=15)

# In[16]:

# Validation generator
validate_datagen = ImageDataGenerator(rescale=1. / 225)
validate_generator = validate_datagen.flow_from_dataframe(
    validate_df,
    directory='data/train/train/',
    x_col='filename',
    y_col='category',
    target_size=(64, 64),
    batch_size=15,
예제 #27
0
# The `image_generator` you created above will act to adjust your image data such that the new mean of the data will be zero, and the standard deviation of the data will be 1.
#
# In other words, the generator will replace each pixel value in the image with a new value calculated by subtracting the mean and dividing by the standard deviation.
#
# $$\frac{x_i - \mu}{\sigma}$$
#
# Run the next cell to pre-process your data using the `image_generator`. In this step you will also be reducing the image size down to 320x320 pixels.

# In[12]:

# Flow from directory with specified batch size and target image size
generator = image_generator.flow_from_dataframe(
    dataframe=train_df,
    directory="nih/images-small/",
    x_col="Image",  # features
    y_col=['Mass'],  # labels
    class_mode="raw",  # 'Mass' column should be in train_df
    batch_size=1,  # images per batch
    shuffle=False,  # shuffle the rows or not
    target_size=(320, 320)  # width and height of output image
)

# Run the next cell to plot up an example of a pre-processed image

# In[13]:

# Plot a processed image
sns.set_style("white")
generated_image, label = generator.__getitem__(0)
plt.imshow(generated_image[0], cmap='gray')
plt.colorbar()
plt.title('Raw Chest X Ray Image')
예제 #28
0
    pd_1 = pd.DataFrame(list(
        zip(col_1, col_3, col_4, col_5, col_6, col_7, col_8)),
                        columns=['id_1', 'x', 'y', 'z', 'qx', 'qy', 'qz'])
    pd_2 = pd.DataFrame(list(zip(col_2, col_3)), columns=['id_2', 'labels'])
    return pd_1, pd_2


df_1, df_2 = create_df()
in_gen_1 = ImageDataGenerator()
in_gen_2 = ImageDataGenerator()
in_gen_1 = in_gen_1.flow_from_dataframe(
    df_1,
    directory='/home/diego/my_project_dir/my_proj_env/flowfrom/images/',
    x_col="id_1",
    y_col=['x', 'y', 'z', 'qx', 'qy', 'qz'],
    target_size=(100, 300),
    batch_size=2,
    shuffle=False,
    class_mode='multi_output',
    color_mode='rgb')
in_gen_2 = in_gen_2.flow_from_dataframe(
    df_2,
    directory='/home/diego/my_project_dir/my_proj_env/flowfrom/images/',
    x_col="id_2",
    y_col='labels',
    target_size=(100, 300),
    batch_size=2,
    shuffle=False,
    class_mode='raw',
    color_mode='rgb')
예제 #29
0
def main(config=None):
    trial_name = os.path.splitext(__file__)[0]
    model_filename = os.path.sep.join(["output", trial_name, "model.h5"])
    checkpoint_folder = os.path.sep.join(["output", trial_name])
    from pathlib import Path
    Path(checkpoint_folder).mkdir(parents=True, exist_ok=True)

    import pandas as pd
    from keras.models import Sequential, load_model
    from keras.layers import Dense, Flatten, Dropout
    from keras.preprocessing.image import ImageDataGenerator
    from keras.optimizers import Adam
    from keras.applications import ResNet50V2
    from keras.applications.resnet_v2 import preprocess_input
    import tensorflow as tf

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        _ = tf.config.experimental.set_memory_growth(physical_devices[0], True)

    import wandb
    from wandb.keras import WandbCallback
    if (config is None):
        wandb.init(project="minibar")
        config = wandb.config
    else:
        wandb.init(project="minibar", config=config)

    df_train = pd.read_csv('data/train_labels.csv')

    from helpers.decouple import decouple
    matrix_train, _ = decouple(df_train)
    from helpers.matrix_to_df import matrix_to_dfcount
    df_train_agg = matrix_to_dfcount(matrix_train)

    train_datagen = ImageDataGenerator(validation_split=0.2,
                                       horizontal_flip=True,
                                       preprocessing_function=preprocess_input)

    train_generator = train_datagen.flow_from_dataframe(
        dataframe=df_train_agg,
        directory='data/train',
        x_col='filename',
        y_col='count',
        target_size=(config['input_shape_height'],
                     config['input_shape_width']),
        batch_size=config['batch_size'],
        class_mode='raw',
        subset="training",
    )

    validation_generator = train_datagen.flow_from_dataframe(
        dataframe=df_train_agg,
        directory='data/train',
        x_col='filename',
        y_col='count',
        target_size=(config['input_shape_height'],
                     config['input_shape_width']),
        batch_size=config['batch_size'],
        class_mode='raw',
        subset="validation",
    )

    if os.path.isfile(model_filename) and config['continue_training']:
        model = load_model(model_filename)
    else:
        model = Sequential()

        model.add(
            ResNet50V2(include_top=False,
                       input_shape=(config['input_shape_height'],
                                    config['input_shape_width'], 3)))

        model.add(Flatten())
        model.add(Dense(units=512, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(units=1))

        model.compile(optimizer=Adam(learning_rate=config['learning_rate']),
                      loss='mean_squared_error',
                      metrics=['accuracy'])
        model.save(model_filename)

    # construct the set of callbacks
    from helpers.epochcheckpoint import EpochCheckpoint
    callbacks = [
        EpochCheckpoint(checkpoint_folder, every=1, startAt=0),
        WandbCallback(save_model=False)
    ]

    model.fit(
        train_generator,
        #steps_per_epoch=100,
        epochs=config['epoch'],
        #steps_per_epoch=100,
        validation_data=validation_generator,
        #validation_steps=100
        callbacks=callbacks,
        verbose=1,
        initial_epoch=config['initial_epoch'])
    model.save(model_filename)
def createModel(df, dir, savename, columns, types_num, epo=10, batch=32):
    DATASET_LOCATION = dir
    BATCH_SIZE = batch
    IMAGE_SIZE = (128, 128)
    INPUT_SHAPE = (128, 128, 3)
    EPOCHS = epo
    conv_base = VGG19(weights='imagenet',
                      include_top=False,
                      input_shape=(128, 128, 3))
    conv_base.trainable = False
    # Tworzymy bazę na podstawie conv modelu bez górnego klasyfikatora

    # Instantiating a Convolutional Neural Network (CNN) Classifier
    model = Sequential()
    # biggest -----------
    # --------vectoor
    for layer in conv_base.layers:
        layer.trainable = False
    # ------frozen base-------------
    model.add(conv_base)

    # model.add(Conv2D(filters=32,kernel_size=3,padding='same',activation='relu', input_shape=INPUT_SHAPE))
    # model.add(Conv2D(filters=32,kernel_size=3,padding='same',activation='relu'))
    # model.add(MaxPooling2D(2, 2))
    # model.add(Conv2D(64, (3, 3), activation= 'relu',padding='same'))
    # model.add(Conv2D(64, (3, 3), activation= 'relu',padding='same'))
    # model.add(MaxPooling2D(2, 2))

    # model.add(Conv2D(128, (3, 3), activation='relu'))
    # model.add(MaxPooling2D(2, 2))
    # conv_base.summary()

    model.add(
        Flatten())  # this converts our 3D feature maps to 1D feature vectors
    # model.add(GlobalAveragePooling2D())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(types_num, activation='softmax'))

    model.compile(
        loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.adam(),
        metrics=["accuracy"],
    )
    print('Initialized model\n')
    # separate in training and testing
    train_df, test_df = train_test_split(df, test_size=0.35, random_state=40)
    # data augmentation - to provide more samples
    train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
    )
    # cannot change validation data!
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    print(
        'Created data augmentation method, now we have more data! Cool huh?\n')
    # read files of a difrectory using flow from dataframe
    # FIRST FOR TRAIN SECOND FOR TEST
    try:
        train_generator = train_datagen.flow_from_dataframe(
            train_df,
            DATASET_LOCATION,
            x_col=columns[0],
            y_col=columns[1],
            target_size=IMAGE_SIZE,
            class_mode="categorical",
            batch_size=BATCH_SIZE,
        )
        print('Created set for teaching\n')
        test_generator = test_datagen.flow_from_dataframe(
            test_df,
            DATASET_LOCATION,
            x_col=columns[0],
            y_col=columns[1],
            target_size=IMAGE_SIZE,
            class_mode="categorical",
            batch_size=BATCH_SIZE,
        )
        print('Created set for validation\n')
        # NOW WE TRAIN THE MODEL
        history = model.fit_generator(
            train_generator,
            epochs=EPOCHS,
            validation_data=test_generator,
            validation_steps=test_df.shape[0] // BATCH_SIZE,
            steps_per_epoch=train_df.shape[0] // BATCH_SIZE,
            verbose=1,
        )
        print('Trained frozen model. Now unfroze some\n')
        set_trainable = False
        for layer in conv_base.layers:
            if layer.name == 'block5_conv1':
                set_trainable = True
            if set_trainable:
                layer.trainable = True
            else:
                layer.trainable = False

        model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer=keras.optimizers.RMSprop(lr=1e-5),
            metrics=["accuracy"],
        )

        history = model.fit_generator(
            train_generator,
            epochs=EPOCHS,
            validation_data=test_generator,
            validation_steps=test_df.shape[0] // BATCH_SIZE,
            steps_per_epoch=train_df.shape[0] // BATCH_SIZE,
            verbose=1,
        )

        print('Trained model\n')
        # save model and architecture to single file
        model.save(savename)
        print("Saved model to disk\n")
        return history
    except Exception as e:
        print(e)