Beispiel #1
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    print('selecting gpu  :', args.gpu_id)
    create_folders(args.path_results)

    list_image, list_mask = get_paths(args.path_train)

    df_train, df_val = generate_dataframe(path_mask=list_mask,
                                          out_size=args.size,
                                          stride=args.stride,
                                          classes=args.classes,
                                          out_directory=args.path_train)

    train_generator = DataGenerator(list_IDs=df_train,
                                    batch_size=args.batch,
                                    dim=(args.size, args.size),
                                    n_channels=3,
                                    n_classes=args.classes,
                                    norm=args.norm,
                                    transformations=True,
                                    shuffle=True)

    validation_generator = DataGenerator(list_IDs=df_val,
                                         batch_size=args.batch,
                                         dim=(args.size, args.size),
                                         n_channels=3,
                                         norm=args.norm,
                                         n_classes=args.classes)

    model = get_model(args=args, initial_lr=0.0001)
    history = start_train(args, model, train_generator, validation_generator)
    if args.no_plot == True:
        save_training_graph(history, args.path_results)
Beispiel #2
0
def TTA(sess, test_lists, dir_path, ckpt_dir, augment_times=1):
    probs = np.zeros((augment_times, len(test_lists), len(category_df)))
    # ckpt config
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        saver.restore(sess, ckpt.model_checkpoint_path)
    for t in range(augment_times):
        test_datagen = DataGenerator(augment=True, random_erasing=True, horizontal_flip=True)
        test_generator = test_datagen.flow_from_list_prediction(
            lists=test_lists,
            batch_size=1,
            image_size=336,
            dir_path=dir_path)
        _probs = np.zeros((len(test_lists), len(category_df)))
        print(t+1, ": times")
        for i, v in tqdm(enumerate(test_lists)):
            inputs = next(test_generator)
            _prob = sess.run([prob], feed_dict={x: inputs, training_flag: False})
            _probs[i, :] = np.asarray(_prob)
        probs[t, :, :] = _probs

    # create pseudo_probs and predictions
    pseudo_probs = np.zeros((len(test_lists), len(category_df)))
    predictions = []
    for i in enumerate(range(probs.shape[1])):
        pseudo_prob = np.mean(probs[:, i, :], axis=0)
        pseudo_probs[i, :] = pseudo_prob
        predictions.append(np.argmax(pseudo_prob))

    predictions = np.asarray(predictions)
    return pseudo_probs, predictions
Beispiel #3
0
def main():

    config = yaml.safe_load(open("config.yaml", 'r'))

    # =========================================== #
    # =============== PREPARE DATA ============== #
    # =========================================== #
    train_x, train_y, val_x, val_y = get_data(config=config)

    train_generator = DataGenerator(images=train_x,
                                    labels=train_y,
                                    config=config,
                                    gen_type='train')
    val_generator = DataGenerator(images=val_x,
                                  labels=val_y,
                                  config=config,
                                  gen_type='val')

    # =========================================== #
    # =============== CREATE MODEL ============== #
    # =========================================== #
    model = GoftNet(config=config)

    # =========================================== #
    # =============== TRAIN MODEL =============== #
    # =========================================== #
    model.train(train_data=train_generator, val_data=val_generator)
def main(args):
  os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu_id
  print('selecting gpu  :', args.gpu_id)
  create_folders(args.path_results)
  dataset_distribution_path = join(args.path_results,'dataset_distribution.csv')
  if os.path.exists(dataset_distribution_path):
    print('{}    loaded!'.format(dataset_distribution_path))
    dataset_distribution = pd.read_csv(dataset_distribution_path)
    train, val, test =  dataset_distribution['train'], dataset_distribution['val'], dataset_distribution['test']   
  else:
    train, val, test = get_paths(args.path_train)
    dataset_distribution  = list(zip(train, val ,test))            
    dataset_distribution = pd.DataFrame(dataset_distribution, columns = ['train', 'val' ,'test'])
    dataset_distribution.to_csv(dataset_distribution_path, index = False, header=True)

  train_generator = DataGenerator(list_IDs = train , batch_size=args.batch,dim=(args.size,args.size), n_channels=3,
                   n_classes=args.classes,norm=args.norm, transformations=True, shuffle=True)


  validation_generator = DataGenerator(list_IDs = val , batch_size=args.batch, dim=(args.size,args.size), n_channels=3,norm=args.norm,
                   n_classes=args.classes)

  model = get_model(args=args,initial_lr=0.0001)
  history = start_train(args,model,train_generator,validation_generator)
  if args.no_plot==True:
    save_training_graph(history,args.path_results)
    def get_train_validation_generator(self,
                                       validation_percentage=0.15,
                                       sessions_per_batch=256,
                                       class_weights=[]):
        # return the generator for the train and optionally the one for validation (set to 0 to skip validation)
        # if sessions_per_batch == 'auto':
        #     sessions_per_batch = self._get_auto_samples_per_batch()
        self.class_weights = class_weights

        tot_sessions = int(self.train_len / self.rows_per_sample)
        #tot_batches = math.ceil(tot_sessions / sessions_per_batch)

        number_of_validation_sessions = int(tot_sessions *
                                            validation_percentage)
        number_of_train_sessions = tot_sessions - number_of_validation_sessions
        train_rows = number_of_train_sessions * self.rows_per_sample

        #batches_in_train = math.ceil(number_of_train_sessions / sessions_per_batch)
        #batches_in_val = tot_batches - batches_in_train

        print('Train generator:')
        train_gen = DataGenerator(self,
                                  pre_fit_fn=self.prefit_xy,
                                  rows_to_read=train_rows)
        #train_gen.name = 'train_gen'
        print('Validation generator:')
        val_gen = DataGenerator(self,
                                pre_fit_fn=self.prefit_xy,
                                skip_rows=train_rows)
        #val_gen.name = 'val_gen'

        return train_gen, val_gen
def example_generator():

    train_gen = DataGenerator(
        data_path=
        "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/data.sort.clean.bed",
        ref_fasta=
        "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/GRCm38.primary_assembly.genome.fa.gz",
        genome_size_file="./mm10.genome.size",
        epi_track_files=[
            "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/AM_R2_allChr_CpG_noL.txt"
        ],
        tasks=["TARGET"],
        upsample=False)

    valid_gen = DataGenerator(
        data_path=
        "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/data_valid_sort.clean.bed",
        ref_fasta=
        "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/GRCm38.primary_assembly.genome.fa.gz",
        genome_size_file="./mm10.genome.size",
        epi_track_files=[
            "/Users/anqin/Desktop/GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/AM_R2_allChr_CpG_noL.txt"
        ],
        tasks=["TARGET"],
        upsample=False)

    one_filter_keras_model = Sequential()
    one_filter_keras_model.add(
        Conv2D(filters=5,
               kernel_size=(1, 15),
               padding="same",
               input_shape=(1, 1000, 5)))
    one_filter_keras_model.add(BatchNormalization(axis=-1))
    one_filter_keras_model.add(Activation('relu'))
    one_filter_keras_model.add(MaxPooling2D(pool_size=(1, 35)))
    one_filter_keras_model.add(Flatten())
    one_filter_keras_model.add(Dense(1))
    one_filter_keras_model.add(Activation("sigmoid"))
    one_filter_keras_model.summary()

    one_filter_keras_model.compile(optimizer='adam',
                                   loss='binary_crossentropy')

    #metrics_callback=MetricsCallback(train_data=(train_X,train_Y),
    #                             validation_data=(valid_X,valid_Y))

    #print(one_filter_keras_model.get_weights())

    history_regression = one_filter_keras_model.fit_generator(
        train_gen,
        validation_data=valid_gen,
        steps_per_epoch=500,
        validation_steps=100,
        epochs=150,
        verbose=1,
        use_multiprocessing=False,
        workers=1,
        max_queue_size=50,
        callbacks=[History()])
Beispiel #7
0
def main():
    import argparse as argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--train-txt', type=str, required=True)
    parser.add_argument('--test-txt', type=str, required=True)
    parser.add_argument('--save-dir', type=str, required=True)
    parser.add_argument('--num-bins', type=int, required=True)
    parser.add_argument('--lr', type=float, required=False, default = 0.001)
    parser.add_argument('--batch-size', type=int, required=False, default=50)
    parser.add_argument('--epochs', type=int, required=False, default=10)
    parser.add_argument('--data-dir', type=str, required=True)
    parser.add_argument('--shape', type=int, required=True, nargs=3, help="height width chanels")
    parser.add_argument('--message', type=str, required=True)

    args        = parser.parse_args()
    data_dir    = args.data_dir
    image_dir   = os.path.join(data_dir, "images/")
    anno_dir    = os.path.join(data_dir, "annotations/")
    train_path  = args.train_txt
    test_path   = args.test_txt

    # Load list of image names for train and test
    raw_train   = load_dataset(train_path)
    raw_test    = load_dataset(test_path)


    # Create train and test generators
    num_bins    = args.num_bins
    batch_size  = args.batch_size
    train_gen   = DataGenerator(batch_size=batch_size,
                      data_set=raw_train[:200],
                      image_dir=image_dir,
                      anno_dir=anno_dir,
                      preprocess_fn=preprocess_normalize_images_bin_annos,
                      prepare_batch_fn=prepare_batch_images_and_labels)
    test_gen    = DataGenerator(batch_size=batch_size,
                      data_set=raw_test[:50],
                      image_dir=image_dir,
                      anno_dir=anno_dir,
                      preprocess_fn=preprocess_normalize_images_bin_annos,
                      prepare_batch_fn=prepare_batch_images_and_labels)

    # Kick-off
    #name        = args.name
    save_dir    = args.save_dir
    epochs      = args.epochs
    in_shape    = args.shape
    lr          = args.lr
    classes     = [i for i in range(num_bins)]
    message     = args.message

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    best_ckpt   = "must have crashed during training :-("
    save_config(save_dir, data_dir, num_bins, lr, batch_size, epochs,
                in_shape, best_ckpt,  message)
    car_brain   = Model(in_shape, classes=classes)
    best_ckpt   = car_brain.train(train_gen, test_gen, save_dir, epochs=epochs)
def train():

    # Reading train and test csv file
    train_df = pd.read_csv(os.path.join(PATH, TRAIN_CSV))
    test_df = pd.read_csv(os.path.join(PATH, TEST_CSV))

    print(f"train shape : {train_df.shape} and test shape : {test_df.shape}")

    train_generator = DataGenerator(train_df,
                                    BATCH_SIZE,
                                    input_size=INPUT_SIZE,
                                    path='',
                                    is_valid=False)

    valid_generator = DataGenerator(test_df,
                                    BATCH_SIZE * 2,
                                    input_size=INPUT_SIZE,
                                    path='',
                                    is_valid=True)

    # Initialize  Model
    print("Loading Model ...")
    model = segmentation_model(input_shape=(INPUT_SIZE, INPUT_SIZE, 3))
    # print(model.summary(110))

    learning_rate = 0.001
    adam = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam, loss=bce_dice_loss, metrics=[IOU])

    cbks = [
        ModelCheckpoint(f"./weights/{WEIGHT_FILENAME}",
                        monitor='val_loss',
                        verbose=1,
                        save_best_only=True,
                        mode='min'),
        ReduceLROnPlateau(monitor='val_loss',
                          factor=0.5,
                          patience=3,
                          verbose=1,
                          mode='min',
                          min_delta=0.0001,
                          min_lr=1e-5),
        EarlyStopping(monitor='val_loss',
                      patience=5,
                      verbose=1,
                      restore_best_weights=False)
    ]

    model.fit(train_generator,
              steps_per_epoch=len(train_generator),
              epochs=50,
              verbose=1,
              callbacks=cbks,
              validation_data=valid_generator,
              validation_steps=len(valid_generator),
              shuffle=False,
              workers=multiprocessing.cpu_count())
Beispiel #9
0
def main(env_var):
    logger.info(env_var)
    img_size, epochs, mask_channels, mask_type, metric_sel, loss_sel, freezed_layers = _parse_env_var(
        env_var)

    if env_var['--VGG16']:
        if img_size is None:
            img_size = (224, 224)
        model_class = vgg16_unet.VGG16Unet

    elif env_var['--InceptionV3']:
        if img_size is None:
            img_size = (299, 299)
        model_class = inception_v3_unet.InceptionV3Unet

    else:
        return None

    image_encoding = read_csv_encoding(length=1280)

    # ids = image_encoding.keys()
    ids = balanced_ids(image_encoding)

    training_gen = DataGenerator(ids=ids[:640],
                                 img_encodings=image_encoding,
                                 mask_type=mask_type,
                                 out_dim_img=img_size,
                                 classification=env_var['--Classification'])

    validati_gen = DataGenerator(ids=ids[640:680],
                                 img_encodings=image_encoding,
                                 mask_type=mask_type,
                                 out_dim_img=img_size,
                                 classification=env_var['--Classification'])

    model = model_class(img_size=img_size,
                        classification=env_var['--Classification'],
                        skip_connections=env_var['--SkipConnections'],
                        mask_channels=mask_channels)
    model.set_net()
    model.freeze_encoder_blocks(depth=freezed_layers)
    model.compile(loss=loss_sel, metrics=metric_sel)
    model.neural_net.summary(print_fn=logger.info)
    model.fit(training_generator=training_gen,
              validation_generator=validati_gen,
              epochs=epochs,
              ref=TIME)

    predicti_gen = DataGenerator(ids=ids[:8],
                                 img_encodings=image_encoding,
                                 mask_type=mask_type,
                                 out_dim_img=img_size,
                                 classification=env_var['--Classification'],
                                 shuffle=False)

    model.predict(pred_generator=predicti_gen)
Beispiel #10
0
def DoTrain(train_list, val_list):
    # parameters
    train_batchsize = 4
    val_batchsize = 1
    class_num = 2
    epochs_num = 30
    initial_epoch_num = 0

    # when train a new model -----------------------------------------

    #model = new_models.ADSNet_Plain()
    #model = new_models.ADSNet_W()
    #model = new_models.ADSNet_O()
    model = new_models.ADSNet()
    #model = new_models.StepDeep_model()

    # print(model.summary())

    dt_now = datetime.datetime.now().strftime('%Y%m%d%H%M')
    print(dt_now)

    adam = optimizers.adam(lr=0.0001)
    model.compile(loss=weight_loss,
                  optimizer=adam,
                  metrics=[POD, FAR, TS, binary_acc])
    modelfilename = "%s-%s-{epoch:02d}.hdf5" % (dt_now, model.name)

    global modelrecordname
    modelrecordname = dt_now + '_' + model.name

    checkpoint = ModelCheckpoint(modelfileDir + modelfilename,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=False,
                                 mode='min')

    train_gen = DataGenerator(train_list,
                              train_batchsize,
                              class_num,
                              generator_type='train')
    val_gen = DataGenerator(val_list,
                            val_batchsize,
                            class_num,
                            generator_type='val')

    RMAE = RecordMetricsAfterEpoch()
    model.fit_generator(
        train_gen,
        validation_data=val_gen,
        epochs=epochs_num,
        initial_epoch=initial_epoch_num,
        # use_multiprocessing=True,
        workers=3,
        max_queue_size=50,
        callbacks=[RMAE, checkpoint])
Beispiel #11
0
def main():
    labels = json.load(open(os.path.join('data', 'labels.json')))
    partition = {'training': None, 'validation': None}
    for x in partition.keys():
        partition[x] = [
            f for f in os.listdir(os.path.join('data', x))
            if os.path.isfile(os.path.join(os.path.join('data', x), f))
        ]
        partition[x].sort()
    print('Indices read.')

    n_classes = len({labels[x] for x in labels})
    l = {labels[x] for x in labels}
    l = {x: i for i, x in enumerate(sorted(list(l)))}
    labels = {x: l[labels[x]] for x in labels.keys()}
    json.dump(l, open('mapping.json', 'w'))
    print('Mappings written.')

    training_generator = DataGenerator(partition['training'], 'training',
                                       labels, 28, 1, n_classes, True, True)
    validation_generator = DataGenerator(partition['validation'], 'validation',
                                         labels, 28, 1, n_classes, True, True)

    model = None
    with tf.device('/cpu:0'):
        model = FullNetwork.model()
    if os.path.exists('weights.h5'):
        model.load_weights('weights.h5')

    initial_epoch = 0
    if os.path.exists('epochs.json'):
        initial_epoch = len(json.load(open('epochs.json')).keys())

    cbk = SaveCallback(model)
    parallel_model = multi_gpu_model(model, gpus=2)
    parallel_model.compile(optimizer='adadelta',
                           loss={
                               'color_model': 'mean_squared_error',
                               'clf_model': 'categorical_crossentropy'
                           },
                           metrics={
                               'color_model': 'accuracy',
                               'clf_model': 'accuracy'
                           })
    parallel_model.fit_generator(generator=training_generator,
                                 epochs=1000,
                                 verbose=1,
                                 callbacks=[cbk],
                                 validation_data=validation_generator,
                                 use_multiprocessing=True,
                                 workers=4,
                                 initial_epoch=initial_epoch)
    print('Training done.')
Beispiel #12
0
def example_generator():

    separate_dataset("regions_for_learning_with_head.clean.equal_size.bed",
                     ["chr1"], "valid.bed")
    separate_dataset("regions_for_learning_with_head.clean.equal_size.bed",
                     ["chr2", "chr19"], "test.bed")
    separate_dataset("regions_for_learning_with_head.clean.equal_size.bed", [
        "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
        "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
        "chr20", "chr21", "chr22"
    ], "train.bed")

    train_gen = DataGenerator(
        data_path="train.bed",
        ref_fasta=
        "../GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/GRCm38.primary_assembly.genome.fa.gz",
        genome_size_file="./mm10.genome.size",
        epi_track_files=["MethylC-seq_WT_cones_rep1_CpG.clean.plus.sorted.bw"],
        tasks=["TARGET"],
        upsample=True,
        upsample_ratio=0.3)

    valid_gen = DataGenerator(
        data_path="valid.bed",
        ref_fasta=
        "../GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/GRCm38.primary_assembly.genome.fa.gz",
        genome_size_file="./mm10.genome.size",
        epi_track_files=["MethylC-seq_WT_cones_rep1_CpG.clean.plus.sorted.bw"],
        tasks=["TARGET"],
        upsample=True,
        upsample_ratio=0.3)

    model = initialize_model()

    trainning_history = model.fit_generator(
        train_gen,
        validation_data=valid_gen,
        steps_per_epoch=5000,
        validation_steps=500,
        epochs=10,
        verbose=1,
        use_multiprocessing=False,
        workers=4,
        max_queue_size=50,
        callbacks=[
            History(),
            ModelCheckpoint("ATAC_peak_Classification_positive_constrain.h5",
                            monitor='val_loss',
                            verbose=1,
                            save_best_only=True,
                            mode='min')
        ])
def train():

    # Reading train and test csv file
    train_df = pd.read_csv(os.path.join(PATH, TRAIN_CSV))
    test_df = pd.read_csv(os.path.join(PATH, TEST_CSV))

    train_df, test_df = str_to_list(train_df), str_to_list(test_df)

    train_df['pts'] = train_df.apply(
        lambda x: combine_list(x.pts_x, x.pts_y), axis=1)
    test_df['pts'] = test_df.apply(
        lambda x: combine_list(x.pts_x, x.pts_y), axis=1)

    train_df.pts = train_df.pts.apply(lambda x: correction(x))
    test_df.pts = test_df.pts.apply(lambda x: correction(x))

    print(f"train shape : {train_df.shape} and test shape : {test_df.shape}")

    train_generator = DataGenerator(train_df,
                                    BATCH_SIZE,
                                    path=os.path.join(PATH, TRAIN_FOLDER),
                                    is_valid=False)

    test_generator = DataGenerator(test_df,
                                   BATCH_SIZE*2,
                                   path=os.path.join(PATH, TEST_FOLDER),
                                   is_valid=True)

    # Initialize  Model
    print("Loading Model ...")
    model = KeypointModel()
    print(model.summary(110))

    learning_rate = 0.001
    adam = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam, loss='mae', metrics=['mse'])

    cbks = [ModelCheckpoint(f"./weights/{WEIGHT_FILENAME}", monitor='val_loss', verbose=1,
                            save_best_only=True, mode='min'),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1,
                              mode='min', min_delta=0.0001, min_lr=1e-5),
            EarlyStopping(monitor='val_loss', patience=5, verbose=1,
                          restore_best_weights=False)]

    model.fit_generator(
        generator=train_generator,
        steps_per_epoch=len(train_generator),
        epochs=50,
        verbose=1,
        callbacks=cbks,
        validation_data=test_generator,
        validation_steps=len(test_generator))
Beispiel #14
0
def prediction_and_evaluation():
    from tensorflow.python.keras.models import load_model

    model = initialize_model()
    model.load_weights("ATAC_peak_Classification_positive_constrain.h5")

    #Get predictions on the test set

    test_gen = DataGenerator(
        data_path="test.bed",
        ref_fasta=
        "../GSM1865005_allC.MethylC-seq_WT_rods_rep1.tsv/GRCm38.primary_assembly.genome.fa.gz",
        genome_size_file="./mm10.genome.size",
        epi_track_files=["MethylC-seq_WT_cones_rep1_CpG.clean.plus.sorted.bw"],
        tasks=["TARGET"],
        upsample=False)

    model_predictions = model.predict_generator(test_gen,
                                                workers=4,
                                                use_multiprocessing=False,
                                                verbose=1)

    model_predictions_bool = model_predictions > 0.5

    test_db_observed = get_labels_from_target_files("test.bed", ["TARGET"])

    print(ClassificationResult(test_db_observed, model_predictions_bool))
Beispiel #15
0
def main():
    # env
    env_path = find_dotenv()
    load_dotenv(dotenv_path=env_path, verbose=True)
    processed_p = Path(os.environ.get('PATH_PROCESSED')).resolve()
    models_p = Path(os.environ.get('PATH_MODELS')).resolve()
    img_h = int(os.environ.get('IMAGE_HEIGHT'))
    img_w = int(os.environ.get('IMAGE_WIDTH'))
    batch_size = int(os.environ.get('BATCH_SIZE'))
    downsample_factor = int(os.environ.get('DOWNSAMPLE_FACTOR'))
    min_lr = float(os.environ.get('MIN_LEARNING_RATE'))
    max_lr = float(os.environ.get('MAX_LEARNING_RATE'))
    # according how Keras' multi_gpu_mode() handles mini-batches
    # logging
    logging.root.removeHandler(absl.logging._absl_handler)
    absl.logging._warn_preinit_stderr = False
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info('TensorFlow version: ' + tf.__version__)
    logger.info('Keras version: ' + tf.keras.__version__)
    # parameters
    train_p = processed_p.joinpath('train')
    assert train_p.exists()
    # generators
    logger.info('loading data')
    train_gen = DataGenerator(train_p, img_w, img_h, batch_size, downsample_factor)
    max_text_len = train_gen.max_text_len
    logger.info('alphabet: \'' + str(train_gen.alphabet) + '\'')
    logger.info('alphabet size: ' + str(len(train_gen.alphabet)))
    logger.info('max text length: ' + str(max_text_len))
    logger.info('image shape: height=' + str(img_h) + ' width=' + str(img_w))
    logger.info('batch size: ' + str(batch_size))
    logger.info('output size: ' + str(train_gen.output_size))
    logger.info('training samples: ' + str(train_gen.n))
    logger.info('train steps per epoch: ' + str(len(train_gen)))
    logger.info('min. learning-rate: ' + str(min_lr))
    logger.info('max. learning-rate: ' + str(max_lr))
    # create model
    model = OCRNet(train_gen.output_size, img_w, img_h, max_text_len)
    model.summary()
    # find best learning rate
    # initialize optimizer
    adam = Adam(lr=min_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    # compile model
    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam, metrics=['accuracy'])
    lrf = LRFinder(model)
    lrf.find(train_gen,
             min_lr, max_lr,
             stepsPerEpoch=len(train_gen),
             batchSize=batch_size)
    # plot the loss for the various learning rates and save the
    # resulting plot to disk
    if not models_p.exists():
        models_p.mkdir()
    lrf.plot_loss(models_p.joinpath('loss_plot.png'), title='loss')
    lrf.plot_loss_change(models_p.joinpath('loss_change_plot.png'), title='loss change')
    # in the config and then train the network for our full set of
    logger.info('learning rate finder complete')
    logger.info('best LR: %f' % lrf.get_best_lr())
Beispiel #16
0
def evaluate_dir_random_rotation(args):
    '''
    Evaluates an image directory, randomly rotating images on-the-fly
    '''
    images = glob.glob(os.path.join(args['image_dir'], "*.jpg"))

    # Creating test generator
    test_gen = DataGenerator(
        images,
        rotate=True,
        preprocess_function=preprocess_input,
        shuffle=True,
        show_intermediate=False,
        batch_size=args['batch_size'],
        dim=args['img_size'],
        regress=args['regress']
    )

    # Loading model
    if args['regress']:
        model = load_model(args['model_dir'], custom_objects={"angle_loss_regress": angle_loss_regress})
    else:
        model = load_model(args['model_dir'], custom_objects={"angle_loss": angle_loss})

    # Running evaluation
    out = model.evaluate(
        test_gen,
        steps = int(len(images) / args['batch_size'])
    )

    print(f"Test Loss: {out[0]} ; Angle Loss: {out[1]}")
def fitModel(model, input_size, categorical, trainDb, trainPaths, trainAge,
             trainGender, testDb, testPaths, testAge, testGender, epoch,
             batch_size, num_worker, callbacks, GPU):
    return model.fit_generator(
        DataGenerator(model, trainDb, trainPaths, trainAge, trainGender,
                      batch_size, input_size, categorical),
        validation_data=DataGenerator(model, testDb, testPaths, testAge,
                                      testGender, batch_size, input_size,
                                      categorical),
        epochs=epoch,
        verbose=2,
        steps_per_epoch=len(trainAge) // (batch_size * GPU),
        validation_steps=len(testAge) // (batch_size * GPU),
        workers=num_worker,
        use_multiprocessing=True,
        max_queue_size=int(batch_size * 2),
        callbacks=callbacks)
Beispiel #18
0
def process(src, out, suffix):
    print 'compile', src, 'to', out

    chips = []

    # загружаем файл
    f = open(src, 'r')
    for s in f:
        s = s.strip()
        l = len(s)
        if l == 0:
            continue
        if l == 1 and (s[0] == '\n' or s[0] == '\r'):
            continue
        if s[0] == '#':
            continue

        if s[l - 1] == '\n':
            s = s[:l - 1]

        if s.startswith('CHIP['):
            chip = Chip()
            load_line(chip, s)
            chips.append(chip)
        else:
            load_line(chips[len(chips) - 1], s)

    f.close()


    g = DataGenerator(suffix)

    for chip in chips:
        #chip.show()
        compile_chip(chip, g)

    g.generate(out)

    print '-------------[Chips]--------------------'
    for chip in chips:
        print chip.name.decode('cp1251').encode('utf8')
    print '----------------------------------------'


    print 'Total chips: ', len(chips)
    print 'Data size: ', g.size
    def get_test_generator(self, sessions_per_batch=256):
        # return the generator for the test

        #def prefit(Xchunk_df, index):
        """ Preprocess a chunk of the sequence dataset """
        #Xchunk_df = self._preprocess_x_df(Xchunk_df, partial=True)
        #return Xchunk_df

        return DataGenerator(self, for_train=False)  #, pre_fit_fn=prefit)
Beispiel #20
0
def process(src, out, suffix):
    print 'compile', src, 'to', out

    chips = []

    # загружаем файл
    f = open(src, 'r')
    for s in f:
        s = s.strip()
        l = len(s)
        if l == 0:
            continue
        if l == 1 and (s[0] == '\n' or s[0] == '\r'):
            continue
        if s[0] == '#':
            continue

        if s[l - 1] == '\n':
            s = s[:l - 1]

        if s.startswith('CHIP['):
            chip = Chip()
            load_line(chip, s)
            chips.append(chip)
        else:
            load_line(chips[len(chips) - 1], s)

    f.close()

    g = DataGenerator(suffix)

    for chip in chips:
        #chip.show()
        compile_chip(chip, g)

    g.generate(out)

    print '-------------[Chips]--------------------'
    for chip in chips:
        print chip.name.decode('cp1251').encode('utf8')
    print '----------------------------------------'

    print 'Total chips: ', len(chips)
    print 'Data size: ', g.size
Beispiel #21
0
def initialize_data():
    global gh_scraper, generator, logger

    # scraping COVID-19 data
    gh_scraper.scrape()
    reports, countries = gh_scraper.cache, gh_scraper.valid_countries
    dates = process_dates(reports)
    data = process_data(reports, countries)

    generator = DataGenerator(dates, data, countries)
Beispiel #22
0
    def __init__(self, modelname):
        self.modelname = modelname
        params = {'dim': (29, 29),
          'batch_size': 1024,
          'n_classes': 2,
          'n_channels': 1,
          'shuffle': True}

        Config.DATAPATH = 'data/train/Fuzzy/'

        data = os.listdir(Config.DATAPATH)
        data.remove('labels.npy')
        labels = np.load(Config.DATAPATH+"labels.npy")
        data_train = data[:int(len(data)/10*8)]
        data_valid = data[int(len(data)/10*8):]
        self.gen_train = DataGenerator(data_train, labels, **params)
        self.gen_valid = DataGenerator(data_valid, labels, **params)

        params['shuffle'] = False
        Config.DATAPATH = 'data/test/Fuzzy/'

        data_test = os.listdir(Config.DATAPATH)
        data_test.remove('labels.npy')
        data_test = data_test[int(len(data_test)/10*8):]
        labels_test = np.load(Config.DATAPATH+"labels.npy")
        self.gen_test = DataGenerator(data_test, labels_test, **params)

        self.model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(29, 29, 1)),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid'),
        ])

        self.model.compile(
                    optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'],
        )
    def get_train_validation_generator(self,
                                       validation_percentage=0.15,
                                       sessions_per_batch=256,
                                       class_weights=[]):
        # return the generator for the train and optionally the one for validation (set to 0 to skip validation)
        # if sessions_per_batch == 'auto':
        #     sessions_per_batch = self._get_auto_samples_per_batch()

        def prefit(Xchunk_df, Ychunk_df, index):
            """ Preprocess a chunk of the sequence dataset """
            #Xchunk_df = self._preprocess_x_df(Xchunk_df, partial=True)
            #Ychunk_df = self._preprocess_y_df(Ychunk_df)

            if len(class_weights) > 0:
                # weight only the last interaction (clickout item) by the class_weight
                weights = np.zeros(Xchunk_df.shape[:2])
                weights[:, -1] = Ychunk_df[:, -1, :] @ class_weights
                return Xchunk_df, Ychunk_df, weights
            else:
                return Xchunk_df, Ychunk_df

        tot_sessions = int(self.train_len / self.rows_per_sample)
        #tot_batches = math.ceil(tot_sessions / sessions_per_batch)

        number_of_validation_sessions = int(tot_sessions *
                                            validation_percentage)
        number_of_train_sessions = tot_sessions - number_of_validation_sessions
        train_rows = number_of_train_sessions * self.rows_per_sample

        #batches_in_train = math.ceil(number_of_train_sessions / sessions_per_batch)
        #batches_in_val = tot_batches - batches_in_train

        print('Train generator:')
        train_gen = DataGenerator(self,
                                  pre_fit_fn=prefit,
                                  rows_to_read=train_rows)
        #train_gen.name = 'train_gen'
        print('Validation generator:')
        val_gen = DataGenerator(self, pre_fit_fn=prefit, skip_rows=train_rows)
        #val_gen.name = 'val_gen'

        return train_gen, val_gen
Beispiel #24
0
 def train(self):
     self.load_weights()
     train_gen = DataGenerator(self.ddir + '/train',
                               self.image_size,
                               self.batch_size,
                               train=True)
     dev_gen = DataGenerator(self.ddir + '/dev', self.image_size,
                             self.batch_size)
     checkpoint_callback = ModelCheckpoint(os.path.join(
         self.wdir, 'weights.h5'),
                                           save_best_only=True,
                                           verbose=1)
     earlystopping_callback = EarlyStopping(verbose=1, patience=5)
     callbacks = [checkpoint_callback, earlystopping_callback]
     self.vae.fit_generator(train_gen,
                            validation_data=dev_gen,
                            epochs=999,
                            shuffle='batch',
                            callbacks=callbacks,
                            verbose=1)
Beispiel #25
0
def load_generators(data_dir):
    # Parameters
    params = {'dim': (96,96),
              'batch_size': 100,
              'n_classes': 2,
              'n_channels': 3,
              'shuffle': True}

    # Data
    data = pd.read_csv(data_dir + 'train_labels.csv')
    train, val = train_test_split(data, test_size = 0.1, random_state=42)
    partition = {"train":list(train['id']), "validation":list(val['id'])}
    labels = dict(zip(data['id'], data['label']))

    train_dir = data_dir + "train/"

    # Generators
    train_gen = DataGenerator(partition['train'], labels, train_dir, **params)
    val_gen = DataGenerator(partition['validation'], labels, train_dir, **params)

    return train_gen, val_gen
Beispiel #26
0
    def password_probability(self, password):
        """
        Calculate the probability of a given password. This works by 
        determining the product of the individual probabilities of a 
        given character conditional to the appearance of the preceding
        characters.


        Parameters
        ----------
        password : str
            The password whose probability is to be calculated.
        model : 
            The Keras model.
        tokenizer : 
            The Keras tokenizer object.
        ix_to_character : dict
            The index-to-character dictionary.
        data : pd.DataFrame
            The dataset, including the tokenized passwords.

        Returns
        -------
        float
            The probability of the password.

        """

        # tokenize the password
        token = self.tokenizer.texts_to_sequences([password])[0]
        x_test = DataGenerator.slide_window(token)
        x_test = np.array(x_test)
        y_test = token - 1

        # determine the probabilities of the permutations of the characters
        probabilities = self.model.predict(x_test, verbose=0)

        # multiply all of the conditional probabilities together in the password
        password_probability = 0
        for index, probability in enumerate(probabilities):
            char_probability = probability[
                y_test[index]]  # get the probability from the model
            password_probability += np.log(
                char_probability)  # use log to avoid roundoff errors

        # calculate the perplexity to account for varying password lengths
        password_length = len(password)
        password_probability /= -password_length
        password_probability = np.exp(
            password_probability)  # recover the raw probability

        return password_probability
def main():
    # env
    env_path = find_dotenv()
    load_dotenv(dotenv_path=env_path, verbose=True)
    processed_p = Path(os.environ.get('PATH_PROCESSED')).resolve()
    models_p = Path(os.environ.get('PATH_MODELS')).resolve()
    img_h = int(os.environ.get('IMAGE_HEIGHT'))
    img_w = int(os.environ.get('IMAGE_WIDTH'))
    batch_size = int(os.environ.get('BATCH_SIZE'))
    downsample_factor = int(os.environ.get('DOWNSAMPLE_FACTOR'))
    lr = float(os.environ.get('LEARNING_RATE'))
    # logging
    logging.root.removeHandler(absl.logging._absl_handler)
    absl.logging._warn_preinit_stderr = False
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info('TensorFlow version: ' + tf.__version__)
    logger.info('Keras version: ' + tf.keras.__version__)
    # parameters
    test_p = processed_p.joinpath('test')
    assert test_p.exists()
    logger.info('load data')
    test_gen = DataGenerator(test_p, img_w, img_h, batch_size,
                             downsample_factor)
    alphabet = test_gen.alphabet
    logger.info('image shape: height=' + str(img_h) + ' width=' + str(img_w))
    logger.info('batch size: ' + str(batch_size))
    logger.info('test samples: ' + str(test_gen.n))
    logger.info('test steps per epoch: ' + str(len(test_gen)))
    logger.info('learning rate: ' + str(lr))
    # model
    checkpoint_p = models_p.joinpath('model.h5')
    assert checkpoint_p.exists()
    model = load_model(str(checkpoint_p), compile=False)
    model.summary()
    logger.info('model loaded')
    # optimizer
    adam = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=adam,
                  metrics=['accuracy'])
    logger.info('model compiled')
    # test data
    score = model.evaluate_generator(generator=test_gen,
                                     steps=len(test_gen),
                                     verbose=1)
    logger.info('loss %.3f accuracy: %.3f' % (score[0], score[1]))
Beispiel #28
0
 def encode(self):
     self.load_weights()
     for type in ['test', 'dev', 'train']:
         print('Encoding {0}'.format(type))
         dpath = os.path.join(self.ddir, type)
         spath = os.path.join(self.wdir, type + '_encodings.h5')
         gen = DataGenerator(dpath, self.image_size, self.batch_size)
         z = self.encoder.predict_generator(gen, verbose=1)
         class_dict = {v: k for k, v in gen.generator.class_indices.items()}
         labels = [class_dict[x] for x in gen.generator.classes]
         with h5py.File(spath, 'w') as f:
             f.create_dataset('encodings', data=z)
             f.create_dataset('filenames',
                              data=np.array(gen.generator.filenames,
                                            dtype='S'))
             f.create_dataset('labels', data=np.array(labels, dtype='S'))
Beispiel #29
0
def gen_data():
    params = {
        'dim': (Config.NUM_ID, 2 * Config.NUM_INTVL),
        'batch_size': 64,
        'n_classes': 2,
        'n_channels': 1,
        'shuffle': True
    }
    Config.DATAPATH = 'data/test/'
    make_dataset("DoS_variation.csv")
    data = os.listdir(Config.DATAPATH)
    data.remove('labels.npy')
    data = data[int(len(data) / 10 * 8.5):]
    labels = np.load(Config.DATAPATH + "labels.npy")
    gen_test = DataGenerator(data, labels, **params)

    return gen_test
Beispiel #30
0
def main(_):

    # initialize settings
    settings = Settings(FLAGS.data_path, FLAGS.data_file, FLAGS.images_path,
                        FLAGS.batch_size, FLAGS.epochs, FLAGS.learning_rate,
                        FLAGS.epoch_sample_size)

    # print settings
    print(settings)

    # load, consolidate and augment data
    data_loader = DataLoader(settings)

    # data generator for train and test
    data_generator = DataGenerator(settings, data_loader)

    # setup & fit model
    DataModel(settings).fit(data_generator)

    print("Finally, done!")
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    print('selecting gpu  :', args.gpu_id)
    create_folders(args)
    model = get_model(args=args, weights=None)
    model.load_weights(join(args.model_dir))
    print('model:', args.model_dir)
    model = fit_model(model=model)
    path = args.path_test
    path_image = join(path, 'images')
    path_mask = join(path, 'masks')
    list_image = glob(os.path.join(path_image, '*.tif'))
    list_mask = glob(os.path.join(path_mask, '*.tif'))
    min_dataset_values, max_dataset_values = 0, 255  #get_min_max(args.path_train)
    df_path_test = join(
        path, 'dataframe_test_dataset_{}_standarizate.csv'.format(args.size))
    if os.path.exists(df_path_test):
        print('{}    loaded!'.format(df_path_test))
        df_test = pd.read_csv(df_path_test)
    else:
        print('{}    saved!'.format(df_path_test))
        df_test = get_coordinates(paths=list_mask,
                                  out_size=(args.size, args.size),
                                  stride=0.8,
                                  classes=args.classes,
                                  stride_minor_class=0.1)
        df_test.to_csv(df_path_test, index=False, header=True)
    test_generator = DataGenerator(list_IDs=df_test,
                                   batch_size=args.batch,
                                   dim=(args.size, args.size),
                                   n_channels=3,
                                   n_classes=args.classes,
                                   min_max=(min_dataset_values,
                                            max_dataset_values),
                                   shuffle=True)
    report_metrics(model, test_generator)