Ejemplo n.º 1
0
def train_model(model, train, val):
    model = create_model()
    model.compile(optimizer='adam',
                  loss=LOSSES,
                  loss_weights=LOSS_WEIGHTS,
                  metrics=['accuracy'])
    train_labels = train.labels_list()
    val_labels = val.labels_list()

    training_generator = data_generator.DataGenerator(train,
                                                      range(train.size()),
                                                      train_labels, **PARAMS)
    validation_generator = data_generator.DataGenerator(
        val, range(val.size()), val_labels, **PARAMS)

    filepath = "completemodel_weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')

    #TODO fit generator (ensure generator returns two outputs)
    model.fit_generator(generator=training_generator,
                        validation_data=validation_generator,
                        use_multiprocessing=True,
                        callbacks=[checkpoint],
                        epochs=15)
def test_if_images_after_preprocessing_are_fine():
    graph = tf.Graph()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    sess = tf.InteractiveSession(graph=graph, config=config)

    dataset_filenames, dataset_sizes = dataset_name_factory.new_get_noncol_train_data_sorted_by_direction_noncol_test_data(
    )

    train_data_gen = data_generator.DataGenerator(
        config.BATCH_SIZE, config.TF_NUM_CLASSES,
        dataset_sizes['train_dataset'], config.TF_INPUT_SIZE, sess,
        dataset_filenames['train_dataset'], config.TF_INPUT_AFTER_RESIZE,
        False)

    test_data_gen = data_generator.DataGenerator(
        config.BATCH_SIZE, config.TF_NUM_CLASSES,
        dataset_sizes['test_dataset'], config.TF_INPUT_SIZE, sess,
        dataset_filenames['test_dataset'], config.TF_INPUT_AFTER_RESIZE, True)

    tf_train_img_ids, tf_train_images, tf_train_labels = train_data_gen.tf_augment_data_with(
    )
    tf_test_img_ids, tf_test_images, tf_test_labels = test_data_gen.tf_augment_data_with(
    )

    for env_idx in range(4):

        tr_img_id, tr_images, tr_labels = train_data_gen.sample_a_batch_from_data(
            env_idx, shuffle=True)
        ts_img_id, ts_images, ts_labels = test_data_gen.sample_a_batch_from_data(
            env_idx, shuffle=False)

        save_batch_of_data('train_env_%d' % env_idx, tr_images, tr_labels)
        save_batch_of_data('test_env_%d' % env_idx, ts_images, ts_labels)
Ejemplo n.º 3
0
def load_mnist_data():
    json_train_path = '/data/data/mnist_train_data/labels/train.json'
    json_val_path = '/data/data/mnist_train_data/labels/val.json'
    save_path = '/data/data/mnist_train_data/images'

    train_data = data_generator.DataGenerator(img_dirpath=save_path,
                                              json_path=json_train_path,
                                              img_w=params.img_w,
                                              img_h=params.img_h,
                                              batch_size=params.batch_size)
    train_data.build_data()
    train_sample_num = train_data.n

    val_data = data_generator.DataGenerator(img_dirpath=save_path,
                                            json_path=json_val_path,
                                            img_w=params.img_w,
                                            img_h=params.img_h,
                                            batch_size=params.batch_size)
    val_data.build_data()
    val_sample_num = val_data.n

    return train_data.next_batch(), \
           val_data.next_batch(), \
           train_sample_num, \
           val_sample_num
Ejemplo n.º 4
0
def main(model_path=None, data_path=None):
    model = ResidualCNN()
    if model_path != None:
        model.load(model_path)

    training_data_X = None
    training_data_y = None
    if data_path != None:
        file = h5py.File(data_path, 'r')
        training_data_X = np.copy(file['X'])
        training_data_y = np.copy(file['y'])
    else:
        generator = data_generator.DataGenerator(
            num_workers=constants.NUM_WORKERS)
        training_data_X, training_data_y = generator.generate_simple_agent_data(
        )

    version = 0
    for i in range(constants.EPOCHS):
        model.model.fit(x=training_data_X,
                        y=training_data_y,
                        batch_size=constants.BATCH_SIZE,
                        epochs=1,
                        shuffle=True,
                        validation_split=0.2)
        model.save(constants.SAVE_MODELS_DIR, constants.MODEL_PREFIX, version)
        version += 1
Ejemplo n.º 5
0
    def refresh(self):
        self.data_gen = data_generator.DataGenerator(
            *configurations.config["data"].values())
        self.features, self.labels = self.data_gen.get_data()

        self.knn_model = knn.Knn(self.features, self.labels)
        self.knn_model.best_params()
        self.knn_model.train()
Ejemplo n.º 6
0
    def __init__(self, mes, trainable=True):
        self.mes = mes
        self.name = mes.model_name
        self.model_path = mes.model_path
        self.model_save_path = mes.model_save_path
        self.model_log_path = mes.model_log_path
        self.model_type = mes.model_type
        self.col_name = mes.train_col
        self.graph = tf.Graph()
        self.trainable = trainable
        if self.model_type == 'LSTM':
            self.data_generator = data_generator_LSTM.DataGeneratorLSTM(mes, trainable)
            self.model = model_cnnlstmpl.LSTMModel(self.mes, self.graph)
        # elif self.model_type == 'ABSA_LSTM':
        #     self.data_generator = data_generator_ABSA.DataGeneratorABSALSTM(self.mes, trainable)
        #     self.model = models.ABSALSTMModel(self.mes, self.graph)
        elif self.model_type == 'NOLSTM':
            print("NOLSTM, prepare 4 data_generator")
            self.data_generator = data_generator.DataGenerator(self.mes, trainable, True)
            self.model = model_cnnpl.NOLSTMModel(self.mes, self.graph)
            print("NOLSTM, load model")
        # elif self.model_type == 'ABSA_NOLSTM':
        #     self.data_generator = data_generator_ABSA.DataGeneratorABSANOLSTM(mes, trainable)
        #     self.model = models.ABSANOLSTMModel(self.mes, self.graph)

        print("start session")
        self.session = tf.Session(graph=self.graph)
        print("started session")
        if trainable:
            self.docs = utils.get_docs(self.col_name)
            self.good_accuracy = self.mes.config['PRE_GOOD_RATE']
            self.best_accuracy_valid = self.good_accuracy
            self.best_accuracy_test = -1.0
            self.dropout_keep_prob_rate = self.mes.config['PRE_DROPOUT_KEEP_PROB']
            self.step_num = self.mes.config['PRE_STEP_NUM']
            self.valid_time = self.mes.config['PRE_VALID_TIME']
            self.validate_times = self.data_generator.valid_sz / self.data_generator.test_batch_sz
            self.test_times = self.data_generator.test_sz / self.data_generator.test_batch_sz
            with self.model.graph.as_default():
                if self.mes.config.get('MODEL_RESTORE_PATH', None) is not None and \
                        os.path.exists(self.mes.config.get('MODEL_RESTORE_PATH', None)):
                    self.model.saver.restore(self.session, self.mes.config['MODEL_RESTORE_PATH'])
                    print 'Restored from', self.mes.config['MODEL_RESTORE_PATH']
                else:
                    init = tf.global_variables_initializer()
                    self.session.run(init)
        else:
            with self.model.graph.as_default():
                if self.mes.config['MODEL_RESTORE_PATH'] is not None:
                    self.model.saver.restore(self.session, self.mes.config['MODEL_RESTORE_PATH'])
                    print 'Restored from', self.mes.config['MODEL_RESTORE_PATH']
                else:
                    self.model.saver.restore(self.session, self.model_save_path)
        self.writer = tf.summary.FileWriter(self.model_log_path, self.session.graph)
Ejemplo n.º 7
0
    def setUp(self):
        # Parameters
        params = {'dim': (1363200, 1),
                  'batch_size': 64,
                  'n_classes': 6,
                  'n_channels': 1,
                  'shuffle': True}

        # Directories
        ok_directory = 'C:/Users/Tony/Downloads/Dataset2/OK/'
        nok_directory = 'C:/Users/Tony/Downloads/Dataset2/NOK/'

        labels = data_generator.DataGenerator.build_label_list(ok_directory=ok_directory, nok_directory=nok_directory)

        partition = data_generator.DataGenerator.build_partition(validation_amount=0.3, labels=labels)

        # Generators
        self.training_generator = data_generator.DataGenerator(partition['train'], labels, **params)
        self.validation_generator = data_generator.DataGenerator(partition['validation'], labels, **params)
        r=1
Ejemplo n.º 8
0
def train_classifier(model, train_set, val_set):
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    print(model.summary())
    train_labels = train_set.labels_list()
    val_labels = val_set.labels_list()

    training_generator = data_generator.DataGenerator(train_set,
                                                      range(train_set.size()),
                                                      train_labels, **PARAMS)
    validation_generator = data_generator.DataGenerator(
        val_set, range(val_set.size()), val_labels, **PARAMS)
    filepath = "weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    model.fit_generator(generator=training_generator,
                        validation_data=validation_generator,
                        use_multiprocessing=True,
                        epochs=20,
                        callbacks=[checkpoint])
    model.save("resnetsaveus.h5")
    def __init__(self,
                 model_sub_dir,
                 epoch=None,
                 model_base_dir=DEFAULT_MODEL_BASE_DIR):
        self.update_model(model_sub_dir,
                          epoch=epoch,
                          model_base_dir=model_base_dir)

        representative_set_df = pd.read_pickle(
            os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl'))
        self.representative_set_gen = data_generator.DataGenerator(
            df=representative_set_df,
            base_dir=DEFAULT_VAL_IMG_PATH,
            input_dim=INPUT_DIM,
            output_dim=OUTPUT_DIM,
            num_hg_blocks=1,  # does not matter
            shuffle=False,
            batch_size=len(representative_set_df),  # single batch
            online_fetch=False,
            is_eval=True)
        h = hourglass.HourglassNet(NUM_COCO_KEYPOINTS, DEFAULT_NUM_HG,
                                   INPUT_CHANNELS, INPUT_DIM, OUTPUT_DIM)
        _, val_df = h.load_and_filter_annotations(DEFAULT_TRAIN_ANNOT_PATH,
                                                  DEFAULT_VAL_ANNOT_PATH, 1.0)
        self.val_gen = data_generator.DataGenerator(
            df=val_df,
            base_dir=DEFAULT_VAL_IMG_PATH,
            input_dim=INPUT_DIM,
            output_dim=OUTPUT_DIM,
            num_hg_blocks=1,  # does not matter
            shuffle=False,
            batch_size=DEFAULT_BATCH_SIZE,
            online_fetch=False,
            is_eval=True)
        self.cocoGt = COCO(DEFAULT_VAL_ANNOT_PATH)
        print("Initialized Evaluation Wrapper!")
Ejemplo n.º 10
0
def global_store():

    database = data_generator.DataGenerator('baseline')

    dfpl = database.table_extract('Plan')

    dfcs = database.table_extract('Case')

    dfmn = database.table_extract('Main')

    dfnl3 = database.table_extract('Nltrd3')

    key_dict = database.key_dict

    groups2case = dfpl.groupby('GROUP').groups

    groups = sorted(list(groups2case.keys()))

    case_in_group = {}

    for group in groups:

        cases = list(dfpl.loc[list(groups2case[group])]['CASE'])

        case_in_group[group] = cases

    case = set()

    for i in range(len(groups)):

        if i == 0:

            case = set(case_in_group[groups[i]])

        else:

            case = case & set(case_in_group[groups[i]])
Ejemplo n.º 11
0
def evaluate_patch_based_network(eval_params, imdb):
    # patches need to be constructed and passed to the generator for one image at a time
    if eval_params.save_params.output_var is True:
        eval_outputs = []
    else:
        eval_outputs = None

    for ind in imdb.image_range:
        if eval_params.save_params.output_var is True:
            eval_output = eoutput.EvaluationOutput()
        else:
            eval_output = None

        cur_full_image = imdb.get_image(ind)
        cur_patch_labels = imdb.get_patch_label(ind)
        cur_image_name = imdb.get_image_name(ind)
        cur_seg = imdb.get_seg(ind)

        if eval_params.save_params.output_var is True:
            eval_output.raw_image = cur_full_image
            eval_output.raw_label = cur_patch_labels
            eval_output.image_name = cur_image_name
            eval_output.raw_seg = cur_seg

        if eval_params.verbosity >= 2:
            print("Evaluating image number: " + str(ind + 1) + " (" +
                  cur_image_name + ")...")

        if eval_params.save_params.disable is False:
            if eval_helper.check_exists(eval_params.save_foldername,
                                        cur_image_name):
                # if the file for this image exists then we have already begun this at some point
                print("File already exists")
            else:
                eval_helper.save_initial_attributes(eval_params,
                                                    cur_image_name)

            status = eval_helper.get_complete_status(
                eval_params.save_foldername,
                cur_image_name,
                boundaries=eval_params.boundaries)
        else:
            status = 'none'

        if status == 'none' and (eval_params.eval_mode == 'both'
                                 or eval_params.eval_mode == 'network'):
            # PERFORM STEP 1: evaluate/predict patches with network

            if eval_params.verbosity >= 2:
                print("Augmenting data using augmentation: " +
                      eval_params.aug_desc + "...")

            aug_fn = eval_params.aug_fn_arg[0]
            aug_arg = eval_params.aug_fn_arg[1]

            # augment raw full sized image and label
            augment_image, augment_patch_labels, augment_seg, _, augment_time = \
                aug_fn(cur_full_image, cur_patch_labels, cur_seg, aug_arg)

            if eval_params.save_params.output_var is True:
                eval_output.aug_image = augment_image
                eval_output.aug_label = augment_patch_labels
                eval_output.aug_seg = augment_seg

            if eval_params.verbosity >= 2:
                print("Constructing patches...")

            # construct patches
            input_patches, input_labels, patch_time = \
                datacon.construct_patches_whole_image(augment_image, augment_patch_labels,
                                                             eval_params.patch_size)

            patch_imdb = image_db.ImageDatabase(images=input_patches,
                                                labels=input_labels)

            if eval_params.verbosity >= 2:
                print("Running network predictions...")

            # use a generator to supply data to model (predict_generator)
            # we have already previously augmented to image so need to augment the individual patches

            start_predict_time = time.time()

            import keras

            class CustomCallback(keras.callbacks.Callback):
                def __init__(self, gen):
                    keras.callbacks.Callback.__init__(self)
                    self.gen = gen

                def on_predict_begin(self, logs=None):
                    self.gen.batch_gen.batch_counter = 0
                    self.gen.batch_gen.full_counter = 0
                    self.gen.batch_gen.aug_counter = 0

            if not eval_params.ensemble:
                start_gen_time = time.time()
                gen = data_generator.DataGenerator(
                    patch_imdb,
                    eval_params.batch_size,
                    aug_fn_args=[],
                    aug_mode='none',
                    aug_probs=[],
                    aug_fly=False,
                    shuffle=False,
                    normalise=eval_params.normalise_input,
                    transpose=eval_params.transpose)
                end_gen_time = time.time()
                gen_time = end_gen_time - start_gen_time

                cust_callback = CustomCallback(gen)
                predicted_labels = eval_params.loaded_model.predict_generator(
                    gen,
                    verbose=eval_params.predict_verbosity,
                    callbacks=[cust_callback])
                print(predicted_labels.shape)
            else:
                predicted_labels = []

                for i in range(len(eval_params.loaded_models)):
                    start_gen_time = time.time()
                    gen = data_generator.DataGenerator(
                        patch_imdb,
                        eval_params.batch_size,
                        aug_fn_args=[],
                        aug_mode='none',
                        aug_probs=[],
                        aug_fly=False,
                        shuffle=False,
                        normalise=eval_params.normalise_input,
                        transpose=eval_params.transpose)
                    end_gen_time = time.time()
                    gen_time = end_gen_time - start_gen_time

                    predicted_labels.append(
                        eval_params.loaded_models[i].predict_generator(
                            gen, verbose=eval_params.predict_verbosity))

            end_predict_time = time.time()
            predict_time = end_predict_time - start_predict_time

            if eval_params.verbosity >= 2:
                print("Converting predictions to boundary maps...")

            # convert predictions to usable probability maps
            start_convert_time = time.time()

            if eval_params.boundaries is True and eval_params.save_params.boundary_maps is True:

                if not eval_params.ensemble:

                    prob_maps = convert_predictions_to_maps_patch_based(
                        predicted_labels, imdb.image_width, imdb.image_height)
                else:
                    prob_maps = []

                    for i in range(len(predicted_labels)):
                        prob_maps.append(
                            np.expand_dims(
                                convert_predictions_to_maps_patch_based(
                                    predicted_labels[i], imdb.image_width,
                                    imdb.image_height),
                                axis=0))

                    prob_maps = eval_helper.perform_ensemble_patch(prob_maps)
            else:
                prob_maps = None

            if eval_params.save_params.output_var is True:
                eval_output.boundary_maps = prob_maps

            end_convert_time = time.time()
            convert_time = end_convert_time - start_convert_time

            # save data to file
            if eval_params.save_params.disable is False:
                eval_helper.intermediate_save_patch_based(
                    eval_params, imdb, cur_image_name, prob_maps, predict_time,
                    augment_time, gen_time, convert_time, patch_time,
                    augment_image, augment_patch_labels, augment_seg,
                    cur_full_image, cur_patch_labels, cur_seg)

        if eval_params.save_params.disable is False:
            status = eval_helper.get_complete_status(
                eval_params.save_foldername,
                cur_image_name,
                boundaries=eval_params.boundaries)
        else:
            status = 'predict'

        if status == 'predict' and eval_params.boundaries is True and \
                (eval_params.eval_mode == 'both' or eval_params.eval_mode == 'gs'):
            aug_fn = eval_params.aug_fn_arg[0]
            aug_arg = eval_params.aug_fn_arg[1]

            # augment raw full sized image and label
            augment_image, augment_patch_labels, augment_seg, _, augment_time = \
                aug_fn(cur_full_image, cur_patch_labels, cur_seg, aug_arg)

            # load probability maps from previous step
            if eval_params.save_params.disable is False and eval_params.save_params.boundary_maps is True:
                prob_maps = eval_helper.load_dataset_extra(
                    eval_params, cur_image_name, "boundary_maps")

            # PERFORM STEP 2: segment probability maps using graph search
            boundary_maps = get_boundary_maps_only(imdb, prob_maps)
            eval_helper.eval_second_step(eval_params,
                                         boundary_maps,
                                         augment_seg,
                                         cur_image_name,
                                         augment_image,
                                         augment_patch_labels,
                                         imdb,
                                         dices=None,
                                         eval_output=eval_output)
        elif eval_params.boundaries is False:
            if eval_params.save_params.disable is False and eval_params.save_params.attributes is True:
                eval_helper.save_final_attributes(eval_params,
                                                  cur_image_name,
                                                  graph_time=None)

        if eval_params.save_params.disable is False and eval_params.save_params.temp_extra is True:
            eval_helper.delete_loadsaveextra_file(eval_params, cur_image_name)

        if eval_params.verbosity >= 2:
            print("DONE image number: " + str(ind + 1) + " (" +
                  cur_image_name + ")...")
            print("______________________________")

    return eval_outputs
Ejemplo n.º 12
0
def train(generated_data_path=None, epochs=1, examples_cap=None):
    """Function to train lstm cvae model.

  Args:
    generated_data_path: If not None, load pre generated data from this path
        instead of generating it anew.
    epochs: Epochs to train the model.
    examples_cap: If not None, restrict to only this many training examples.

  Return:
    Nothing.
  """

    # Prepare ModelConfig
    base_dir = os.getcwd().replace("/contrastive_vae", "")
    glove_dir = base_dir.replace("/code", "/data/glove.twitter.27B/")
    embedding_path = os.path.join(glove_dir, 'glove.twitter.27B.200d.txt')
    short_jokes_path = os.path.join(
        base_dir.replace("/code", '/data/short-jokes-dataset/'),
        "shortjokes.csv")
    hacker_news_path = os.path.join(
        base_dir.replace("/code", '/data/hacker-news-dataset/'),
        "hacker_news_subset_10_to_200.csv")
    model_dir = (base_dir +
                 "/model_checkpoints/contrastive_vae/{:%Y%m%d_%H%M%S}".format(
                     datetime.now()))

    model_config = contrastive_vae_model.ModelConfig(
        positive_data_path=short_jokes_path,
        contrastive_data_path=hacker_news_path,
        embedding_path=embedding_path,
        model_dir=model_dir,
        embedding_dim=200,
        batch_size=32,
        max_nb_words=100000,
        max_nb_examples=None,
        max_sequence_length=50,
        encoder_lstm_dims=[256, 128],
        decoder_lstm_dims=[128, 256],
        latent_dim=64,
        kl_weight=1.,
        optimizer="RMSprop")

    # Set up logging
    try:
        os.makedirs(model_config.model_dir)
    except:
        logger.info("Did not successfully make new model dir")
    logger = logging.getLogger("contrastive_vae")
    logger.setLevel(logging.INFO)
    logging.basicConfig(filename=(model_config.model_dir + '/model_log.log'),
                        level=logging.INFO)

    # Load or generate data
    logger.info("Loading or generating data...")
    if generated_data_path:
        x_train, s_train, x_val, s_val, tokenizer, _, _ = pickle.load(
            open(generated_data_path, "r"))
    else:
        data_gen = data_generator.DataGenerator(
            positive_data_path=short_jokes_path,
            contrastive_data_path=hacker_news_path)
        x_train, s_train, x_val, s_val, tokenizer, _, _ = data_gen.generate()
    logger.info("Done loading or generating data.")

    # Build and fit model
    contra_vae = contrastive_vae_model.ContraVAE(model_config, tokenizer)
    hist = contra_vae.fit(x_train=x_train,
                          s_train=s_train,
                          x_val=x_val,
                          s_val=s_val,
                          epochs=epochs,
                          examples_cap=examples_cap)
    logger.info(hist.history)
    print("Done.")
Ejemplo n.º 13
0

# ## Premier réseau : Nom des notes

#%%

if __name__ == "__main__":
    #nb_labels = 23 # 23 symboles pour les notes
    nb_labels = 15  # 15 symboles pour les octaves
    nb_epochs = 50
    ids = dict()
    ids['train'] = os.listdir(os.path.abspath("../data/train_out_x/"))
    ids['valid'] = os.listdir(os.path.abspath("../data/validation_out_x/"))
    batch_size_eval = 16
    train_generator = datas.DataGenerator(ids['train'],
                                          "train",
                                          batch_size=12,
                                          aug_rate=0.25)
    valid_generator = datas.DataGenerator(ids['valid'],
                                          "validation",
                                          batch_size=batch_size_eval,
                                          aug_rate=0.25)
    nb_train = len(ids['train'])
    nb_eval = len(ids['valid'])
    x_valid = valid_generator[0]
    y_valid = np.zeros(len(x_valid[1][0][2]))
    nb_features = int(x_valid[1][0][0].shape[2])  #Hauteur des images
    padding_value = 255
    #%%
    network = create_network(nb_features, padding_value, lr=0.0001)
    #%%
    checkout_path = "../models/checkout/test2"
Ejemplo n.º 14
0
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import data_generator as d
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint
import accuracy as ac

i = 1

gen = d.DataGenerator()

plt.figure(figsize=(12, 12))

gen.clearDatabase()
gen.generateDatabase(1 * 5 / 100, 20)

infected_list = np.array(gen.getInfectedList())
km = KMeans(n_clusters=5, n_init=1000, algorithm="full", tol=1e-8)
kmp = km.fit_predict(infected_list)

li = np.asarray(infected_list)
pca = PCA(n_components=2)  #2-dim ensional PCA
transformed = pd.DataFrame(pca.fit_transform(infected_list))
cent_reduc = pd.DataFrame(pca.fit_transform(km.cluster_centers_))

plt.subplot(320 + i)
plt.scatter(transformed[:][0], transformed[:][1], s=20, c=kmp)
plt.scatter(cent_reduc[:][0],
            cent_reduc[:][1],
def rAtk(pred, target, k):
    #sorted_pred = np.argsort(pred)[::-1]
    correct = 0
    for i in pred[:k]:
        if i in target:
            correct += 1

    return correct / float(len(target))


gen = SentenceGeneration()
gen.readModel('keyword_f')

data_gen = data_generator.DataGenerator(
    "../data/code_f_keyword_indexed.txt",
    "../data/comment_f_keyword_indexed.txt", 0.20, 600, 20)

codes, keywords, raw_comment = data_gen.getTestData()

np.random.seed(30)
np.random.shuffle(codes)
np.random.seed(30)
np.random.shuffle(raw_comment)
np.random.seed(30)
np.random.shuffle(keywords)

sens = []
co = []
comm = []
r = 0
Ejemplo n.º 16
0
def train_test(ds_name,
               K,
               mode,
               ds_path='Datasets/',
               W=None,
               max_epochs=100,
               test_percent=0.20,
               val_percent=0.10,
               batch_size=20,
               savefig=False,
               showfig=True):
    """
    Description:
    Training and Evaluating the chosen model on a chosen dataset.
    input:
    ds_name – dataset name , as the directory of the dataset.
    K - number of receptive fields inputs to the model size of W each one.
    mode - the type of features fed to the classifier. should be in ['vertex','edge','comb','vertex_channels'].
    ds_path - the path containing the dataset directory.
    W - size of receptive field , the number of relative graph vertexes inputs into one kernel cnn kernel.
        should be NaN for recommanded values, integer for costume value of tuple for 'comb' mode.
    max_epochs - numbebr of maximum numbert of epochs.
    test_percent - the test set percent from the whole dataset.
    val_percent - the validation pervent from the train set.
    batch_size - batch size.
    savefig - parameter controls whether saving the graph to pdf file.
    showfig - parameter controls whether showing the graph automatically afrer run.
    Output:
    A trained model
    """

    data, labels = prepare_paths(Datasets_dict[ds_name], overwrite=True)
    num_of_classes = len(set(labels.values()))
    rands1 = np.random.random(len(data))
    if type(W) == int or type(W) == tuple and len(W) == 1:
        wv = W
        we = W
    elif type(W) == tuple:
        wv = W[0]
        we = W[1]
    else:
        rec_width = get_recommended_width(ds_name, ds_path)
        wv = rec_width['V']
        we = rec_width['E']
        print(
            'Chosen Recommended width values are {} for verteces and {} for edges'
            .format(wv, we))
    if mode == 'comb':
        m = create_1DdoubleCnn2(K, wv, we, num_of_classes)
        W = (wv, we)
    elif mode == 'vertex':
        m = create_1Dcnn(K, wv, num_of_classes, n_channels=1)
        W = (wv, )
    elif mode == 'edge':
        m = create_1Dcnn(K, we, num_of_classes)
        W = (we, )
    elif mode == 'vertex_channels':
        m = create_1Dcnn(K, wv, num_of_classes, n_channels=4)
        W = (wv, )
    else:
        raise Exception(
            "'mode' parameter should be in ['vertex','edge','comb','vertex_channels'] "
        )
    X_train_ids = data[rands1 > test_percent]
    X_test_ids = data[rands1 <= test_percent]
    rands2 = np.random.random(len(X_train_ids))
    X_val_ids = X_train_ids[rands2 <= val_percent]
    X_train_ids = X_train_ids[rands2 > val_percent]

    dg_train = data_generator.DataGenerator(X_train_ids,
                                            labels,
                                            Datasets_dict[ds_name]['path'],
                                            len(set(labels.values())),
                                            W=W,
                                            k=K,
                                            mode=mode,
                                            batch_size=batch_size)
    dg_test = data_generator.DataGenerator(X_test_ids,
                                           labels,
                                           Datasets_dict[ds_name]['path'],
                                           len(set(labels.values())),
                                           W=W,
                                           k=K,
                                           mode=mode)
    dg_val = data_generator.DataGenerator(X_val_ids,
                                          labels,
                                          Datasets_dict[ds_name]['path'],
                                          len(set(labels.values())),
                                          W=W,
                                          k=K,
                                          mode=mode)
    dirname = 'TB_Dataset-{}__Mode-{}__K-{}__Width-{}'.format(
        ds_name, mode, K, '_'.join([str(w) for w in W]))
    h = m.fit_generator(dg_train,
                        epochs=max_epochs,
                        verbose=2,
                        callbacks=[
                            TensorBoard(dirname),
                            EarlyStopping(patience=10, monitor='val_acc')
                        ],
                        validation_data=dg_val.getallitems(),
                        workers=1)
    X_test, y_test = dg_test.getallitems()
    ev = m.evaluate(X_test, y_test)
    with open(dirname + '/history.json', 'w') as file:
        file.write(json.dumps(h.history))
    plot_graph(dirname, ds_name, 'val_acc', 'acc', 'Accuracy', h, K, mode,
               len(h.epoch), savefig, showfig, W, ev[1])
    plot_graph(dirname, ds_name, 'val_loss', 'loss', 'Loss', h, K, mode,
               len(h.epoch), savefig, showfig, W, ev[0])
    return m
Ejemplo n.º 17
0
                log_embedding = True if epoch % 20 == 0 else False
                write_log(iter_i, log_embedding, dg)

            if (epoch % SAVE_CKP_EVERY) == 0:
                checkpoint = {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }
                net.save_ckp(checkpoint, './models', epoch)

            if epoch % UPDATE_LR_EVERY == 0:
                scheduler.step()
                for param_group in optimizer.param_groups:
                    lr = param_group['lr']
                    print(f'Learning rate updated to: {lr}')

    timeElapsed = datetime.now() - start_time
    print('Finished Training! Time elapsed (hh:mm:ss.ms) {}'.format(timeElapsed))
    print("\nHistory:")
    print(running_loss_history)
    print(running_acc_history)
    write_log(iter_i - 1, log_embedding=True, dg=dg)
    writer.close()


if __name__ == '__main__':
    batch_size = 32
    dg = data_generator.DataGenerator(root='./dataset', batch_size=batch_size)
    run(dg, batch_size=batch_size, num_epochs=50, lr=5e-4)
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--gpu',
                        default=0,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result/',
                        help='Directory to output the result')
    parser.add_argument(
        '--epoch_labelled',
        '-e',
        default=100,
        type=int,
        help='Number of epochs to learn only with labelled data')
    parser.add_argument(
        '--epoch_unlabelled',
        '-u',
        default=100,
        type=int,
        help='Number of epochs to learn with labelled and unlabelled data')
    parser.add_argument('--dimz',
                        '-z',
                        default=2,
                        type=int,
                        help='Dimention of encoded vector')
    parser.add_argument('--batchsize',
                        '-batch',
                        type=int,
                        default=128,
                        help='Learning minibatch size')
    parser.add_argument('--data',
                        '-d',
                        default='sprites',
                        help='Name of the dataset to be used for experiments')
    parser.add_argument('--model',
                        '-m',
                        default='conv',
                        help='Convolutional or linear model')
    parser.add_argument('--beta',
                        '-b',
                        default=100,
                        help='Beta coefficient for the KL loss')
    parser.add_argument('--gamma',
                        '-g',
                        default=100000,
                        help='Gamma coefficient for the classification loss')
    parser.add_argument(
        '--labels',
        '-l',
        default="composite",
        help='Determined how to treat the labels for the different images')
    parser.add_argument(
        '--freq',
        '-f',
        default=10,
        help='Frequency at which snapshots of the model are saved.')
    parser.add_argument(
        '--mode',
        default="weakly",
        help='Mode of training - weakly supervised or unsupervised')
    args = parser.parse_args()

    print('\n###############################################')
    print('# GPU: \t\t\t{}'.format(args.gpu))
    print('# dim z: \t\t{}'.format(args.dimz))
    print('# Minibatch-size: \t{}'.format(args.batchsize))
    print('# Epochs Labelled: \t{}'.format(args.epoch_labelled))
    print('# Epochs Unabelled: \t{}'.format(args.epoch_unlabelled))
    print('# Dataset: \t\t{}'.format(args.data))
    print('# Model Architecture: \t{}'.format(args.model))
    print('# Beta: \t\t{}'.format(args.beta))
    print('# Gamma: \t\t{}'.format(args.gamma))
    print('# Frequency: \t\t{}'.format(args.freq))
    print('# Trainign model: \t{}'.format(args.model))
    print('# Out Folder: \t\t{}'.format(args.out))
    print('###############################################\n')

    stats = {'train_loss': [], 'train_accs': [], 'valid_loss': [], 'valid_rec_loss': [], 'valid_label_loss': [],\
         'valid_label_acc': [], 'valid_kl': []}

    models_folder = os.path.join(args.out, "models")
    manifold_gif = os.path.join(args.out, "gifs/manifold_gif")
    scatter_gif = os.path.join(args.out, "gifs/scatter_gif")
    scatter_folder = os.path.join(args.out, "scatter")
    eval_folder = os.path.join(args.out, "eval")
    shutil.rmtree(os.path.join(args.out, "models"))
    os.mkdir(os.path.join(args.out, "models"))

    if args.mode == "unsupervised":
        ignore = []
    else:
        ignore = ["unlabelled"]

    generator = data_generator.DataGenerator()
    train, train_labels, train_concat, train_vectors, test, test_labels, test_concat, test_vectors, unseen,\
    unseen_labels, unseen_concat, unseen_vectors, groups = generator.generate_dataset(ignore=ignore, args=args)

    data_dimensions = train.shape
    print('\n###############################################')
    print("DATA_LOADED")
    print("# Training: \t\t{0}".format(train.shape))
    print("# Training labels: \t{0}".format(set(train_labels)))
    print("# Training labels: \t{0}".format(train_labels.shape))
    print("# Training vectors: \t{0}".format(train_vectors.shape))
    print("# Testing: \t\t{0}".format(test.shape))
    print("# Testing labels: \t{0}".format(set(test_labels)))
    print("# Testing labels: \t{0}".format(test_labels.shape))
    print("# Testing vectors: \t{0}".format(test_vectors.shape))
    print("# Unseen: \t\t{0}".format(unseen.shape))
    print("# Unseen labels: \t{0}".format(set(unseen_labels)))
    print('###############################################\n')

    train_iter = chainer.iterators.SerialIterator(train_concat, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test_concat,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Prepare VAE model, defined in net.py
    if args.model == "conv":
        if args.data == "sprites":
            model = net.Conv_VAE(train.shape[1],
                                 n_latent=args.dimz,
                                 groups=groups,
                                 beta=args.beta,
                                 gamma=args.gamma)
        else:
            model = net.Conv_VAE_MNIST(train.shape[1],
                                       args.dimz,
                                       beta=args.beta)
    else:
        model = net.VAE(train.shape[1], args.dimz, 500)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    lf = model.get_loss_func()

    stats, model, optimizer, epochs_so_far = training_loop(
        model=model,
        optimizer=optimizer,
        stats=stats,
        epochs=args.epoch_labelled,
        train_iter=train_iter,
        test_iter=test_iter,
        lf=lf,
        models_folder=models_folder,
        args=args)

    print("Save Model\n")
    serializers.save_npz(os.path.join(models_folder, 'final.model'), model)

    print("Save Optimizer\n")
    serializers.save_npz(os.path.join(models_folder, 'final.state'), optimizer)

    print("Clear Images from Last experiment\n")
    clear_last_results(folder_name=args.out)

    if args.mode == "weakly":
        model.to_cpu()

        config_parser = ConfigParser("config/config.json")
        groups = config_parser.parse_groups()

        # calculate manifold boundaries
        latent = model.get_latent(test).data
        mean = np.mean(latent, axis=0)
        cov = np.cov(latent.T)
        no_std = 2
        # boundaries are [[min_x, min_y],[max_x, max_y]]
        boundaries = np.array(
            [mean - no_std * cov.diagonal(), mean + no_std * cov.diagonal()])

        # assign colors to each label for plotting purposes
        all_labels = np.append(test_labels, unseen_labels, axis=0)
        colors = attach_colors(labels=all_labels)

        # visualise the learnt data manifold in the latent space
        print("Plot Reconstructed images sampeld from a standart Normal\n")
        data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
        figure_title = "Manifold Visualisation"
        plot_sampled_images(model=model,
                            data=data,
                            boundaries=boundaries,
                            image_size=data_dimensions[-1],
                            image_channels=data_dimensions[1],
                            filename=os.path.join(args.out, "manifold"),
                            figure_title=figure_title)

        print("Performing Reconstructions\n")
        perform_reconstructions(model=model,
                                train=train,
                                test=test,
                                unseen=unseen,
                                no_images=25,
                                name_suffix="supervised",
                                args=args)

        os.mkdir(os.path.join(scatter_folder, "supervised"))
        print("Plot Latent Testing Distribution for Singular Labels\n")
        data = np.repeat(test, 2, axis=0)
        plot_labels = test_labels
        plot_separate_distributions(data=data,
                                    labels=plot_labels,
                                    groups=groups,
                                    boundaries=boundaries,
                                    colors=colors["singular"],
                                    model=model,
                                    filename=os.path.join(
                                        scatter_folder, "supervised",
                                        "singular_separate"))
        plot_overall_distribution(data=data,
                                  labels=plot_labels,
                                  boundaries=boundaries,
                                  colors=colors["singular"],
                                  model=model,
                                  filename=os.path.join(
                                      scatter_folder, "supervised",
                                      "singular_together"))

        generator = data_generator.DataGenerator()
        train, train_labels, train_concat, train_vectors, test, test_labels, test_concat, test_vectors, unseen,\
        unseen_labels, unseen_concat, unseen_vectors, groups = generator.generate_dataset(args=args)

        data_dimensions = train.shape
        print('\n###############################################')
        print("DATA_LOADED")
        print("# Training: \t\t{0}".format(train.shape))
        print("# Training labels: \t{0}".format(set(train_labels)))
        print("# Training labels: \t{0}".format(train_labels.shape))
        print("# Training vectors: \t{0}".format(train_vectors.shape))
        print("# Testing: \t\t{0}".format(test.shape))
        print("# Testing labels: \t{0}".format(set(test_labels)))
        print("# Testing labels: \t{0}".format(test_labels.shape))
        print("# Testing vectors: \t{0}".format(test_vectors.shape))
        print("# Unseen: \t\t{0}".format(unseen.shape))
        print("# Unseen labels: \t{0}".format(set(unseen_labels)))
        print('###############################################\n')

        train_iter = chainer.iterators.SerialIterator(train_concat,
                                                      args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test_concat,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        model = net.Conv_VAE(train.shape[1],
                             n_latent=args.dimz,
                             groups=groups,
                             beta=args.beta,
                             gamma=args.gamma)
        serializers.load_npz("result/models/final.model", model)
        model.gamma = 10000
        # model.beta = 1

        if args.gpu >= 0:
            # Make a specified GPU current
            chainer.cuda.get_device_from_id(args.gpu).use()
            model.to_gpu()

        optimizer = chainer.optimizers.Adam()
        optimizer.setup(model)

        lf = model.get_loss_func()
        stats, model, optimizer, _ = training_loop(
            model=model,
            optimizer=optimizer,
            stats=stats,
            epochs=args.epoch_unlabelled,
            train_iter=train_iter,
            test_iter=test_iter,
            lf=lf,
            models_folder=models_folder,
            epochs_so_far=epochs_so_far,
            args=args)


########################################
########### RESULTS ANALYSIS ###########
########################################

    model.to_cpu()

    config_parser = ConfigParser("config/config.json")
    groups = config_parser.parse_groups()

    # calculate manifold boundaries
    latent = model.get_latent(test).data
    mean = np.mean(latent, axis=0)
    cov = np.cov(latent.T)
    # boundaries are [[min_x, min_y],[max_x, max_y]]
    boundaries = np.array(
        [mean - no_std * cov.diagonal(), mean + no_std * cov.diagonal()])

    # assign colors to each label for plotting purposes
    all_labels = np.append(test_labels, unseen_labels, axis=0)
    colors = attach_colors(labels=all_labels)

    # visualise the learnt data manifold in the latent space
    print("Plot Reconstructed images sampeld from a standart Normal\n")
    data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
    figure_title = "Manifold Visualisation"
    plot_sampled_images(model=model,
                        data=data,
                        boundaries=boundaries,
                        image_size=data_dimensions[-1],
                        image_channels=data_dimensions[1],
                        filename=os.path.join(args.out, "manifold_1"),
                        figure_title=figure_title)

    print("Test time Classification\n")
    tmp_labels = test_time_classification(data_test=np.repeat(test, 2, axis=0),
                                          data_all=np.append(test,
                                                             unseen,
                                                             axis=0),
                                          labels=test_labels,
                                          unseen_labels=unseen_labels,
                                          groups=groups,
                                          boundaries=boundaries,
                                          model=model,
                                          colors=colors,
                                          folder_name=eval_folder)

    print("Label Analisys\n")
    true_labels = np.append(test_labels, unseen_labels, axis=0)
    label_analysis(labels=true_labels,
                   predictions=tmp_labels,
                   groups=groups,
                   model=model,
                   folder_name=eval_folder)

    print("Saving the loss plots\n")
    plot_loss_curves(stats=stats, args=args)

    print("Evaluate Axes Alignment\n")
    data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
    plot_labels = np.append(test_labels, unseen_labels, axis=0)
    axes_alignment(data=data,
                   labels=plot_labels,
                   model=model,
                   folder_name=eval_folder)

    print("Performing Reconstructions\n")
    perform_reconstructions(model=model,
                            train=train,
                            test=test,
                            unseen=unseen,
                            no_images=25,
                            name_suffix="weakly_supervised",
                            args=args)

    print("Plot Latent Testing Distribution for Singular Labels\n")
    data = np.repeat(test, 2, axis=0)
    plot_labels = test_labels
    plot_separate_distributions(data=data,
                                labels=plot_labels,
                                groups=groups,
                                boundaries=boundaries,
                                colors=colors["singular"],
                                model=model,
                                filename=os.path.join(scatter_folder,
                                                      "singular_separate"))
    plot_overall_distribution(data=data,
                              labels=plot_labels,
                              boundaries=boundaries,
                              colors=colors["singular"],
                              model=model,
                              filename=os.path.join(scatter_folder,
                                                    "singular_together"))

    print(
        "Plot Latent Testing Distribution for Singular Labels + Unseen Distribution\n"
    )
    data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
    plot_labels = np.append(test_labels, unseen_labels, axis=0)
    plot_separate_distributions(data=data,
                                labels=plot_labels,
                                boundaries=boundaries,
                                colors=colors["singular"],
                                model=model,
                                filename=os.path.join(
                                    scatter_folder,
                                    "singular_separate_unseen"))
    plot_overall_distribution(data=data,
                              labels=plot_labels,
                              boundaries=boundaries,
                              colors=colors["singular"],
                              model=model,
                              filename=os.path.join(
                                  scatter_folder, "singular_together_unseen"))

    if args.labels == "composite":
        print("Plot Latent Testing Distribution for Composite Labels\n")
        # compose the composite labels
        data = test
        test_labels_tmp = test_labels.reshape(len(test_labels) / 2, 2)
        plot_labels = np.array(["_".join(x) for x in test_labels_tmp])
        plot_separate_distributions(data=data,
                                    labels=plot_labels,
                                    boundaries=boundaries,
                                    colors=colors["composite"],
                                    model=model,
                                    filename=os.path.join(
                                        scatter_folder, "composite_separate"))
        plot_overall_distribution(data=data,
                                  labels=plot_labels,
                                  boundaries=boundaries,
                                  colors=colors["composite"],
                                  model=model,
                                  filename=os.path.join(
                                      scatter_folder, "composite_together"))

        print(
            "Plot Latent Testing Distribution for Composite Labels + Unseen Distribution\n"
        )
        data = np.append(test, unseen, axis=0)
        test_labels_tmp = np.append(test_labels, unseen_labels, axis=0)
        test_labels_tmp = test_labels_tmp.reshape(len(test_labels_tmp) / 2, 2)
        plot_labels = np.array(["_".join(x) for x in test_labels_tmp])
        plot_separate_distributions(data=data,
                                    labels=plot_labels,
                                    boundaries=boundaries,
                                    colors=colors["composite"],
                                    model=model,
                                    filename=os.path.join(
                                        scatter_folder,
                                        "composite_separate_unseen"))
        plot_overall_distribution(data=data,
                                  labels=plot_labels,
                                  boundaries=boundaries,
                                  colors=colors["composite"],
                                  model=model,
                                  filename=os.path.join(
                                      scatter_folder,
                                      "composite_together_unseen"))

    print("Generating data for retrospective model evaluation\n")
    for model_name in list(
            filter(lambda name: "final" not in name,
                   os.listdir(models_folder))):
        serializers.load_npz(os.path.join(models_folder, model_name), model)
        filename = model_name.replace(".model", "")

        figure_title = "Manifold Visualisation for epoch {0}".format(filename)
        data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
        plot_sampled_images(model=model,
                            data=data,
                            boundaries=boundaries,
                            image_size=data_dimensions[-1],
                            image_channels=data_dimensions[1],
                            filename=os.path.join(manifold_gif, filename),
                            figure_title=figure_title)

        data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0)
        plot_labels = np.append(test_labels, unseen_labels, axis=0)
        for key in groups:
            if not os.path.exists(os.path.join(scatter_gif, key)):
                os.mkdir(os.path.join(scatter_gif, key))
            plot_group_distribution(data=data,
                                    labels=plot_labels,
                                    boundaries=boundaries,
                                    colors=colors["singular"],
                                    model=model,
                                    group_id=key,
                                    filename=os.path.join(
                                        scatter_gif, key, filename))

    print("Making the Latent Manifold GIF\n")
    samples = [x.split('_')[0] for x in os.listdir(manifold_gif)]
    rests = ['_'.join(x.split('_')[1:]) for x in os.listdir(manifold_gif)]
    samples.sort(key=int)
    samples = [
        os.path.join(manifold_gif, x + "_" + rest)
        for (x, rest) in zip(samples, rests)
    ]
    result_name = os.path.join(manifold_gif, "samples_animation.gif")
    subprocess.call(["convert", "-loop", "5", "-delay", "50"] + samples +
                    [result_name])

    for key in groups:
        print("Making the Composite Label Distribution GIF for group" + key +
              "\n")
        folder_name = os.path.join(scatter_gif, key)
        distr = [x.replace(".png", "") for x in os.listdir(folder_name)]
        distr.sort(key=int)
        distr = [os.path.join(folder_name, x + ".png") for x in distr]
        result_name = os.path.join(folder_name, "distr_animation.gif")
        subprocess.call(["convert", "-loop", "5", "-delay", "50"] + distr +
                        [result_name])
Ejemplo n.º 19
0
]
list_with_train_labels_paths = [
    DATA_FOLDER + 'train/' + 'labels/' + x
    for x in sorted(os.listdir(DATA_FOLDER + 'train/' + 'labels/'))
]
list_with_val_imgs_paths = [
    DATA_FOLDER + 'test/' + 'data/' + x
    for x in sorted(os.listdir(DATA_FOLDER + 'test/' + 'data/'))
]
list_with_labels_paths = [
    DATA_FOLDER + 'test/' + 'labels/' + x
    for x in sorted(os.listdir(DATA_FOLDER + 'test/' + 'labels/'))
]

train_set = data_generator.DataGenerator(list_with_train_imgs_paths,
                                         list_with_train_labels_paths,
                                         cache=True,
                                         augmentation=True)
train_set = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

test_set = data_generator.DataGenerator(list_with_val_imgs_paths,
                                        list_with_labels_paths,
                                        cache=True,
                                        augmentation=False)
test_set = DataLoader(test_set, batch_size=BATCH_SIZE)


def sliding_window(top, step=10, window_size=(20, 20)):
    """ Slide a window_shape window across the image with a stride of step """
    for x in range(0, top.shape[0], step):
        if x + window_size[0] > top.shape[0]:
            x = top.shape[0] - window_size[0]
Ejemplo n.º 20
0
        dataset_info['train_size'] = 50000
    if datatype == 'cifar-100':
        image_size = 24
        n_iterations = 400
        num_labels = 100
        dataset_info['dataset_name'] = 'cifar-100'
        dataset_info['n_channels'] = 3
        dataset_info['resize_to'] = 0
        dataset_info['n_slices'] = 1
        dataset_info['train_size'] = 50000

    batch_size = dataset_info['train_size'] // 10
    train_dataset, train_labels = read_data_file(datatype)

    data_gen = data_generator.DataGenerator(
        batch_size, num_labels, dataset_info['train_size'],
        dataset_info['n_slices'], image_size, dataset_info['n_channels'],
        dataset_info['resize_to'], dataset_info['dataset_name'], session)

    if datatype != 'imagenet-250':
        tf_train_images = tf.placeholder(tf.float32,
                                         shape=(batch_size, image_size,
                                                image_size,
                                                dataset_info['n_channels']),
                                         name='TrainDataset')
    else:
        train_train_images = tf.placeholder(
            tf.float32,
            shape=(batch_size, dataset_info['resize_to'],
                   dataset_info['resize_to'], dataset_info['n_channels']),
            name='TrainDataset')
Ejemplo n.º 21
0
    epochs=args.epochs,
    loss_weight=args.loss_weight,
    checkpoint_dir=args.checkpoint_dir,
    logs=args.tensorboard_dir,
)

if (args.prev_checkpoint != None):
    AnoVAEGAN1.load_model_checkpoint(args.prev_checkpoint)

train_path_list = os.listdir(args.dataset + '/train')
test_path_list = os.listdir(args.dataset + '/test')

train_generator = data_generator.DataGenerator(
    list_IDs=train_path_list,
    directory=args.dataset + '/train',
    batch_size=args.batch_size,
    image_size=(args.image_size, args.image_size),
    n_channels=args.n_channels,
    shuffle=args.shuffle_data)

test_generator = data_generator.DataGenerator(list_IDs=test_path_list,
                                              directory=args.dataset + '/test',
                                              batch_size=len(test_path_list))

# generated_images = AnoVAEGAN1.generator(train_generator.__getitem__(0), training = True)
# # print(generated_images.shape)

#Adding data to JSON file.
#metadata = {}
#metadata['dataset'] = args.dataset
#metadata['batch_size'] = batch_size
Ejemplo n.º 22
0
import data_generator
import json
import sys
gen = data_generator.DataGenerator('fields.json')

suffix = str(0)
total = 5000000
data = []
for i in range(total + 1):
    if i % 10000 == 0 and i > 2:
        with open('testfiles/testdata_' + suffix, 'a') as fh:
            json.dump(data, fh, indent=1)
        data = []
        suffix = str(i)
        print('{}%'.format(i / total * 100), end='\r')
        if i == total:
            break
    data.append(gen.generate())
Ejemplo n.º 23
0
flog.write('leaky:{}, dropout:{}, rnnlen: {}, segment_size:{}\n'.format(
    leaky, dropout, rnn_len, segment_size))
flog.write('version:{}, existing model:{}\n'.format(version, existing_model))
flog.write('train data: {}, {}\n'.format(file_train_data, file_train_label))
flog.flush()

########## Loading data
params = {
    'dim': (segment_size, nbr_feature),
    'batch_size': batch_size,
    'n_channels': 1,
    'rnn_len': rnn_len
}

train_generator = data_generator.DataGenerator(file_train_data,
                                               file_train_label,
                                               shuffle=True,
                                               **params)
val_generator = data_generator.DataGenerator(
    file_val_data, file_val_label, shuffle=False,
    **params)  #set shuffle=False to calculate AUC
test_generator1 = data_generator.DataGenerator(
    file_test_data1,
    file_test_label1,
    shuffle=False,
    use_reverse=False,
    **params)  #set shuffle=False to calculate AUC
test_generator2 = data_generator.DataGenerator(
    file_test_data2,
    file_test_label2,
    shuffle=False,
    use_reverse=False,
Ejemplo n.º 24
0
Archivo: eval.py Proyecto: romty/PhU
import matplotlib.pyplot as plt
img = scipy.io.loadmat(temp[0])['wrap']
mask_x = unwrap(img,
                wrap_around_axis_0=False,
                wrap_around_axis_1=False,
                wrap_around_axis_2=False)
plt.figure(figsize=(10, 10))
plt.subplot(121)
plt.imshow(dg.normalize_angle(img), cmap='jet')
plt.subplot(122)
plt.imshow(dg.normalize_angle(mask_x), cmap='jet')
plt.show()
class_map = 1
test_generator = dg.DataGenerator(test_pair,
                                  class_map,
                                  batch_size=20,
                                  dim=(256, 256, 1),
                                  shuffle=True)
test_steps = test_generator.__len__()
test_steps


class eval_denoising:
    def __init__(
            self,
            I1,
            I2,  # I1 and I2 are the two images to compare
            I3=None,  # Image bruitée
            PSNR_peak=255):  # default value for PSNR
        self.I1 = I1  # result
        self.I2 = I2  # objective
Ejemplo n.º 25
0
import imp

import evaluation
import data_generator
imp.reload(evaluation)
imp.reload(data_generator)
from constants import *

representative_set_df = pd.read_pickle(os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl'))
subdir = '2021-03-31-08h-54m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_wmse-1-5'

generator = data_generator.DataGenerator(
            df=representative_set_df,
            base_dir=DEFAULT_VAL_IMG_PATH,
            input_dim=INPUT_DIM,
            output_dim=OUTPUT_DIM,
            num_hg_blocks=1, # doesn't matter for evaluation b/c we take one stack for GT
            shuffle=False,
            batch_size=len(representative_set_df),
            online_fetch=False)

# %% Run visualization on epoch range and save images to disk

epochs_to_visualize = [27, 28] #range(34,45)
print("\n\nEval start:   {}\n".format(time.ctime()))
for epoch in epochs_to_visualize:
    eval = evaluation.Evaluation(
        model_sub_dir=subdir,
        epoch=epoch)
    X_batch, y_stacked = generator[0] # There is only one batch in the generator
    y_batch = y_stacked[0] # take first hourglass section
Ejemplo n.º 26
0
def run(dg, batch_size=128, num_epochs=5, lr=0.001):
    if dg is None:
        dg = data_generator.DataGenerator(root='./dataset', batch_size=batch_size)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0, 0))
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95, last_epoch=-1)

    total_iters = -(-len(dg.train_dataset) // batch_size) * num_epochs
    print("NUM_EPOCHS = {}, BATCH_SIZE = {}, len(train_set) = {} "
          "--> #Iterations = {}\n".format(num_epochs, batch_size, len(dg.train_dataset), total_iters))
    start_time = datetime.now()
    iter_i = 1
    for epoch in range(1, num_epochs + 1):
        print(f'Epoch {epoch}')
        running_loss = 0.0

        for batch_i, anchor in enumerate(dg.train_loader):
            inputs = dg.make_batch(anchor)
            inputs = inputs.to(device)

            # forward + backward + optimize
            outputs = model(inputs)
            loss = total_loss(outputs)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            iter_i += 1

            if iter_i % 10 == 0:
                writer.add_scalar("Loss", loss.item(), iter_i)
            if iter_i % 1000 == 0:
                write_log(iter_i, log_embedding=True, dg=dg)

        else:
            # Note: set DEBUG=True to see classification acc. after every epoch but comes with the cost of
            # computing the histogram, which takes up some time and therefore leading to a longer training time
            DEBUG = True
            print_loss_acc(DEBUG, running_loss, dg)
            if total_iters < 1000 and epoch % 5 == 0:
                log_embedding = True if epoch % 20 == 0 else False
                write_log(iter_i, log_embedding, dg)

            if (epoch % SAVE_CKP_EVERY) == 0:
                checkpoint = {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }
                net.save_ckp(checkpoint, './models', epoch)

            if epoch % UPDATE_LR_EVERY == 0:
                scheduler.step()
                for param_group in optimizer.param_groups:
                    lr = param_group['lr']
                    print(f'Learning rate updated to: {lr}')

    timeElapsed = datetime.now() - start_time
    print('Finished Training! Time elapsed (hh:mm:ss.ms) {}'.format(timeElapsed))
    print("\nHistory:")
    print(running_loss_history)
    print(running_acc_history)
    write_log(iter_i - 1, log_embedding=True, dg=dg)
    writer.close()
Ejemplo n.º 27
0
# %%
representative_set_df = pd.read_pickle(
    os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl'))
subdir = '2021-04-01-21h-59m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_lrfix'
eval = evaluation.Evaluation(model_sub_dir=subdir, epoch=26)

# %% Save stacked evaluation heatmaps
import data_generator
imp.reload(data_generator)
import time

generator = data_generator.DataGenerator(df=representative_set_df,
                                         base_dir=DEFAULT_VAL_IMG_PATH,
                                         input_dim=INPUT_DIM,
                                         output_dim=OUTPUT_DIM,
                                         num_hg_blocks=eval.num_hg_blocks,
                                         shuffle=False,
                                         batch_size=len(representative_set_df),
                                         online_fetch=False)

# Select image to predict heatmaps
X_batch, y_stacked = generator[0]  # There is only one batch in the generator
# X_batch, y_stacked = evaluation.load_and_preprocess_img('data/skier.jpg', eval.num_hg_blocks)
y_batch = y_stacked[0]  # take first hourglass section
# Save stacked heatmap images to disk
m_batch = representative_set_df.to_dict(
    'records'
)  # TODO: eventually this will be passed from data generator as metadata
print("\n\nEval start:   {}\n".format(time.ctime()))
eval.visualize_batch(X_batch, y_batch, m_batch)
print("\n\nEval end:   {}\n".format(time.ctime()))
Ejemplo n.º 28
0
import data_generator
from keras.preprocessing import sequence
import keras

from keras.models import Model

from bleu import computeMaps, bleuFromMaps
from text_generator import SentenceGeneration

max_caption_len = 26
maxlen = 500
mem_size = 30

data_gen = data_generator.DataGenerator(
    "../qnaData/code_f_keyword_indexed.txt",
    "../qnaData/comment_f_indexed.txt", 0.20, maxlen, max_caption_len)

codes, partial_captions, next_words = data_gen.MakeDataset3(train=True)
#
codes = sequence.pad_sequences(codes, maxlen=maxlen)
partial_captions = sequence.pad_sequences(partial_captions,
                                          maxlen=max_caption_len)

codesT, partial_captionsT, next_wordsT = data_gen.MakeDataset3(train=False)
codesT = sequence.pad_sequences(codesT, maxlen=maxlen)
partial_captionsT = sequence.pad_sequences(partial_captionsT,
                                           maxlen=max_caption_len)

vocab_size = 5000
Ejemplo n.º 29
0
import tensorflow as tf
from tensorflow import keras

import numpy as np

import data_generator as dg

data_generator = dg.DataGenerator()
training_samples = data_generator.training_data(1000)
num_test_samples = 10
testing_samples = data_generator.testing_data(num_test_samples)

input_dim = len(training_samples[0]['input'])
num_classes = 4

# optimizer = 'rmsprop'
optimizer = keras.optimizers.SGD(0.1, 0.9, nesterov=True)

model = keras.Sequential()
# model.add(keras.layers.Dense(16))
# model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dense(4, activation='relu', input_dim=input_dim))
# model.add(keras.layers.Dropout(0.5))
# model.add(keras.layers.Dense(8, activation='relu'))
# model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(num_classes, activation='sigmoid'))
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

training_data, training_labels = data_generator.samples_to_keras(
        accuracy_logger.addHandler(accuracyFH)
        accuracy_logger.info('#Train EnvID, Epoch, Test EnvID, Non-collision Accuracy,Non-collision Accuracy(Soft),Non-collision loss,' +
                         'Preci-NC-L,Preci-NC-S,Preci-NC-R,,Rec-NC-L,Rec-NC-S,Rec-NC-R')
        accuracy_loggers.append(accuracy_logger)

    graph = tf.Graph()
    configp = tf.ConfigProto(allow_soft_placement=True,log_device_placement=False)
    sess = tf.InteractiveSession(graph=graph,config=configp)

    with sess.as_default() and graph.as_default():
        cnn_variable_initializer.set_from_main(sess)
        cnn_variable_initializer.build_tensorflw_variables_detached()
        models_utils.set_from_main(sess,logger)

        train_data_gen = data_generator.DataGenerator(
            config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['train_dataset'],
            config.TF_INPUT_SIZE, sess, dataset_filenames['train_dataset'], config.TF_INPUT_AFTER_RESIZE,False
        )

        test_data_gen = data_generator.DataGenerator(
            config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['test_dataset'],
            config.TF_INPUT_SIZE, sess, dataset_filenames['test_dataset'], config.TF_INPUT_AFTER_RESIZE, True
        )

        tf_train_img_ids, tf_train_images, tf_train_labels = train_data_gen.tf_augment_data_with()
        tf_test_img_ids, tf_test_images, tf_test_labels = test_data_gen.tf_augment_data_with()

        define_tf_ops(tf_train_images, tf_train_labels, tf_test_images, tf_test_labels)

        tf.global_variables_initializer().run(session=sess)

        for main_ep in range(3):