def train(model): dataGen = DataGenerator("train", 169728, BATCH_SIZE, True) dataGenval = DataGenerator("val", 42432, BATCH_SIZE, False) dataGentest = DataGenerator("test", 48832, 64, False) filepath = "BioElmoTextModel-{epoch:02d}-{val_loss:.2f}.hdf5" modelckp = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, mode='min') clr = CyclicLR(base_lr=0.0001, max_lr=0.0006, step_size=2000.) es = EarlyStopping(monitor="val_loss", mode=min, verbose=1) callbacks_list = [ modelckp, checkpoint, clr, es, roc_callback(dataGentest, np.array(dataGentest.labels)) ] model.fit_generator(dataGen, callbacks_list=callbacks_list, validation_data=dataGenval, use_multiprocessing=False, verbose=1, epochs=EPOCHS, callbacks=call_list, workers=4) model.save_weights("BioElmoTextModel.h5")
class DataProviderService: def __init__(self, nr_of_items): self.data_generator = DataGenerator() self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items) def get_candidates(self): return self.CANDIDATES def get_candidate(self, id): result = None if id: for cand in self.CANDIDATES: if id == str(cand["id"]): result = cand break return result def get_random_candidates(self, nr_of_candidates): return self.data_generator.generate_candidates(nr_of_candidates) def update_name(self, id, new_name): nr_of_updated_items = 0 for cand in self.CANDIDATES: if id == str(cand["id"]): cand["first_name"] = new_name nr_of_updated_items += 1 break return nr_of_updated_items def delete_candidate(self, id): cand_for_delete = None for cand in self.CANDIDATES: if id == str(cand["id"]): cand_for_delete = cand break if cand_for_delete is not None: self.CANDIDATES.remove(cand_for_delete) return True else: return False def add_candidate(self, first_name, last_name): cand = Candidate(first_name, last_name, []) self.CANDIDATES.append(cand.serialize()) return str(cand.id) def add_project(self, project_name, project_description): new_project = Project(project_name, datetime.datetime.utcnow(), datetime.datetime.utcnow(), project_description) self.CANDIDATES[0]['experience'][0]['projects'].append( new_project.serialize()) return str(new_project.id) def get_random_projects(self, nr_of_projects): return self.data_generator.generate_projects(nr_of_projects, True)
class DataProviderService: def __init__(self, nr_of_items): self.data_generator = DataGenerator() self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items) def get_candidates(self): return self.CANDIDATES def get_candidate(self, id): result = None if id: for cand in self.CANDIDATES: if id == str(cand["id"]): result = cand break return result def get_random_candidates(self, nr_of_candidates): return self.data_generator.generate_candidates(nr_of_candidates) def update_name(self, id, new_name): nr_of_updated_items = 0 for cand in self.CANDIDATES: if id == str(cand["id"]): cand["first_name"] = new_name nr_of_updated_items += 1 break return nr_of_updated_items def delete_candidate(self, id): cand_for_delete = None for cand in self.CANDIDATES: if id == str(cand["id"]): cand_for_delete = cand break if cand_for_delete is not None: self.CANDIDATES.remove(cand_for_delete) return True else: return False def add_candidate(self, first_name, last_name): cand = Candidate(first_name, last_name, []) self.CANDIDATES.append(cand.serialize()) return str(cand.id) def add_project(self, project_name, project_description): new_project = Project(project_name, datetime.datetime.utcnow(), datetime.datetime.utcnow(), project_description) self.CANDIDATES[0]['experience'][0]['projects'].append(new_project.serialize()) return str(new_project.id) def get_random_projects(self, nr_of_projects): return self.data_generator.generate_projects(nr_of_projects, True)
def run(self): datagen = DataGenerator() trainX, trainY, validationX, validationY, testX, testY = datagen.make_cases( NUMBER_OF_CASES, SPLIT) # datagen.display_N_pictures(10) self.training_phase(trainX, trainY) self.validation_phase(validationX, validationY) self.test_phase(testX, testY)
def train(self, datasets=(0, 1)): # /home/etienne/data/default_experiment/models/model.{epoch:06d}.h5 current_model_directory = data_prefix + '/data/similarities/%s/%s' % ( self.type, self.feat) import os if not os.path.exists(current_model_directory): os.makedirs(current_model_directory) current_model_path = current_model_directory + '/model.{epoch:06d}.h5' lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=1e-7) print(current_model_path) # tensorboard = keras.callbacks.TensorBoard(log_dir=logs_path, write_graph=False) checkpoint = keras.callbacks.ModelCheckpoint(current_model_path, save_best_only=True, monitor='val_loss', mode='min', verbose=1) callbacks_list = [ lr_reducer, lr_scheduler, # """tensorboard,""" checkpoint ] dataset = generate_dataset((datasets[0], datasets[1])) print(dataset.label.value_counts()) validation_set = dataset.sample(int(dataset.shape[0] * 0.2), replace=False) training_set = dataset.drop(validation_set.index) if self.type == 'contrastive': n_classes = 1 else: n_classes = 2 train_datagen = DataGenerator(training_set, batch_size=BATCH_SIZE, augment=True, n_classes=n_classes) valid_datagen = DataGenerator(validation_set, batch_size=BATCH_SIZE, n_classes=n_classes) history = self.model.fit_generator( train_datagen, epochs=50, steps_per_epoch=min(5000, int(training_set.shape[0] / BATCH_SIZE)), # steps_per_epoch=1, callbacks=callbacks_list, validation_steps=min(int(validation_set.shape[0] / BATCH_SIZE), 500), # validation_steps=1, validation_data=valid_datagen, shuffle=True) self.history.append(history)
def train(): ids = np.arange(1, 604) kf = KFold(n_splits=n_folds) fold = 0 epochs = 1000 cvscores = [] for train_index, test_index in kf.split(ids): valid_steps = len(test_index) / batch_size steps_per_epoch = len(train_index) / (2 * batch_size) # 2 #print( train_index[[1,2,3,4,5,6]]) callbacks = [ EarlyStopping(monitor='val_loss', patience=5), ModelCheckpoint(filepath=prefix_model + "best_m_{}".format(fold), monitor='val_loss', save_best_only=True) ] training_generator = DataGenerator(**params).generate( prefix, ids[train_index], ids[train_index]) valid_generator = DataGenerator(**params2).generate( prefix, ids[test_index], ids[test_index]) K.clear_session() linknet = model.get_model2() #print( linknet.summary() ) learning_rate = 1e-4 decay_rate = learning_rate / epochs optimizer = optimizers.Adam(lr=learning_rate, decay=decay_rate) linknet.compile(loss=model.loss, optimizer="adam", metrics=['accuracy', model.dice]) #tensorboard = TensorBoard(log_dir="logs/{}".format(time())) linknet.fit_generator(generator=training_generator, steps_per_epoch=steps_per_epoch, callbacks=callbacks, validation_data=valid_generator, validation_steps=valid_steps, epochs=epochs) scores = linknet.evaluate_generator(generator=valid_generator, steps=valid_steps) cvscores.append(scores[0]) predictions = linknet.predict_generator(generator=training_generator, steps=100) save_preds(predictions, fold) fold = fold + 1 del linknet for i, sc in enumerate(cvscores): print("loss fold {}:{} ".format(i, sc))
def main(): train_data = DataGenerator(FLAGS, 'train', 40000) valid_data = DataGenerator(FLAGS, 'valid', 5000) # test_data = DataGenerator(FLAGS, 'test') my_model = pointer_net.PointerNet(batch_size=FLAGS.batch_size, max_input_sequence_len=FLAGS.max_input_sequence_len, max_output_sequence_len=FLAGS.max_output_sequence_len, rnn_size=FLAGS.rnn_size, attention_size=FLAGS.attention_size, num_layers=FLAGS.num_layers, beam_width=FLAGS.beam_width, learning_rate=FLAGS.learning_rate, max_gradient_norm=FLAGS.max_gradient_norm, ) trainsummary = open('trainingsummary.txt', 'w+') with tf.Session() as sess: writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Load model parameters from %s" % ckpt.model_checkpoint_path) my_model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") sess.run(tf.global_variables_initializer()) print('start!!!!!!!!!!!!!!!!!') step_time = 0.0 loss = 0.0 current_step = 0 train_flag_var = False for _ in range(FLAGS.train_epoch*(train_data.data_size//FLAGS.batch_size)): start_time = time.time() inputs, enc_input_weights, outputs, dec_input_weights = \ train_data.get_batch(True) summary, step_loss, predicted_ids_with_logits, targets, debug_var = \ my_model.step(sess, inputs, enc_input_weights, outputs, dec_input_weights, train_flag_var) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Time to print statistic and save model if current_step % FLAGS.steps_per_checkpoint == 0: train_flag_var = True with sess.as_default(): gstep = my_model.global_step.eval() print("global step %d step-time %.2f loss %.2f" % (gstep, step_time, loss)) trainsummary.write('Epoch %d \n' % (current_step/FLAGS.steps_per_checkpoint)) trainsummary.write("global step %d step-time %.2f loss %.2f \n" % (gstep, step_time, loss)) #### eval_valid(valid_data, my_model, sess, train_flag_var, trainsummary) writer.add_summary(summary, gstep) checkpoint_path = os.path.join(FLAGS.log_dir, "wdp.ckpt") my_model.saver.save(sess, checkpoint_path, global_step=my_model.global_step) step_time, loss = 0.0, 0.0 train_flag_var = False trainsummary.close()
def __init__(self, checkpoint_path): self.checkpoint_path = checkpoint_path # set training parameters self.learning_rate = 0.01 self.num_iter = 100000 self.save_iter = 1000 self.val_iter = 1000 self.log_iter = 100 self.batch_size = 16 # set up data layer self.image_size = (224, 224) self.data_generator = DataGenerator(self.image_size)
class DataProviderService: def __init__(self, nr_of_items): self.data_generator = DataGenerator() self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items) def get_candidates(self): return self.CANDIDATES def get_candidate(self, id): result = None if id: for cand in self.CANDIDATES: if id == str(cand['id']): result = cand break return result def get_random_candidates(self, nr_of_candidates): return self.data_generator.generate_candidates(nr_of_candidates) def update_name(self, id, new_name): nr_of_updated_items = 0 for cand in self.CANDIDATES: if id == str(cand['id']): cand['first_name'] = new_name nr_of_updated_items += 1 break return nr_of_updated_items def delete_candidate(self, id): cand_for_delete = None for cand in self.CANDIDATES: if id == str(cand['id']): cand_for_delete = cand break if cand_for_delete is not None: self.CANDIDATES.remove(cand_for_delete) return True else: return False def add_candidate(self, first_name, last_name): cand = Candidate(first_name, last_name, []) self.CANDIDATES.append(cand.serialize()) return str(cand.id)
def __init__(self, account: Account, max_limit=500): # attributes self.account = account self.dataloader = DataGenerator() self.index = 0 self.max_limit = max_limit self.tickers = [] self.tech_indicators = [] self.max_idx = None self.current_day = None self.minute_account_balances = [] self.daily_account_balances = [] self.all_tickers = self.dataloader.all_syms self.generator = None
def iterable_helper(self_data, other_data): if len(self_data) != len(other_data): return False for self_item, other_item in zip(self_data, other_data): if type(self_item) != type(other_item): return False elif DataGenerator.is_dict(self_item): if not dicts_helper(self_item, other_item): return False elif DataGenerator.is_iterable(self_item): if not iterable_helper(self_item, other_item): return False else: if self_item != other_item: return False return True
def __init__(self, state, year): self.state = state self.year = year self._data_generator = DataGenerator(state, year) self.dv_like_relationships = set([27, 3, 4, 6, 12, 21, 26]) self.violent_offenses = set([1, 3, 4, 27, 32, 36, 38, 43, 51]) self._tables = {}
def test__add_to_dict_with__map_filter_cols(self): data_generator = DataGenerator('TX', 2000) for name, file in data_generator.extract_zip(): if 'cde_agencies.csv' in name.lower(): lookup = filename_map.get_data(name) data_generator._add_to_dict(lookup, file) self.maxDiff = None expected = [ 'agency_id', 'ori', 'ncic_agency_name', 'state_id', 'state_abbr', 'population', 'population_group_code', 'population_group_desc', 'nibrs_start_date', 'county_name' ] result = list(data_generator._dict['agencies']['19089'][0].keys()) self.assertCountEqual(expected, result)
def load_go_data(self, data_type='train', num_samples=1000,use_generator=False): splitter = Splitter(data_dir=self.data_dir) data = splitter.draw_data(data_type, num_samples) zip_names = set() indices_by_zip_name = {} for filename, index in data: zip_names.add(filename) #collect all zip file names contained in the data in a list if filename not in indices_by_zip_name: indices_by_zip_name[filename] = [] indices_by_zip_name[filename].append(index) #group all sgf file indices by zip file name for zip_name in zip_names: base_name = zip_name.replace('.tar.gz', '') data_file_name = base_name + data_type # train or test if not os.path.isfile(self.data_dir + '/' + data_file_name): # extracrt the sgf files and encode them to numpy arrays (features and labels) and save them as chunks on disk self.process_zip(zip_name, data_file_name, indices_by_zip_name[zip_name]) if use_generator: generator = DataGenerator(self.data_dir, data) return generator else: features_and_labels = self.group_games(data_type, data) return features_and_labels
def predict_eval(): ll = [] for i in range(0, 4): K.clear_session() f = "../models/best_m_{}".format(i) ids = np.arange(1, 604) test_index = np.arange(500) valid_generator = DataGenerator(**params2).generate( prefix, ids[test_index], ids[test_index]) ln = model.get_model2() ln.load_weights(f) ln.compile(loss=model.loss, optimizer="adam", metrics=['accuracy', model.dice]) scores = ln.evaluate_generator(generator=valid_generator, steps=50) print(scores) ll.append(scores) del ln #return None for l in ll: print(l)
def train(self): # load and save configuration config = self._config config.save() # load training data loader = SourceLoader("train", config.get_parameter("landmarks"), config.is_debug()) sources = loader.get_sources() input_shape = config.input_shape() # split the data into validation and training set len_data = len(sources) numbers = list(range(len_data)) np.random.shuffle(numbers) split_data = math.floor(len_data*config.get_parameter('validation_split')) train_ids = numbers[0:split_data] val_ids = numbers[split_data+1:len(numbers)] validation_sources = sources[val_ids] training_sources = sources[train_ids] # build data generators training_generator = DataGenerator(training_sources, **config.get_bundle("generator")) validation_generator = DataGenerator(validation_sources, **config.get_bundle("generator")) model = self._generate_model(input_shape) # define callback function to save state of the trained model cp_callback = tensorflow.keras.callbacks.ModelCheckpoint(**config.get_bundle("checkpoint")) # load existing weights if existant according to the configuration # and set number of epoch accordingly initial_epoch = 0 latest = tensorflow.train.latest_checkpoint(config.checkpoint_path()) if latest: print("found existing weights, loading...") model.load_weights(latest) found_num = re.search(r'\d+', os.path.basename(latest)) if found_num: checkpoint_id = int(found_num.group(0)) initial_epoch = checkpoint_id # fit the model model.fit(training_generator, validation_data=validation_generator, epochs=config.get_parameter("epochs"), initial_epoch=initial_epoch, callbacks=[cp_callback])
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=2, learning_rate=5e-4): start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) motions, claims = DataGenerator(lexicon_count=lexicon_count, motion_length=motion_length, claim_length=claim_length, num_data=num_train_data, data_dir=train_data_dir, batch_size=num_train_data, shuffle=True).generate().next() # print(training_pairs) # training_pairs = [variablesFromPair(random.choice(pairs)) # for i in range(n_iters)] criterion = nn.NLLLoss() for iter in range(1, num_train_data + 1): # training_pair = training_pairs[iter - 1] # print(training_pair) motion = np.asarray(motions[iter - 1]) claim = np.asarray(claims[iter - 1]) motion = Variable(torch.LongTensor(motion.tolist()).unsqueeze(1)) claim = Variable(torch.LongTensor(claim.tolist()).unsqueeze(1)) input_variable = motion.cuda() if use_cuda else motion target_variable = claim.cuda() if use_cuda else claim # print(training_pair) # print(input_variable) # print(target_variable) # exit() loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / num_train_data), iter, iter / num_train_data * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses)
def train(model, train_img_ids, val_img_ids, args): train_generator = DataGenerator(train_img_ids, args.train_images_folder, args.batch_size, args.patch_size) checkpointer = keras.callbacks.ModelCheckpoint(filepath=args.model_path, verbose=1, save_best_only=False) if len(val_img_ids) > 0: val_generator = DataGenerator(val_img_ids, args.val_images_folder, args.batch_size, args.patch_size) else: val_generator = None model.fit_generator(generator=train_generator, validation_data=val_generator, epochs=args.epochs, verbose=1, callbacks=[checkpointer], use_multiprocessing=False)
def test__add_to_dict(self): data_generator = DataGenerator('TX', 2013) for name, file in data_generator.extract_zip(): if 'nibrs_victim.csv' in name.lower(): lookup = filename_map.get_data(name) data_generator._add_to_dict(lookup, file) expected = 189139 result = len(data_generator._dict['nibrs_victim']) self.assertEqual(expected, result) expected = [ 'incident_id', 'age_num', 'sex_code', 'race_id', 'ethnicity_id', 'victim_id', 'victim_type_id', 'victim_seq_num' ] result = list( data_generator._dict['nibrs_victim']['68950600'][0].keys()) self.assertCountEqual(expected, result)
def test__get_cols(self): lookup = filename_map.get_data('nibrs_location_type.csv') row = { 'merp': 'herro', 'location_id': 47, 'mop': 788888, 'location_name': 'bad one', } data_generator = DataGenerator('KY', 2016) expected = { 'location_id': 47, 'location_name': 'bad one', } result = data_generator._filter_cols(lookup, row) self.assertDictEqual(expected, result)
def test__map_filter_cols(self): lookup = filename_map.get_data('cde_agencies.csv') row = { 'merp': 'herro', 'ori': 47, 'mop': 788888, 'primary_county': 'Mexicaliand', } data_generator = DataGenerator('TX', 2015) expected = { 'ori': 47, 'county_name': 'Mexicaliand', } result = data_generator._map_filter_cols(lookup, row) self.assertDictEqual(expected, result)
def trainer(model_params): datasets = load_dataset('dataset', model_params) train_set = datasets[0][0] + datasets[0][1] valid_set = datasets[1][0] + datasets[1][1] test_set = datasets[2][0] + datasets[2][1] labels = {dataset: i for (i, dataset) in enumerate(model_params.data_set)} train_generator = DataGenerator(train_set, labels, batch_size=model_params.batch_size, shuffle=True) valid_generator = DataGenerator(valid_set, labels, batch_size=model_params.batch_size, shuffle=True) model = discriminator.Discriminator(model_params) model.train(train_generator, valid_generator) model.save() print('Done!')
def helper(data, keys): for key in data: if DataGenerator.is_dict(data[key]): keys.append([key, []]) # the empty dict is for the inner keys helper(data[key], keys[-1][1] if type(keys[-1]) is list else keys) else: keys.append(key) return keys
def run_trial(self, trial, data=None, labels=None, users=None, indices=None, batch_size=32): self.ntrial += 1 hp = trial.hyperparameters print('Trial {:d}'.format(self.ntrial)) print(hp.values) if "tuner/trial_id" in hp: past_trial = self.oracle.get_trial(hp['tuner/trial_id']) model = self.load_model(past_trial) else: model = self.hypermodel.build(hp) initial_epoch = hp['tuner/initial_epoch'] epochs = hp['tuner/epochs'] # Cross-validation based on users fold = 0 val_losses = [] cv = GroupKFold(n_splits=self.cv) trial_users = users[indices] X = np.zeros((len(trial_users), 10)) y = np.zeros(len(trial_users)) # dummy for splitting for train_indices, val_indices in cv.split(X, y, trial_users): fold += 1 print('Inner CV fold {:d}'.format(fold)) train_gen = DataGenerator(indices[train_indices], data, labels, self.states, partition='train',\ batch_size=batch_size, seqlen=self.seqlen, n_channels=self.num_channels, feat_channels=self.feat_channels,\ n_classes=self.num_classes, shuffle=True, balance=True, mean=self.mean, std=self.std) val_gen = DataGenerator(indices[val_indices], data, labels, self.states, partition='test',\ batch_size=batch_size, seqlen=self.seqlen, n_channels=self.num_channels, feat_channels=self.feat_channels,\ n_classes=self.num_classes, mean=self.mean, std=self.std) model = self.hypermodel.build(trial.hyperparameters) model.fit(train_gen, epochs=epochs, validation_data=val_gen,\ verbose=1, shuffle=False, initial_epoch=initial_epoch, workers=2, max_queue_size=20, use_multiprocessing=False ) val_losses.append(model.evaluate(val_gen)) self.oracle.update_trial(trial.trial_id, {'val_loss': np.mean(val_losses)}) self.save_model(trial.trial_id, model)
def __init__(self): self.height = height self.width = width self.channels = channels self.batch_size = batch_size self.epochs = epochs self.line = line self.n_show_image = n_show_image self.vgg = vgg self.optimizer = optimizer self.DG = DataGenerator(X_train, Y_train, batch_size=batch_size, dim=(128, 128)) self.DGP = DataGenerator_predict(X_predict, batch_size=batch_size, dim=(128, 128)) self.number = number self.AE = self.build_AE() self.AE.compile(loss='mse', optimizer=self.optimizer)
def dicts_helper(self_data, other_data): if len(self_data) != len(other_data): return False for (self_k, self_v), (other_k, other_v) in zip(self_data.items(), other_data.items()): if self_k == other_k: if type(self_v) != type(other_v): return False elif DataGenerator.is_dict(self_v): if not dicts_helper(self_v, other_v): return False elif DataGenerator.is_iterable(self_v): if not iterable_helper(self_v, other_v): return False else: if self_v != other_v: return False else: return False return True
def __init__(self, checkpoint_path='./checkpoints/'): self.checkpoint_path = checkpoint_path # set training parameters self.learning_rate = 0.0001 self.num_iter = 1000000 self.log_iter = 500 self.save_iter = 5000 self.batch_size = 1 # use batch re-norm which allows batch_size = 1 # create the data generator self.image_size = (1024, 1024) self.data_generator = DataGenerator(self.image_size) self.num_labels = 3
def load_generators(data_dir): # Parameters params = { 'dim': (96, 96), 'batch_size': 100, 'n_classes': 2, 'n_channels': 3, 'shuffle': True } # Data data = pd.read_csv(data_dir + 'train_labels.csv') train, val = train_test_split(data, test_size=0.1, random_state=42) partition = {"train": list(train['id']), "validation": list(val['id'])} labels = dict(zip(data['id'], data['label'])) train_dir = data_dir + "train/" # Generators train_gen = DataGenerator(partition['train'], labels, train_dir, **params) val_gen = DataGenerator(partition['validation'], labels, train_dir, **params) return train_gen, val_gen
def __init__(self): self.height = height self.width = width self.channels = channels self.batch_size = batch_size self.epochs = epochs self.line = line self.n_show_image = n_show_image self.vgg = vgg self.optimizerD = optimizerD self.optimizerC = optimizerC self.DG = DataGenerator(X_train, Y_train, batch_size=batch_size) self.DGP = DataGenerator_predict(X_predict, batch_size=batch_size) self.number = number patch = int(self.height / 2**1) self.disc_patch = (patch, patch, 3) self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=self.optimizerD) self.generator = self.build_generator() self.discriminator.trainable = False side = Input(shape=(self.height, self.width, self.channels)) front = Input(shape=(self.height, self.width, self.channels)) image = self.generator(side) valid = self.discriminator([front, image]) self.combined = Model([side, front], [image, valid]) self.combined.compile(loss=['mae', "mse"], loss_weights=[100, 1], optimizer=self.optimizerC)
def read_data(self, data_dir): patterns = [] labels = [] i = -1 for root, dirs, files in os.walk(data_dir): if i < 0: i = i + 1 else: [patterns.append(root + '/' + file) for file in files] [labels.append(i) for file in files] i = i + 1 self.__generator = DataGenerator(patterns, labels, self.__scale_size, self.__shuffle, self.__input_channels, self.__n_classes)
def makePredictions(model): dataGentest = DataGenerator("test", 48832, BATCH_SIZE, False) predictions = model.predict_generator(dataGentest, dataGentest.n / 64, verbose=1, workers=8) LABELS = [ 'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Airspace Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices' ] testdf = pd.read_csv(TEST, nrows=48832) array = np.array([predictions, testdf[LABELS].values]) np.save("Elmo_Predictions", array)
class TestPipelineFramework(unittest.TestCase): def setUp(self): self.dg = DataGenerator() def test_create_empty_framework(self): with self.assertRaises(Exception): self.dg.get_empty_pipeline() def test_single_node_framework(self): p = self.dg.get_single_node_pipeline() self.assertEquals(len(p), 1) self.assertTrue(nx.is_tree(p.dag)) def test_linear_framework(self): p = self.dg.get_linear_pipeline() self.assertEquals(len(p), 3) self.assertTrue(nx.is_tree(p.dag)) def test_tree_framework(self): p = self.dg.get_tree_pipeline() self.assertEquals(len(p), 7) self.assertTrue(nx.is_tree(p.dag)) def test_dag_framework(self): p = self.dg.get_dag_pipeline() self.assertEquals(len(p), 7) self.assertTrue(nx.is_directed_acyclic_graph(p.dag)) def test_cyclic_framework(self): with self.assertRaises(Exception): self.dg.get_cyclic_pipeline() def test_disconnected_framework(self): p = self.dg.get_disconnected_pipeline() self.assertEquals(len(p), 5) self.assertTrue(nx.is_directed_acyclic_graph(p.dag)) def test_unbalanced_framework(self): p = self.dg.get_unbalanced_pipeline() self.assertEquals(len(p), 7) self.assertTrue(nx.is_tree(p.dag)) def test_ranktree_framework(self): p = self.dg.get_ranktree_pipeline() self.assertEquals(len(p), 14) self.assertTrue(nx.is_tree(p.dag)) def test_loose_framework(self): p = self.dg.get_loose_pipeline() self.assertEquals(len(p), 6) self.assertTrue(nx.is_directed_acyclic_graph(p.dag)) def test_self_ref_framework(self): with self.assertRaises(Exception): self.dg.get_self_ref_pipeline() def test_duplicate_node_framework(self): with self.assertRaises(Exception): self.dg.get_duplicate_node_pipeline() def test_unknown_uid_framework(self): with self.assertRaisesRegexp(KeyError, "Unknown UID C set as requirement for B"): self.dg.get_unknown_uid_framework()
def __init__(self, N, T, random_network, sigma_x=0.1, sigma_y=0.001, sigma_baseline=0.0001, renormalize_sigma=False, time_weights=None, time_weights_parameters=None, cued_feature_time=0, enforce_min_distance=0.17, stimuli_generation='random', enforce_first_stimulus=True, stimuli_to_use=None, specific_stimuli_random_centers=False, specific_stimuli_asymmetric=False, enforce_distance_cued_feature_only=False, debug=False ): if time_weights_parameters is None: time_weights_parameters = dict(weighting_alpha=0.3, weighting_beta=1.0, specific_weighting=0.3, weight_prior='uniform' ) DataGenerator.__init__(self, N, T, random_network, sigma_y=sigma_y, time_weights=time_weights, time_weights_parameters=time_weights_parameters ) # This is the noise on specific memories. Belongs here. self.init_all_sigma(sigma_x, sigma_y, sigma_baseline, renormalize=renormalize_sigma) self.enforce_min_distance = enforce_min_distance # Build the correct stimuli if stimuli_to_use is not None: # Use the provided stimuli self.set_stimuli(stimuli_to_use) else: if stimuli_generation == 'specific_stimuli': # Use our specifically built function, to get the special # stimuli combination allowing to verify some biases self.generate_specific_stimuli(asymmetric=specific_stimuli_asymmetric, centre=np.array([0., 0.]), specific_stimuli_random_centers=specific_stimuli_random_centers) elif stimuli_generation is not None: # Generate it randomly self.generate_stimuli(stimuli_generation=stimuli_generation, enforce_first_stimulus=enforce_first_stimulus, cued_feature_R=1, enforce_distance_cued_feature_only=enforce_distance_cued_feature_only) else: raise ValueError("No data generation possible.") if debug: print "== DataGeneratorRfn ==" print "Size: %d, %d items/times." % (N, T) print "sigma_x %.3g, sigma_y %.3g sigma_baseline %.3g renormalized: %d" % (self.sigma_x, self.sigma_y, self.sigma_baseline, renormalize_sigma) # Build the dataset self.build_dataset(cued_feature_time=cued_feature_time)
def __init__(self, nr_of_items): self.data_generator = DataGenerator() self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items)
class TestRank(unittest.TestCase): def setUp(self): self.dag = DataGenerator() def test_rank_by_total_successors_linear(self): p = self.dag.get_linear_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 2, 'B': 1, 'C': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_tree(self): p = self.dag.get_tree_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 6, 'B': 2, 'C': 2, 'D': 0, 'E': 0, 'F': 0, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_dag(self): p = self.dag.get_dag_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 6, 'B': 2, 'C': 5, 'D': 1, 'E': 0, 'F': 1, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_dag_disconected(self): p = self.dag.get_disconnected_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 1, 'B': 0, 'C': 2, 'D': 0, 'E': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_single_node(self): p = self.dag.get_single_node_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_unbalanced_pipeline(self): p = self.dag.get_unbalanced_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 6, 'B': 4, 'C': 0, 'D': 3, 'E': 0, 'F': 0, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_ranktree_pipeline(self): p = self.dag.get_ranktree_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 13, 'B': 6, 'C': 5, 'D': 2, 'E': 2, 'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0} self.assertEqual(ranks, expected) def test_rank_by_total_successors_loose_pipeline(self): p = self.dag.get_loose_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_linear(self): p = self.dag.get_linear_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 1, 'B': 1, 'C': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_tree(self): p = self.dag.get_tree_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 2, 'B': 2, 'C': 2, 'D': 0, 'E': 0, 'F': 0, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_dag(self): p = self.dag.get_dag_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 1, 'B': 1, 'C': 3, 'D': 1, 'E': 0, 'F': 1, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_dag_disconected(self): p = self.dag.get_disconnected_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 1, 'B': 0, 'C': 2, 'D': 0, 'E': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_single_node(self): p = self.dag.get_single_node_pipeline() ranker.rank_by_total_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_unbalanced_pipeline(self): p = self.dag.get_unbalanced_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 2, 'B': 1, 'C': 0, 'D': 3, 'E': 0, 'F': 0, 'G': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_ranktree_pipeline(self): p = self.dag.get_ranktree_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 2, 'B': 2, 'C': 5, 'D': 2, 'E': 2, 'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0} self.assertEqual(ranks, expected) def test_rank_by_successors_loose_pipeline(self): p = self.dag.get_loose_pipeline() ranker.rank_by_successors(p) ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0} self.assertEqual(ranks, expected) def test_rank_by_fifo_tree(self): p = self.dag.get_tree_pipeline() ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None} self.assertEqual(ranks, expected) def test_rank_by_fifo_unbalanced_pipeline(self): p = self.dag.get_unbalanced_pipeline() ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None} self.assertEqual(ranks, expected) def test_rank_by_fifo_ranktree_pipeline(self): p = self.dag.get_ranktree_pipeline() ranks = {task._uid: task._rank for task in p.dag.node} expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None, 'H': None, 'I': None, 'J': None, 'K': None, 'L': None, 'M': None, 'N': None} self.assertEqual(ranks, expected)
def setUp(self): self.dag = DataGenerator()
class TestConcretePipeline(unittest.TestCase): def setUp(self): self.dg = DataGenerator() def test_replace_none(self): data = self.dg.get_args() p = PipelineFramework([(Task("A", False, False, "exe", None, "path", "none", "are", "replaced"), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "exe none are replaced") self.assertEquals(task.wd, "path") self.assertFalse(task.skip) def test_replace_simple(self): data = self.dg.get_args() p = PipelineFramework([(Task("B", "$$skip1$$", "f", "exe", None, "path", "-blah", "$$a1$$"), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "exe -blah val_for_a1") self.assertEquals(task.wd, "path") self.assertTrue(task.skip) def test_replace_multiple(self): data = self.dg.get_args() p = PipelineFramework([(Task("C", "$$skip2$$", False, "$$a2$$", None, "$$a1$$", "$$a3$$", "$$a4$$"), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "secondParameter a_3rd_one 4") self.assertEquals(task.wd, "val_for_a1") self.assertTrue(task.skip) def test_replace_repeated(self): data = self.dg.get_args() p = PipelineFramework([(Task("D", "$$skip3$$", False, "$$a1$$", None, "$$a1$$", "$$a1$$", "$$a1$$"), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "val_for_a1 val_for_a1 val_for_a1") self.assertEquals(task.wd, "val_for_a1") self.assertTrue(task.skip) def test_replace_partial(self): data = self.dg.get_args() p = PipelineFramework([(Task("E", "$$skip4$$", False, "$$a1$$", None, "$$a4$$a4$$"), [])]) with self.assertRaises(Exception): ConcretePipeline(0, p, data, "blah") def test_replace_back_to_back(self): data = self.dg.get_args() p = PipelineFramework([(Task("F", "$$skip5$$", False, "$$a4$$$$a4$$", None, "$$a2$$"), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "44") self.assertEquals(task.wd, "secondParameter") self.assertFalse(task.skip) def test_replace_substring(self): data = self.dg.get_args() p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, "/path/$$a2$$/more", "-$$a3$$", "\"$$a4$$\""), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "exe -a_3rd_one \"4\"") self.assertEquals(task.wd, "/path/secondParameter/more") self.assertFalse(task.skip) def test_replace_unicode(self): data = self.dg.get_args() p = PipelineFramework([(Task("G", False, False, u"exe", None, u"/path/$$a2$$/more", u"-$$a3$$", u"\"$$a4$$\""), [])]) cp = ConcretePipeline(0, p, data, "blah") task = cp.dag.nodes()[0] self.assertEquals(str(task), "exe -a_3rd_one \"4\"") self.assertEquals(task.wd, "/path/secondParameter/more") self.assertFalse(task.skip) def test_replace_invalid_var(self): data = self.dg.get_args() p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, "/path/$$a2$$/more", "-$$a3$$", "$$a5$$"), [])]) with self.assertRaises(Exception): ConcretePipeline(0, p, data, "blah") def test_replace_invalid_type(self): data = self.dg.get_args() p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, None, "-$$a3$$", "$$a4$$"), [])]) with self.assertRaises(Exception): ConcretePipeline(0, p, data, "blah") def test_done(self): data = self.dg.get_args() p = self.dg.get_dag_pipeline() cp = ConcretePipeline(0, p, data, "blah") ready = list(cp.get_ready_tasks()) self.assertEquals(len(ready), 2) # Get the "A" task task = [t for t in ready if t._uid == "A"][0] self.assertFalse(list(cp.get_ready_successors(task))) self.assertFalse(cp.is_done(task)) cp.set_done(task) self.assertTrue(cp.is_done(task)) self.assertEquals(list(cp.get_ready_successors(task))[0]._uid, "C") self.assertEquals(cp.get_completed_tasks(), 1) def test_mc(self): data = self.dg.get_args() def get_mc(p): return ConcretePipeline(0, p, data, "blah").mc self.assertEquals(get_mc(PipelineFramework([])), 0) self.assertEquals(get_mc(self.dg.get_single_node_pipeline()), 1) self.assertEquals(get_mc(self.dg.get_linear_pipeline()), 1) self.assertEquals(get_mc(self.dg.get_tree_pipeline()), 4) self.assertEquals(get_mc(self.dg.get_dag_pipeline()), 3) self.assertEquals(get_mc(self.dg.get_disconnected_pipeline()), 3) self.assertEquals(get_mc(self.dg.get_unbalanced_pipeline()), 4) self.assertEquals(get_mc(self.dg.get_ranktree_pipeline()), 9) self.assertEquals(get_mc(self.dg.get_loose_pipeline()), 6)