def _predict_loop(self, f, ins, batch_size=128, verbose=0): ''' Abstract method to loop over some data in batches. ''' nb_sample = len(ins[0]) outs = [] if verbose == 1: progbar = Progbar(target=nb_sample) batches = make_batches(nb_sample, batch_size) index_array = np.arange(nb_sample) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] ins_batch = slice_X(ins, batch_ids) batch_outs = f(*ins_batch) if type(batch_outs) != list: batch_outs = [batch_outs] if batch_index == 0: for batch_out in batch_outs: shape = (nb_sample,) + batch_out.shape[1:] outs.append(np.zeros(shape)) for i, batch_out in enumerate(batch_outs): outs[i][batch_start:batch_end] = batch_out if verbose == 1: progbar.update(batch_end) return outs
def train_model_embed(train, dev, glove, model, model_dir = 'models/curr_model', nb_epochs = 20, batch_size = 64, hs=True, ci = True): X_dev_p, X_dev_h, y_dev = load_data.prepare_split_vec_dataset(dev, glove=glove) word_index = load_data.WordIndex(glove) if not os.path.exists(model_dir): os.makedirs(model_dir) for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue X_train_p, X_train_h , y_train = load_data.prepare_split_vec_dataset([train[k] for k in train_index], word_index.index) padded_p = load_data.pad_sequences(X_train_p, maxlen = PREM_LEN, dim = -1, padding = 'pre') padded_h = load_data.pad_sequences(X_train_h, maxlen = HYPO_LEN, dim = -1, padding = 'post') data = {'premise_input': padded_p, 'embed_input': np.expand_dims(np.array(train_index), axis=1), 'output' : padded_h} if ci: data['class_input'] = y_train if hs: data['train_input'] = padded_h data['output'] = np.ones((batch_size, HYPO_LEN, 1)) #sw = (padded_h != 0).astype(float) #train_loss = float(model.train_on_batch(data, sample_weight={'output':sw})[0]) train_loss = float(model.train_on_batch(data)[0]) p.add(len(train_index),[('train_loss', train_loss)]) sys.stdout.write('\n') model.save_weights(model_dir + '/model~' + str(e))
def test_progbar(): n = 2 input_arr = np.random.random((n, n, n)) bar = Progbar(n) for i, arr in enumerate(input_arr): bar.update(i, list(arr))
class TrainIntervalLogger(Callback): def __init__(self, interval=10000): self.interval = interval self.step = 0 self.reset() def reset(self): self.interval_start = timeit.default_timer() self.progbar = Progbar(target=self.interval) self.metrics = [] self.infos = [] self.info_names = None self.episode_rewards = [] def on_train_begin(self, logs): self.train_start = timeit.default_timer() self.metrics_names = self.model.metrics_names print('Training for {} steps ...'.format(self.params['nb_steps'])) def on_train_end(self, logs): duration = timeit.default_timer() - self.train_start print('done, took {:.3f} seconds'.format(duration)) def on_step_begin(self, step, logs): if self.step % self.interval == 0: if len(self.episode_rewards) > 0: metrics = np.array(self.metrics) assert metrics.shape == (self.interval, len(self.metrics_names)) formatted_metrics = '' if not np.isnan(metrics).all(): # not all values are means means = np.nanmean(self.metrics, axis=0) assert means.shape == (len(self.metrics_names),) for name, mean in zip(self.metrics_names, means): formatted_metrics += ' - {}: {:.3f}'.format(name, mean) formatted_infos = '' if len(self.infos) > 0: infos = np.array(self.infos) if not np.isnan(infos).all(): # not all values are means means = np.nanmean(self.infos, axis=0) assert means.shape == (len(self.info_names),) for name, mean in zip(self.info_names, means): formatted_infos += ' - {}: {:.3f}'.format(name, mean) print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos)) print('') self.reset() print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step)) def on_step_end(self, step, logs): if self.info_names is None: self.info_names = logs['info'].keys() values = [('reward', logs['reward'])] self.progbar.update((self.step % self.interval) + 1, values=values, force=True) self.step += 1 self.metrics.append(logs['metrics']) if len(self.info_names) > 0: self.infos.append([logs['info'][k] for k in self.info_names]) def on_episode_end(self, episode, logs): self.episode_rewards.append(logs['episode_reward'])
def validate(dev, gen_test, beam_size, hypo_len, samples, noise_size, glove, cmodel = None, adverse = False, diverse = False): vgen = val_generator(dev, gen_test, beam_size, hypo_len, noise_size) p = Progbar(samples) batchez = [] while p.seen_so_far < samples: batch = next(vgen) preplexity = np.mean(np.power(2, batch[2])) loss = np.mean(batch[2]) losses = [('hypo_loss',loss),('perplexity', preplexity)] if cmodel is not None: ceval = cmodel.evaluate([batch[0], batch[1]], batch[4], verbose = 0) losses += [('class_loss', ceval[0]), ('class_acc', ceval[1])] probs = cmodel.predict([batch[0], batch[1]], verbose = 0) losses += [('class_entropy', np.mean(-np.sum(probs * np.log(probs), axis=1)))] p.add(len(batch[0]), losses) batchez.append(batch) batchez = merge_result_batches(batchez) res = {} if adverse: val_loss = adverse_validation(dev, batchez, glove) print 'adverse_loss:', val_loss res['adverse_loss'] = val_loss if diverse: div, _, _, _ = diversity(dev, gen_test, beam_size, hypo_len, noise_size, 64, 32) res['diversity'] = div print for val in p.unique_values: arr = p.sum_values[val] res[val] = arr[0] / arr[1] return res
def fit_model(self, X, y): """ fits a model to some data """ for e in range(self.nb_epoch): print('Epoch: ', e, ' of ', self.nb_epoch) progbar = Progbar(target=X.shape[0], verbose=True) # batch train with realtime data augmentation total_accuracy = 0 total_loss = 0 current = 0 for X_batch, y_batch in self.datagen.flow(X, y, self.batch_size): # prepare the batch with random augmentations X_batch, y_batch = self.batch_warp(X_batch, y_batch) # train on the batch loss, accuracy = self.model.train(X_batch, y_batch, accuracy = True) # update the progress bar total_loss += loss * self.batch_size total_accuracy += accuracy * self.batch_size current += self.batch_size if current > self.X.shape[0]: current = self.X.shape[0] else: progbar.update(current, [('loss', loss), ('acc.', accuracy)]) progbar.update(current, [('loss', total_loss/current), ('acc.', total_accuracy/current)]) # checkpoints between epochs self.model.save_weights(self.save_weights_file, overwrite = True)
def adverse_generate2(gen_model, ad_model, cmodel, train, word_index, glove, threshold = 0.95, batch_size = 64, ci = False): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) results = [] for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] class_indices = [load_data.LABEL_LIST.index(train[k][2]) for k in train_index] probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch, np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices) gen_batch = generation.get_classes(probs) ad_preds = ad_model.predict_on_batch(gen_batch)[0].flatten() X = [] for i in range(len(orig_batch)): concat = orig_batch[i][0] + ["--"] + word_index.get_seq(gen_batch[i]) X.append(load_data.load_word_vecs(concat, glove)) X = np.array(X) X_padded = load_data.pad_sequences(X, dim = len(X[0][0])) cpreds = cmodel.predict_on_batch(X_padded)[0][np.arange(len(X_padded)), class_indices] pred_seq = [word_index.print_seq(gen) for gen in gen_batch] premises = [" ".join(ex[0]) for ex in orig_batch] classes = np.array(load_data.LABEL_LIST)[class_indices] zipped = zip(cpreds, ad_preds, premises, pred_seq, classes) results += [el for el in zipped if el[0] * el[1]> threshold] p.add(len(train_index),[('added', float(len([el for el in zipped if el[0] * el[1]> threshold])))]) if len(results) > 200: print (i + 1) * batch_size return results return results
def _test_loop(self, f, ins, batch_size=128, verbose=0): ''' Abstract method to loop over some data in batches. ''' nb_sample = len(ins[0]) outs = [] if verbose == 1: progbar = Progbar(target=nb_sample) batches = make_batches(nb_sample, batch_size) index_array = np.arange(nb_sample) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] ins_batch = slice_X(ins, batch_ids) batch_outs = f(*ins_batch) if type(batch_outs) == list: if batch_index == 0: for batch_out in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): outs[i] += batch_out * len(batch_ids) else: if batch_index == 0: outs.append(0.) outs[0] += batch_outs * len(batch_ids) if verbose == 1: progbar.update(batch_end) for i, out in enumerate(outs): outs[i] /= nb_sample return outs
def learn(self, env, epoch=1, batch_size=1, exp_batch_size=0, gamma=0.9, reset_memory=False, verbose=1, callbacks=None): """Train Agent to play Enviroment env Parameters ---------- env : :obj:`Enviroment` The enviroment the agent learn to play epoch : int number of complete episodes to play batch_size : int number of experiences to replay per step exp_batch_size : int number of experiences to replay from the consolidated :attr:`ExperienceReplayexperience.experience`. gamma : float discount factor reset_memory : bool if we should restart :attr:`ExperienceReplay.memory` before starting the game. verbose : int controls how much should we print callbacks : list of callables TODO: Add callback support """ print("Learning started!") print("[Environment]: {}".format(env.description)) print("[Model]: {}".format(self.model.description)) print("[Memory]: {}".format(self.memory.description)) if reset_memory: self.reset() progbar = Progbar(epoch) rewards = 0 for e in xrange(epoch): # reset enviroment env.reset() game_over = False loss = 0 # get initial observation, start game obs_t = env.observe() # Run an episonde while not game_over: obs_tm1 = obs_t action = self.policy(obs_tm1) # apply action, get rewards and new state obs_t, reward, game_over = env.update(action) rewards += reward # store experience self.remember(obs_tm1, action, reward, obs_t, game_over) # adapt model loss += self.update(batch_size=batch_size, exp_batch_size=exp_batch_size, gamma=gamma) if verbose == 1: progbar.add(1, values=[("loss", loss), ("rewards", rewards)])
def run_epoch(self, split, train=False, batch_size=128, return_pred=False): total = total_loss = 0 func = self.model.train_on_batch if train else self.model.test_on_batch ids, preds, targs = [], [], [] prog = Progbar(split.num_examples) for idx, X, Y, types in split.batches(batch_size): X.update({k: np.concatenate([v, types], axis=1) for k, v in Y.items()}) batch_end = time() loss = func(X) prob = self.model.predict(X, verbose=0)['p_relation'] prob *= self.typechecker.get_valid_cpu(types[:, 0], types[:, 1]) pred = prob.argmax(axis=1) targ = Y['p_relation'].argmax(axis=1) ids.append(idx) targs.append(targ) preds.append(pred) total_loss += loss total += 1 prog.add(idx.size, values=[('loss', loss), ('acc', np.mean(pred==targ))]) preds = np.concatenate(preds).astype('int32') targs = np.concatenate(targs).astype('int32') ids = np.concatenate(ids).astype('int32') ret = { 'f1': f1_score(targs, preds, average='micro', labels=self.labels), 'precision': precision_score(targs, preds, average='micro', labels=self.labels), 'recall': recall_score(targs, preds, average='micro', labels=self.labels), 'accuracy': accuracy_score(targs, preds), 'loss': total_loss / float(total), } if return_pred: ret.update({'ids': ids.tolist(), 'preds': preds.tolist(), 'targs': targs.tolist()}) return ret
def play(self, env, epoch=1, batch_size=1, visualize=None, verbose=1): print("Free play started!") frames = np.zeros((0, ) + env.observe_image().shape[1:]) frames = frames.transpose(0, 2, 3, 1) rewards = 0 progbar = Progbar(epoch) for e in xrange(epoch): env.reset() game_over = False loss = 0 # get initial observation, start game obs_t = env.observe() while not game_over: obs_tm1 = obs_t # get next action action = self.policy(obs_tm1, train=False) # apply action, get rewareds and new state obs_t, reward, game_over = env.update(action) rewards += reward frame_t = env.observe_image().transpose(0, 2, 3, 1) frames = np.concatenate([frames, frame_t], axis=0) if verbose == 1: progbar.add(1, values=[("loss", loss), ("rewards", rewards)]) if visualize: print("Making gif!") frames = np.repeat(frames, 3, axis=-1) make_gif(frames[:-visualize['n_frames']], filepath=visualize['filepath'], gray=visualize['gray']) print("See your gif at {}".format(visualize['filepath']))
def make_predictions(conf,shot_list,loader,custom_path=None): feature_extractor = FeatureExtractor(loader) save_prepath = feature_extractor.get_save_prepath() if custom_path == None: model_path = conf['paths']['model_save_path'] + model_filename#save_prepath + model_filename else: model_path = custom_path model = joblib.load(model_path) #shot_list = shot_list.random_sublist(10) y_prime = [] y_gold = [] disruptive = [] pbar = Progbar(len(shot_list)) fn = partial(predict_single_shot,model=model,feature_extractor=feature_extractor) pool = mp.Pool() print('predicting in parallel on {} processes'.format(pool._processes)) #for (y_p,y,disr) in map(fn,shot_list): for (y_p,y,disr) in pool.imap(fn,shot_list): #y_p,y,disr = predict_single_shot(model,feature_extractor,shot) y_prime += [np.expand_dims(y_p,axis=1)] y_gold += [np.expand_dims(y,axis=1)] disruptive += [disr] pbar.add(1.0) pool.close() pool.join() return y_prime,y_gold,disruptive
def preprocess(X): progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) progbar.add(1) return X
def rotation_augmentation(X, angle_range): progbar = Progbar(X.shape[0]) X_rot = np.copy(X) for i in range(len(X)): angle = np.random.randint(-angle_range, angle_range) for j in range(X.shape[1]): X_rot[i, j] = ndimage.rotate(X[i, j], angle, reshape=False, order=1) progbar.add(1) return X_rot
def preprocess(X): "Pre-process images that are fed to neural network" progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) progbar.add(1) return X # Denoising weight is the regularization parameter
def generation_test(train, glove, model, batch_size = 64, prem_len = 22, hypo_len = 12): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: X_prem, X_hypo, _ = load_data.prepare_split_vec_dataset([train[k] for k in train_index], glove) X_p = load_data.pad_sequences(X_prem, maxlen = prem_len, dim = 50) X_h = load_data.pad_sequences(X_hypo, maxlen = hypo_len, dim = 50) train_loss = model.train_on_batch(X_p, X_h)[0] p.add(len(X_p),[('train_loss', train_loss)])
def test_progbar(): values_s = [None, [['key1', 1], ['key2', 1e-4]], [['key3', 1], ['key2', 1e-4]]] for target in (len(values_s) - 1, None): for verbose in (0, 1, 2): bar = Progbar(target, width=30, verbose=verbose, interval=0.05) for current, values in enumerate(values_s): bar.update(current, values=values)
def batchwise_function(func, X, batch_size=100, verbose=1): # Y = [func([X[i*batch_size:(i+1)*batch_size]]) for i in range( # 0, X.shape[0]//batch_size)] Y = [] progbar = Progbar(X.shape[0]) for i in range(0, X.shape[0] // batch_size): Y += [func([X[i*batch_size:(i+1)*batch_size]])] if verbose > 0: progbar.add(batch_size) return np.concatenate(Y, axis=0)
class TrainIntervalLogger(Callback): def __init__(self, interval=10000): self.interval = interval self.step = 0 self.reset() def reset(self): """ Reset statistics """ self.interval_start = timeit.default_timer() self.progbar = Progbar(target=self.interval) self.metrics = [] self.infos = [] self.info_names = None self.episode_rewards = [] def on_train_begin(self, logs): """ Initialize training statistics at beginning of training """ self.train_start = timeit.default_timer() self.metrics_names = metrics_names() print('Training for {} steps ...'.format(self.params['nb_steps'])) def on_train_end(self, logs): """ Print training duration at end of training """ duration = timeit.default_timer() - self.train_start print('done, took {:.3f} seconds'.format(duration)) def on_step_begin(self, step, logs): """ Print metrics if interval is over """ if self.step % self.interval == 0: if len(self.episode_rewards) > 0: metrics = np.array(self.metrics) assert metrics.shape == (self.interval, len(self.metrics_names)) formatted_metrics = '' if not np.isnan(metrics).all(): # not all values are means means = np.nanmean(self.metrics, axis=0) assert means.shape == (len(self.metrics_names),) for name, mean in zip(self.metrics_names, means): formatted_metrics += ' - {}: {:.3f}'.format(name, mean) formatted_infos = '' print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos)) print('') self.reset() print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step)) def on_step_end(self, step, logs): """ Update progression bar at the end of each step """ values = [('reward', logs['reward'])] self.progbar.update((self.step % self.interval) + 1, values=values) self.step += 1 self.metrics.append(logs['metrics']) def on_episode_end(self, episode, logs): """ Update reward value at the end of each episode """ self.episode_rewards.append(logs['episode_reward'])
def main(): corpora = argv[1:] corpora = [x[:-1] if x.endswith(os.sep) else x for x in corpora] num_files = sum([len(os.listdir(x)) for x in corpora]) pb = Progbar(num_files) for corpus in corpora: mkdir_p(corpus + '_downsampled') for filename in os.listdir(corpus): if filename.endswith('wav'): downsample(os.path.join(corpus, filename), os.path.join(corpus + '_downsampled', filename), verbose=False) pb.add(1)
def zoom_augmentation(X, y, k_min): progbar = Progbar(X.shape[0]) # progress bar for augmentation status tracking X_zoom = np.copy(X) y_zoom = np.copy(y) for i in range(len(X)): k_random = 1.0 - (np.random.rand() * (1.0 - k_min)) for j in range(X.shape[1]): X_zoom[i, j] = zoom(X[i, j], k_random) y_zoom[i] *= 1 / (k_random * k_random) progbar.add(1) return X_zoom, y_zoom
def sampling_augmentation(X, n): progbar = Progbar(X.shape[0]) X_sampled = [] for i in range(len(X)): slices = np.copy(X[i]) ix = np.random.choice(range(len(slices)), n, replace=False) np.random.shuffle(ix) X_sampled.append(slices[ix,]) progbar.add(1) return np.array(X_sampled)
def test_points(premises, labels, noises, gtest, cmodel, hypo_len): p = Progbar(len(premises)) hypos = [] bs = 64 for i in range(len(labels) / bs): words, _ = generative_predict_beam(gtest, premises[i * bs: (i+1)*bs], noises[i * bs: (i+1)*bs,None,:], labels[i * bs: (i+1)*bs], True, hypo_len) hypos.append(words) p.add(len(words)) hypos = np.vstack(hypos) cpreds = cmodel.evaluate([premises[:len(hypos)], hypos], labels[:len(hypos)]) print cpreds
def preprocess1(X,weight=0.1): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=weight, multichannel=False) progbar.add(1) return X
def predict_general_(self, model, X, size, load_func): queue = Queue.Queue() #generate the progress bar if self.verbose>0: progbar = Progbar(size, width=80, verbose=self.verbose) batch_idx = range(min(size, self.memory_batch_size)); self.matrix_load_into_queue(X, batch_idx, queue, load_func); X_batch,_,_ = queue.get() p = [] samples = 0 last_update = time.time()-1000 for _, i in enumerate(xrange(0, size, self.memory_batch_size)): next_start = i+len(batch_idx) next_end = min(size, next_start+self.memory_batch_size) if next_end>next_start: #spin the thread up batch_idx_next = range(next_start,next_end); thread = threading.Thread(target=self.matrix_load_into_queue, args=(X,batch_idx_next,queue,load_func)) thread.start() else: batch_idx_next = None thread = None #predict the value if X_batch.shape[0]>0: p_curr = model.predict(X_batch, batch_size=self.batch_size, verbose=0) p.append(p_curr) #increment the counter samples+= len(batch_idx) curr_update = time.time() if self.verbose>0 and (curr_update-last_update>=0.5 or (samples)>=size): progbar.update(samples, []) last_update = curr_update #wait for the next load to happen if thread is not None: thread.join() X_batch,_,_ = queue.get() #now add the next batch batch_idx = batch_idx_next p = np.vstack(p) return p
def test_adverse(dev, ad_model, gen_model, word_index, glove, train_len, batch_size=64, ci = False): mb = load_data.get_minibatches_idx(len(dev), batch_size, shuffle=False) p = Progbar(len(dev) * 2) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([dev[k] for k in train_index], word_index, gen_model, train_len, class_indices = class_indices) pred = ad_model.predict_on_batch(X)[0].flatten() loss = binary_crossentropy(y.flatten(), pred).eval() acc = sum(np.abs(y - pred) < 0.5) / float(len(y)) p.add(len(X),[('test_loss', loss), ('test_acc', acc)])
def make_predictions_gpu(conf,shot_list,loader,custom_path=None): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU":1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] model = specific_builder.build_model(True) model.compile(optimizer=optimizer_class(),loss=conf['data']['target'].loss) specific_builder.load_model_weights(model,custom_path) model.reset_states() pbar = Progbar(len(shot_list)) shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'],do_shuffle=False,equal_size=True) for (i,shot_sublist) in enumerate(shot_sublists): X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist) #load data and fit on data y_p = model.predict(X, batch_size=conf['model']['pred_batch_size']) model.reset_states() y_p = loader.batch_output_to_array(y_p) y = loader.batch_output_to_array(y) #cut arrays back y_p = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y_p)] y = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y)] pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration y_prime += y_p y_gold += y disruptive += disr y_prime = y_prime[:len(shot_list)] y_gold = y_gold[:len(shot_list)] disruptive = disruptive[:len(shot_list)] loader.set_inference_mode(False) return y_prime,y_gold,disruptive
def make_evaluations_gpu(conf,shot_list,loader): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU":1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] batch_size = min(len(shot_list),conf['model']['pred_batch_size']) pbar = Progbar(len(shot_list)) print('evaluating {} shots using batchsize {}'.format(len(shot_list),batch_size)) shot_sublists = shot_list.sublists(batch_size,equal_size=False) all_metrics = [] all_weights = [] for (i,shot_sublist) in enumerate(shot_sublists): batch_size = len(shot_sublist) model = specific_builder.build_model(True,custom_batch_size=batch_size) model.compile(optimizer=optimizer_class(),loss=conf['data']['target'].loss) specific_builder.load_model_weights(model) model.reset_states() X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist,custom_batch_size=batch_size) #load data and fit on data all_metrics.append(model.evaluate(X,y,batch_size=batch_size,verbose=False)) all_weights.append(batch_size) model.reset_states() pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration if len(all_metrics) > 1: print('evaluations all: {}'.format(all_metrics)) loss = np.average(all_metrics,weights = all_weights) print('Evaluation Loss: {}'.format(loss)) loader.set_inference_mode(False) return loss
def new_generate_dataset(dataset, samples, gen_test, beam_size, hypo_len, noise_size, cmodel): vgen = val_generator(dataset, gen_test, beam_size, hypo_len, noise_size) p = Progbar(samples) batchez = [] while p.seen_so_far < samples: batch = next(vgen) probs = cmodel.predict([batch[0], batch[1]], verbose = 0) batch += (probs,) p.add(len(batch[0])) batchez.append(batch) return merge_result_batches(batchez)
def adverse_model_train(train, ad_model, gen_model, word_index, glove, nb_epochs = 20, batch_size=64, ci=False): for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(2 * len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([train[k] for k in train_index], word_index, gen_model, len(train), class_indices = class_indices) loss = ad_model.train_on_batch(X, y)[0] p.add(len(X),[('train_loss', loss)])
def preprocess1(X, weight=0.1): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar( X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=weight, multichannel=False) progbar.add(1) return X
def equalize(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Equalizing images...') progbar = Progbar( X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): X[i] = exposure.equalize_hist(X[i]) progbar.add(1) return X
def nldenoise(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Denoising images...') progbar = Progbar( X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): X[i] = denoise_bilateral(X[i], sigma_range=0.05, sigma_spatial=4) progbar.add(1) return X
def shift_augmentation(X, h_range, w_range): progbar = Progbar( X.shape[0]) # progress bar for augmentation status tracking X_shift = np.copy(X) size = X.shape[2:] for i in range(len(X)): h_random = np.random.rand() * h_range * 2. - h_range w_random = np.random.rand() * w_range * 2. - w_range h_shift = int(h_random * size[0]) w_shift = int(w_random * size[1]) for j in range(X.shape[1]): X_shift[i, j] = ndimage.shift(X[i, j], (h_shift, w_shift), order=0) progbar.add(1) return X_shift
def train(generator, discriminator, combined, epochs=50): # Load and normalize data: (x_train, _), (x_test, _) = mnist.load_data() x_train = np.concatenate((x_train, x_test), axis=0) x_train = x_train.reshape((NUM_IMGS, IMG_ROWS, IMG_COLS, CHANNELS)) x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Label arrays for positive/negative examples positive_examples = np.ones((BATCH_SIZE, 1)) negative_examples = np.zeros((BATCH_SIZE, 1)) # Number of batch loops: batch_loops = int(NUM_IMGS // BATCH_SIZE) for epoch in range(epochs): progress_bar = Progbar(target=batch_loops) shuffle_idx = np.random.permutation(NUM_IMGS) real_imgs = x_train[shuffle_idx] for batch_i in range(batch_loops): progress_bar.update(batch_i) # Discriminator: img_batch = real_imgs[batch_i * BATCH_SIZE:(batch_i + 1) * BATCH_SIZE] noise = np.random.normal(0, 1, (BATCH_SIZE, LATENT_SIZE)) fake_img_batch = generator.predict(noise) d_loss_real = discriminator.train_on_batch(img_batch, positive_examples) d_loss_fake = discriminator.train_on_batch(fake_img_batch, negative_examples) d_loss_total = 0.5 * np.add(d_loss_real, d_loss_fake) # Generator: noise = np.random.normal(0, 1, (2 * BATCH_SIZE, LATENT_SIZE)) positive = np.concatenate((positive_examples, positive_examples), axis=0) g_loss = combined.train_on_batch(noise, positive) print("Epoch: %d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss_total[0], 100 * d_loss_total[1], g_loss)) if epoch % 2 == 0: save_images(generator, epoch) return generator
def compute_loss(sess, learner_object, generator, steps, verbose=0): """Generates predictions for the input samples from a data generator and computes metrics on the predictions. """ steps_done = 0 all_vel_mse = [] all_pnt_mse = [] all_vel_std = [] all_pnt_std = [] inputs = {} outputs = {} if verbose == 1: progbar = Progbar(target=steps) while steps_done < steps: generator_output = next(generator) if isinstance(generator_output, tuple): if len(generator_output) == 2: x, gt_lab = generator_output else: raise ValueError('output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) else: raise ValueError('Output not valid for current evaluation') inputs['images'] = x inputs['gt_labels'] = gt_lab results = learner_object.inference(inputs, sess) all_pnt_std.append(results['stds'][:2]) all_vel_std.append(results['stds'][2]) all_vel_mse.append(results['vel_loss']) all_pnt_mse.append(results['pnt_loss']) steps_done += 1 progbar.update(steps_done) outputs['pnt_std'] = float(np.mean(all_pnt_std)) outputs['vel_std'] = float(np.mean(all_vel_std)) outputs['pnt_mse'] = float(np.mean(all_pnt_mse)) outputs['vel_mse'] = float(np.mean(all_vel_mse)) outputs['total_loss'] = float(np.sqrt(np.mean(all_pnt_mse + all_vel_mse))) return outputs
def rotaterandom(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Rotating images...') progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): if np.random.rand() > 0.5: angle = angle = (np.random.rand() - 0.5) * 12 X[i] = rotate(X[i], angle, mode='nearest', reshape=False) progbar.add(1) return X
def dl_progress(count, block_size, total_size): if ProgressTracker.progbar is None: if total_size == -1: total_size = None ProgressTracker.progbar = Progbar(total_size) else: ProgressTracker.progbar.update(count * block_size)
def train(generator, discriminator, combined, epochs=50): # Load and normalize data: (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = np.concatenate((x_train, x_test), axis=0) x_train = x_train.reshape((NUM_IMGS, IMG_ROWS, IMG_COLS, CHANNELS)) y_train = np.concatenate((y_train, y_test), axis=0) x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Number of batch loops: batch_loops = int(NUM_IMGS // BATCH_SIZE) # Train ACGAN: for epoch in range(epochs): shuffle_idx = np.random.permutation(NUM_IMGS) real_imgs = x_train[shuffle_idx] labels = y_train[shuffle_idx] progress_bar = Progbar(target=batch_loops) for batch_i in range(batch_loops): progress_bar.update(batch_i) pos_examples_smooth = np.random.normal(0.7, 0.12, (BATCH_SIZE,)) neg_examples_smooth = np.random.normal(0.0, 0.3, (BATCH_SIZE,)) # Discriminator: real_img_batch = real_imgs[batch_i*BATCH_SIZE:(batch_i+1)*BATCH_SIZE] real_label_batch = labels[batch_i*BATCH_SIZE:(batch_i+1)*BATCH_SIZE].reshape(-1,) noise_batch = np.random.normal(0, 1, (BATCH_SIZE, LATENT_SIZE)) fake_label_batch = np.random.randint(0, NUM_CLASSES, BATCH_SIZE) fake_img_batch = generator.predict([noise_batch, fake_label_batch.reshape((-1, 1))]) d_loss_r = discriminator.train_on_batch(real_img_batch, [pos_examples_smooth, real_label_batch]) d_loss_f = discriminator.train_on_batch(fake_img_batch, [neg_examples_smooth, fake_label_batch]) d_loss_total = np.add(d_loss_r, d_loss_f) * 0.5 # Generator: noise = np.random.normal(0, 1, (BATCH_SIZE, LATENT_SIZE)) fake_labels = np.random.randint(0, NUM_CLASSES, BATCH_SIZE) pos_examples_smooth = np.random.normal(0.7, 0.12, (BATCH_SIZE,)) g_loss = combined.train_on_batch([noise, fake_labels.reshape(-1, 1)], [pos_examples_smooth, fake_labels]) # print ("Epoch: %d [D f/r loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss[0])) print(d_loss_total) print(g_loss) save_images(generator, epoch)
class TrainIntervalLogger(Callback): def __init__(self, interval=10000): self.interval = interval self.step = 0 self.reset() def reset(self): self.interval_start = timeit.default_timer() self.progbar = Progbar(target=self.interval) self.metrics = [] def on_train_begin(self, logs): self.train_start = timeit.default_timer() self.metrics_names = self.model.metrics_names print('Training for {} steps ...'.format(self.params['nb_steps'])) def on_train_end(self, logs): duration = timeit.default_timer() - self.train_start print('done, took {:.3f} seconds'.format(duration)) def on_step_begin(self, step, logs): if self.step % self.interval == 0: self.reset() print('Interval {} ({} steps performed)'.format(self.step / self.interval + 1, self.step)) def on_step_end(self, step, logs): # TODO: work around nan's in metrics. This isn't really great yet and probably not 100% accurate filtered_metrics = [] means = None for idx, value in enumerate(logs['metrics']): if not np.isnan(value): filtered_metrics.append(value) else: mean = np.nan if len(self.metrics) > 0 and not np.isnan(self.metrics).all(): if means is None: means = np.nanmean(self.metrics, axis=0) assert means.shape == (len(self.metrics_names),) mean = means[idx] filtered_metrics.append(mean) values = [('reward', logs['reward'])] if not np.isnan(filtered_metrics).any(): values += list(zip(self.metrics_names, filtered_metrics)) self.progbar.update((self.step % self.interval) + 1, values=values, force=True) self.step += 1 self.metrics.append(logs['metrics'])
def run_epoch(self, split, train=False, batch_size=128, return_pred=False): total = total_loss = 0 func = self.model.train_on_batch if train else self.model.test_on_batch ids, preds, targs = [], [], [] prog = Progbar(split.num_examples) for idx, X, Y, types in split.batches(batch_size): X.update( {k: np.concatenate([v, types], axis=1) for k, v in Y.items()}) batch_end = time() loss = func(X) prob = self.model.predict(X, verbose=0)['p_relation'] prob *= self.typechecker.get_valid_cpu(types[:, 0], types[:, 1]) pred = prob.argmax(axis=1) targ = Y['p_relation'].argmax(axis=1) ids.append(idx) targs.append(targ) preds.append(pred) total_loss += loss total += 1 prog.add(idx.size, values=[('loss', loss), ('acc', np.mean(pred == targ))]) preds = np.concatenate(preds).astype('int32') targs = np.concatenate(targs).astype('int32') ids = np.concatenate(ids).astype('int32') ret = { 'f1': f1_score(targs, preds, average='micro', labels=self.labels), 'precision': precision_score(targs, preds, average='micro', labels=self.labels), 'recall': recall_score(targs, preds, average='micro', labels=self.labels), 'accuracy': accuracy_score(targs, preds), 'loss': total_loss / float(total), } if return_pred: ret.update({ 'ids': ids.tolist(), 'preds': preds.tolist(), 'targs': targs.tolist() }) return ret
class TrainIntervalLogger(Callback): def __init__(self, interval=10000): self.interval = interval self.step = 0 self.reset() def reset(self): self.interval_start = timeit.default_timer() self.progbar = Progbar(target=self.interval) self.metrics = [] self.episode_rewards = [] def on_train_begin(self, logs): self.train_start = timeit.default_timer() self.metrics_names = self.model.metrics_names print('Training for {} steps ...'.format(self.params['nb_steps'])) def on_train_end(self, logs): duration = timeit.default_timer() - self.train_start print('done, took {:.3f} seconds'.format(duration)) def on_step_begin(self, step, logs): if self.step % self.interval == 0: if len(self.episode_rewards) > 0: metrics = np.array(self.metrics) assert metrics.shape == (self.interval, len(self.metrics_names)) formatted_metrics = '' if not np.isnan(metrics).all(): # not all values are means means = np.nanmean(self.metrics, axis=0) assert means.shape == (len(self.metrics_names),) for name, mean in zip(self.metrics_names, means): formatted_metrics += ' - {}: {:.3f}'.format(name, mean) print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics)) print('') self.reset() print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step)) def on_step_end(self, step, logs): values = [('reward', logs['reward'])] self.progbar.update((self.step % self.interval) + 1, values=values, force=True) self.step += 1 self.metrics.append(logs['metrics']) def on_episode_end(self, episode, logs): self.episode_rewards.append(logs['episode_reward'])
class DLProgbar: """Manage progress bar state for use in urlretrieve.""" def __init__(self): self.progbar = None self.finished = False def __call__(self, block_num, block_size, total_size): if not self.progbar: if total_size == -1: total_size = None self.progbar = Progbar(total_size) current = block_num * block_size if current < total_size: self.progbar.update(current) elif not self.finished: self.progbar.update(self.progbar.target) self.finished = True
def __init__(self, logger, rnn, db): super(ResetStateCallback, self).__init__() self.logger = logger self.batches = 0 self.rnn = rnn self.db = db self.bar = Progbar(2000000) self.songs_learned = 0
def train(BATCH_SIZE): (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = X_train[:, :, :, None] X_test = X_test[:, :, :, None] # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:]) d = discriminator_model() g = generator_model() d_on_g = generator_containing_discriminator(g, d) d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) g.compile(loss='binary_crossentropy', optimizer="SGD") d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) d.trainable = True d.compile(loss='binary_crossentropy', optimizer=d_optim) for epoch in range(100): print("Epoch is", epoch) BATCH_COUNT = int(X_train.shape[0] / BATCH_SIZE) print("Number of batches", BATCH_COUNT) progress_bar = Progbar(target=BATCH_COUNT) for index in range(int(X_train.shape[0] / BATCH_SIZE)): noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100)) image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE] generated_images = g.predict(noise, verbose=0) if index % 20 == 0: image = combine_images(generated_images) image = image * 127.5 + 127.5 #调试阶段不生成图片 Image.fromarray(image.astype( np.uint8)).save("hackDCGAN/" + str(epoch) + "_" + str(index) + ".png") X = np.concatenate((image_batch, generated_images)) y = [1] * BATCH_SIZE + [0] * BATCH_SIZE d_loss = d.train_on_batch(X, y) noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) d.trainable = False g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) d.trainable = True progress_bar.update(index, values=[("d_loss", d_loss), ("g_loss", g_loss), ("epoch", epoch + 1)]) g.save_weights('hackDCGAN/gan_generator.h5', True) d.save_weights('hackDCGAN/gan_discriminator.h5', True)
def process(set,images_path,labels,image_res=32): full_images_path=[get_image_path("Color", "%s.jpg" % (image[:-2])) for image in images_path] images=load_images(full_images_path) labels = [labels[name] for name in images_path] # data_image = dataset.require_dataset('image/' + set, (len(labels), 256, 256, 3), dtype='float') # data_label = dataset.require_dataset('label/' + set, (len(labels), 21,2), dtype='float') # data_center = dataset.require_dataset('center/' + set, (len(labels), 2), dtype='float') p = Progbar(len(labels)) index=0 newlabels={} # with open('data.txt', 'w') as outfile: # json.dump(images_path, outfile) # pdb.set_trace() data=np.empty((len(labels),image_res,image_res,3),'uint8') target=np.empty((len(labels),21,2),'float') image_center=np.empty((len(labels),2),'float') for image in images: label=labels[index] pdb.set_trace() newimage,newlabel,center =normalize(image, label) newimage1, newlabel1=resize(newimage,newlabel,(image_res,image_res)) # cv2.imwrite(os.path.join("/Volumes/8TB/個人文件/ics175/test-32", '%s.png'%(images_path[index])),newimage) # newlabels[images_path[index]]=newlabel data[index]=newimage1 target[index]=newlabel1 image_center[index]=center pdb.set_trace() p.update(index) index+=1 datas={} datas['data']=data datas['label']=target datas['center']=image_center pickle.dump(datas, open(os.path.join(Dataset, "test-" + str(image_res) + "data-tiny.save"), 'wb'))
def run(tag_dist, output_fname, force, nb_samples): os.makedirs(os.path.dirname(output_fname), exist_ok=True) if os.path.exists(output_fname) and force: print("Deleted {}".format(output_fname)) os.remove(output_fname) else: assert not os.path.exists(output_fname), \ "File {} already exists. Use --force to override it" basename, _ = os.path.splitext(output_fname) anit_name = basename + "_anti_{}.png" hist_name = basename + "_hist_{}.png" plot_anitaliasing(tag_dist, anit_name, 1) plot_anitaliasing(tag_dist, anit_name, 2) plot_anitaliasing(tag_dist, anit_name, 4) plot_anitaliasing(tag_dist, anit_name, 8) labels, masks, _ = next(generator(tag_dist, 10000, antialiasing=2)) for key in labels.dtype.names: m = labels[key].mean() s = labels[key].std() print("{}: {:.3f}, {:.3f}".format(key, m, s)) assert abs(m) <= 0.03 for label_name in sorted(set(labels.dtype.names) - set(['bits'])): x = labels[label_name] plt.hist(x.flatten(), bins=40, normed=True) plt.savefig(hist_name.format(label_name)) plt.clf() dset = DistributionHDF5Dataset(output_fname, distribution=tag_dist, nb_samples=nb_samples, mode='w') progbar = Progbar(nb_samples) batch_size = min(25000, nb_samples) for labels, tags, depth_map in generator(tag_dist, batch_size, antialiasing=4): pos = dset.append(labels=labels, tag3d=tags, depth_map=depth_map) progbar.update(pos) if pos == nb_samples: break print("Saved tag 3d dataset to: {}".format(output_fname)) dist_fname = basename + "_distribution.json" with open(dist_fname, "w+") as dist_f: dist_f.write(tag_dist.to_json()) print("Saved distribution to: {}".format(dist_fname))
def sample(self, tf, length, out_fp=None): sampled_hots = [] self.pred_model.set_weights(self.model.get_weights()) self.pred_model.reset_states() rows = np.random.randint(0,tf.vocab_size-1,size=(1,1)) b = Progbar(length) for i in np.arange(0, length): next_row_index = self.sample_softmax(np.squeeze(self.pred_model.predict(rows), 0)[0], self.temp) sampled_hots.append(next_row_index) rows = np.array([next_row_index]).reshape((1,1)) b.update(i) sampled_hots = np.array(sampled_hots) s_string = tf.indexes_to_string(sampled_hots) if out_fp is None: out_fp = 'samples/{}.txt'.format(self.name) with open(out_fp, 'w') as fp: fp.write(s_string) return s_string
def test_progbar(): n = 2 input_arr = np.random.random((n, n, n)) bar = Progbar(n) for i, arr in enumerate(input_arr): bar.update(i, list(arr)) bar = Progbar(None) for i, arr in enumerate(input_arr): bar.update(i, list(arr))
def _producer(self): N = self.cache_size passes = 0 if self.verbose > 0: prog = Progbar(N) while True: # Acquire the lock for the whole first pass if passes == 0: self.cache_lock.acquire() for idx in range(N): # We 're done stop now if self._stop and passes > 0: return while True: sample = self._s_generator.get_sample(self._dataset) if sample.X is None or sample.y is None: continue break # Do the copy to the cache but make sure you lock first and # unlock afterwards with self.cache_lock: try: for i, xi in enumerate(sample.X): self.X[i][idx] = xi for i, yi in enumerate(sample.y): self.y[i][idx] = yi except Exception as e: sys.stderr.write("Exception caught in producer thread") # Show progress if it is the first pass if passes == 0 and self.verbose > 0: prog.update(idx + 1) # Release the lock if it was the first pass if passes == 0: self._ready = True self.cache_lock.release() # Count the passes passes += 1
def validate(dev, gen_test, beam_size, hypo_len, samples, noise_size, glove, cmodel=None, adverse=False, diverse=False): vgen = val_generator(dev, gen_test, beam_size, hypo_len, noise_size) p = Progbar(samples) batchez = [] while p.seen_so_far < samples: batch = next(vgen) preplexity = np.mean(np.power(2, batch[2])) loss = np.mean(batch[2]) losses = [('hypo_loss', loss), ('perplexity', preplexity)] if cmodel is not None: ceval = cmodel.evaluate([batch[0], batch[1]], batch[4], verbose=0) losses += [('class_loss', ceval[0]), ('class_acc', ceval[1])] probs = cmodel.predict([batch[0], batch[1]], verbose=0) losses += [('class_entropy', np.mean(-np.sum(probs * np.log(probs), axis=1)))] p.add(len(batch[0]), losses) batchez.append(batch) batchez = merge_result_batches(batchez) res = {} if adverse: val_loss = adverse_validation(dev, batchez, glove) print 'adverse_loss:', val_loss res['adverse_loss'] = val_loss if diverse: div, _, _, _ = diversity(dev, gen_test, beam_size, hypo_len, noise_size, 64, 32) res['diversity'] = div print for val in p.unique_values: arr = p.sum_values[val] res[val] = arr[0] / arr[1] return res
def diversity(dev, gen_test, beam_size, hypo_len, noise_size, per_premise, samples): step = len(dev[0]) / samples sind = [i * step for i in range(samples)] p = Progbar(per_premise * samples) for i in sind: hypos = [] unique_words = [] hypo_list = [] premise = dev[0][i] prem_list = set(cut_zeros(list(premise))) while len(hypos) < per_premise: label = np.argmax(dev[2][i]) words = single_generate(premise, label, gen_test, beam_size, hypo_len, noise_size) hypos += [str(ex) for ex in words] unique_words += [int(w) for ex in words for w in ex if w > 0] hypo_list += [set(cut_zeros(list(ex))) for ex in words] jacks = [] prem_jacks = [] for u in range(len(hypo_list)): sim_prem = len(hypo_list[u] & prem_list) / float( len(hypo_list[u] | prem_list)) prem_jacks.append(sim_prem) for v in range(u + 1, len(hypo_list)): sim = len(hypo_list[u] & hypo_list[v]) / float( len(hypo_list[u] | hypo_list[v])) jacks.append(sim) avg_dist_hypo = 1 - np.mean(jacks) avg_dist_prem = 1 - np.mean(prem_jacks) d = entropy(Counter(hypos).values()) w = entropy(Counter(unique_words).values()) p.add(len(hypos), [('diversity', d), ('word_entropy', w), ('avg_dist_hypo', avg_dist_hypo), ('avg_dist_prem', avg_dist_prem)]) arrd = p.sum_values['diversity'] arrw = p.sum_values['word_entropy'] arrj = p.sum_values['avg_dist_hypo'] arrp = p.sum_values['avg_dist_prem'] return arrd[0] / arrd[1], arrw[0] / arrw[1], arrj[0] / arrj[1], arrp[ 0] / arrp[1]
def anno_to_data(anno_dct, attr_id_to_idx, target_img_size=(250, 250)): n_items = len(anno_dct) n_attr = len(attr_id_to_idx) X = np.zeros(shape=(n_items, target_img_size[0], target_img_size[1], 3)) Y = np.zeros(shape=(n_items, n_attr)) image_id_list = [] pbar = Progbar(n_items) for idx, (image_id, entry) in enumerate(anno_dct.iteritems()): # ----- Labels -> Vec this_attr_ids = set() for attr_entry in entry['attributes']: this_attr_ids.add(attr_entry['attr_id']) label_vec = np.zeros(n_attr) for attr_id in this_attr_ids: this_idx = attr_id_to_idx[attr_id] label_vec[this_idx] = 1 Y[idx] = label_vec # ----- Image -> Mat this_image_path = entry['image_path'] resized_img_path = this_image_path.replace('images', 'images_250') resized_img_path = osp.join('/BS/orekondy2/work/datasets/VISPR2017', resized_img_path) if osp.exists(resized_img_path): this_image_path = resized_img_path else: this_image_path = osp.join(SEG_ROOT, this_image_path) img = load_img(this_image_path, target_size=target_img_size) img_arr = img_to_array(img) X[idx] = img_arr image_id_list.append(image_id) pbar.update(idx) return X, Y, image_id_list
def train(self, X_train): for epoch in range(self.epochs): print ("Epoch is ", epoch) n_iter = int(X_train.shape[0] / self.batch_size) progress_bar = Progbar(target=n_iter) for index in range(n_iter): # create random noise -> U(0,1) 10 vactors noise = np.random.uniform(0, 1, size=(self.batch_size, self.z_dim)) """ load real data & generate fake data """ image_batch = X_train[index*self.batch_size:(index+1)*self.batch_size] gen_images = self.g.predict(noise, verbose=0) # visualize training result if index % 50 == 0: image = self.plot_generate_images(gen_images) image = image*127.5+127.5 cv2.imwrite('./result/' + str(epoch)+"_"+str(index)+ ".png", image ) # attach label for training discriminator X = np.concatenate((image_batch, gen_images)) y = np.array([1] * self.batch_size + [0] * self.batch_size) """ training discriminator """ d_loss = self.d.train_on_batch(X, y) """ training generator """ self.d.trainable = False g_loss = self.d_on_g.train_on_batch(noise, np.array([1] * self.batch_size)) self.d.trainable = True progress_bar.update(index, values=[('g', g_loss), ('d', d_loss)]) print('') """ save weights for each epoch """ if not os.path.exists('./saved_model/'): os.makedirs('./saved_model/') self.g.save_weights('./saved_model/generator.h5', True) self.d.save_weights('./saved_model/discriminator.h5',True) return self.d, self.g
def main_sequences(data_config_file=None, prefix=None, n_jobs=None, data=None, regions=None, labels=None, num_intervals=None, **kwargs): from tfdragonn.io_utils import infinite_batch_iter from keras.utils.generic_utils import Progbar import psutil if num_intervals is not None: logger.info( "Using total of {} intervals to test sequence batch streaming". format(num_intervals)) intervals = regions.at(range(num_intervals)) else: logger.info( "Using total of {} intervals to test sequence batch streaming". format(len(regions))) intervals = regions interval_length = intervals[0].length # set up extractor and interval generation fasta_extractor = MemmappedFastaExtractor(data.genome_data_dir) print("starting batch generation...") process = psutil.Process(os.getpid()) samples_per_epoch = 2000000 batch_size = 128 batches_per_epoch = samples_per_epoch / batch_size out = np.zeros((batch_size, 1, 4, interval_length), dtype=np.float32) interval_batch_iterator = infinite_batch_iter(intervals, batch_size) progbar = Progbar(target=samples_per_epoch) for batch_indxs in xrange(1, batches_per_epoch + 1): out = fasta_extractor(next(interval_batch_iterator), out=out) progbar.update( batch_indxs * batch_size, values=[ ("Non-shared RSS (Mb)", (process.memory_info().rss - process.memory_info().shared) / 10**6) ]) logger.info("Done!")
def on_epoch_begin(self, epoch, logs=None): self.epoch = epoch if not self.initialized: self.initialized = True if self.use_steps: target = self.params['steps'] else: target = self.params['samples'] self.target = target self.progbar = Progbar(target=self.epochs, verbose=1) self.seen = 0
def load_subsets(self, subsets): """ Loads specified subsets of the data for the code jam. Returns tuple: ( images, labels, subset membership number ) You can use the subset membership number to select the data from particular subset: e.g. result[(indices == 4).flatten()] """ result = None resultLabels = None indices = None n_of_subsets = len(subsets) p = Progbar(n_of_subsets) p.update(0) for index, subsetIndex in enumerate(subsets): data = np.load("{}/{}.npz".format(self.root_path, subsetIndex)) if result is None: result = data['images'] else: result = np.vstack([result, data['images']]) if resultLabels is None: resultLabels = data['labels'] else: resultLabels = np.vstack([resultLabels, data['labels']]) tmp = np.ones(data['labels'].shape) * subsetIndex if indices is None: indices = tmp else: indices = np.vstack([indices, tmp]) p.update(index + 1) return (result, resultLabels, indices)
def build_vocab(self, texts, verbose=1, **kwargs): """Builds the internal vocabulary and computes various statistics. Args: texts: The list of text items to encode. verbose: The verbosity level for progress. Can be 0, 1, 2. (Default value = 1) **kwargs: The kwargs for `token_generator`. """ if self.has_vocab: logger.warn( "Tokenizer already has existing vocabulary. Overriding and building new vocabulary." ) progbar = Progbar(len(texts), verbose=verbose, interval=0.25) count_tracker = _CountTracker() self._token_counts.clear() self._num_texts = len(texts) for token_data in self.token_generator(texts, **kwargs): indices, token = token_data[:-1], token_data[-1] count_tracker.update(indices) self._token_counts[token] += 1 # Update progressbar per document level. progbar.update(indices[0]) # Generate token2idx and idx2token. self.create_token_indices(self._token_counts.keys()) # All done. Finalize progressbar update and count tracker. count_tracker.finalize() self._counts = count_tracker.counts progbar.update(len(texts), force=True)
def play(self, env, epoch=1, batch_size=1, visualize=None, verbose=1): print("Free play started!") frames = np.zeros((0, ) + env.observe_image().shape[1:]) frames = frames.transpose(0, 2, 3, 1) progbar = Progbar(epoch) for e in xrange(epoch): # reset environment on each epoch env.reset() game_over = False loss = 0 rewards = 0 # get initial observation, start game obs_t = env.observe() while not game_over: obs_tm1 = obs_t # get next action action = self.policy(obs_tm1, train=False) # apply action, get rewareds and new state obs_t, reward, game_over = env.update(action) rewards += reward frame_t = env.observe_image().transpose(0, 2, 3, 1) frames = np.concatenate([frames, frame_t], axis=0) if verbose == 1: progbar.add(1, values=[("loss", loss), ("rewards", rewards)]) if visualize: from agnez.video import make_gif print("Making gif!") frames = np.repeat(frames, 3, axis=-1) make_gif(frames[:visualize['n_frames']], filepath=visualize['filepath'], gray=visualize['gray'], interpolation='none') print("See your gif at {}".format(visualize['filepath']))
def evaluate_by_datasets(self, model): from keras.utils.generic_utils import Progbar results = [] for i, single in enumerate(self.single_datasets): ys = [np.zeros(s.y_valid.shape[1:]) for s in self.single_datasets] # makes blank ys result = [] print('Evaluating', single.name) progbar = Progbar(len(single.X_valid)) for j in range(len(single.X_valid)): X, y = next(single.valid_generator) Xtemp = [] for x_one in X: x_normed = util.random_unify_3d_mels(x_one, self.duration) Xtemp.append(x_normed) Xtemp = np.array(Xtemp) result.append( np.argmax(y) == np.argmax(model.predict(Xtemp)[i])) progbar.update(j) results.append(result) progbar.update(len(single.X_valid)) print(' =', np.sum(result) / len(result)) accuracies = [np.sum(result) / len(result) for result in results] for s, acc in zip(self.single_datasets, accuracies): print('Accuracy with %s = %f' % (s.name, acc)) return accuracies