def adverse_generate2(gen_model, ad_model, cmodel, train, word_index, glove, threshold = 0.95, batch_size = 64, ci = False): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) results = [] for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] class_indices = [load_data.LABEL_LIST.index(train[k][2]) for k in train_index] probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch, np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices) gen_batch = generation.get_classes(probs) ad_preds = ad_model.predict_on_batch(gen_batch)[0].flatten() X = [] for i in range(len(orig_batch)): concat = orig_batch[i][0] + ["--"] + word_index.get_seq(gen_batch[i]) X.append(load_data.load_word_vecs(concat, glove)) X = np.array(X) X_padded = load_data.pad_sequences(X, dim = len(X[0][0])) cpreds = cmodel.predict_on_batch(X_padded)[0][np.arange(len(X_padded)), class_indices] pred_seq = [word_index.print_seq(gen) for gen in gen_batch] premises = [" ".join(ex[0]) for ex in orig_batch] classes = np.array(load_data.LABEL_LIST)[class_indices] zipped = zip(cpreds, ad_preds, premises, pred_seq, classes) results += [el for el in zipped if el[0] * el[1]> threshold] p.add(len(train_index),[('added', float(len([el for el in zipped if el[0] * el[1]> threshold])))]) if len(results) > 200: print (i + 1) * batch_size return results return results
def validate(dev, gen_test, beam_size, hypo_len, samples, noise_size, glove, cmodel = None, adverse = False, diverse = False): vgen = val_generator(dev, gen_test, beam_size, hypo_len, noise_size) p = Progbar(samples) batchez = [] while p.seen_so_far < samples: batch = next(vgen) preplexity = np.mean(np.power(2, batch[2])) loss = np.mean(batch[2]) losses = [('hypo_loss',loss),('perplexity', preplexity)] if cmodel is not None: ceval = cmodel.evaluate([batch[0], batch[1]], batch[4], verbose = 0) losses += [('class_loss', ceval[0]), ('class_acc', ceval[1])] probs = cmodel.predict([batch[0], batch[1]], verbose = 0) losses += [('class_entropy', np.mean(-np.sum(probs * np.log(probs), axis=1)))] p.add(len(batch[0]), losses) batchez.append(batch) batchez = merge_result_batches(batchez) res = {} if adverse: val_loss = adverse_validation(dev, batchez, glove) print 'adverse_loss:', val_loss res['adverse_loss'] = val_loss if diverse: div, _, _, _ = diversity(dev, gen_test, beam_size, hypo_len, noise_size, 64, 32) res['diversity'] = div print for val in p.unique_values: arr = p.sum_values[val] res[val] = arr[0] / arr[1] return res
def build_doc_id_to_url_map(profile_name='cogcomp', bucket_name='finer-annotation', prefix='annotation/by_length'): connection = boto.connect_s3(profile_name=profile_name) bucket = connection.get_bucket(bucket_name) bucket.list() url_paths = [] for keyObj in bucket.list(prefix): url_path = os.path.join('https://s3.amazonaws.com/', keyObj.bucket.name, keyObj.name) url_paths.append(url_path) print('Found %d docs. Fetching, parsing jsons and building map... ' % len(url_paths)) progbar = Progbar(len(url_paths)) errors = 0 doc_id_to_url_map = {} for url_path in url_paths: try: response = urllib2.urlopen(url_path) doc_id = json.loads(response.read())['doc_id'] doc_id_to_url_map[doc_id] = url_path except Exception: errors += 1 progbar.add(1) print('Done with %d errors' % errors) return doc_id_to_url_map
def make_predictions(conf, shot_list, loader, custom_path=None): generator = loader.inference_batch_generator_full_shot(shot_list) inference_model = build_torch_model(conf) if custom_path is None: model_path = get_model_path(conf) else: model_path = custom_path inference_model.load_state_dict(torch.load(model_path)) # shot_list = shot_list.random_sublist(10) y_prime = [] y_gold = [] disruptive = [] num_shots = len(shot_list) pbar = Progbar(num_shots) while True: x, y, mask, disr, lengths, num_so_far, num_total = next(generator) # x, y, mask = Variable(torch.from_numpy(x_).float()), # Variable(torch.from_numpy(y_).float()), # Variable(torch.from_numpy(mask_).byte()) output = apply_model_to_np(inference_model, x) for batch_idx in range(x.shape[0]): curr_length = lengths[batch_idx] y_prime += [output[batch_idx, :curr_length, 0]] y_gold += [y[batch_idx, :curr_length, 0]] disruptive += [disr[batch_idx]] pbar.add(1.0) if len(disruptive) >= num_shots: y_prime = y_prime[:num_shots] y_gold = y_gold[:num_shots] disruptive = disruptive[:num_shots] break return y_prime, y_gold, disruptive
def load_shots(self, shot_list, is_inference=False, as_list=False, num_samples=np.Inf): X = [] Y = [] Disr = [] print("loading...") pbar = Progbar(len(shot_list)) sample_prob_d, sample_prob_nd = self.get_sample_probs( shot_list, num_samples) fn = partial(self.load_shot, is_inference=is_inference, sample_prob_d=sample_prob_d, sample_prob_nd=sample_prob_nd) pool = mp.Pool() print('loading data in parallel on {} processes'.format( pool._processes)) for x, y, disr in pool.imap(fn, shot_list): X.append(x) Y.append(y) Disr.append(disr) pbar.add(1.0) pool.close() pool.join() return X, Y, np.array(Disr)
def make_predictions(conf, shot_list, loader, custom_path=None): feature_extractor = FeatureExtractor(loader) # save_prepath = feature_extractor.get_save_prepath() if custom_path is None: model_path = conf['paths']['model_save_path'] + \ model_filename # save_prepath + model_filename else: model_path = custom_path model = joblib.load(model_path) # shot_list = shot_list.random_sublist(10) y_prime = [] y_gold = [] disruptive = [] pbar = Progbar(len(shot_list)) fn = partial(predict_single_shot, model=model, feature_extractor=feature_extractor) pool = mp.Pool() print('predicting in parallel on {} processes'.format(pool._processes)) # for (y_p, y, disr) in map(fn, shot_list): for (y_p, y, disr) in pool.imap(fn, shot_list): # y_p, y, disr = predict_single_shot(model, feature_extractor,shot) y_prime += [np.expand_dims(y_p, axis=1)] y_gold += [np.expand_dims(y, axis=1)] disruptive += [disr] pbar.add(1.0) pool.close() pool.join() return y_prime, y_gold, disruptive
def learn(self, env, epoch=1, batch_size=1, exp_batch_size=0, gamma=0.9, reset_memory=False, verbose=1, callbacks=None): """Train Agent to play Enviroment env Parameters ---------- env : :obj:`Enviroment` The enviroment the agent learn to play epoch : int number of complete episodes to play batch_size : int number of experiences to replay per step exp_batch_size : int number of experiences to replay from the consolidated :attr:`ExperienceReplayexperience.experience`. gamma : float discount factor reset_memory : bool if we should restart :attr:`ExperienceReplay.memory` before starting the game. verbose : int controls how much should we print callbacks : list of callables TODO: Add callback support """ print("Learning started!") print("[Environment]: {}".format(env.description)) print("[Model]: {}".format(self.model.description)) print("[Memory]: {}".format(self.memory.description)) if reset_memory: self.reset() progbar = Progbar(epoch) rewards = 0 for e in xrange(epoch): # reset enviroment env.reset() game_over = False loss = 0 # get initial observation, start game obs_t = env.observe() # Run an episonde while not game_over: obs_tm1 = obs_t action = self.policy(obs_tm1) # apply action, get rewards and new state obs_t, reward, game_over = env.update(action) rewards += reward # store experience self.remember(obs_tm1, action, reward, obs_t, game_over) # adapt model loss += self.update(batch_size=batch_size, exp_batch_size=exp_batch_size, gamma=gamma) if verbose == 1: progbar.add(1, values=[("loss", loss), ("rewards", rewards)])
def detect_defects(self, validation_generator, verbose=1): total_samples = validation_generator.samples batch_size = validation_generator.batch_size results = list() labels = list() if (verbose != 0): progress_bar = Progbar(target=total_samples) for _ in range(np.ceil(total_samples / batch_size).astype(np.int32)): image_batch, lbls = validation_generator.next() labels = np.append(labels, lbls.reshape(lbls.shape[0])) image_batch = (image_batch.astype(np.float32) - 127.5) / 127.5 tmp_rslt = self.discriminator.model.predict( x=image_batch, batch_size=image_batch.shape[0], verbose=0) if (verbose != 0): progress_bar.add(image_batch.shape[0]) results = np.append(results, tmp_rslt.reshape(tmp_rslt.shape[0])) results = [1 if x >= 0.5 else 0 for x in results] tn, fp, fn, tp = confusion_matrix(labels, results).ravel() #################### NON DEFECT SITUATIONS #################### # Probability of Detecting a Non-Defect: (tp / (tp + fn)) if ((tp + fn) != 0): recall = tp / (tp + fn) else: recall = 0.0 # Probability of Correctly Detecting a Non-Defect: (tp / (tp + fp)) if ((tp + fp) != 0): precision = tp / (tp + fp) else: precision = 0.0 ###################### DEFECT SITUATIONS ###################### # Probability of Detecting a Defect: (tn / (tn + fp)) if ((tn + fp) != 0): specificity = tn / (tn + fp) else: specificity = 0.0 # Probability of Correctly Detecting a Defect: (tn / (tn + fn)) if ((tn + fn) != 0): negative_predictive_value = tn / (tn + fn) else: negative_predictive_value = 0.0 return precision, recall, specificity, negative_predictive_value
def play(self, env, epoch=1, batch_size=1, visualize=None, verbose=1): print("Free play started!") frames = np.zeros((0, ) + env.observe_image().shape[1:]) frames = frames.transpose(0, 2, 3, 1) rewards = 0 progbar = Progbar(epoch) for e in xrange(epoch): env.reset() game_over = False loss = 0 # get initial observation, start game obs_t = env.observe() while not game_over: obs_tm1 = obs_t # get next action action = self.policy(obs_tm1, train=False) # apply action, get rewareds and new state obs_t, reward, game_over = env.update(action) rewards += reward frame_t = env.observe_image().transpose(0, 2, 3, 1) frames = np.concatenate([frames, frame_t], axis=0) if verbose == 1: progbar.add(1, values=[("loss", loss), ("rewards", rewards)]) if visualize: print("Making gif!") frames = np.repeat(frames, 3, axis=-1) make_gif(frames[:-visualize['n_frames']], filepath=visualize['filepath'], gray=visualize['gray']) print("See your gif at {}".format(visualize['filepath']))
def train_model_embed(train, dev, glove, model, model_dir = 'models/curr_model', nb_epochs = 20, batch_size = 64, hs=True, ci = True): X_dev_p, X_dev_h, y_dev = load_data.prepare_split_vec_dataset(dev, glove=glove) word_index = load_data.WordIndex(glove) if not os.path.exists(model_dir): os.makedirs(model_dir) for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue X_train_p, X_train_h , y_train = load_data.prepare_split_vec_dataset([train[k] for k in train_index], word_index.index) padded_p = load_data.pad_sequences(X_train_p, maxlen = PREM_LEN, dim = -1, padding = 'pre') padded_h = load_data.pad_sequences(X_train_h, maxlen = HYPO_LEN, dim = -1, padding = 'post') data = {'premise_input': padded_p, 'embed_input': np.expand_dims(np.array(train_index), axis=1), 'output' : padded_h} if ci: data['class_input'] = y_train if hs: data['train_input'] = padded_h data['output'] = np.ones((batch_size, HYPO_LEN, 1)) #sw = (padded_h != 0).astype(float) #train_loss = float(model.train_on_batch(data, sample_weight={'output':sw})[0]) train_loss = float(model.train_on_batch(data)[0]) p.add(len(train_index),[('train_loss', train_loss)]) sys.stdout.write('\n') model.save_weights(model_dir + '/model~' + str(e))
def run_epoch(self, split, train=False, batch_size=128, return_pred=False): total = total_loss = 0 func = self.model.train_on_batch if train else self.model.test_on_batch ids, preds, targs = [], [], [] prog = Progbar(split.num_examples) for idx, X, Y, types in split.batches(batch_size): X.update({k: np.concatenate([v, types], axis=1) for k, v in Y.items()}) batch_end = time() loss = func(X) prob = self.model.predict(X, verbose=0)['p_relation'] prob *= self.typechecker.get_valid_cpu(types[:, 0], types[:, 1]) pred = prob.argmax(axis=1) targ = Y['p_relation'].argmax(axis=1) ids.append(idx) targs.append(targ) preds.append(pred) total_loss += loss total += 1 prog.add(idx.size, values=[('loss', loss), ('acc', np.mean(pred==targ))]) preds = np.concatenate(preds).astype('int32') targs = np.concatenate(targs).astype('int32') ids = np.concatenate(ids).astype('int32') ret = { 'f1': f1_score(targs, preds, average='micro', labels=self.labels), 'precision': precision_score(targs, preds, average='micro', labels=self.labels), 'recall': recall_score(targs, preds, average='micro', labels=self.labels), 'accuracy': accuracy_score(targs, preds), 'loss': total_loss / float(total), } if return_pred: ret.update({'ids': ids.tolist(), 'preds': preds.tolist(), 'targs': targs.tolist()}) return ret
def make_predictions(conf,shot_list,loader,custom_path=None): feature_extractor = FeatureExtractor(loader) save_prepath = feature_extractor.get_save_prepath() if custom_path == None: model_path = conf['paths']['model_save_path'] + model_filename#save_prepath + model_filename else: model_path = custom_path model = joblib.load(model_path) #shot_list = shot_list.random_sublist(10) y_prime = [] y_gold = [] disruptive = [] pbar = Progbar(len(shot_list)) fn = partial(predict_single_shot,model=model,feature_extractor=feature_extractor) pool = mp.Pool() print('predicting in parallel on {} processes'.format(pool._processes)) #for (y_p,y,disr) in map(fn,shot_list): for (y_p,y,disr) in pool.imap(fn,shot_list): #y_p,y,disr = predict_single_shot(model,feature_extractor,shot) y_prime += [np.expand_dims(y_p,axis=1)] y_gold += [np.expand_dims(y,axis=1)] disruptive += [disr] pbar.add(1.0) pool.close() pool.join() return y_prime,y_gold,disruptive
def tensorise_smiles_mp(smiles, max_degree=5, max_atoms=None, workers=cpu_count() - 1, chunksize=3000, verbose=True): ''' Multiprocess implementation of `tensorise_smiles` # Arguments: See `tensorise_smiles` documentation # Additional arguments: workers: int, num parallel processes chunksize: int, num molecules tensorised per worker, bigger chunksize is preffered as each process will preallocate np.arrays # Returns: See `tensorise_smiles` documentation # TODO: - fix python keyboardinterrupt bug: https://noswap.com/blog/python-multiprocessing-keyboardinterrupt - replace progbar with proper logging ''' pool = Pool(processes=workers) # Create an iterator #http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] smiles_chunks = chunks(smiles, chunksize) # MAP: Tensorise in parallel map_function = partial(tensorise_smiles, max_degree=max_degree, max_atoms=max_atoms) if verbose: print('Tensorising molecules in batches...') pbar = Progbar(len(smiles), width=50) tensor_list = [] for tensors in pool.imap(map_function, smiles_chunks): pbar.add(tensors[0].shape[0]) tensor_list.append(tensors) print('Merging batch tensors... ', end='') else: tensor_list = pool.map(map_function, smiles_chunks) if verbose: print('[DONE]') # REDUCE: Concatenate the obtained tensors pool.close() pool.join() return concat_mol_tensors(tensor_list, match_degree=max_degree != None, match_max_atoms=max_atoms != None)
def train_model(self): cbs = [] cbs.append(EarlyStopping(patience=2)) cbs.append(LearningRateScheduler(lambda e: self.lr * 0.999**(e / 10))) cb = CallBacks(cbs) cb.set_model(self.model) print('Start training chatbot...') train_num = len(self.en_ipt) cb.on_train_begin() for itr in range(self.epoch): print('Epoch %s/%s' % (itr + 1, self.epoch)) cb.on_epoch_begin(itr) indexes = np.random.permutation(train_num) progbar = Progbar(train_num) losses = [] for idx in range(int(0.8 * train_num / self.bs)): batch_idx = indexes[idx * self.bs:(idx + 1) * self.bs] en_ipt_bc = self.en_ipt[batch_idx] de_ipt_bc = self.de_ipt[batch_idx] de_opt_bc = self.de_opt[batch_idx] if np.random.rand() < self.tfr: # apply teacher forcing bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_bc], de_opt_bc) else: # do not apply teacher forcing ipt_len = [sum(i) for i in np.any(de_opt_bc, axis=-1)] de_ipt_nt = np.zeros((self.max_de_seq, self.bs), dtype='int64') en_out, h, c = self.encoder_model.predict( en_ipt_bc, batch_size=self.bs) de_in = np.asarray([[self.word2idx['bos']]] * self.bs) for i in range(self.max_de_seq): de_out, h, c = self.decoder_model.predict( [en_out, de_in, h, c], batch_size=self.bs) sampled_idxs = np.argmax(de_out[:, -1, :], axis=-1) de_ipt_nt[i] = sampled_idxs de_in = sampled_idxs.reshape((-1, 1)) de_ipt_nt = de_ipt_nt.T for i in range(self.bs): de_ipt_nt[i, ipt_len[i]:] = 0 bc_loss = self.model.train_on_batch([en_ipt_bc, de_ipt_nt], de_opt_bc) losses.append(bc_loss) progbar.add(self.bs, [('loss', np.mean(losses))]) val_idx = indexes[-int(0.2 * train_num):] val_loss = self.model.evaluate( [self.en_ipt[val_idx], self.de_ipt[val_idx]], self.de_opt[val_idx], batch_size=self.bs, verbose=0) progbar.update(train_num, [('val_loss', np.mean(val_loss))]) cb.on_epoch_end(itr, logs={ 'loss': np.mean(losses), 'val_loss': np.mean(val_loss) }) self.model.save_weights(self.ckpt_dir + 'weights.hdf5') cb.on_train_end() print('Chatbot training complete.')
def preprocess(X): progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) progbar.add(1) return X
def rotation_augmentation(X, angle_range): progbar = Progbar(X.shape[0]) X_rot = np.copy(X) for i in range(len(X)): angle = np.random.randint(-angle_range, angle_range) for j in range(X.shape[1]): X_rot[i, j] = ndimage.rotate(X[i, j], angle, reshape=False, order=1) progbar.add(1) return X_rot
def make_evaluations_gpu(conf, shot_list, loader): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU": 1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] batch_size = min(len(shot_list), conf['model']['pred_batch_size']) pbar = Progbar(len(shot_list)) print('evaluating {} shots using batchsize {}'.format( len(shot_list), batch_size)) shot_sublists = shot_list.sublists(batch_size, equal_size=False) all_metrics = [] all_weights = [] for (i, shot_sublist) in enumerate(shot_sublists): batch_size = len(shot_sublist) model = specific_builder.build_model(True, custom_batch_size=batch_size) model.compile(optimizer=optimizer_class(), loss=conf['data']['target'].loss) specific_builder.load_model_weights(model) model.reset_states() X, y, shot_lengths, disr = loader.load_as_X_y_pred( shot_sublist, custom_batch_size=batch_size) #load data and fit on data all_metrics.append( model.evaluate(X, y, batch_size=batch_size, verbose=False)) all_weights.append(batch_size) model.reset_states() pbar.add(1.0 * len(shot_sublist)) loader.verbose = False #True during the first iteration if len(all_metrics) > 1: print('evaluations all: {}'.format(all_metrics)) loss = np.average(all_metrics, weights=all_weights) print('Evaluation Loss: {}'.format(loss)) loader.set_inference_mode(False) return loss
def preprocess(X): "Pre-process images that are fed to neural network" progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) progbar.add(1) return X # Denoising weight is the regularization parameter
def generation_test(train, glove, model, batch_size = 64, prem_len = 22, hypo_len = 12): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: X_prem, X_hypo, _ = load_data.prepare_split_vec_dataset([train[k] for k in train_index], glove) X_p = load_data.pad_sequences(X_prem, maxlen = prem_len, dim = 50) X_h = load_data.pad_sequences(X_hypo, maxlen = hypo_len, dim = 50) train_loss = model.train_on_batch(X_p, X_h)[0] p.add(len(X_p),[('train_loss', train_loss)])
def batchwise_function(func, X, batch_size=100, verbose=1): # Y = [func([X[i*batch_size:(i+1)*batch_size]]) for i in range( # 0, X.shape[0]//batch_size)] Y = [] progbar = Progbar(X.shape[0]) for i in range(0, X.shape[0] // batch_size): Y += [func([X[i * batch_size:(i + 1) * batch_size]])] if verbose > 0: progbar.add(batch_size) return np.concatenate(Y, axis=0)
def batchwise_function(func, X, batch_size=100, verbose=1): # Y = [func([X[i*batch_size:(i+1)*batch_size]]) for i in range( # 0, X.shape[0]//batch_size)] Y = [] progbar = Progbar(X.shape[0]) for i in range(0, X.shape[0] // batch_size): Y += [func([X[i*batch_size:(i+1)*batch_size]])] if verbose > 0: progbar.add(batch_size) return np.concatenate(Y, axis=0)
def preprocess(X): progbar = Progbar( X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): # X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) X[i, j] = feature.canny(X[i, j], sigma=1.2) progbar.add(1) return X
def rotation_augmentation(X, angle_range): progbar = Progbar(X.shape[0]) # progress bar for augmentation status tracking X_rot = np.copy(X) for i in range(len(X)): angle = np.random.randint(-angle_range, angle_range) for j in range(X.shape[1]): X_rot[i, j] = ndimage.rotate(X[i, j], angle, reshape=False, order=1) progbar.add(1) return X_rot
def make_predictions_gpu(conf, shot_list, loader, custom_path=None): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU": 1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] model = specific_builder.build_model(True) model.compile(optimizer=optimizer_class(), loss=conf['data']['target'].loss) specific_builder.load_model_weights(model, custom_path) model.reset_states() pbar = Progbar(len(shot_list)) shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'], do_shuffle=False, equal_size=True) for (i, shot_sublist) in enumerate(shot_sublists): X, y, shot_lengths, disr = loader.load_as_X_y_pred(shot_sublist) #load data and fit on data y_p = model.predict(X, batch_size=conf['model']['pred_batch_size']) model.reset_states() y_p = loader.batch_output_to_array(y_p) y = loader.batch_output_to_array(y) #cut arrays back y_p = [arr[:shot_lengths[j]] for (j, arr) in enumerate(y_p)] y = [arr[:shot_lengths[j]] for (j, arr) in enumerate(y)] pbar.add(1.0 * len(shot_sublist)) loader.verbose = False #True during the first iteration y_prime += y_p y_gold += y disruptive += disr y_prime = y_prime[:len(shot_list)] y_gold = y_gold[:len(shot_list)] disruptive = disruptive[:len(shot_list)] loader.set_inference_mode(False) return y_prime, y_gold, disruptive
def sampling_augmentation(X, n): progbar = Progbar(X.shape[0]) X_sampled = [] for i in range(len(X)): slices = np.copy(X[i]) ix = np.random.choice(range(len(slices)), n, replace=False) np.random.shuffle(ix) X_sampled.append(slices[ix,]) progbar.add(1) return np.array(X_sampled)
def zoom_augmentation(X, y, k_min): progbar = Progbar(X.shape[0]) # progress bar for augmentation status tracking X_zoom = np.copy(X) y_zoom = np.copy(y) for i in range(len(X)): k_random = 1.0 - (np.random.rand() * (1.0 - k_min)) for j in range(X.shape[1]): X_zoom[i, j] = zoom(X[i, j], k_random) y_zoom[i] *= 1 / (k_random * k_random) progbar.add(1) return X_zoom, y_zoom
def main(): corpora = argv[1:] corpora = [x[:-1] if x.endswith(os.sep) else x for x in corpora] num_files = sum([len(os.listdir(x)) for x in corpora]) pb = Progbar(num_files) for corpus in corpora: mkdir_p(corpus + '_downsampled') for filename in os.listdir(corpus): if filename.endswith('wav'): downsample(os.path.join(corpus, filename), os.path.join(corpus + '_downsampled', filename), verbose=False) pb.add(1)
def sampling_augmentation(X, n): progbar = Progbar(X.shape[0]) X_sampled = [] for i in range(len(X)): slices = np.copy(X[i]) ix = np.random.choice(range(len(slices)), n, replace=False) np.random.shuffle(ix) X_sampled.append(slices[ix, ]) progbar.add(1) return np.array(X_sampled)
def test_points(premises, labels, noises, gtest, cmodel, hypo_len): p = Progbar(len(premises)) hypos = [] bs = 64 for i in range(len(labels) / bs): words, _ = generative_predict_beam(gtest, premises[i * bs: (i+1)*bs], noises[i * bs: (i+1)*bs,None,:], labels[i * bs: (i+1)*bs], True, hypo_len) hypos.append(words) p.add(len(words)) hypos = np.vstack(hypos) cpreds = cmodel.evaluate([premises[:len(hypos)], hypos], labels[:len(hypos)]) print cpreds
def make_predictions_gpu(conf,shot_list,loader,custom_path=None): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU":1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] model = specific_builder.build_model(True) model.compile(optimizer=optimizer_class(),loss=conf['data']['target'].loss) specific_builder.load_model_weights(model,custom_path) model.reset_states() pbar = Progbar(len(shot_list)) shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'],do_shuffle=False,equal_size=True) for (i,shot_sublist) in enumerate(shot_sublists): X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist) #load data and fit on data y_p = model.predict(X, batch_size=conf['model']['pred_batch_size']) model.reset_states() y_p = loader.batch_output_to_array(y_p) y = loader.batch_output_to_array(y) #cut arrays back y_p = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y_p)] y = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y)] pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration y_prime += y_p y_gold += y disruptive += disr y_prime = y_prime[:len(shot_list)] y_gold = y_gold[:len(shot_list)] disruptive = disruptive[:len(shot_list)] loader.set_inference_mode(False) return y_prime,y_gold,disruptive
def test_adverse(dev, ad_model, gen_model, word_index, glove, train_len, batch_size=64, ci = False): mb = load_data.get_minibatches_idx(len(dev), batch_size, shuffle=False) p = Progbar(len(dev) * 2) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([dev[k] for k in train_index], word_index, gen_model, train_len, class_indices = class_indices) pred = ad_model.predict_on_batch(X)[0].flatten() loss = binary_crossentropy(y.flatten(), pred).eval() acc = sum(np.abs(y - pred) < 0.5) / float(len(y)) p.add(len(X),[('test_loss', loss), ('test_acc', acc)])
def zoom_augmentation(X, y, k_min): progbar = Progbar( X.shape[0]) # progress bar for augmentation status tracking X_zoom = np.copy(X) y_zoom = np.copy(y) for i in range(len(X)): k_random = 1. - (np.random.rand() * (1. - k_min)) for j in range(X.shape[1]): X_zoom[i, j] = zoom(X[i, j], k_random) y_zoom[i] *= 1 / (k_random * k_random) progbar.add(1) return X_zoom, y_zoom
def preprocess1(X,weight=0.1): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=weight, multichannel=False) progbar.add(1) return X
def make_evaluations_gpu(conf,shot_list,loader): loader.set_inference_mode(True) if backend == 'tf' or backend == 'tensorflow': first_time = "tensorflow" not in sys.modules if first_time: import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto(device_count={"GPU":1}) set_session(tf.Session(config=config)) else: os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' import theano from keras.utils.generic_utils import Progbar from plasma.models.builder import ModelBuilder specific_builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] batch_size = min(len(shot_list),conf['model']['pred_batch_size']) pbar = Progbar(len(shot_list)) print('evaluating {} shots using batchsize {}'.format(len(shot_list),batch_size)) shot_sublists = shot_list.sublists(batch_size,equal_size=False) all_metrics = [] all_weights = [] for (i,shot_sublist) in enumerate(shot_sublists): batch_size = len(shot_sublist) model = specific_builder.build_model(True,custom_batch_size=batch_size) model.compile(optimizer=optimizer_class(),loss=conf['data']['target'].loss) specific_builder.load_model_weights(model) model.reset_states() X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist,custom_batch_size=batch_size) #load data and fit on data all_metrics.append(model.evaluate(X,y,batch_size=batch_size,verbose=False)) all_weights.append(batch_size) model.reset_states() pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration if len(all_metrics) > 1: print('evaluations all: {}'.format(all_metrics)) loss = np.average(all_metrics,weights = all_weights) print('Evaluation Loss: {}'.format(loss)) loader.set_inference_mode(False) return loss
def adverse_model_train(train, ad_model, gen_model, word_index, glove, nb_epochs = 20, batch_size=64, ci=False): for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(2 * len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([train[k] for k in train_index], word_index, gen_model, len(train), class_indices = class_indices) loss = ad_model.train_on_batch(X, y)[0] p.add(len(X),[('train_loss', loss)])
def new_generate_dataset(dataset, samples, gen_test, beam_size, hypo_len, noise_size, cmodel): vgen = val_generator(dataset, gen_test, beam_size, hypo_len, noise_size) p = Progbar(samples) batchez = [] while p.seen_so_far < samples: batch = next(vgen) probs = cmodel.predict([batch[0], batch[1]], verbose = 0) batch += (probs,) p.add(len(batch[0])) batchez.append(batch) return merge_result_batches(batchez)
def preprocess(X): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) progbar.add(1) return X
def shift_augmentation(X, h_range, w_range): progbar = Progbar(X.shape[0]) # progress bar for augmentation status tracking X_shift = np.copy(X) size = X.shape[2:] for i in range(len(X)): h_random = np.random.rand() * h_range * 2. - h_range w_random = np.random.rand() * w_range * 2. - w_range h_shift = int(h_random * size[0]) w_shift = int(w_random * size[1]) for j in range(X.shape[1]): X_shift[i, j] = ndimage.shift(X[i, j], (h_shift, w_shift), order=0) progbar.add(1) return X_shift
def shift_random(X, h_range, v_range): X_shift = np.copy(X) print("random shifting") status = Progbar(X.shape[0]) for i in range(X.shape[0]): h_shift = np.random.rand() * h_range * 2 - h_range v_shift = np.random.rand() * v_range * 2 - v_range h_shift = int(h_shift * X.shape[2]) v_shift = int(v_shift * X.shape[3]) for j in range(X.shape[1]): X_shift = ndimage.shift(X[i, j, :, :], (h_shift, v_shift), order=0) status.add(1) return X_shift
def rotation(X, angle_range): print("rotation augmentation") status = Progbar(X.shape[0]) X_rotated = np.copy(X) for i in range(X.shape[0]): angle = np.random.randint(-angle_range, angle_range) for j in range(X.shape[1]): X_rotated[i, j, :, :] = ndimage.rotate(X[i, j, :, :], angle, reshape=False, order=2) status.add(1) return X_rotated
def read_images(img_size, img_list, with_progbar=True): num_image = len(img_list) if with_progbar: progbar = Progbar(num_image) x = np.ones((num_image, 3, img_size, img_size), dtype=np.float16) for i, photo_id in enumerate(img_list): im_raw = image.load_img('data/train_photos/%d.jpg' % photo_id) im_raw = im_raw.resize((img_size, img_size), PIL.Image.NEAREST) x[i] = image.img_to_array(im_raw) if with_progbar: progbar.add(1) return x
def nldenoise(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Denoising images...') progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): X[i] = denoise_bilateral(X[i], sigma_range=0.05, sigma_spatial=4) progbar.add(1) return X
def equalize(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Equalizing images...') progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): X[i] = exposure.equalize_hist(X[i]) progbar.add(1) return X
def predict_over_augmentations(model, X, y, n_aug=10, evaluate=True): progbar = Progbar(n_aug) # progress bar for pre-processing status tracking probs = np.zeros(len(X), ) gen_iter = datagen.flow(X, batch_size=len(X), shuffle=False) for i in range(n_aug): progbar.add(1) X_aug = gen_iter.next() preds = model.predict(X_aug) probs += preds[:, 0] probs /= np.float(n_aug) if evaluate: pred_classes = np.round(probs) accuracy = 1.0 - (np.abs(y - pred_classes).sum()) / len(y) print("Accuracy: " + str(accuracy)) return probs
def generation_test(train, glove, model, batch_size=64, prem_len=22, hypo_len=12): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: X_prem, X_hypo, _ = load_data.prepare_split_vec_dataset( [train[k] for k in train_index], glove) X_p = load_data.pad_sequences(X_prem, maxlen=prem_len, dim=50) X_h = load_data.pad_sequences(X_hypo, maxlen=hypo_len, dim=50) train_loss = model.train_on_batch(X_p, X_h)[0] p.add(len(X_p), [('train_loss', train_loss)])
def adverse_model2_train(train, ad_model, gen_model, word_index, glove, nb_epochs = 20, batch_size=64, ci=False): for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None train_b, gen_b, y = adverse_batch([train[k] for k in train_index], word_index, gen_model, len(train), class_indices = class_indices, separate = False) data = {'train_hypo' : train_b, 'gen_hypo': gen_b, 'output2': y} loss = ad_model.train_on_batch(data)[0] p.add(len(train_b),[('train_loss', loss)])
def pre_generate(train, gen_model, discriminator, class_model, word_index, beam_size, target_size): p = Progbar(target_size) ca_gen = ca_generator(train, gen_model, discriminator, class_model, word_index, beam_size) result_dict = {} while p.seen_so_far < target_size: batch = next(ca_gen) for k, v in batch.iteritems(): result_dict.setdefault(k,[]).append(v) p.add(len(batch['hypo'])) for k, v in result_dict.iteritems(): result_dict[k] = np.concatenate(v) return result_dict
def test_hierarchical_softmax(timesteps = 15, input_dim = 50, batch_size = 32, output_dim = 3218, batches = 300, epochs = 30): model = Graph() model.add_input(name='real_input', batch_input_shape=(batch_size, timesteps, input_dim)) model.add_input(name='train_input', batch_input_shape=(batch_size, timesteps), dtype='int32') model.add_node(HierarchicalSoftmax(output_dim, input_dim = input_dim, input_length = timesteps), name = 'hs', inputs=['real_input','train_input'], merge_mode = 'join', create_output=True) model.compile(loss={'hs':hs_categorical_crossentropy}, optimizer='adam') print "hs model compiled" model2 = Sequential() model2.add(TimeDistributedDense(output_dim, batch_input_shape=(batch_size, timesteps, input_dim))) model2.add(Activation('softmax')) model2.compile(loss='categorical_crossentropy', optimizer='adam') print "softmax model compiled" learn_f = np.random.normal(size = (input_dim, output_dim)) learn_f = np.divide(learn_f, norm(learn_f, axis=1)[:,None]) print "learn_f generated" for j in range(epochs): batch_data= generate_batch(learn_f, batch_size, timesteps, input_dim, output_dim, batches) print "Epoch", j, "data genrated" p = Progbar(batches * batch_size) for b in batch_data: data_train = {'real_input': b[0], 'train_input': b[1], 'hs':b[2]} loss = float(model.train_on_batch(data_train)[0]) p.add(batch_size,[('hs_loss', loss)]) p2 = Progbar(batches * batch_size) for b in batch_data: loss, acc = model2.train_on_batch(b[0], b[3], accuracy=True) p2.add(batch_size,[('softmax_loss', loss),('softmax_acc', acc)]) test_data = generate_batch(learn_f, batch_size, timesteps, input_dim, output_dim, batches) p = Progbar(batches * batch_size) for b in test_data: data_test = {'real_input': b[0], 'train_input': b[1], 'hs':b[3]} loss = float(model.test_on_batch(data_test)[0]) p.add(batch_size,[('hs__test_loss', loss)]) p2 = Progbar(batches * batch_size) for b in batch_data: loss = float(model2.train_on_batch(b[0], b[3])[0]) p2.add(batch_size,[('softmax_loss', loss)])
def rotaterandom(X): """ Pre-process images that are fed to neural network. :param X: X """ print('Rotating images...') progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): if np.random.rand() > 0.5: angle = angle = (np.random.rand() - 0.5) * 12 X[i] = rotate(X[i], angle, mode='nearest', reshape=False) progbar.add(1) return X
def preprocess3(X, weight=0.1): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar( X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): X[i, j] = denoise_tv_chambolle(X[i, j], weight=weight, multichannel=False) X[i, j] = equalize_adapthist(X[i, j]) # X[i, j] = cut(X[i, j],0.33,0.66) progbar.add(1) return X
def run_epoch(self, split, train=False, batch_size=128, return_pred=False): total = total_loss = 0 func = self.model.train_on_batch if train else self.model.test_on_batch ids, preds, targs = [], [], [] prog = Progbar(split.num_examples) for idx, X, Y, types in split.batches(batch_size): X.update( {k: np.concatenate([v, types], axis=1) for k, v in Y.items()}) batch_end = time() loss = func(X) prob = self.model.predict(X, verbose=0)['p_relation'] prob *= self.typechecker.get_valid_cpu(types[:, 0], types[:, 1]) pred = prob.argmax(axis=1) targ = Y['p_relation'].argmax(axis=1) ids.append(idx) targs.append(targ) preds.append(pred) total_loss += loss total += 1 prog.add(idx.size, values=[('loss', loss), ('acc', np.mean(pred == targ))]) preds = np.concatenate(preds).astype('int32') targs = np.concatenate(targs).astype('int32') ids = np.concatenate(ids).astype('int32') ret = { 'f1': f1_score(targs, preds, average='micro', labels=self.labels), 'precision': precision_score(targs, preds, average='micro', labels=self.labels), 'recall': recall_score(targs, preds, average='micro', labels=self.labels), 'accuracy': accuracy_score(targs, preds), 'loss': total_loss / float(total), } if return_pred: ret.update({ 'ids': ids.tolist(), 'preds': preds.tolist(), 'targs': targs.tolist() }) return ret
def _arena_play(self, num, verbose=True): """ Plays num games in which player 1 and 2 both start num/2 times each Parameters ---------- num : int Returns ------- oneWon : int games won by player1 twoWon : int games won by player2 draws: games won by nobody """ num = int(num/2) eps_time = Progbar(2*num, stateful_metrics=["wins", "draws", "losses"]) eps_time.update(0, values=[("wins",0),("draws",0),("losses",0)]) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): #import pdb; pdb.set_trace() result = self._arena_play_once(self, self.pnet, verbose=verbose) if result == 1: oneWon += 1 elif result == -1: twoWon += 1 else: draws += 1 eps_time.add(1, [("wins", oneWon), ("losses", twoWon), ("draws", draws)]) result = self._arena_play_once(self.pnet, self, verbose=verbose) if result == 1: twoWon += 1 elif result == -1: oneWon += 1 else: draws += 1 eps_time.add(1, [("wins", oneWon), ("losses", twoWon), ("draws", draws)]) return oneWon, twoWon, draws
def rotation_augmentation(X, angle_range): progbar = Progbar(X.shape[0]) # progress bar for augmentation status tracking X_rot = np.copy(X) for i in range(len(X)): angle = np.random.randint(-angle_range, angle_range) for j in range(X.shape[1]): age = X[i, j][0,0] dist = X[i, j][0,1] sex = X[i, j][0,2] type = X[i, j][0,3] X_rot[i, j] = ndimage.rotate(X[i, j], angle, reshape=False, order=1) X_rot[i, j][0,0] = age X_rot[i, j][0,1] = dist X_rot[i, j][0,2] = sex X_rot[i, j][0,3] = type progbar.add(1) return X_rot
def load_shots(self,shot_list,is_inference=False,as_list=False,num_samples=np.Inf): X = [] Y = [] Disr = [] print("loading...") pbar = Progbar(len(shot_list)) sample_prob_d,sample_prob_nd = self.get_sample_probs(shot_list,num_samples) fn = partial(self.load_shot,is_inference=is_inference,sample_prob_d=sample_prob_d,sample_prob_nd=sample_prob_nd) pool = mp.Pool() print('loading data in parallel on {} processes'.format(pool._processes)) for x,y,disr in pool.imap(fn,shot_list): X.append(x) Y.append(y) Disr.append(disr) pbar.add(1.0) pool.close() pool.join() return X,Y,np.array(Disr)
def preprocess(X): """ Pre-process images that are fed to neural network. :param X: X """ progbar = Progbar(X.shape[0]) # progress bar for pre-processing status tracking for i in range(X.shape[0]): for j in range(X.shape[1]): age = X[i, j][0,0] dist = X[i, j][0,1] sex = X[i, j][0,2] type = X[i, j][0,3] X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False) X[i, j][0,0] = age X[i, j][0,1] = dist X[i, j][0,2] = sex X[i, j][0,3] = type progbar.add(1) return X
def make_predictions_gpu(conf,shot_list,loader): os.environ['THEANO_FLAGS'] = 'device=gpu,floatX=float32' #=cpu import theano from keras.utils.generic_utils import Progbar from model_builder import ModelBuilder builder = ModelBuilder(conf) y_prime = [] y_gold = [] disruptive = [] _,model = builder.build_train_test_models() builder.load_model_weights(model) model.reset_states() pbar = Progbar(len(shot_list)) shot_sublists = shot_list.sublists(conf['model']['pred_batch_size'],shuffle=False,equal_size=True) for (i,shot_sublist) in enumerate(shot_sublists): X,y,shot_lengths,disr = loader.load_as_X_y_pred(shot_sublist) #load data and fit on data y_p = model.predict(X, batch_size=conf['model']['pred_batch_size']) model.reset_states() y_p = loader.batch_output_to_array(y_p) y = loader.batch_output_to_array(y) #cut arrays back y_p = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y_p)] y = [arr[:shot_lengths[j]] for (j,arr) in enumerate(y)] # print('Shots {}/{}'.format(i*num_at_once + j*1.0*len(shot_sublist)/len(X_list),len(shot_list_train))) pbar.add(1.0*len(shot_sublist)) loader.verbose=False#True during the first iteration y_prime += y_p y_gold += y disruptive += disr y_prime = y_prime[:len(shot_list)] y_gold = y_gold[:len(shot_list)] disruptive = disruptive[:len(shot_list)] return y_prime,y_gold,disruptive