def __init__(self, fields, reader=None, sep='\t', seed=None): self.fields = fields self.reader = reader# GlobReader!! self.sep = sep self.seed = seed if seed==None or seed<=0: self.seed = U.get_seed() else: self.seed = seed self.rng = np.random.RandomState(self.seed + U.string2rand('FieldParser'))
def split_data(data, target, seed=None, test_size=.1): ''' Will split the data and target params: data: the data to be split target: the target dataset that will be split seed (optional): seed to split, else will use the common seed test_size (optional): test size. Default 10% ''' seed = seed if seed else get_seed() return cv.train_test_split(data, target, test_size=test_size, random_state=seed)
def __init__(self, file_name, chunk_size=1000, shuf=True, regex=None, seed=None, bucket=None, verbose=True): self.file_name = file_name self.chunk_size = chunk_size if chunk_size==None or chunk_size<=0: # read entire file as one chunk self.chunk_size=np.iinfo(np.int32).max self.shuf = shuf self.bucket = bucket self.verbose = verbose self.regex = regex if regex: self.regex = re.compile(regex) if seed==None or seed<=0: self.seed = U.get_seed() else: self.seed = seed self.rng = np.random.RandomState(self.seed + U.string2rand('ChunkReader'))
def __init__(self, file_pattern, chunk_size=1000, shuf=True, regex=None, seed=None, bucket=None, verbose=True): self.file_pattern = file_pattern self.file_names = glob.glob(self.file_pattern) self.file_names.sort() self.chunk_size = chunk_size self.shuf = shuf self.bucket = bucket self.verbose = verbose self.regex = regex if seed==None or seed<=0: self.seed = U.get_seed() else: self.seed = seed self.rng = np.random.RandomState(self.seed + U.string2rand('GlobReader')) self.num_files = None self.bpf = None self.prev_file = ''
def train_regression(data, target, models=None, scorer=None, verbose=False, seed=None): ''' Helper to split the models always the same :return: list of tuples (model id, dict of model) :params: data: data to be trained, no target values target: model target val models: models to be trained, inside a list [] scorer: the scorer to order the best results verbose: print information ''' seed = seed if seed else get_seed() # get the models to test if not models: models = [ExtraTreesClassifier(random_state=seed), GradientBoostingClassifier(random_state=seed), RandomForestClassifier(random_state=seed), LogisticRegressionCV(), RidgeClassifierCV(), LinearSVC(random_state=seed), SVC(random_state=seed), SGDClassifier(random_state=seed), GaussianNB()] # choose accuracy if no scorer was passed if not scorer: scorer = accuracy_score # split in train and validation set X_train, X_val, y_train, y_val = split_data(data, target, seed) print_time('Created train and validation') print_time('Size train: {} test:{}'. format(X_train.shape, X_val.shape)) return train_all_models(X_train, X_val, y_train, y_val, models, scorer, verbose)
# Load config parameters locals().update(config) # Set up model and prediction function x = tensor.tensor3('inputs', dtype='float64') y = tensor.tensor3('targets', dtype='float64') model = 'bs' with open ('gru_best.pkl', 'r') as picklefile: model = load(picklefile) y_hat, cost, cells = nn_fprop(x, y, frame_length, hidden_size, num_layers, model) predict_fn = theano.function([x], y_hat) # Generate print "generating audio..." seed = get_seed(hdf5_file, [seed_index]) sec = 16000 samples_to_generate = sec*secs_to_generate num_frames_to_generate = samples_to_generate/frame_length + seq_length #don't include seed predictions = [] prev_input = seed for i in range(num_frames_to_generate): prediction = predict_fn(prev_input) predictions.append(prediction) pred_min = numpy.min(predictions) pred_max = numpy.max(predictions) prev_input = rescale(prediction, pred_max, pred_min) actually_generated = numpy.asarray(predictions)[seq_length:,:,:,:] #cut off seed last_frames = actually_generated[:,:,-1,:] make_wav(output_filename, actually_generated.flatten()) print str(secs_to_generate)+'seconds of audio generated'
# Fourier magnitude map modlmap = enmap.modlmap(shape, wcs) # Unlensed CMB theory theory = cosmology.default_theory() cltt2d = theory.uCl('TT', modlmap) clte2d = theory.uCl('TE', modlmap) clee2d = theory.uCl('EE', modlmap) power = np.zeros((3, 3, shape[0], shape[1])) power[0, 0] = cltt2d power[1, 1] = clee2d power[1, 2] = clte2d power[2, 1] = clte2d # Unlensed CMB generator mgen = maps.MapGen((3, ) + shape, wcs, power) for j, task in enumerate(my_tasks): print(f'Rank {rank} performing task {task} as index {j}') cmb = mgen.get_map( seed=cutils.get_seed('lensed', task, False) ) # unlensed map ; you can save it if you want, before lensing it cmb = lens_map(cmb) # do the lensing dcmb = cmb.resample( (3, dNpix, dNpix) ) # downsample the lensed CMB map (if saving unlensed, do the same to it) # For some reason, I was saving the Fourier transforms... probably since it is easier to apply a beam later kmap = enmap.map2harm(dcmb, iau=False) enmap.write_map(f'{savedir}lensed_kmap_real_{task:06d}.fits', kmap.real) enmap.write_map(f'{savedir}lensed_kmap_imag_{task:06d}.fits', kmap.imag)
saveload.Load(load_path), plotter, saveload.Checkpoint(last_path, save_separately=['log']), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run() # Generate print "generating audio..." seed = get_seed(hdf5_file, [400]) seed_influence_length = frame_length * 3 sec = 16000 samples_to_generate = sec*secs_to_generate num_frames_to_generate = samples_to_generate/frame_length + seed_influence_length generated_seq = [] prev_input = seed for x in range(0, num_frames_to_generate): prediction = predict_fn(prev_input) generated_seq.append(prediction) #NEED TO OVERLAP/AVG? prev_input = prediction actually_generated = numpy.asarray(generated_seq).flatten()[seed_influence_length:] filename = str(frame_length)+str(seq_length)+'.wav' make_wav(filename, actually_generated)
def sign_up(total_count, needed_count): dict_path = os.path.join(utils.root_path(), 'english.txt') seed = utils.get_seed(dict_path, 12) keys = shamir.crypto_keys(needed_count, total_count, seed) WalletManager.create_wallet(seed.encode()) return keys
"-m", "--model", help="Save model in different file", action="store_true", ) parser.add_argument( "--split", metavar="[1-99]", help="Choose size of split", choices=(range(1, 100)), type=split_size, default=80, ) args = parser.parse_args() get_seed(args.seed) input_n = 13 output_n = 2 hidden_layers = args.hidden_layer n = neuralNetwork(input_n, output_n, hidden_layers, args.learningrate, sigmoid, args.bias) fit(args, n) print() if args.visu is True: fig1, ax1 = display(n.loss, n.val_loss, "Loss Trend", "loss", "val_loss") fig2, ax2 = display(n.acc, n.val_acc, "Accuracy Trend", "acc", "val_acc") plt.show() save_model(args.model, n)
def _train(args, pretrain_args): """Train the language model. Creates train/valid/test models, runs training epochs, saves model and writes results to database if specified. """ start_time = time.time() print('Training', ', '.join(args.speakers), '...') # randomly sample validation set monte_carlo_cv_num times for num in range(args.monte_carlo_cv_num): # get seed used to sub-sample validation dataset (use 42 for 1st run) seed = utils.get_seed(num) # get train/valid/test data and convert to sequences train_data, valid_data, test_data, id_to_word = data_reader.get_data( args, seed=seed) # set configurations/hyperparameters for model config, test_config = utils.set_config(args, id_to_word) # initialize word embeddings init_embed = utils.init_embedding(id_to_word, dim=args.embed_size, init_scale=args.init_scale, embed_path=args.embed_path) with tf.Graph().as_default(): # initializer used to initialize TensorFlow variables initializer = tf.random_uniform_initializer( -config['init_scale'], config['init_scale']) # create Train model with tf.name_scope('Train'): with tf.variable_scope('Model', reuse=None, initializer=initializer): m_train = model.Model(args, is_training=True, config=config, init_embed=init_embed, name='Train') m_train.build_graph() # create Valid model with tf.name_scope('Valid'): with tf.variable_scope('Model', reuse=True, initializer=initializer): m_valid = model.Model(args, is_training=False, config=config, init_embed=init_embed, name='Valid') m_valid.build_graph() # create Test model with tf.name_scope('Test'): with tf.variable_scope('Model', reuse=True, initializer=initializer): m_test = model.Model(args, is_training=False, config=test_config, init_embed=init_embed, name='Test') m_test.build_graph() # create summaries to be viewed in TensorBoard tb_summaries = utils.TensorBoardSummaries() tb_summaries.create_ops() init = tf.global_variables_initializer() # if pretrained, must create dict to initialize TF Saver if bool(pretrain_args): # get trainable variables and convert to dict for Saver reuse_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars]) # create saver for TF session (see function for addl details) saver = utils.create_tf_saver(args, pretrain_args, reuse_vars_dict) else: saver = tf.train.Saver() # ppls dict has perplexities that are stored in results database ppls = {} ppls, _ = _update_ppls(ppls, initialize=True) with tf.Session() as sess: sess.run(init) if args.load_path != '': print('Restoring model...') saver.restore(sess, args.load_path) for epoch in range(config['max_epoch']): print('Epoch: {0} Learning rate: {1:.3f}\n'.format( epoch + 1, sess.run(m_train.lr))) for i, speaker in enumerate(args.speakers): print('Training {0} ...'.format(speaker)) # run epoch on training data train_perplexity = _run_epoch( sess, m_train, args, train_data, i, tb_summaries, id_to_word, train_op=m_train.train_op, verbose=True) print('Epoch: {0} Train Perplexity: {1:.3f}'.format( epoch + 1, train_perplexity)) ppls, _ = _update_ppls(ppls, epoch=epoch + 1, speaker=speaker, ppl=train_perplexity, dataset='train') print('Validating...') # run epoch on validation data valid_perplexity = _run_epoch(sess, m_valid, args, valid_data, i, tb_summaries, id_to_word, verbose=True) print('Epoch: {0} Valid Perplexity: {1:.3f}'.format( epoch + 1, valid_perplexity)) ppls, improved = _update_ppls(ppls, epoch=epoch + 1, speaker=speaker, ppl=valid_perplexity, dataset='valid') if improved: # save model if valid ppl is lower than current # best valid ppl if args.save_path != '': print('Saving model to {0}.'.format( args.save_path)) saver.save(sess, args.save_path) for i, speaker in enumerate(args.speakers): print('Testing {0} ...'.format(speaker)) print('Restoring best model for testing...') saver.restore(sess, args.save_path) # run model on test data test_perplexity = _run_epoch(sess, m_test, args, test_data, i) ppls['test_ppl_' + speaker] = test_perplexity print('Test Perplexity: {0:.3f}'.format(test_perplexity)) if args.insert_db == 'True': # write params/config/results to sql database results_db.insert_results(args, config, start_time, ppls)