def train(): epochs = 40 # int values smaller 32 batch_sizes = [10] # list of lists of relative number of nodes for intermediate layers # [] .. No intermediate layer # [0.5] .. One intermediate layers with 50% of nodes of the input layer # Output layer has always one node layers_list = [[], [0.5], [0.6, 0.3], [0.7, 0.5, 0.2]] # Possible values: 'relu', 'sigmoid', 'tanh', 'softmax', ... activations = ["relu"] # Possible values fdat.read_train_data(), sdat.read_train_data() trainsets = [fdat.read_train_data()] for batch_size in batch_sizes: for layers in layers_list: for activation in activations: for ts in trainsets: complexity = len(layers) layer_configs = [hlp.LayerConfig(size) for size in layers] model_config = hlp.ModelConfig(activation=activation, optimizer='adam', loss='mean_squared_error', layers=layer_configs) input_size = ts.x.shape[1] model = DeepModel( f'{activation}_{complexity}', lambda: hlp.create_model(model_config, input_size)) training = Training(id=f'bs{batch_size}', batch_size=batch_size, deepModel=model, epochs=epochs, trainset=ts) plot_loss_during_training(training=training)
def cv_one(nn: NN) -> float: x_train, x_test, y_train, y_test = ms.train_test_split( trainset.x, trainset.y) print("-- x train", x_train.shape) print("-- y train", y_train.shape) print("-- x test", x_test.shape) print("-- y test", y_test.shape) layer_configs = [hlp.LayerConfig(size) for size in nn.layers] model_config = hlp.ModelConfig(activation=nn.activation, optimizer='adam', loss='mean_squared_error', layers=layer_configs) model = hlp.create_model(model_config, x_train.shape[1]) history = model.fit(x_train, y_train, epochs=run_config.epochs, batch_size=run_config.batch_size) for loss in history.history['loss']: print(f'-- {loss:.6f}') err = model.evaluate(x_test, y_test) return err
# Get unique characters. chars = helpers.get_unique_characters(text) # Get length of unique chars. chars_length = len(chars) # Create sequences that are the input values and the next characters that are the labels. values, labels = helpers.create_sequences(text, SEQUENCE_LENGTH, SEQUENCE_STEP) char_to_index, index_to_char = helpers.create_dictionaries(chars) # Convert to one hot arrays. x, y = helpers.convert_to_one_hot(values, SEQUENCE_LENGTH, chars_length, char_to_index, labels) # Create model. model = helpers.create_model(SEQUENCE_LENGTH, chars_length) # Train the model and save it to the disk. model.fit(x, y, batch_size=512, epochs=EPOCHS) model.save_weights("model_weights.h5") # Uncomment the next line to use existing model weights. But you need to comment the two lines above. # You need to have already run it at least once to save the first model weights. # model.load_weights("model_weights.h5") # Create a first 80 chars seed. print('_____________') seed = u"Σύρε να ειπής της μάννας σου να μη σε καταρειέται\nνα πέσεις στο βουνό και να σου".lower( ) sys.stdout.write(unicode(seed).encode('utf8')) for i in range(400): x = np.zeros((1, SEQUENCE_LENGTH, chars_length))
if os.path.exists(TRAINING_MODEL_PATH): print( "Model already exists. If you continue this model will be ovewritten. Type 'yes' to continue. " "Type 'no' to exit and keep existing model.") resume = "" while resume not in ("yes", "no"): resume = input("yes/no: ") if resume == "no": sys.exit() else: print( "Starting training from scratch. Existing model will be overwritten." ) training_model = helpers.create_model(batch_size=BATCH_SIZE, input_length=SEQ_LENGTH, num_chars=num_chars, batch_momentum=batch_momentum, rnn_depth=RNN_DEPTH, dropout=DROPOUT_R) training_model.compile( loss=helpers.sparse_softmax_cross_entropy_with_logits, optimizer=Nadam()) print("Model compiled and ready for training. Here is its summary:") print(training_model.summary()) predict_model = helpers.create_model(batch_size=1, input_length=1, num_chars=num_chars, rnn_depth=RNN_DEPTH) generator = helpers.GenerateJoke(char_dict) #now a loop epoch = 0
def main(config, tr_stream, dev_stream, use_bokeh=False): logger.info('Building RNN encoder-decoder') cost, samples, search_model = create_model(config) #cost, samples, search_model = create_multitask_model(config) logger.info("Building model") cg = ComputationGraph(cost) training_model = Model(cost) # apply dropout for regularization if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog logger.info('Applying dropout') dropout_inputs = [ x for x in cg.intermediary_variables if x.name == 'maxout_apply_output' ] cg = apply_dropout(cg, dropout_inputs, config['dropout']) # Set extensions logger.info("Initializing extensions") extensions = [ FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] # Add sampling if config['hook_samples'] >= 1: logger.info("Building sampler") extensions.append( Sampler(model=search_model, data_stream=tr_stream, src_vocab=config['src_vocab'], trg_vocab=config['trg_vocab'], phones_vocab=config['phones'], hook_samples=config['hook_samples'], every_n_batches=config['sampling_freq'], src_vocab_size=config['src_vocab_size'])) # Add early stopping based on f1 if config['f1_validation'] is not None: logger.info("Building f1 validator") extensions.append( F1Validator(samples=samples, config=config, model=search_model, data_stream=dev_stream, normalize=config['normalized_f1'], every_n_batches=config['f1_val_freq'])) # Reload model if necessary if config['reload']: extensions.append(LoadNMT(config['saveto'])) # Set up training algorithm logger.info("Initializing training algorithm") algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule([ StepClipping(config['step_clipping']), eval(config['step_rule'])(), RemoveNotFinite() ]), on_unused_sources='warn') # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop(model=training_model, algorithm=algorithm, data_stream=tr_stream, extensions=extensions) # Train! main_loop.run()
def ncc_section_range(start_section, end_section, path_template): img_in_out_mip = [(6, 6), (6, 7), (7, 8)] for img_in_mip, img_out_mip in img_in_out_mip: pyramid_name = "ncc_m{}".format(img_out_mip) if img_out_mip == 6: cv_src_path = path_template + 'm6_normalized' cv_dst_path = path_template + 'ncc/ncc_m{}'.format(img_out_mip) elif img_out_mip in [7, 8]: cv_src_path = path_template + 'ncc/ncc_m{}'.format(img_in_mip) cv_dst_path = path_template + 'ncc/ncc_m{}'.format(img_out_mip) else: raise Exception("Unkown mip") cv_src = cv.CloudVolume(cv_src_path, mip=img_in_mip, fill_missing=True, bounded=False, progress=False) cv_dst = cv.CloudVolume(cv_dst_path, mip=img_out_mip, fill_missing=True, bounded=False, progress=False, parallel=5, info=deepcopy(cv_src.info), non_aligned_writes=True) cv_dst.info['data_type'] = 'float32' cv_dst.commit_info() cv_xy_start = [0, 0] crop = 256 if img_in_mip == 6: cv_xy_start = [256 * 0, 1024 * 0] cv_xy_end = [8096, 8096] #[1024 * 8 - 256*0, 1024 * 8 - 256*0] patch_size = 8096 // 4 elif img_in_mip == 7: cv_xy_start = [256 * 0, 1024 * 0] cv_xy_end = [4048, 4048] #[1024 * 8 - 256*0, 1024 * 8 - 256*0] patch_size = 4048 // 2 elif img_in_mip == 8: cv_xy_end = [2024, 2048] #[1024 * 8 - 256*0, 1024 * 8 - 256*0] patch_size = 2024 global_start = 0 scale_factor = 2**(img_out_mip - img_in_mip) encoder = create_model( "model", checkpoint_folder="./models/{}".format(pyramid_name)) for z in range(start_section, end_section): print("MIP {} Section {}".format(img_out_mip, z)) s = time.time() cv_src_data = cv_src[cv_xy_start[0]:cv_xy_end[0], cv_xy_start[1]:cv_xy_end[1], z].squeeze() src_data = torch.cuda.FloatTensor(cv_src_data) src_data = src_data.unsqueeze(0) in_shape = src_data.shape dst = torch.zeros((1, in_shape[-2] // scale_factor, in_shape[-1] // scale_factor), device=src_data.device) for i in range(0, src_data.shape[-2] // patch_size): for j in range(0, src_data.shape[-1] // patch_size): x = [ global_start + i * patch_size, global_start + (i + 1) * patch_size ] y = [ global_start + j * patch_size, global_start + (j + 1) * patch_size ] x_padded = copy.copy(x) y_padded = copy.copy(y) if i != 0: x_padded[0] = x[0] - crop if i != src_data.shape[-2] // patch_size - 1: x_padded[1] = x[1] + crop if j != 0: y_padded[0] = y[0] - crop if j != src_data.shape[-1] // patch_size - 1: y_padded[1] = y[1] + crop patch = src_data[..., x_padded[0]:x_padded[1], y_padded[0]:y_padded[1]].squeeze() with torch.no_grad(): processed_patch = encoder( patch.unsqueeze(0).unsqueeze(0)).squeeze() if i != 0: processed_patch = processed_patch[crop // scale_factor:, :] if i != src_data.shape[-2] // patch_size - 1: processed_patch = processed_patch[:-crop // scale_factor, :] if j != 0: processed_patch = processed_patch[:, crop // scale_factor:] if j != src_data.shape[-1] // patch_size - 1: processed_patch = processed_patch[:, :-crop // scale_factor] dst[..., x[0] // scale_factor:x[1] // scale_factor, y[0] // scale_factor:y[1] // scale_factor] = processed_patch if torch.any(processed_patch != processed_patch): raise Exception("None result occured") with torch.no_grad(): if scale_factor == 2: black_mask = src_data != 0 black_frac = float(torch.sum(black_mask == False)) / float( torch.sum(src_data > -10000)) black_mask = torch.nn.MaxPool2d(2)( black_mask.unsqueeze(0).float()) != 0 black_mask = black_mask.squeeze(0) elif scale_factor == 4: black_mask = src_data != 0 black_frac = float(torch.sum(black_mask == False)) / float( torch.sum(src_data > -10000)) black_mask = torch.nn.MaxPool2d(2)( black_mask.unsqueeze(0).float()) != 0 black_mask = black_mask.squeeze(0) black_mask = torch.nn.MaxPool2d(2)( black_mask.unsqueeze(0).float()) != 0 black_mask = black_mask.squeeze(0) elif scale_factor == 1: black_mask = (src_data > -10) * (src_data != 0) black_frac = float(torch.sum(black_mask == False)) / float( torch.sum(src_data > -10000)) else: raise Exception("Unimplemented") if torch.any(dst != dst): raise Exception("None result occured") dst_norm = normalize(dst, mask=black_mask, mask_fill=0) if torch.any(dst_norm != dst_norm): raise Exception("None result occured") cv_data = get_np( dst_norm.squeeze().unsqueeze(2).unsqueeze(2)).astype( np.float32) cv_dst[cv_xy_start[0] // scale_factor:cv_xy_end[0] // scale_factor, cv_xy_start[1] // scale_factor:cv_xy_end[1] // scale_factor, z] = cv_data e = time.time() print(e - s, " sec")
indexes = [line[headers['IdAfter']] for line in lines] # Keep IdAfter as the id comments = [line[headers['Comment']] for line in lines] texts = [line[headers['TextBefore']] for line in lines] textdiffs = [line[headers['TextAdditions']] for line in lines] textdiffsd = [line[headers['TextDeletions']] for line in lines] codes = [line[headers['CodeBefore']] for line in lines] codediffs = [line[headers['CodeAdditions']] for line in lines] codediffsd = [line[headers['CodeDeletions']] for line in lines] codesequences = [line[headers['CodeSequenceBefore']] for line in lines] codesequencediffs = [line[headers['CodeSequenceAdditions']] for line in lines] codesequencediffsd = [line[headers['CodeSequenceDeletions']] for line in lines] # Create the models if they do not exist if not model_exists('tfidf_comments'): create_model('tfidf_comments', [line[headers['Comment']] for line in lines]) if not model_exists('tfidf_texts'): create_model('tfidf_texts', [ line[headers['TextBefore']] + ' ' + line[headers['TextAdditions']] + ' ' + line[headers['TextDeletions']] for line in lines ]) if not model_exists('tfidf_codes'): create_model('tfidf_codes', [ line[headers['CodeBefore']] + ' ' + line[headers['CodeAdditions']] + ' ' + line[headers['CodeDeletions']] for line in lines ]) # Load the models comment_vectorizer = load_model('tfidf_comments')