def get_pipeline(period, data_path, target_inclusion_prob, windows, appliances, target_appliance, activations, seq_length, num_seq_per_batch): # Adding a and b to be coherent with buildings chosen in WINDOWS num_seq_per_batch = num_seq_per_batch filtered_activations = filter_activations(windows, appliances, activations) real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=data_path, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) sample = real_agg_source.get_batch(num_seq_per_batch=seq_length).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() pipeline = DataPipeline( [real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) return pipeline
def get_pipeline(target_appliance, activations): num_seq_per_batch = 64 if target_appliance == 'kettle': seq_length = 128 train_buildings = [1, 2, 4] unseen_buildings = [5] elif target_appliance == 'microwave': seq_length = 288 train_buildings = [1, 2] unseen_buildings = [5] elif target_appliance == 'washing machine': seq_length = 1024 train_buildings = [1, 5] unseen_buildings = [2] elif target_appliance == 'fridge': seq_length = 512 train_buildings = [1, 2, 4] unseen_buildings = [5] elif target_appliance == 'dish washer': seq_length = 1024 + 512 train_buildings = [1, 2] unseen_buildings = [5] filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, sample_period=SAMPLE_PERIOD) real_agg_source = RealAggregateSource(activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD) stride_source = StrideSource(target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=STRIDE) sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() pipeline = DataPipeline( [synthetic_agg_source, real_agg_source, stride_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) return pipeline
def get_pipeline(self, num_seq_per_batch, start_date, building_id, valid_range, validate_length): valid_agg_source = [] for task_appliance in self.appliances: # buildings buildings = self.building[task_appliance] train_buildings = buildings['train_buildings'] unseen_buildings = buildings['unseen_buildings'] # windows filtered_windows = select_windows(train_buildings, unseen_buildings, self.window) # data sources valid_agg_source.append( ValidationSource(appliances=self.appliances, filename=self.nilm_filename, windows=filtered_windows, sample_period=self.sample_period, start_date=start_date, valid_range=valid_range, building_id=building_id, validate_length=validate_length, format=self.format)) # look for existing processing parameters only when OVERRIDE is not on; if # none, generate new ones print('Looking for existing processing parameters ... ') proc_params_filename = os.path.join( dirs.MODELS_DIR, 'proc_params_' + self.model_name + '.npz') print('Found; using them ...') multi_input_std = np.load(proc_params_filename)['multi_input_std'] multi_target_std = np.load(proc_params_filename)['multi_target_std'] """multi_input_std = 1000 multi_target_std = 400""" # generate pipeline pipeline = DataPipeline(valid_agg_source, num_seq_per_batch=num_seq_per_batch, input_processing=[ DivideBy(multi_input_std), IndependentlyCenter() ], target_processing=[DivideBy(multi_target_std)]) return pipeline, multi_input_std, multi_target_std
def get_pipeline(activations): # sequence periods seq_period = SEQ_PERIODS[TARGET_APPLIANCE] seq_length = seq_period // SAMPLE_PERIOD # buildings buildings = BUILDINGS[TARGET_APPLIANCE] train_buildings = buildings['train_buildings'] unseen_buildings = buildings['unseen_buildings'] # windows filtered_windows = select_windows( train_buildings, unseen_buildings, WINDOWS) filtered_activations = filter_activations( filtered_windows, activations, [TARGET_APPLIANCE]) # data sources synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=TARGET_APPLIANCE, seq_length=seq_length, sample_period=SAMPLE_PERIOD ) real_agg_source = realaggregatesource.RealAggregateSource( activations=filtered_activations, target_appliance=TARGET_APPLIANCE, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, allow_multiple_target_activations_in_aggregate=True, sample_period=SAMPLE_PERIOD ) stride_source = stridesource.StrideSource( target_appliance=TARGET_APPLIANCE, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=None ) # look for existing processing parameters only when OVERRIDE is not on; if # none, generate new ones print('Looking for existing processing parameters ... ', end='') proc_params_filename = os.path.join(dirs.MODELS_DIR, 'proc_params_' + DATASET + '_' + TARGET_APPLIANCE + '_' + strftime('%Y-%m-%d_%H_%m') + '.npz') if not OVERRIDE and os.path.exists(proc_params_filename): print('Found; using them ...') input_std, target_std = np.load(proc_params_filename)['arr_0'] else: if OVERRIDE: print('Overridden; generating new ones ...') else: print('Not found; generating new ones ...') sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() print('Saving the processing parameters ...') np.savez(proc_params_filename, [input_std, target_std]) # generate pipeline pipeline = DataPipeline( [synthetic_agg_source, real_agg_source, stride_source], num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)] ) return pipeline, input_std, target_std
filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD) # ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) num_test_seq = 101 # create a validation set X_valid = np.empty((num_test_seq * num_seq_per_batch, seq_length)) Y_valid = np.empty((num_test_seq * num_seq_per_batch, 3)) for i in range(num_test_seq): (x_valid, y_valid) = pipeline.train_generator(fold='unseen_appliances', source_id=1).next() X_valid[i * num_seq_per_batch:(i + 1) * num_seq_per_batch, :] = x_valid[:, :, 0] Y_valid[i * num_seq_per_batch:(i + 1) * num_seq_per_batch, :] = y_valid
# ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)] ) num_test_seq = 101 # create a validation set X_valid = np.empty((num_test_seq*num_seq_per_batch, seq_length)) Y_valid = np.empty((num_test_seq*num_seq_per_batch, 3)) for i in range(num_test_seq): (x_valid,y_valid) = pipeline.train_generator(fold = 'unseen_appliances', source_id = 1).next()
# ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)] ) from keras.layers import Input, Dense, Flatten from keras.models import Model starting_time = time.time() # define the network architecture = Conv Net input_seq = Input(shape = (1, seq_length)) conv1_layer = Convolution1D(nb_filter = 16, filter_length = 3, border_mode='same', init = 'normal', activation = 'relu') conv1 = conv1_layer(input_seq) conv2 = Convolution1D(nb_filter = 16, filter_length = 3, border_mode='same',
def run_experiment(dataset, INPUT_MEAN, INPUT_STD, SOURCE_TYPES, VALIDATION_SOURCE_TYPES, DOWNSAMPLE_FACTOR, SEQ_LENGTH, TARGET_SEQ_LENGTH, MAX_TARGET_POWER, TARGET_APPLIANCE, TRAINING_SEED, VERBOSE_TRAINING, LEARNING_RATE, NUM_SEQ_PER_BATCH, EPOCHS, STEPS_PER_EPOCH, USE_CUDA, CHECKPOINT_BEST_MSE, CHECKPOINTING_EVERY_N_EPOCHS, TEST_DISAGGREGATE_EVERY_N_EPOCHS, _run): torch.manual_seed(TRAINING_SEED) OUTPUT_FOLDER = os.path.join(ex.get_experiment_info()['name'], "output") for observer in _run.observers: if type(observer) is FileStorageObserver: OUTPUT_FOLDER = os.path.join(observer.basedir, str(_run._id)) VERBOSE_TRAINING = 0 os.makedirs(OUTPUT_FOLDER, exist_ok=True) writer = SummaryWriter(log_dir=OUTPUT_FOLDER) # From dataset Ingredient TRAIN_BUILDINGS = dataset["TRAIN_BUILDINGS"] ON_POWER_THRESHOLD = dataset["ON_POWER_THRESHOLD"] ############################################################################################## #PREPARE DATASET (DATALOADERs) ############################################################################################## running_data_processes = [] # stop these at the end sources, validation_sources = get_sources( training_source_names=SOURCE_TYPES, validation_source_names=VALIDATION_SOURCE_TYPES, seq_length=SEQ_LENGTH, sources_seed=TRAINING_SEED, validation_stride=128) input_processing_steps = [ Add(-INPUT_MEAN), DivideBy(INPUT_STD), Transpose((0, 2, 1)) ] target_processing_steps = [ Add(-INPUT_MEAN), DivideBy(INPUT_STD), Transpose((0, 2, 1)) ] if DOWNSAMPLE_FACTOR > 1: downsample_rng = np.random.RandomState(TRAINING_SEED) input_processing_steps_training = [ DownSample(DOWNSAMPLE_FACTOR, downsample_rng) ] + input_processing_steps else: input_processing_steps_training = input_processing_steps validation_pipeline = DataPipeline( sources=validation_sources, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=input_processing_steps_training, target_processing=target_processing_steps) validation_batches = get_validation_batches(validation_pipeline) print("appliance {} has {} validation batches".format( TARGET_APPLIANCE, sum([len(v) for k, v in validation_batches.items()]))) data_pipeline = DataPipeline( sources=sources, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=input_processing_steps_training, target_processing=target_processing_steps) data_thread = DataProcess(data_pipeline) data_thread.start() running_data_processes.append(data_thread) net = _Net(SEQ_LENGTH) print(net) metrics_accu = MetricsAccumulator(on_power_threshold=ON_POWER_THRESHOLD, max_power=MAX_TARGET_POWER) # note: MSE - Mean Squared Error criterion = torch.nn.MSELoss() stop_training = False best_mse = None # PREPARE TEST DISAGGREGATOR if TEST_DISAGGREGATE_EVERY_N_EPOCHS is not None: test_disaggregator = Disaggregator( EVALUATION_DATA_PATH=dataset['EVALUATION_DATA_PATH'], TARGET_APPLIANCE=TARGET_APPLIANCE, ON_POWER_THRESHOLD=ON_POWER_THRESHOLD, MAX_TARGET_POWER=MAX_TARGET_POWER, pad_mains=True, pad_appliance=False, disagg_func=disag_seq2seq, downsample_factor=DOWNSAMPLE_FACTOR, disagg_kwargs=dict(model=net, input_processing=input_processing_steps, target_processing=target_processing_steps, n_seq_per_batch=NUM_SEQ_PER_BATCH, seq_length=SEQ_LENGTH, target_seq_length=TARGET_SEQ_LENGTH, USE_CUDA=USE_CUDA, stride=1)) # PREPARE TENSORS, WHICH WILL BE FED USED DURING TRAINING AND VALIDATION input = torch.FloatTensor(NUM_SEQ_PER_BATCH, 1, SEQ_LENGTH) target = torch.FloatTensor(NUM_SEQ_PER_BATCH, 1, TARGET_SEQ_LENGTH) if USE_CUDA: # note: push to GPU net.cuda() criterion.cuda() input, target = input.cuda(), target.cuda() # setup optimizer. TODO: Should we use 'Adam' for disaggregator? optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999)) #optimizer = optim.SGD(net.parameters(), momentum=0.9, nesterov=True, lr=LEARNING_RATE) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[50, 75], gamma=0.1) history = {} csvpath = os.path.join(OUTPUT_FOLDER, "history.csv") if os.path.exists(csvpath): print("Already exists: {}".format(csvpath)) return -1 progbar_epoch = tqdm(desc="Epoch", total=EPOCHS, unit="epoch", disable=(not VERBOSE_TRAINING)) for epoch in range(EPOCHS): # TRAINING metrics_log = {'training': {}} training_loss = 0.0 progbar = tqdm(desc="Train", total=STEPS_PER_EPOCH, leave=False, disable=(not VERBOSE_TRAINING)) for i in range(STEPS_PER_EPOCH): net.zero_grad() batch = data_thread.get_batch() while batch is None: batch = data_thread.get_batch() qsize = data_thread._queue.qsize() aggregated_signal = torch.from_numpy(batch.after_processing.input) target_signal = torch.from_numpy(batch.after_processing.target) if USE_CUDA: aggregated_signal = aggregated_signal.cuda() target_signal = target_signal.cuda() input.resize_as_(aggregated_signal).copy_(aggregated_signal) target.resize_as_(target_signal).copy_(target_signal) inputv = Variable(input, requires_grad=False) targetv = Variable(target, requires_grad=False) output = net(inputv) loss = criterion(output, targetv) loss.backward() optimizer.step() training_loss += loss.item() progbar.set_postfix(dict(loss="{:.4f}".format(loss.item()), qsize=qsize), refresh=False) progbar.update() metrics_log['training']['loss'] = float(training_loss / STEPS_PER_EPOCH) metrics_log['training']['lr'] = optimizer.param_groups[0]['lr'] # VALIDATION #pr_num_thresholds = 127 for fold in validation_batches: metrics_accu.reset_accumulator() #accumulated_pr = {} #for cl in ["tp", "tn", "fp", "fn"]: # accumulated_pr[cl] = torch.LongTensor(pr_num_thresholds).zero_() for batch in validation_batches[fold]: aggregated_signal = torch.from_numpy( batch.after_processing.input) target_signal = torch.from_numpy(batch.after_processing.target) if USE_CUDA: aggregated_signal = aggregated_signal.cuda() target_signal = target_signal.cuda() input.resize_as_(aggregated_signal).copy_(aggregated_signal) target.resize_as_(target_signal).copy_(target_signal) with torch.no_grad(): inputv = Variable(input) targetv = Variable(target) output = net(inputv) val_loss = criterion(output, targetv) loss_value = val_loss.item() # other metrics pred_y = data_pipeline.apply_inverse_processing( output.cpu().data.numpy(), 'target') true_y = batch.before_processing.target metrics_accu.accumulate_metrics(true_y, pred_y, val_loss=loss_value) #calculate_pr_curve_torch(accumulated_pr, MAX_TARGET_POWER, true_y, pred_y, num_thresholds=pr_num_thresholds) for key, value in metrics_accu.finalize_metrics().items(): metrics_log.setdefault(fold[0], {}).setdefault(key, {})[fold[1]] = value #precision = accumulated_pr["tp"] / (accumulated_pr["tp"] + accumulated_pr["fp"]) #recall = accumulated_pr["tp"] / (accumulated_pr["tp"] + accumulated_pr["fn"]) #writer.add_pr_curve_raw("pr_{}/{}".format(fold[0], fold[1]), # true_positive_counts=accumulated_pr["tp"], # false_positive_counts=accumulated_pr["fp"], # true_negative_counts=accumulated_pr["tn"], # false_negative_counts=accumulated_pr["fn"], # precision=precision, recall=recall, # global_step=(epoch+1)*STEPS_PER_EPOCH, num_thresholds=pr_num_thresholds) # LR Scheduler val_loss = metrics_log['unseen_activations']['val_loss']['rss'] #val_loss = metrics_log['mean_squared_error']['unseen_activations']['rss'] #scheduler.step(val_loss) scheduler.step() # PRINT STATS if not VERBOSE_TRAINING: print('[{:d}/{:d}] {}'.format(epoch + 1, EPOCHS, metrics_log['training'])) else: progbar_epoch.set_postfix( dict(loss=metrics_log['training']['loss']), refresh=False) progbar_epoch.update() progbar.close() # store in history / tensorboard for fold, metrics_for_fold in metrics_log.items(): for metric_name, value in metrics_for_fold.items(): if type(value) == dict: SW_add_scalars2(writer, "{}/{}".format(fold, metric_name), value, (epoch + 1) * STEPS_PER_EPOCH) for k, v in value.items(): name = "{}/{}/{}".format(fold, metric_name, k) history.setdefault(name, []).append(v) else: name = "{}/{}".format(fold, metric_name) writer.add_scalar(name, value, (epoch + 1) * STEPS_PER_EPOCH) history.setdefault(name, []).append(value) # CHECKPOINTING if CHECKPOINT_BEST_MSE: mse = val_loss if best_mse is None: best_mse = mse if best_mse > mse: msg = "[{:d}/{:d}] MSE improved from {:.4f} to {:.4f} (d={:f}), saving model...".format( epoch + 1, EPOCHS, best_mse, mse, best_mse - mse) if not VERBOSE_TRAINING: print(msg) else: progbar_epoch.write(msg) torch.save( { 'epoch': epoch + 1, 'step': (epoch + 1) * STEPS_PER_EPOCH, 'mse': mse, 'model': net.state_dict() }, '{}/net_best_mse.pth.tar'.format(OUTPUT_FOLDER)) best_mse = mse if CHECKPOINTING_EVERY_N_EPOCHS is not None: if (epoch + 1) % CHECKPOINTING_EVERY_N_EPOCHS == 0: torch.save( net.state_dict(), '{}/net_step_{:06d}.pth'.format( OUTPUT_FOLDER, (epoch + 1) * STEPS_PER_EPOCH)) if TEST_DISAGGREGATE_EVERY_N_EPOCHS is not None: if (epoch + 1) % TEST_DISAGGREGATE_EVERY_N_EPOCHS == 0: scores = test_disaggregator.calculate_metrics() scores_by_metric = {} for building_i, building in scores.items(): for metric, value in building.items(): scores_by_metric.setdefault(metric, {})[building_i] = value for metric, building_d in scores_by_metric.items(): SW_add_scalars2(writer, "test_score/{}".format(metric), building_d, (epoch + 1) * STEPS_PER_EPOCH) if stop_training: break # CHECKPOINTING at end torch.save( { 'epoch': epoch + 1, 'step': (epoch + 1) * STEPS_PER_EPOCH, 'model': net.state_dict(), 'optimizer': optimizer.state_dict(), #'scheduler': scheduler.state_dict() # TODO: scheduler is not saved this way, scheduler.state_dict() does not exist }, '{}/net_step_{:06d}.pth.tar'.format(OUTPUT_FOLDER, (epoch + 1) * STEPS_PER_EPOCH)) df = pd.DataFrame(history) df.to_csv(csvpath) for p in running_data_processes: p.stop() writer.close() #return 42 return metrics_log['training']['loss']
def get_pipeline(activations): global seq_length agg_source = [] prob = [] target_inclusion_prob = 0.48 + len(APPLIANCES) * 0.1 for task_appliance in APPLIANCES: seq_period = SEQ_PERIODS[task_appliance] seq_length = seq_period // SAMPLE_PERIOD # buildings buildings = BUILDINGS[task_appliance] train_buildings = buildings['train_buildings'] unseen_buildings = buildings['unseen_buildings'] # windows filtered_windows = select_windows(train_buildings, unseen_buildings, WINDOWS) filtered_activations = filter_activations(filtered_windows, activations, BUILDINGS_APPLIANCES) # data sources real_source_prob = min(0.82, target_inclusion_prob) if task_appliance == 'fridge': real_source_prob = 1.0 agg_source.append( RealAggregateSource(activations=filtered_activations, target_appliance=task_appliance, appliances=APPLIANCES, target_inclusion_prob=real_source_prob, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD)) prob.append(1.0 / NUM_APPLIANCE) """agg_source.append(SyntheticAggregateSource( activations=filtered_activations, appliances=APPLIANCES, seq_length=seq_length, distractor_inclusion_prob=0.3, target_inclusion_prob=min(0.5, target_inclusion_prob), sample_period=SAMPLE_PERIOD )) agg_source.append(StrideSource( target_appliance=task_appliance, appliances=APPLIANCES, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=None )) prob.append(0.5/NUM_APPLIANCE)""" # look for existing processing parameters only when OVERRIDE is not on; if # none, generate new ones print('Looking for existing processing parameters ... ') proc_params_filename = path.join( dirs.MODELS_DIR, 'proc_params_' + DATASET + '_[' + TARGET_APPLIANCE + ']_' + strftime('%Y-%m-%d_%H_%m') + '.npz') if not OVERRIDE and path.exists(proc_params_filename): print('Found; using them ...') multi_input_std = np.load(proc_params_filename)['multi_input_std'] multi_target_std = np.load(proc_params_filename)['multi_target_std'] else: if OVERRIDE: print('Overridden; generating new ones ...') else: print('Not found; generating new ones ...') multi_input_std = np.array([]) multi_target_std = np.array([]) for sample_source in agg_source: batch_size = 1024 sample = sample_source.get_batch( num_seq_per_batch=batch_size).next() sample = sample.before_processing multi_input_std = np.append(multi_input_std, sample.input.flatten().std()) multi_target_std = np.append(multi_target_std, [ sample.target[:, idx].flatten().std() for idx in range(NUM_APPLIANCE) ]) multi_input_std = np.mean(multi_input_std) multi_target_std = multi_target_std.reshape(-1, NUM_APPLIANCE) multi_target_std = np.mean(multi_target_std, axis=0) print('=' * 10) print('Input std = ', multi_input_std) for idx, appliance in enumerate(APPLIANCES): print(appliance, 'std = ', multi_target_std[idx]) print('=' * 10) print('Saving the processing parameters ...') np.savez(proc_params_filename, multi_input_std=[multi_input_std], multi_target_std=multi_target_std) # generate pipeline pipeline = DataPipeline( agg_source, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=[DivideBy(multi_input_std), IndependentlyCenter()], target_processing=[DivideBy(multi_target_std)], source_probabilities=prob, ) return pipeline, multi_input_std, multi_target_std
filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD) # ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) # get the shape of X_train (X_train, Y_train) = pipeline.train_generator().next() starting_time = time.time() nb_epoch = 1 # define the network architecture = Conv Net model = Sequential() model.add( Convolution1D(64, 3, border_mode='same', input_shape=(1, X_train.shape[2]),
# ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)] ) # get the shape of X_train (X_train, Y_train) = pipeline.train_generator().next() starting_time = time.time() nb_epoch = 1