def custom_NMF(V, W=None, H=None, num_components=10, init="random", num_iter=20, beta=2): ''' parameters: V - matrix to be factorised init - set initialisation,must be a string in:{"random","custom"} if "custom", W,H must be given num_iter - number of iterations beta - beta of divergence, must be an integer(beta>=0) return: updated W,H ''' np.seterr(all='ignore') n_samples = V.shape[0] n_features = V.shape[1] #initialisation if init == "random": W = np.random.random((n_samples, num_components)) H = np.random.random((num_components, n_features)) if init == "custom": if W.shape != (n_samples, num_components) or H.shape != ( num_components, n_features): print("error!W or H does not have a right shape!") #multiplicative update #visulize progress pro = IntProgress(max=num_iter) pro.description = "updating" display(pro) start = time() b_div = [] for ite in range(num_iter): #update H H = update_H(W, H, V, beta) #update W W = update_W(W, H, V, beta) #limit W and H to avoid overflow H = np.clip(H, 10**-150, None) W = np.clip(W, 10**-150, None) #show error #print("divergence of iteration %d: %.6f"%(ite,beta_divergence(V,np.dot(W,H),beta))) b_div.append(beta_divergence(V, np.dot(W, H), beta)) pro.value += 1 end = time() pro.description = "done(%.1fs)" % (end - start) #plot the curve of divergence iterations = np.linspace(1, num_iter, num_iter) plt.figure(figsize=(6, 4)) plt.plot(iterations, b_div) plt.title("beta=%d" % (beta)) plt.xlabel("iterations") plt.ylabel("beta divergence") plt.show() return W, H
def perceptron_pass2(lines, classifier): n = len(lines) count = 0 p = IntProgress(max=n) display(p) c = classifier random.shuffle(lines) for line in lines: count += 1 label, feature = get_label_feature(line) value = get_value(classifier, feature) if label * value > 0: pass else: for word in feature: if None == classifier.get(word): classifier[word] = label * feature[word] c[word] = label * feature[word] * (n - count) / n else: classifier[word] += label * feature[word] c[word] += label * feature[word] * (n - count) / n p.value = count p.description = "{}%".format(round(100 * p.value / p.max, 2)) p.description = "Done" return c
def progress_bar(job): '''Display a status bar showing how many tasks are completed''' status=get_status(job) f = IntProgress(min=0, max=status['Total'], bar_style='success') f.value = status['Total'] - status['Pending'] - status['Running'] f.description = "{:1.0f} tasks left ".format(status['Pending']+status['Running']) display(f) while f.value < status['Total']: status=get_status(job) f.value = status['Total'] - status['Pending'] - status['Running'] f.description = "{:1.0f} tasks left ".format(status['Pending']+status['Running']) if status['Failed'] > 0: f.bar_style='warning' sleep(1)
def progress_bar(dirs, path, extension, data): progress = IntProgress() progress.max = len(mydirs) progress.description = '(Init)' display(progress) for mydir in dirs: os.chdir(path + mydir) all_filenames = [i for i in glob.glob('*.{}'.format(extension))] for file in all_filenames: df_temp = pd.read_csv(file, encoding='utf8') try: data = pd.merge(data, df_temp, how='outer', on='SEQN') except: print(mypath + mydir + '/' + file) progress.value += 1 progress.description = mydir progress.description = '(Done)'
def create_movie(self, path, size, image_format='png', animation_frames=list(), quality=100, samples_per_pixel=1, start_frame=0, end_frame=0, interpupillary_distance=0.0, exportIntermediateFrames=True): from ipywidgets import IntProgress application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) self.export_frames(path=path, animation_frames=animation_frames, start_frame=start_frame, end_frame=end_frame, size=size, samples_per_pixel=samples_per_pixel, quality=quality, interpupillary_distance=interpupillary_distance, exportIntermediateFrames=exportIntermediateFrames) done = False while not done: import time time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] self._client.set_application_parameters( image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) progress_widget.description = 'Done' progress_widget.value = 100
def create_snapshot(self, size, path, samples_per_pixel, export_intermediate_frames=False): """ Create a snapshot of the current frame :size: Frame buffer size :path: Full path of the snapshot image :samples_per_pixel: Samples per pixel :export_intermediate_frames: If True, intermediate samples are stored to disk. Otherwise, only the final accumulation is exported """ application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] old_smoothed_key_frames = copy.deepcopy(self._smoothed_key_frames) self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) control_points = [self.get_camera()] current_animation_frame = int(self._client.get_animation_parameters()['current']) animation_frames = [current_animation_frame] self.build_camera_path( control_points=control_points, nb_steps_between_control_points=1, smoothing_size=1) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) base_dir = os.path.dirname(path) self.export_frames( path=base_dir, animation_frames=animation_frames, size=size, samples_per_pixel=samples_per_pixel, export_intermediate_frames=export_intermediate_frames) done = False while not done: time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] progress_widget.description = 'Done' progress_widget.value = 100 frame_path = base_dir + '/00000.png' if os.path.exists(frame_path): os.rename(frame_path, path) self._client.set_application_parameters(image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) self._smoothed_key_frames = copy.deepcopy(old_smoothed_key_frames)
def create_movie( self, path, size, animation_frames=list(), quality=100, samples_per_pixel=1, start_frame=0, end_frame=0, interpupillary_distance=0.0, exportIntermediateFrames=True): """ Create and export a set of PNG frames for later movie generation :path: Full path of the snapshot folder :size: Frame buffer size :animation_frames: Optional list of animation frames :quality: PNG quality :samples_per_pixel: Samples per pixel :start_frame: Start frame to export in the provided sequence :end_frame: Last frame to export in the provided sequence :interpupillary_distance: Interpupillary distance for stereo rendering. If set to 0, stereo is disabled :exportIntermediateFrames: If True, intermediate samples are stored to disk. Otherwise, only the final accumulation is exported """ application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) self.export_frames( path=path, animation_frames=animation_frames, start_frame=start_frame, end_frame=end_frame, size=size, samples_per_pixel=samples_per_pixel, quality=quality, interpupillary_distance=interpupillary_distance, exportIntermediateFrames=exportIntermediateFrames) done = False while not done: time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] self._client.set_application_parameters(image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) progress_widget.description = 'Done' progress_widget.value = 100
def progressify(iterable, n): start_time = time.time() progress = IntProgress(min=0, max=n, layout=Layout(width='100%')) text = Label(layout=Layout(width='100%')) display(progress) display(text) for it in iterable: yield it progress.value += 1 elapsed_time = time.time() - start_time percent = progress.value * 100.0 / n progress.description = '%.1f%% (%s / %s)' % (percent, progress.value, n) text.value = 'elapsed %s' % datetime.timedelta(seconds=elapsed_time) progress.bar_style = 'success'
def from_geodataframe_to_map(portal: Portal, gdf: GeoDataFrame, data_name: str, map_title: str, layer_name: str): progress = IntProgress() progress.max = 100 progress.value = 0 progress.description = '上传文件:' display(progress) def refresh_progress(read, total): progress.value = read data_id = portal.upload_dataframe_as_json(data_name, gdf, callback=refresh_progress) layer = portal.prepare_geojson_layer(data_id, layer_name) map_id = portal.create_map([layer], 3857, map_title) mr = portal.get_map(map_id) pm = PortalThumbnail(mr) display(pm) return map_id
def in_progress(seq, msg="Progress: [%(processed)d / %(total)d]", length=None): """ Iterate over sequence, yielding item with progress widget displayed. This is useful if you need to precess sequence of items with some time consuming operations .. note:: This works only in Jupyter Notebook .. note:: This function requires *ipywidgets* package to be installed :param seq: sequence to iterate on. :param str msg: (optional) message template to display. available to use 'processed' and 'total' integer vars, where 'processed' is number of items processed and 'total' is total number of items in seq. :param int length: (optional) if seq is generator, or it is not possible to apply 'len(seq)' function to 'seq', then this argument is required and it's value will be used as total number of items in seq. Example example:: import time for i in in_progress(range(10)): time.sleep(1) """ from IPython.display import display from ipywidgets import IntProgress if length is None: length = len(seq) progress = IntProgress(value=0, min=0, max=length, description=msg % {'processed': 0, 'total': length}) display(progress) for i, item in enumerate(seq, 1): progress.value = i progress.description = msg % {'processed': i, 'total': length} yield item progress.close()
def perceptron_pass1(lines, classifier): p = IntProgress(max=len(lines)) display(p) random.shuffle(lines) for line in lines: label, feature = get_label_feature(line) value = get_value(classifier, feature) if label * value > 0: pass else: for word in feature: if None == classifier.get(word): classifier[word] = label * feature[word] else: classifier[word] += label * feature[word] p.value += 1 p.description = "{}%".format(round(100 * (p.value / p.max), 1)) return classifier
def extract_features(model, filenames, path_to_images, display_bar=True): ''' Extracts output from model on list of filenames. Parameters ---------- model : Keras model. filenames : list List of filenames to process. path_to_images : path to folder with images Returns ------- features : ndarray Extracted features. ''' if display_bar: bar = IntProgress(value=1, min=1, max=len(filenames), step=1, description='Initializing...') display(bar) features = np.zeros((len(filenames), 4096), dtype=np.float) for i in range(len(filenames)): img_path = os.path.join(path_to_images, filenames[i]) img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) features[i] = feature_extractor.predict(x).squeeze() if display_bar: bar.value = i + 1 bar.description = '[{:>{tab}} / {}]'.format(i + 1, bar.max, tab=len(str(bar.max))) if display_bar: bar.bar_style = 'success' return features
def create_snapshot(self, size, path, samples_per_pixel, exportIntermediateFrames=True): from ipywidgets import IntProgress import os import copy application_params = self._client.get_application_parameters() renderer_params = self._client.get_renderer() old_image_stream_fps = application_params['image_stream_fps'] old_viewport_size = application_params['viewport'] old_samples_per_pixel = renderer_params['samples_per_pixel'] old_max_accum_frames = renderer_params['max_accum_frames'] old_smoothed_key_frames = copy.deepcopy(self._smoothed_key_frames) self._client.set_renderer(samples_per_pixel=1, max_accum_frames=samples_per_pixel) self._client.set_application_parameters(viewport=size) self._client.set_application_parameters(image_stream_fps=0) control_points = [self.get_camera()] current_animation_frame = int( self._client.get_animation_parameters()['current']) animation_frames = [current_animation_frame] self.build_camera_path(control_points=control_points, nb_steps_between_control_points=1, smoothing_size=1) progress_widget = IntProgress(description='In progress...', min=0, max=100, value=0) display(progress_widget) base_dir = os.path.dirname(path) self.export_frames(path=base_dir, animation_frames=animation_frames, size=size, samples_per_pixel=samples_per_pixel, exportIntermediateFrames=exportIntermediateFrames) done = False while not done: import time time.sleep(1) progress = self.get_export_frames_progress()['progress'] progress_widget.value = progress * 100 done = self.get_export_frames_progress()['done'] progress_widget.description = 'Done' progress_widget.value = 100 frame_path = base_dir + '/00000.png' if os.path.exists(frame_path): os.rename(frame_path, path) self._client.set_application_parameters( image_stream_fps=old_image_stream_fps, viewport=old_viewport_size) self._client.set_renderer(samples_per_pixel=old_samples_per_pixel, max_accum_frames=old_max_accum_frames) self._smoothed_key_frames = copy.deepcopy(old_smoothed_key_frames)
def in_progress(seq, msg="Progress: [%(processed)d / %(total)d]", length=None, close=True): """ Iterate over sequence, yielding item with progress widget displayed. This is useful if you need to precess sequence of items with some time consuming operations .. note:: This works only in Jupyter Notebook .. note:: This function requires *ipywidgets* package to be installed :param seq: sequence to iterate on. :param str msg: (optional) message template to display. Following variables could be used in this template: - processed - total - time_total - time_per_item :param int length: (optional) if seq is generator, or it is not possible to apply 'len(seq)' function to 'seq', then this argument is required and it's value will be used as total number of items in seq. Example example:: import time for i in in_progress(range(10)): time.sleep(1) """ from IPython.display import display from ipywidgets import IntProgress import time if length is None: length = len(seq) start_time = time.time() progress = IntProgress(value=0, min=0, max=length, description=msg % { 'processed': 0, 'total': length, 'time_total': 0.0, 'time_per_item': 0.0, 'time_remaining': 0.0, }) display(progress) for i, item in enumerate(seq, 1): progress.value = i # i_start_time = time.time() yield item # Do the job i_end_time = time.time() progress.description = msg % { 'processed': i, 'total': length, 'time_total': i_end_time - start_time, 'time_per_item': (i_end_time - start_time) / i, 'time_remaining': ((i_end_time - start_time) / i) * (length - i), } if close: progress.close()
def train(self, batch_size=64, learning_rate=1e-3, num_epochs=5, max_num=-1, best_path='keyboard_model_best.tar', current_path='keyboard_model_latest.tar', decay_every=10, save_model=True, dirs=[0]): model = self.model criterion = nn.MSELoss() optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=decay_every, gamma=0.05) since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_loss = None for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch + 1, num_epochs)) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 # Iterate over data. max_num_for_this_epoch = max_num if phase == 'train' else -1 total = dataset.get_num_of_data( phase) if max_num == -1 else max_num bar = IntProgress(max=total) display(bar) for inputs, labels in dataset.data_batch( type=phase, batch_size=batch_size, max_num=max_num_for_this_epoch, dirs=dirs): inputs = torch.Tensor(inputs) labels = torch.Tensor(labels) inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward outputs = model(inputs) labels = torch.reshape(labels, [-1, 8]) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * batch_size # free unoccupied memory if torch.cuda.is_available(): torch.cuda.empty_cache() else: torch.cpu.empty_cache() bar.value += batch_size bar.description = f'{bar.value} / {total}' bar.close() epoch_loss = running_loss / dataset.get_num_of_data(phase) print('{} Loss: {:.4f}'.format(phase, epoch_loss)) # deep copy the model if phase == 'val' and (best_loss == None or epoch_loss < best_loss): best_loss = epoch_loss best_model_wts = copy.deepcopy(model.state_dict()) torch.save(model.state_dict(), best_path) print(f'The best model has been saved to {best_path} ...') torch.save(model.state_dict(), current_path) print(f'Current mode has been saved to {current_path} ...') print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val loss: {:4f}'.format(best_loss)) # load best model weights model.load_state_dict(best_model_wts) self.model = model
def status_printer(file, total=None, desc=None): """ Manage the printing of an IPython/Jupyter Notebook progress bar widget. """ # Fallback to text bar if there's no total # DEPRECATED: replaced with an 'info' style bar # if not total: # return super(tqdm_notebook, tqdm_notebook).status_printer(file) fp = file if not getattr(fp, 'flush', False): # pragma: no cover fp.flush = lambda: None # Prepare IPython progress bar if total: pbar = IntProgress(min=0, max=total) else: # No total? Show info style bar with no progress tqdm status pbar = IntProgress(min=0, max=1) pbar.value = 1 pbar.bar_style = 'info' if desc: pbar.description = desc # Prepare status text ptext = HTML() # Only way to place text to the right of the bar is to use a container container = HBox(children=[pbar, ptext]) display(container) def print_status(s='', close=False, bar_style=None): # Note: contrary to native tqdm, s='' does NOT clear bar # goal is to keep all infos if error happens so user knows # at which iteration the loop failed. # Clear previous output (really necessary?) # clear_output(wait=1) # Get current iteration value from format_meter string if total: n = None if s: npos = s.find(r'/|/') # cause we use bar_format=r'{n}|...' # Check that n can be found in s (else n > total) if npos >= 0: n = int(s[:npos]) # get n from string s = s[npos + 3:] # remove from string # Update bar with current n value if n is not None: pbar.value = n # Print stats if s: # never clear the bar (signal: s='') s = s.replace('||', '') # remove inesthetical pipes s = escape(s) # html escape special characters (like '?') ptext.value = s # Change bar style if bar_style: # Hack-ish way to avoid the danger bar_style being overriden by # success because the bar gets closed after the error... if not (pbar.bar_style == 'danger' and bar_style == 'success'): pbar.bar_style = bar_style # Special signal to close the bar if close and pbar.bar_style != 'danger': # hide only if no error container.visible = False return print_status
def train(FLAG): print("Reading dataset...") if FLAG.dataset == 'CIFAR-10': train_data = CIFAR10(train=True) test_data = CIFAR10(train=False) elif FLAG.dataset == 'CIFAR-100': train_data = CIFAR100(train=True) test_data = CIFAR100(train=False) else: raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.") Xtrain, Ytrain = train_data.train_data, train_data.train_labels Xtest, Ytest = test_data.test_data, test_data.test_labels print("Build VGG16 models...") vgg16 = VGG16(FLAG.init_from, prof_type=FLAG.prof_type) # build model using dp dp = [(i + 1) * 0.05 for i in range(1, 20)] vgg16.build(dp=dp) # define tasks tasks = ['100', '50'] print(tasks) saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks)) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') tvars_trainable = tf.trainable_variables() for rm in vgg16.gamma_var: tvars_trainable.remove(rm) print('%s is not trainable.' % rm) # useful function def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters learning_rate = 2e-4 batch_size = 32 alpha = 0.5 early_stop_patience = 4 min_delta = 0.0001 # optimizer # opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) # recorder epoch_counter = 0 # tensorboard writer writer = tf.summary.FileWriter(FLAG.log_dir, sess.graph) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) # initial task obj = vgg16.loss_dict[tasks[0]] while (len(tasks)): # acquire a new task cur_task = tasks[0] tasks = tasks[1:] new_obj = vgg16.loss_dict[cur_task] # just finished a task if epoch_counter > 0: # save models saver.save(sess, checkpoint_path, global_step=epoch_counter) # task-wise loss aggregation # obj = tf.add(tf.multiply(1-alpha,obj), tf.multiply(alpha,new_obj)) obj = tf.add(obj, new_obj) # optimizer train_op = opt.minimize(obj, var_list=tvars_trainable) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_loss = 100000 # a large number # optimize when the aggregated obj while (patience_counter < early_stop_patience): stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') # training an epoch for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size sess.run([train_op], feed_dict={ vgg16.x: Xtrain[st:ed, :, :, :], vgg16.y: Ytrain[st:ed, :] }) ptrain.value += 1 ptrain.description = "Training %s/%s" % (i, ptrain.max) bar_train.next() # validation val_loss = 0 val_accu = 0 for i in range(int(Xtest.shape[0] / 200)): st = i * 200 ed = (i + 1) * 200 loss, accu, epoch_summary = sess.run( [obj, vgg16.accu_dict[cur_task], vgg16.summary_op], feed_dict={ vgg16.x: Xtest[st:ed, :], vgg16.y: Ytest[st:ed, :] }) val_loss += loss val_accu += accu pval.value += 1 pval.description = "Testing %s/%s" % (i, pval.value) val_loss = val_loss / pval.value val_accu = val_accu / pval.value # early stopping check if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :] # epoch end writer.add_summary(epoch_summary, epoch_counter) epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> obj loss: %.4f, task at %s: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), val_loss, cur_task, val_accu)) saver.save(sess, checkpoint_path, global_step=epoch_counter) writer.close()
def train(self, phase=['train', 'val'], color='black', learning_rate=1e-3, weight_lambda=0.0005, num_epoch=5, max_num=-1, best_path='model_best.tar', current_path='model_latest.tar', tsb_writer=None, tag='', decay_every=10, save_model=True): model = self.model criterion = nn.MSELoss() optimizer = optim.Adam(self.model.parameters(), lr=learning_rate, weight_decay=weight_lambda) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=decay_every, gamma=0.05) since = time.time() best_model = None best_loss = None best_path = time.strftime('[%Y%m%d]%H-%M-%S') + best_path current_path = time.strftime('[%Y%m%d]%H-%M-%S') + current_path print(f'The best model will be saved to {best_path} ...') print(f'Thhe latest model will be saved to {current_path} ...') for epoch in range(num_epoch): print('Epoch {}/{}'.format(epoch + 1, num_epoch), end='') self.epoch_total += 1 _loss = dict() _diff = dict() for phase in phase: if phase == 'train': scheduler.step() model.train() else: model.eval() running_loss = 0.0 running_diff = 0.0 total = dataset.get_lstm_data_num( phase, color) if max_num == -1 else max_num bar = IntProgress(max=total) display(bar) for i, (inputs, labels) in enumerate( dataset.lstm_data_batch(type=phase, color=color, max_num=total, need_bar=False)): _labels = labels inputs = torch.Tensor(inputs) labels = torch.Tensor(np.array([labels]) / 63.5 - 1) inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(inputs) labels = torch.reshape(labels, [1]) loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() running_diff += np.abs( (outputs.cpu().detach().numpy()[0] + 1) * 63.5 - _labels) if torch.cuda.is_available(): torch.cuda.empty_cache() bar.value += 1 if i % 32 == 0: bar.description = f'{bar.value} / {total}' bar.close() epoch_loss = running_loss / total epoch_diff = running_diff / total if epoch % 5 == 0: print('{} Loss: {:.4f}, L1 Diff: {:.4f}'.format( phase, epoch_loss, epoch_diff)) _loss[phase] = epoch_loss _diff[phase] = epoch_diff if phase == 'val' and (best_loss == None or epoch_loss < best_loss): best_loss = epoch_loss best_model = copy.deepcopy(model.state_dict()) if save_model: torch.save(model.state_dict(), best_path) if save_model: torch.save(model.state_dict(), current_path) if tsb_writer and 'val' in phase and 'train' in phase: tsb_writer.add_scalars(f'{tag}/Loss', { 'val': _loss['val'], 'train': _loss['train'] }, self.epoch_total) tsb_writer.add_scalars(f'{tag}/L1 Diff', { 'val': _diff['val'], 'train': _diff['train'] }, self.epoch_total) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val loss: {:4f}'.format(best_loss)) self.model = model
def in_progress(seq, msg="Progress: [%(processed)d / %(total)d]", length=None, close=True): """ Iterate over sequence, yielding item with progress widget displayed. This is useful if you need to precess sequence of items with some time consuming operations .. note:: This works only in Jupyter Notebook .. note:: This function requires *ipywidgets* package to be installed :param seq: sequence to iterate on. :param str msg: (optional) message template to display. Following variables could be used in this template: - processed - total - time_total - time_per_item :param int length: (optional) if seq is generator, or it is not possible to apply 'len(seq)' function to 'seq', then this argument is required and it's value will be used as total number of items in seq. Example example:: import time for i in in_progress(range(10)): time.sleep(1) """ from IPython.display import display from ipywidgets import IntProgress import time if length is None: length = len(seq) start_time = time.time() progress = IntProgress( value=0, min=0, max=length, description=msg % { 'processed': 0, 'total': length, 'time_total': 0.0, 'time_per_item': 0.0, 'time_remaining': 0.0, } ) display(progress) for i, item in enumerate(seq, 1): progress.value = i # i_start_time = time.time() yield item # Do the job i_end_time = time.time() progress.description = msg % { 'processed': i, 'total': length, 'time_total': i_end_time - start_time, 'time_per_item': (i_end_time - start_time) / i, 'time_remaining': ((i_end_time - start_time) / i) * (length - i), } if close: progress.close()
def interpolate(self, tile_size=256, n_workers=1, threads_per_worker=8, memory_limit='14GB', progressBar=None): """ Interpolates the data of a time series object using the method or methods provided :param method: list of interpolation methods """ if self.mask is None: pass if self.isNotebook is True: # Set up progress bar _items = len(self.interpolation_methods.value) # For every interpol method selected by the user _item = 0 progress_bar = IntProgress( value=0, min=0, max=_items, step=1, description='', bar_style='', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal', style={'description_width': 'initial'}, layout={'width': '75%'}) display(progress_bar) progress_bar.value = _item # Get temp dataset to perform the interpolation data_var = self.data_vars.value # Interpolation methods interpolation_methods = self.interpolation_methods.value else: data_var = self.selected_data_var interpolation_methods = [self.selected_interpolation_method] tmp_ds = getattr(self.ts.data, data_var).copy(deep=True) # Store original data type dtype = tmp_ds.data.dtype # Get fill value and idx fill_value = tmp_ds.attrs['nodatavals'][0] mask_fill_value = (tmp_ds == fill_value) mask_fill_value = (mask_fill_value * fill_value).astype(dtype) #idx_no_data = np.where(tmp_ds.data == fill_value) # Apply mask tmp_ds *= self.mask # Set NaN where there are zeros tmp_ds = tmp_ds.where(tmp_ds != 0) # Where there were fill values, set the value again to # fill value to avoid not having data to interpolate #tmp_ds.data[idx_no_data] = fill_value tmp_ds += mask_fill_value #tmp_ds[idx_no_data] = fill_value # Where are less than 20% of observations, use fill value min_n_obs = int(tmp_ds.shape[0] * 0.2) #idx_lt_two_obs = np.where(self.mask.sum(axis=0) < min_n_obs) tmp_ds = tmp_ds.where(self.mask.sum(axis=0) > min_n_obs, fill_value) #tmp_ds.data[:, idx_lt_two_obs[0], # idx_lt_two_obs[1]] = fill_value #tmp_ds[:, idx_lt_two_obs[0], idx_lt_two_obs[1]] = fill_value for method in interpolation_methods: if self.isNotebook is True: progress_bar.value = _item progress_bar.description = (f"Interpolation of {data_var}" f" using {method}") if method == 'smoothn': # First, we need a linear interpolation tmp_interpol_ds = tmp_ds.interpolate_na(dim='time', method='linear') # Weigth obs #idx = np.nonzero(tmp_interpol_ds.data) #w = tmp_ds.copy(deep=True).data #w[idx] *= 2 # Smoothing s = float(self.smooth_factor.value) tmp_masked = np.ma.masked_equal( tmp_interpol_ds.data * self.mask, 0) tmp_smoothed = smoothn( tmp_masked, #W=tmp_masked * 2, isrobust=True, isrobust=True, s=s, TolZ=1e-6, axis=0)[0] tmp_masked = None del (tmp_masked) # Overwrite data tmp_interpol_ds.data = tmp_smoothed else: tmp_interpol_ds = tmp_ds.interpolate_na(dim='time', method=method) # Set data type to match the original (non-interpolated) tmp_interpol_ds.data = tmp_interpol_ds.data.astype(dtype) # Copy metadata attributes tmp_interpol_ds.attrs = tmp_ds.attrs # Save to file fname = f"{self.product}.{self.version}.{data_var}.{method}.tif" output_dir = os.path.join(self.source_dir, data_var[1::], 'interpolated') if os.path.exists(output_dir) is False: os.mkdir(output_dir) fname = os.path.join(output_dir, fname) save_dask_array(fname=fname, data=tmp_interpol_ds, data_var=data_var, method=method, tile_size=tile_size, n_workers=n_workers, threads_per_worker=threads_per_worker, memory_limit=memory_limit, progressBar=progressBar) if self.isNotebook is True: _item += 1 if self.isNotebook is True: # Remove progress bar progress_bar.close() del progress_bar
def process_folder(path_in, path_out, path_to_annotations, predictor_path): ''' Detects faces and landmarks on every image in the folder, and saves cropped faces to the output folder. Landmarks are saves to a DataFrame, or appended to an existing one. Parameters ---------- path_in : str Folder with input images. path_out : str Ouput folder for extracted faces. path_to_annotations : str Path to annotations csv file. predictor_path : str Path to a trained dlib face detector. Returns ------- ''' # Create output dir, if needed. if not os.path.exists(os.path.abspath(path_out)): os.makedirs(os.path.abspath(path_out)) # Get list of image pathes. imgs = [ os.path.join(path_in, filename) for filename in os.listdir(path_in) ] # Configure dlib face detector and landmarks predictor face_detector = dlib.get_frontal_face_detector() # Read trained landmarks predictor model predictor = dlib.shape_predictor(predictor_path) landmarks = {} count = 0 # Progress bar bar = IntProgress(value=1, min=1, max=len(imgs), step=1) print('Processing folder: ' + path_in) display(bar) # Read the annotations file if os.path.exists(path_to_annotations): annotations = True df = pd.read_csv(path_to_annotations, header=0, index_col=0) else: annotations = False for filename in imgs: # Update progress bar bar.description = '%d / %d' % (bar.value, len(imgs)) # Try to find the image in already processed files img_processed = False if annotations: img_basename = os.path.splitext(os.path.basename(filename))[0] if df.index.str.startswith(img_basename).sum() > 0: img_processed = True if not img_processed or not annotations: # Read the image img = imread(filename) # Detect faces dets = face_detector(img, 1) # Iterate over all detected faces for i, d in enumerate(dets): count += 1 # Predict landmark points shape = predictor(img, d) # Crop face face, points = crop_by_landmarks(img, shape_to_array(shape), pad=10) # Save face and landmarks base, ext = os.path.splitext(filename) face_filename = os.path.basename(base) + '_' + str(i) + ext imsave(os.path.join(path_out, face_filename), face) landmarks[face_filename] = array_to_str(points) bar.value += 1 # Convert landmarks dictionary to DataFrame and add to csv landmarks_df = pd.DataFrame.from_dict(landmarks, columns=['points'], orient='index') landmarks_df.index.name = 'filename' landmarks_df['smile'] = 0 landmarks_df['mouth_open'] = 0 landmarks_df['labeled'] = False if annotations: df = df.append(landmarks_df, sort=False) df.to_csv(path_to_annotations) else: landmarks_df.to_csv(path_to_annotations) # Print statistics print('Found %d faces' % count)
def _analytics(self, b): """ Uses the self.user_qa_selection OrderedDictionary to extract the corresponding QA values and create a mask of dimensions: (number of qa layers, time steps, cols(lat), rows(lon)) Additionally computes the temporal mask and the max gap length """ if not type(b) == QProgressBar: progress_bar = IntProgress( value=0, min=0, max=len(self.user_qa_selection), step=1, description='', bar_style='', # 'success', 'info', 'warning', 'danger' or '' orientation='horizontal', style = {'description_width': 'initial'}, layout={'width': '50%'} ) display(progress_bar) n_qa_layers = len(self.user_qa_selection) # Get the name of the first data var to extract its shape for k, v in self.ts.data.data_vars.items(): break # Create mask xarray _time, _latitude, _longitude = self.ts.data.data_vars[k].shape mask = np.zeros((n_qa_layers, _time, _latitude, _longitude), np.int8) qa_layer = self.qa_def.QualityLayer.unique() # QA layer user to create mask _qa_layer = getattr(self.ts.qa, f"qa{qa_layer[0]}") for i, user_qa in enumerate(self.user_qa_selection): if type(b) == QProgressBar: b.setValue(i) b.setFormat(f"Masking by QA {user_qa}") else: progress_bar.value = i progress_bar.description = f"Masking by QA {user_qa}" user_qa_fieldname = user_qa.replace(" ", "_").replace("/", "_") for j, qa_value in enumerate(self.user_qa_selection[user_qa]): qa_value_field_name = qa_value.replace(" ", "_") qa_flag_val = self.qa_def[(self.qa_def.Name == user_qa) & (self.qa_def.Description == qa_value)].Value.iloc[0] if j == 0 : mask[i] = (_qa_layer[user_qa_fieldname] == qa_flag_val) else: mask[i] = np.logical_or( mask[i], _qa_layer[user_qa_fieldname] == qa_flag_val) if type(b) == QProgressBar: b.setValue(0) b.setEnabled(False) else: # Remove progress bar progress_bar.close() del progress_bar #self.__temp_mask = mask #mask = xr.DataArray(np.all(self.__temp_mask, axis=0), mask = xr.DataArray(np.all(mask, axis=0), coords=[v.time.data, v.latitude.data, v.longitude.data], dims=['time', 'latitude', 'longitude']) mask.attrs = v.attrs self.mask = mask # Remove local multi-layer mask variable mask = None del(mask) # Create the percentage of data available mask # Get the per-pixel per-time step binary mask pct_data_available = (self.mask.sum(axis=0) * 100.0) / _time pct_data_available.latitude.data = v.latitude.data pct_data_available.longitude.data = v.longitude.data # Set the pct_data_available object self.pct_data_available = pct_data_available # Using the computed mask get the max gap length self.__get_max_gap_length(b)
def vgg16_train(model, train, test, init_from, save_dir, batch_size=64, epoch=300, early_stop_patience=25): if not os.path.exists(save_dir): os.makedirs(save_dir) checkpoint_path = os.path.join(save_dir, 'model.ckpt') with tf.Session() as sess: print(tf.trainable_variables()) # hyper parameters learning_rate = 5e-4 #adam min_delta = 0.0001 # recorder epoch_counter = 0 loss_history = [] val_loss_history = [] # optimizer opt = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = opt.minimize(model.loss) # saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) sess.run(tf.global_variables_initializer()) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(train.images.shape[0]/batch_size) pval.max = int(test.images.shape[0]/batch_size) # reset due to adding a new task patience_counter = 0 current_best_val_loss = 100000 # a large number # train start while(patience_counter < early_stop_patience): stime = time.time() bar_train = Bar('Training', max=int(train.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar('Validation', max=int(test.images.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') # training an epoch train_loss = 0 for i in range(int(train.images.shape[0]/batch_size)): st = i*batch_size ed = (i+1)*batch_size _, loss = sess.run([train_op, model.loss], feed_dict={model.x: train.images[st:ed,:], model.y: train.labels[st:ed,:], model.w: train.weights[st:ed,:] }) train_loss += loss ptrain.value +=1 ptrain.description = "Training %s/%s" % (i, ptrain.max) bar_train.next() train_loss /= ptrain.max val_loss = 0 for i in range(int(test.images.shape[0]/batch_size)): st = i*batch_size ed = (i+1)*batch_size loss = sess.run(model.loss, feed_dict={model.x: test.images[st:ed,:], model.y: test.labels[st:ed,:], model.w: np.expand_dims(np.repeat(1.0,batch_size),axis=1) }) val_loss += loss pval.value +=1 pval.description = "Training %s/%s" % (i, pval.max) bar_val.next() val_loss /= pval.max if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("reset early stopping and save model into %s at epoch %s" % (checkpoint_path,epoch_counter)) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch train.shuffle() loss_history.append(train_loss) val_loss_history.append(val_loss) ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print("Epoch %s (%s), %s sec >> train-loss: %.4f, val-loss: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, val_loss)) # epoch end epoch_counter += 1 if epoch_counter >= epoch: break res = pd.DataFrame({"epoch":range(0,len(loss_history)), "loss":loss_history, "val_loss":val_loss_history}) res.to_csv(os.path.join(save_dir,"history.csv"), index=False) print("end training")
def getTS(self, startDate, endDate): ndays = (endDate - startDate).days + 1 if self.init: ndays += 1 currentDate = startDate delta = timedelta(days=1) self.df = pd.DataFrame(columns=[ 'datetime', 'AOD', 'DUST_PM', 'SALT_PM', 'ORG_CARB', 'BLK_CARB', 'SO4', 'PM2.5' ]) with self.out_cp: self.out_cp.clear_output() pbar = IntProgress(min=0, max=int(ndays)) pbar.description = 'Progress:' info1 = Label('0%') info2 = Label(' ') display( VBox([ HBox([pbar, info1]), HBox([info2], layout=Layout(justify_content='center')) ])) progVal = 0 if self.init: info2.value = 'Initializing NASA Earth Data Connection..' self.initSession() self.init = False pbar.value += 1 progVal += 1 info1.value = '{:.1f}%'.format( (float(progVal) / float(ndays)) * 100.0) self.lonlatToIndex(self.plon, self.plat) while currentDate <= endDate: url = self.getUrlMERRA(currentDate) info2.value = 'Accessing data for {}'.format(currentDate) dataset = open_url(url, session=self.session) aod = np.squeeze(dataset['TOTEXTTAU'][:, self.ilat, self.ilon]) dust_pm = np.squeeze( dataset['DUSMASS25'][:, self.ilat, self.ilon]) * 1000000000.0 salt_pm = np.squeeze( dataset['SSSMASS25'][:, self.ilat, self.ilon]) * 1000000000.0 org_carb = np.squeeze( dataset['OCSMASS'][:, self.ilat, self.ilon]) * 1000000000.0 blk_carb = np.squeeze( dataset['BCSMASS'][:, self.ilat, self.ilon]) * 1000000000.0 so4 = np.squeeze(dataset['SO4SMASS'][:, self.ilat, self.ilon]) * 1000000000.0 pm25 = (1.375 * so4 + 1.6 * org_carb + blk_carb + dust_pm + salt_pm) dt = pd.date_range(currentDate, periods=24, freq='H') vardict = { 'datetime': dt, 'AOD': aod, 'DUST_PM': dust_pm, 'SALT_PM': salt_pm, 'ORG_CARB': org_carb, 'BLK_CARB': blk_carb, 'SO4': so4, 'PM2.5': pm25 } df_add = pd.DataFrame(vardict) self.df = pd.concat([self.df, df_add]) currentDate += delta progVal += 1 info1.value = '{:.1f}%'.format( (float(progVal) / float(ndays)) * 100.0) pbar.value += 1 self.stateChange = False
def train(FLAG): print("Reading dataset...") # load data Xtrain, Ytrain = read_images(TRAIN_DIR), read_masks(TRAIN_DIR, onehot=True) Xtest, Ytest = read_images(VAL_DIR), read_masks(VAL_DIR, onehot=True) track = [ "hw3-train-validation/validation/0008", "hw3-train-validation/validation/0097", "hw3-train-validation/validation/0107" ] Xtrack, Ytrack = read_list(track) vgg16 = VGG16(classes=7, shape=(256, 256, 3)) vgg16.build(vgg16_npy_path=FLAG.init_from, mode=FLAG.mode, keep_prob=FLAG.keep_prob) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = 32 epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) obj = vgg16.loss train_op = opt.minimize(obj, global_step=global_step) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_loss = np.float('Inf') # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss, train_accu = 0.0, 0.0 for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, accu, _ = sess.run( [obj, vgg16.accuracy, train_op], feed_dict={ vgg16.x: Xtrain[st:ed, :], vgg16.y: Ytrain[st:ed, :], vgg16.is_train: True }) train_loss += loss train_accu += accu ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) train_loss = train_loss / ptrain.value train_accu = train_accu / ptrain.value # validation val_loss = 0 val_accu = 0 for i in range(int(Xtest.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, accu = sess.run( [obj, vgg16.accuracy], feed_dict={ vgg16.x: Xtest[st:ed, :], vgg16.y: Ytest[st:ed, :], vgg16.is_train: False }) val_loss += loss val_accu += accu pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_accu = val_accu / pval.value # plot if epoch_counter % 10 == 0: Xplot = sess.run(vgg16.pred, feed_dict={ vgg16.x: Xtrack[:, :], vgg16.y: Ytrack[:, :], vgg16.is_train: False }) for i, fname in enumerate(track): saveimg = skimage.transform.resize(Xplot[i], output_shape=(512, 512), order=0, preserve_range=True, clip=False) saveimg = label2rgb(saveimg) imageio.imwrite( os.path.join( FLAG.save_dir, os.path.basename(fname) + "_pred_" + str(epoch_counter) + ".png"), saveimg) print( os.path.join( FLAG.save_dir, os.path.basename(fname) + "_pred_" + str(epoch_counter) + ".png")) # early stopping check if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("save in %s" % checkpoint_path) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :] # epoch end epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_accu, val_loss, val_accu))
def train(FLAG): print("Reading dataset...") if FLAG.dataset == 'CIFAR-10': train_data = CIFAR10(train=True) test_data = CIFAR10(train=False) vgg16 = VGG16(classes=10) elif FLAG.dataset == 'CIFAR-100': train_data = CIFAR100(train=True) test_data = CIFAR100(train=False) vgg16 = VGG16(classes=100) else: raise ValueError("dataset should be either CIFAR-10 or CIFAR-100.") print("Build VGG16 models for %s..." % FLAG.dataset) Xtrain, Ytrain = train_data.train_data, train_data.train_labels Xtest, Ytest = test_data.test_data, test_data.test_labels vgg16.build(vgg16_npy_path=FLAG.init_from, prof_type=FLAG.prof_type, conv_pre_training=True, fc_pre_training=False) vgg16.sparsity_train(l1_gamma=FLAG.lambda_s, l1_gamma_diff=FLAG.lambda_m, decay=FLAG.decay, keep_prob=FLAG.keep_prob) # define tasks tasks = ['var_dp'] print(tasks) # initial task cur_task = tasks[0] obj = vgg16.loss_dict[tasks[0]] saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks)) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') tvars_trainable = tf.trainable_variables() #for rm in vgg16.gamma_var: # tvars_trainable.remove(rm) # print('%s is not trainable.'% rm) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = 64 epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = 1e-4 # adam # 4e-3 #sgd half_cycle = 20000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = 1e-4 # adam # 4e-3 #sgd half_cycle = 10000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = opt.minimize(obj, global_step=global_step, var_list=tvars_trainable) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) spareness = vgg16.spareness(thresh=0.05) print("initial spareness: %s" % sess.run(spareness)) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_accu = 0 # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): def load_batches(): for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size batch = ia.Batch(images=Xtrain[st:ed, :, :, :], data=Ytrain[st:ed, :]) yield batch batch_loader = ia.BatchLoader(load_batches) bg_augmenter = ia.BackgroundAugmenter(batch_loader=batch_loader, augseq=transform, nb_workers=4) # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss, train_accu = 0.0, 0.0 while True: batch = bg_augmenter.get_batch() if batch is None: print("Finished epoch.") break x_images_aug = batch.images_aug y_images = batch.data loss, accu, _ = sess.run( [obj, vgg16.accu_dict[cur_task], train_op], feed_dict={ vgg16.x: x_images_aug, vgg16.y: y_images, vgg16.is_train: True }) bar_train.next() train_loss += loss train_accu += accu ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) train_loss = train_loss / ptrain.value train_accu = train_accu / ptrain.value batch_loader.terminate() bg_augmenter.terminate() # # training an epoch # for i in range(int(Xtrain.shape[0]/batch_size)): # st = i*batch_size # ed = (i+1)*batch_size # augX = transform.augment_images(Xtrain[st:ed,:,:,:]) # sess.run([train_op], feed_dict={vgg16.x: augX, # vgg16.y: Ytrain[st:ed,:], # vgg16.is_train: False}) # ptrain.value +=1 # ptrain.description = "Training %s/%s" % (i, ptrain.max) # bar_train.next() # validation val_loss = 0 val_accu = 0 for i in range(int(Xtest.shape[0] / 200)): st = i * 200 ed = (i + 1) * 200 loss, accu = sess.run( [obj, vgg16.accu_dict[cur_task]], feed_dict={ vgg16.x: Xtest[st:ed, :], vgg16.y: Ytest[st:ed, :], vgg16.is_train: False }) val_loss += loss val_accu += accu pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_accu = val_accu / pval.value print("\nspareness: %s" % sess.run(spareness)) # early stopping check if (val_accu - current_best_val_accu) > min_delta: current_best_val_accu = val_accu patience_counter = 0 para_dict = sess.run(vgg16.para_dict) np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict) print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy")) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :] # epoch end # writer.add_summary(epoch_summary, epoch_counter) epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu at %s: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_accu, val_loss, cur_task, val_accu)) saver.save(sess, checkpoint_path, global_step=epoch_counter) sp, rcut = gammaSparsifyVGG16(para_dict, thresh=0.02) np.save(os.path.join(FLAG.save_dir, "sparse_dict.npy"), sp) print("sparsify %s in %s" % (np.round( 1 - rcut, 3), os.path.join(FLAG.save_dir, "sparse_dict.npy"))) #writer.close() arr_spareness.append(1 - rcut) np.save(os.path.join(FLAG.save_dir, "sprocess.npy"), arr_spareness) FLAG.optimizer = opt_type FLAG.lr = start_learning_rate FLAG.batch_size = batch_size FLAG.epoch_end = epoch_counter FLAG.val_accu = current_best_val_accu header = '' row = '' for key in sorted(vars(FLAG)): if header is '': header = key row = str(getattr(FLAG, key)) else: header += "," + key row += "," + str(getattr(FLAG, key)) row += "\n" header += "\n" if os.path.exists("/home/cmchang/new_CP_CNN/model.csv"): with open("/home/cmchang/new_CP_CNN/model.csv", "a") as myfile: myfile.write(row) else: with open("/home/cmchang/new_CP_CNN/model.csv", "w") as myfile: myfile.write(header) myfile.write(row)
def train(FLAG): print("Reading dataset...") # load data Xtrain, df_train = read_dataset(TRAIN_CSV, TRAIN_DIR) Xtest, df_test = read_dataset(TEST_CSV, TEST_DIR) vae = VAE() vae.build(lambda_KL=FLAG.lambda_KL, n_dim=FLAG.n_dim, batch_size=FLAG.batch_size, shape=Xtrain.shape[1:]) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = FLAG.batch_size epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) obj = vae.train_op train_op = opt.minimize(obj, global_step=global_step) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_loss = np.float('Inf') # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss = 0.0 train_reconstruction_loss = 0.0 train_kl_loss = 0.0 for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, reconstruction_loss, kl_loss, _ = sess.run( [ obj, vae.loss['reconstruction'], vae.loss['KL_loss'], train_op ], feed_dict={ vae.x: Xtrain[st:ed, :], vae.y: Xtrain[st:ed, :], vae.is_train: True }) print(loss) print(reconstruction_loss) print(kl_loss) train_loss += loss train_reconstruction_loss += reconstruction_loss train_kl_loss += kl_loss ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) output = sess.run( [vae.output], feed_dict={ vae.x: Xtrain[0:64, :], vae.y: Xtrain[0:64, :], vae.is_train: False }) print("=== train data ====") print(output) #print((Xtrain[0,:]-128.0)/128.0) train_loss = train_loss / ptrain.value train_reconstruction_loss = train_reconstruction_loss / ptrain.value train_kl_loss = train_kl_loss / ptrain.value # validation val_loss = 0 val_reconstruction_loss = 0.0 val_kl_loss = 0.0 for i in range(int(Xtest.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, reconstruction_loss, kl_loss = sess.run( [obj, vae.loss['reconstruction'], vae.loss['KL_loss']], feed_dict={ vae.x: Xtest[st:ed, :], vae.y: Xtest[st:ed, :], vae.is_train: False }) val_loss += loss val_reconstruction_loss += reconstruction_loss val_kl_loss += kl_loss pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_reconstruction_loss = val_reconstruction_loss / pval.value val_kl_loss = val_kl_loss / pval.value # plot # if epoch_counter%10 == 0: # Xplot = sess.run(vae.output, # feed_dict={vae.x: Xtest[:,:], # vae.y: Xtest[:,:], # vae.is_train: False}) # for i, fname in enumerate(track): # imageio.imwrite(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png"), saveimg) # print(os.path.join(FLAG.save_dir,os.path.basename(fname)+"_pred_"+str(epoch_counter)+".png")) # early stopping check if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("save in %s" % checkpoint_path) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain = Xtrain[idx, :, :, :] # epoch end epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train recon loss: %.4f, train kl loss: %.4f, val loss: %.4f, val recon loss: %.4f, val kl loss: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_reconstruction_loss, train_kl_loss, val_loss, val_reconstruction_loss, val_kl_loss)) # para_dict = sess.run(vgg16.para_dict) # np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict) # print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy")) FLAG.optimizer = opt_type FLAG.lr = start_learning_rate FLAG.batch_size = batch_size FLAG.epoch_end = epoch_counter FLAG.val_loss = current_best_val_loss header = '' row = '' for key in sorted(vars(FLAG)): if header is '': header = key row = str(getattr(FLAG, key)) else: header += "," + key row += "," + str(getattr(FLAG, key)) row += "\n" if os.path.exists("/home/cmchang/DLCV2018SPRING/hw4/model.csv"): with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "a") as myfile: myfile.write(row) else: with open("/home/cmchang/DLCV2018SPRING/hw4/model.csv", "w") as myfile: myfile.write(header) myfile.write(row)
def get_press_series(spliter, color, difference, paddings=2): global tmp_imgs white_width = 17 + 2 * paddings black_width = 16 + 2 * paddings height = 106 width = 884 print('Start extracting keypress series ...') print(f' White width: {white_width}px') print(f' Black width: {black_width}px') print(f' Height: {height}px') print('') for name in spliter: black_coor = None N = y_org[name].shape[0] for p in X_path[name]: img = cv2.imread(p) black_coor = get_black_boundaries(img) if len(black_coor) == 36: break y_trans = np.transpose(y_org[name], (1, 0)) print('Pre-loading images ...') bar = IntProgress(max=N) display(bar) for i in range(N): img = pad_img(cv2.imread(X_path[name][i]), paddings) tmp_imgs.append(img) bar.value += 1 bar.close() bar = IntProgress(max=88) display(bar) for k in range(88): if k in black_mask: col = 'black' else: col = 'white' if col not in color: continue _y = y_trans[k] _y = np.argwhere(_y > 0).flatten() if _y.shape[0] == 0: continue last = _y[0] _n = len(_y) for i in range(_n): if i % 32 == 0: bar.description = f'{i}/{_n}' if i != 0 and _y[i] != _y[i - 1] + 1: if col == 'black': add_series(name, col, last, _y[i - 1], k, paddings, black_coor, difference) else: add_series(name, col, last, _y[i - 1], k, paddings, difference) last = _y[i] if i == _n - 1 and last != -1: if col == 'black': add_series(name, col, last, _y[i], k, paddings, black_coor, difference) else: add_series(name, col, last, _y[i], k, paddings, difference) bar.value += 1 bar.close() del tmp_imgs tmp_imgs = [] print(f'{name} set loading finished ...') print(' Pressed white keys: ' + str(len(X_series[name]['white']))) print(' Pressed black keys: ' + str(len(X_series[name]['black'])))
# 2015-2016 mypath = "/Users/Tim/Desktop/scor_test/Data/2015-2016/" mydirs = [f for f in listdir(mypath) if isdir(join(mypath, f))] mydirs.remove("Demographics") mydirs.remove("Dietary") df9 = pd.read_csv( "/Users/Tim/Desktop/scor_test/Data/2015-2016/Demographics/DEMO_I.csv", encoding='utf8') extension = 'csv' # Initialize a progess bar progress = IntProgress() progress.max = len(mydirs) progress.description = '(Init)' display(progress) for mydir in mydirs: os.chdir(mypath + mydir) all_filenames = [i for i in glob.glob('*.{}'.format(extension))] for file in all_filenames: df_temp = pd.read_csv(file, encoding='utf8') try: df9 = pd.merge(df9, df_temp, how='outer', on='SEQN') except: print(mypath + mydir + '/' + file) progress.value += 1 progress.description = mydir progress.description = '(Done)' k = 0