def get_all_post_data(cookie): date = datetime.date.today().strftime("%Y%m%d") filename = "%s_post_data.csv" % date usernames = [] with open("weibo_ids.csv") as f: for line in f.readlines(): segs = line.strip().split(",") name = segs[0].decode("GB2312").encode("utf-8") #name = segs[0] username = segs[1] if username == "id": continue usernames.append((name, username)) is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print( "date,time,name,username,mid,comment,up,forward,price,address", file=f) for name, username in usernames: utils.log_print("[** LOG **] Get post data %s, %s, %s" % (name, username, cookie.get_cookie())) res = get_post_data(cookie, username, name, f)
def test(): model_folder = '/home/stormlab/seg/LSTM-UNet-Outputs/Retrained/LSTMUNet/MyRun_SIM/2020-03-03_191130' with open(os.path.join(model_folder, 'model_params.pickle'), 'rb') as fobj: model_dict = pickle.load(fobj) model_cls = get_model(model_dict['name']) device = '/gpu:0' with tf.device(device): model = model_cls(*model_dict['params'], data_format='NHWC', pad_image=False) model.load_weights(os.path.join(model_folder, 'model.ckpt')) log_print("Restored from {}".format(os.path.join( model_folder, 'model'))) image = cv2.imread( '/home/stormlab/seg/LSTM-UNet-Outputs/Retrained/LSTMUNet/MyRun_SIM/2020-03-03_191130/image7.png', -1) plt.imshow(image, cmap='gray') img = cv2.resize(image, (64, 64), interpolation=cv2.INTER_AREA) image = cv2.normalize(img.astype(np.float32), None, 0.0, 1.0, cv2.NORM_MINMAX) np_image = np.expand_dims(image, axis=0) # Add another dimension for tensorflow np_image = np.expand_dims(np_image, axis=0) np_image = np.expand_dims(np_image, axis=-1) logits, pred = model(np_image, False) pred = np.squeeze(pred, (0, 1, 4)) plt.imshow(pred, cmap='gray')
def get_post_data(cookie, username, name, f): url = "https://weibo.cn/%s" % username response = requests.get(url) if not response: utils.log_print("[** ERROR LOG**] Failed to get weibo page %s" % url) return soup = BeautifulSoup(response.content, "lxml") for div in soup.find_all("div", class_="c"): if div.get("id") is None: continue div_id = div.get("id") assert div_id.startswith("M_") weibo_url = "https://www.weibo.com/%s/%s" % (username, div_id[2:]) mid = url_to_mid(div_id[2:]) url = "https://pay.biz.weibo.com/aj/getprice/advance?mid=%s&touid=%s" % ( mid, username) headers = { "Host": "pay.biz.weibo.com", "Accept-Encoding": "gzip, deflate, br", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", "Accept": "application/json, text/plain, */*", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7", "Cookie": cookie.get_cookie() } data = requests.get(url, headers=headers) if not data: utils.log_print("[** ERROR LOG**] Failed to get weibo details %s" % url) continue data = json.loads(data.content) for a in div.find_all("a"): if a.text.encode("utf-8").startswith("评论"): comment = "".join(re.findall("[(\d+)]", a.text.encode("utf-8"))) if a.text.encode("utf-8").startswith("赞"): up = "".join(re.findall("[(\d+)]", a.text.encode("utf-8"))) if a.text.encode("utf-8").startswith("转发"): forward = "".join(re.findall("[(\d+)]", a.text.encode("utf-8"))) try: time = datetime.datetime.now().strftime("%Y%m%d,%H:%M:%S.%f") utils.log_print("[** LOG **] Succeed getting post data %s" % name) print("%s,%s,%s,%s,%s,%s,%s,%s,%s" % (time, name, username, mid, comment, up, forward, data["data"]["price"], weibo_url), file=f) except: utils.log_print( "[** ERROR LOG **] Price is not available %s with mid=%s" % (name, mid))
def get_all_followers(cookie): date = datetime.date.today().strftime("%Y%m%d") filename = "%s_follower_counts.csv" % date usernames = [] with open("weibo_ids.csv") as f: for line in f.readlines(): segs = line.strip().split(",") name = segs[0].decode("GB2312").encode("utf-8") username = segs[2] if username == "id": continue usernames.append((name, username)) is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print_follower_count_header(f) # username = "******" for name, username in usernames: utils.log_print("[** LOG **] Get followers %s" % name) try: get_followers(cookie, name, username) utils.log_print("[** LOG **] Succeed getting followers %s" % name) except: utils.log_print( "[** ERROR LOG **] Failed getting followers %s" % name) date = datetime.date.today().strftime("%Y%m%d") time = datetime.datetime.now().strftime("%Y%m%d,%H:%M:%S.%f") with open(".%s_error.log" % date, "a") as f_err: print("Failed", time, name, username, file=f_err) return filename
def get_all_followers(cookie): def print_follower_count_header(f): print("date,time,name,username,followers", file=f) date = datetime.date.today().strftime("%Y%m%d") filename = "%s_follower_counts.csv" % date usernames = [] with open("weibo_ids.csv") as f: for line in f.readlines(): segs = line.strip().split(",") name = segs[0].decode("GB2312").encode("utf-8") #name = segs[0] username = segs[1] if username == "id": continue usernames.append((name, username)) is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print_follower_count_header(f) for name, username in usernames: utils.log_print("[** LOG **] Get followers %s" % name) res = get_followers(cookie, name, username) if res is None: utils.log_print( "[** ERROR LOG **] Failed getting followers %s" % name) date = datetime.date.today().strftime("%Y%m%d") time = datetime.datetime.now().strftime("%Y%m%d,%H:%M:%S.%f") with open(".%s_error.log" % date, "a") as f_err: print("Failed", time, name, username, file=f_err) else: utils.log_print("[** LOG **] Succeed getting followers %s" % name) name, username, followers = res time = datetime.datetime.now().strftime("%Y%m%d,%H:%M:%S.%f") print("%s,%s,%s,%s" % (time, name, username, followers), file=f) return filename
def get_chart(cookie): headers = { "Host": "chart.weibo.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7", "Cookie": cookie.get_cookie() } date = datetime.date.today().strftime("%Y%m%d") filename = "%s_chart.csv" % date is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print( "date,time,name,read_number,interaction_number,affection_number,loveness_number", file=f) for rank in [3, 5, 6]: for page in [1, 2]: url = "http://chart.weibo.com/chart?rank_type=%s&page=%s" % ( rank, page) response = requests.get(url, headers=headers) if not response: utils.log_print( "[** ERROR LOG **] Failed getting chart with rank=%s and page=%s" % (rank, page)) continue soup = BeautifulSoup(response.content, "lxml") name_divs = soup.find_all("div", class_=re.compile("sr_name S_func1")) read_num_divs = soup.find_all( "li", class_=re.compile("arr1 clearfix")) interaction_num_divs = soup.find_all( "li", class_=re.compile("arr2 clearfix")) affection_num_divs = soup.find_all( "li", class_=re.compile("arr3 clearfix")) loveness_num_divs = soup.find_all( "li", class_=re.compile("arr4 clearfix")) for name_div, read_div, inter_div, affect_div, loveness_div in zip( name_divs, read_num_divs, interaction_num_divs, affection_num_divs, loveness_num_divs): name = name_div.text.encode("utf-8") read_num = read_div.find_all( "span", class_="pro_num")[0].text.encode("utf-8") interaction_num = inter_div.find_all( "span", class_="pro_num")[0].text.encode("utf-8") affection_num = affect_div.find_all( "span", class_="pro_num")[0].text.encode("utf-8") loveness_num = loveness_div.find_all( "span", class_="pro_num")[0].text.encode("utf-8") time = datetime.datetime.now().strftime( "%Y%m%d,%H:%M:%S.%f") utils.log_print("[** LOG **] Succeed getting chart %s" % name) print("%s,%s,%s,%s,%s,%s" % (time, name, read_num, interaction_num, affection_num, loveness_num), file=f)
def train(): device = '/gpu:0' if params.gpu_id >= 0 else '/cpu:0' with tf.device(device): #Initialization of the data data_provider = params.data_provider #Initialization of the model and parameters recurrent_dropout = 0.3 dropout = 0.3 l1 = 0 l2 = 0 kernel_init = 'he_uniform' net_type = 'original_net' pretraining = False lstm_type = 'enc' step_per_epoch = data_provider.num_steps_per_epoch step_val = data_provider.num_steps_per_val step_gif = step_per_epoch * 10 num_epoch = 0 patience = 1000 discriminator = False attention_gate = True #Initialization neural network net_kernel_params = Net_type(recurrent_dropout, (l1, l2), kernel_init)[net_type] model = Nets.ULSTMnet2D(net_kernel_params, params.data_format, False, dropout, pretraining, lstm_type, attention_gate) if discriminator: disc = Nets.Discriminator(params.data_format) #Initialization of Losses and Metrics loss_fn = LossFunction() # loss_fn = TverskyLoss() train_loss = k.metrics.Mean(name='train_loss') train_metrics = METRICS_TRAIN val_loss = k.metrics.Mean(name='val_loss') val_metrics = METRICS_VAL test_loss = k.metrics.Mean(name='test_loss') test_metrics = METRICS_TEST best_test_loss = k.metrics.Mean(name='best_test_loss') best_test_metrics = METRICS_BEST_TEST final_test_loss = 0 final_test_prec = 0 final_test_acc = 0 final_test_rec = 0 #define learning rate step decay class decay_lr(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self): print('Learning rate initialized') @tf.function def __call__(self, step): if tf.less(step, 500): return 0.0001 elif tf.logical_and(tf.greater(step, 500), tf.less(step, 2000)): return 0.00005 elif tf.logical_and(tf.greater(step, 2000), tf.less(step, 5000)): return 0.00001 elif tf.logical_and(tf.greater(step, 5000), tf.less(step, 8000)): return 0.000005 else: return 0.000001 #Early stopping control class EarlyStoppingAtMinLoss(tf.keras.callbacks.Callback): def __init__(self, patience=0): self.patience = patience # best_weights to store the weights at which the minimum loss occurs. self.best_weights = None self.wait = 0 # The epoch the training stops at. self.stopped_epoch = 0 # Initialize the best as infinity. self.best = np.Inf self.stop = False def step_end(self, epoch, val_loss): current = np.array(val_loss.result()) if np.less(current, self.best): self.best = current self.wait = 0 self.stopped_epoch = epoch # Record the best weights if current results is better (less). self.best_weights = model.get_weights() else: self.wait += 1 if self.wait >= self.patience: self.stop = True return self.stop, self.best_weights def on_train_end(self): if self.stopped_epoch > 0: print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) return self.stopped_epoch #Adam optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=decay_lr()) if discriminator: optimizer_disc = tf.keras.optimizers.Adam(learning_rate=decay_lr()) #Checkpoint ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), optimizer=optimizer, net=model) #Early Stopping callback early_stopping = EarlyStoppingAtMinLoss(patience) #Load checkpoint if there is if params.load_checkpoint: if os.path.isdir(params.load_checkpoint_path): latest_checkpoint = tf.train.latest_checkpoint( params.load_checkpoint_path) else: latest_checkpoint = params.load_checkpoint_path try: print(latest_checkpoint) if latest_checkpoint is None or latest_checkpoint == '': log_print("Initializing from scratch.") else: ckpt.restore(latest_checkpoint) log_print("Restored from {}".format(latest_checkpoint)) except tf.errors.NotFoundError: raise ValueError( "Could not load checkpoint: {}".format(latest_checkpoint)) else: log_print("Initializing from scratch.") manager = tf.train.CheckpointManager( ckpt, os.path.join(params.experiment_save_dir, 'tf_ckpts'), max_to_keep=params.save_checkpoint_max_to_keep, keep_checkpoint_every_n_hours=params.save_checkpoint_every_N_hours) @tf.function def train_step(image, label): with tf.GradientTape( ) as gen_tape: #, tf.GradientTape() as disc_tape: logits, output = model(image, True) if discriminator: real = disc(label[:, -1]) fake = disc(output[:, -1]) d_loss = tf.reduce_mean( tf.math.log(real) + tf.math.log(1 - fake)) loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, logits) else: loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, output) gradients = gen_tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) if discriminator: gradients_disc = disc_tape.gradient(d_loss, disc.trainable_variables) optimizer_disc.apply_gradients( zip(gradients_disc, disc.trainable_variables)) ckpt.step.assign_add(1) train_loss(loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(train_metrics): metric(label[:, -1], output[:, -1]) train_metrics[i] = metric return output, loss @tf.function def val_step(image, label): logits, output = model(image, False) t_loss = loss_fn.bce_dice_loss(label, output, logits) # t_loss = loss_fn.loss(label, output) val_loss(t_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(val_metrics): metric(label[:, -1], output[:, -1]) val_metrics[i] = metric return output, t_loss @tf.function def test_step(image, label): logits, output = model(image, False) tt_loss = loss_fn.bce_dice_loss(label, output, logits) # tt_loss = loss_fn.loss(label, output) test_loss(tt_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) test_loss(tt_loss) for i, metric in enumerate(test_metrics): metric(label[:, -1], output[:, -1]) test_metrics[i] = metric return output, tt_loss @tf.function def best_test_step(image, label): logits, output = model(image, False) tt_loss = loss_fn.bce_dice_loss(label, output, logits) # tt_loss = loss_fn.loss(label, output) test_loss(tt_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) best_test_loss(tt_loss) for i, metric in enumerate(best_test_metrics): metric(label[:, -1], output[:, -1]) best_test_metrics[i] = metric return output, tt_loss @tf.function def test_epoch(image): logits, output = model(image, False) return output #inizialize directories and dictionaries to use on tensorboard train_summary_writer = val_summary_writer = test_summary_writer = best_test_summary_writer = None train_scalars_dict = val_scalars_dict = best_test_scalars_dict = test_scalars_dict = None if not params.dry_run: #Initialization of tensorboard's writers and dictionaries train_log_dir = os.path.join(params.experiment_log_dir, 'train') val_log_dir = os.path.join(params.experiment_log_dir, 'val') test_log_dir = os.path.join(params.experiment_log_dir, 'test') best_test_log_dir = os.path.join(params.experiment_log_dir, 'best_test') train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) best_test_summary_writer = tf.summary.create_file_writer( best_test_log_dir) train_scalars_dict = { 'Loss': train_loss, 'LUT values': train_metrics[0:4], 'Model evaluation': train_metrics[4:7] } val_scalars_dict = { 'Loss': val_loss, 'LUT values': val_metrics[0:4], 'Model evaluation': val_metrics[4:7] } test_scalars_dict = { 'Loss': test_loss, 'LUT values': test_metrics[0:4], 'Model evaluation': test_metrics[4:7] } best_test_scalars_dict = { 'Loss': best_test_loss, 'LUT values': best_test_metrics[0:4], 'Model evaluation': best_test_metrics[4:7] } # #write the values in tensorboard def tboard(writer, log_dir, step, scalar_loss_dict, images_dict, factor): with tf.device('/cpu:0'): with writer.as_default(): for scalar_loss_name, scalar_loss in scalar_loss_dict.items( ): if (scalar_loss_name == 'LUT values'): with tf.summary.create_file_writer( os.path.join(log_dir, 'TruePositive')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[0].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join( log_dir, 'FalsePositive')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[1].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'TrueNegative')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[2].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join( log_dir, 'FalseNegative')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[3].result().numpy() / step_per_epoch * factor, step=step) elif (scalar_loss_name == 'Model evaluation'): with tf.summary.create_file_writer( os.path.join(log_dir, 'Accuracy')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[0].result() * 100, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'Precision')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[1].result() * 100, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'Recall')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[2].result() * 100, step=step) else: tf.summary.scalar(scalar_loss_name, scalar_loss.result(), step=step) for image_name, image in images_dict.items(): if params.channel_axis == 1: image = tf.transpose(image, (0, 2, 3, 1)) tf.summary.image(image_name, image, max_outputs=1, step=step) #binarization of the output and perform some morphological operations def post_processing(images): images_shape = images.shape im_reshaped = np.reshape( images, (images.shape[0], images.shape[1], images.shape[2])) bw_predictions = np.zeros((images.shape[0], images.shape[1], images.shape[2])).astype(np.float32) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) for i in range(0, images.shape[0]): ret, bw_predictions[i] = cv2.threshold(im_reshaped[i], 0.8, 1, cv2.THRESH_BINARY) bw_predictions[i] = cv2.morphologyEx(bw_predictions[i], cv2.MORPH_OPEN, kernel) bw_predictions[i] = cv2.morphologyEx(bw_predictions[i], cv2.MORPH_CLOSE, kernel) bw_predictions = np.reshape(bw_predictions, images_shape) return bw_predictions #visualize images and labels of a batch (can be also used to visualize predictions and labels) def show_dataset_labels(x_train, y_train): num_train = x_train.shape[0] * x_train.shape[1] x_train = np.reshape(x_train, (x_train.shape[0] * x_train.shape[1], x_train.shape[2], x_train.shape[3])) y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1], y_train.shape[2], y_train.shape[3])) plt.figure(figsize=(15, 15)) for i in range(0, num_train): plt.subplot(num_train / 2, 2, i + 1) plt.imshow(x_train[i, :, :], cmap='gray') plt.title("Original Image") plt.show() for j in range(0, num_train): plt.subplot(num_train / 2, 2, j + 1) plt.imshow(y_train[j, :, :], cmap='gray') plt.title("Masked Image") plt.suptitle("Examples of Images and their Masks") plt.show() template = '{}: Step {}, Loss: {}, Accuracy: {}, Precision: {}, Recall: {}' log_print('Start of training') try: # if True: train_imgs_dict = {} val_imgs_dict = {} best_test_imgs_dict = {} test_imgs_dict = {} minimum_found = False stopped_epoch = None stop = False #initialization for gif output_batch_list = {} for i in range(0, params.batch_size): output_batch_list[str(i)] = [] first = True log_print('Starting of epoch: {}'.format(int(num_epoch))) progbar = tf.keras.utils.Progbar(step_per_epoch) #iterate along the number of iterations for _ in range(int(ckpt.step), params.num_iterations + 1): if params.aws: r = requests.get( 'http://169.254.169.254/latest/meta-data/spot/instance-action' ) if not r.status_code == 404: raise AWSError('Quitting Spot Instance Gracefully') #read batch image_sequence, seg_sequence, is_last_batch = data_provider.read_batch( 'train', False, None) #train batch train_output_sequence, train_loss_value = train_step( image_sequence, seg_sequence) #postprocessing prediction train_bw_predictions = post_processing( train_output_sequence[:, -1]) progbar.update(int(ckpt.step) - num_epoch * step_per_epoch) #if epoch is finished if not int(ckpt.step) % step_per_epoch: #validation steps are performed for i in range(0, step_val): (val_image_sequence, val_seg_sequence, is_last_batch) = data_provider.read_batch( 'val', False, None) #if profile is true, write on tensorboard the network graph if params.profile: graph_dir = os.path.join( params.experiment_log_dir, 'graph/' ) + datetime.now().strftime("%Y%m%d-%H%M%S") tf.summary.trace_on(graph=True, profiler=True) graph_summary_writer = tf.summary.create_file_writer( graph_dir) val_output_sequence, val_loss_value = val_step( val_image_sequence, val_seg_sequence) if params.profile: with graph_summary_writer.as_default(): tf.summary.trace_export( 'train_step', step=int(ckpt.step), profiler_outdir=params.experiment_log_dir) val_bw_predictions = post_processing( val_output_sequence[:, -1]) #if the best loss is not found, call early stopping callback if not minimum_found: stop, best_weights = early_stopping.step_end( num_epoch, val_loss) #print training values to console log_print( template.format('Training', int(ckpt.step), train_loss.result(), train_metrics[4].result() * 100, train_metrics[5].result() * 100, train_metrics[6].result() * 100)) #calling the function that writes the training dictionaries on tensorboard if not params.dry_run: display_image = image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) train_imgs_dict['Image'] = display_image train_imgs_dict['GT'] = seg_sequence[:, -1] train_imgs_dict['Output'] = train_output_sequence[:, -1] train_imgs_dict['Output_bw'] = train_bw_predictions tboard(train_summary_writer, train_log_dir, int(ckpt.step), train_scalars_dict, train_imgs_dict, 1) log_print( 'Printed Training Step: {} to Tensorboard'.format( int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) #reset train metrics for i in range(0, 7): train_metrics[i].reset_states() train_loss.reset_states() #print validation values to console log_print( template.format('Validation', int(ckpt.step), val_loss.result(), val_metrics[4].result() * 100, val_metrics[5].result() * 100, val_metrics[6].result() * 100)) #calling the function that writes the validation dictionaries on tensorboard if not params.dry_run: display_image = val_image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) val_imgs_dict['Image'] = display_image val_imgs_dict['GT'] = val_seg_sequence[:, -1] val_imgs_dict['Output'] = val_output_sequence[:, -1] val_imgs_dict['Output_bw'] = val_bw_predictions tboard(val_summary_writer, val_log_dir, int(ckpt.step), val_scalars_dict, val_imgs_dict, step_per_epoch / step_val) log_print('Printed Validation Step: {} to Tensorboard'. format(int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) #reset validation metrics for i in range(0, 7): val_metrics[i].reset_states() val_loss.reset_states() num_epoch = num_epoch + 1 log_print('Starting of epoch: {}'.format(int(num_epoch))) progbar = tf.keras.utils.Progbar(step_per_epoch) #save the prediction in order to create a gif file if not int(ckpt.step) % step_gif: image_seq, seg_seq = data_provider.read_new_image( 'gif_test') #save images and predictions just at the first iteration if first: for i in range(0, image_seq.shape[0]): image = np.squeeze(np.array(image_seq[i, -1])) image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX) img = Image.fromarray(image.astype(np.uint8)) img.save(params.experiment_save_dir + '/image' + str(i) + '.png') seg = np.squeeze(np.array(seg_seq[i, -1])) seg = cv2.normalize(seg, None, 0, 255, cv2.NORM_MINMAX) seg = Image.fromarray(seg.astype(np.uint8)) seg.save(params.experiment_save_dir + '/label' + str(i) + '.png') first = False #perform prediction on the images output = test_epoch(image_seq) #save gif for i in range(0, image_seq.shape[0]): image = cv2.normalize(np.array(output[i, -1]), None, 0, 255, cv2.NORM_MINMAX) # image = cv2.resize(image, (300, 300), interpolation = cv2.INTER_AREA) image = image.astype(np.uint8) image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) image = np.moveaxis(image, -1, 0) output_batch_list[str(i)].append(image) write_gif(output_batch_list[str(i)], params.experiment_save_dir + '/prediction' + str(i) + '.gif', fps=5) if num_epoch == 200: step_gif = step_per_epoch * 50 #save checkpoints if int(ckpt.step ) % params.save_checkpoint_iteration == 0 or int( ckpt.step) == params.num_iterations: if not params.dry_run: save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) else: log_print( "WARNING: dry_run flag is ON! Mot saving checkpoints or tensorboard data" ) #if minimum loss found if stop: #save best model actual_weights = model.get_weights() model.set_weights(best_weights) stopped_epoch = early_stopping.on_train_end() log_print('Saving Best Model of inference:') model_fname = os.path.join(params.experiment_save_dir, 'best_model.ckpt') model.save_weights(model_fname, save_format='tf') with open( os.path.join(params.experiment_save_dir, 'best_model_params.pickle'), 'wb') as fobj: pickle.dump( { 'name': model.__class__.__name__, 'params': (net_kernel_params, ) }, fobj, protocol=pickle.HIGHEST_PROTOCOL) log_print('Saved. Continue training') stop = False minimum_found = True #perform test predictions with best model #create the dataset num_testing = data_provider.num_test() data_provider.enqueue_index('best_test', None) #perform test and print results on tensorboard for i in range(0, num_testing): image_seq, seg_seq = data_provider.read_new_image( 'best_test') best_test_output_sequence, best_test_loss_value = best_test_step( image_seq, seg_seq) log_print( template.format( 'Testing', int(i), best_test_loss.result(), best_test_metrics[4].result() * 100, best_test_metrics[5].result() * 100, best_test_metrics[6].result() * 100)) display_image = image_seq[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) best_test_imgs_dict['Image'] = display_image best_test_imgs_dict['GT'] = seg_seq[:, -1] best_test_imgs_dict[ 'Output'] = best_test_output_sequence[:, -1] tboard(best_test_summary_writer, best_test_log_dir, i, best_test_scalars_dict, best_test_imgs_dict, step_per_epoch / 1) log_print( 'Printed Testing Step: {} to Tensorboard'.format( i)) for i in range(0, 7): best_test_metrics[i].reset_states() best_test_loss.reset_states() model.set_weights(actual_weights) #when it comes to the end if ckpt.step == params.num_iterations: #create the dataset num_testing = data_provider.num_test() data_provider.enqueue_index('test', None) #perform test on the new samples for i in range(0, num_testing): image_seq, seg_seq = data_provider.read_new_image( 'test') test_output_sequence, test_loss_value = test_step( image_seq, seg_seq) log_print( template.format('Testing', int(i), test_loss.result(), test_metrics[4].result() * 100, test_metrics[5].result() * 100, test_metrics[6].result() * 100)) display_image = image_seq[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) test_imgs_dict['Image'] = display_image test_imgs_dict['GT'] = seg_seq[:, -1] test_imgs_dict['Output'] = test_output_sequence[:, -1] tboard(test_summary_writer, test_log_dir, i, test_scalars_dict, test_imgs_dict, step_per_epoch / 1) log_print( 'Printed Testing Step: {} to Tensorboard'.format( i)) #save final test values final_test_loss = test_loss.result() final_test_acc = test_metrics[4].result() * 100 final_test_prec = test_metrics[5].result() * 100 final_test_rec = test_metrics[6].result() * 100 except (KeyboardInterrupt, ValueError, AWSError) as err: if not params.dry_run: log_print( 'Saving Model Before closing due to error: {}'.format( str(err))) save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) # raise err except Exception as err: # raise err finally: if not params.dry_run: #save model's weights log_print('Saving Model of inference:') model_fname = os.path.join(params.experiment_save_dir, 'model.ckpt') model.save_weights(model_fname, save_format='tf') with open( os.path.join(params.experiment_save_dir, 'model_params.pickle'), 'wb') as fobj: pickle.dump( { 'name': model.__class__.__name__, 'params': (net_kernel_params, ) }, fobj, protocol=pickle.HIGHEST_PROTOCOL) #save parameters values and final loss and metrics values with open( os.path.join(params.experiment_save_dir, 'params_list.csv'), 'w') as fobj: writer = csv.writer(fobj) model_dict = { 'Pretraining': pretraining, 'Mode': pretraining_type, 'Stopping_epoch': stopped_epoch, 'Attention gate': attention_gate, 'Dropout': dropout, 'Recurrent dropout': recurrent_dropout, 'L1': l1, 'L2': l2, 'Kernel init': kernel_init, 'Net type': net_type } model_dict.update({ 'Loss': np.array(final_test_loss), 'Accuracy': np.array(final_test_acc), 'Precision': np.array(final_test_prec), 'Recall': np.array(final_test_rec) }) for key, value in model_dict.items(): writer.writerow([key, value]) log_print('Saved Model to file: {}'.format(model_fname)) end_time = time.time() log_print('Program execution time:', end_time - start_time) else: log_print('WARNING: dry_run flag is ON! Not Saving Model') log_print('Closing gracefully') log_print('Done')
def train(run_folder, hparams, params_value): device = '/gpu:0' if params.gpu_id >= 0 else '/cpu:0' with tf.device(device): #Initialization of the data data_provider = params.data_provider #Initialization of the model and parameters recurrent_dropout = hparams[HP_DROPOUT] dropout = hparams[HP_DROPINPUT] l1 = hparams[HP_L1] l2 = hparams[HP_L2] kernel_init = 'he_uniform' net_type = 'original_net' pretraining = False pretraining_type = 'full' step_per_epoch = data_provider.num_steps_per_epoch step_val = data_provider.num_steps_per_val num_epoch = 0 discriminator = False attention_gate = False #Initialization neural network net_kernel_params = Net_type(recurrent_dropout, (l1, l2), kernel_init)[net_type] model = Nets.ULSTMnet2D(net_kernel_params, params.data_format, False, dropout, pretraining, pretraining_type, attention_gate) if discriminator: disc = Nets.Discriminator(params.data_format) #Initialization of Losses and Metrics loss_fn = LossFunction() # loss_fn = WeightedLoss() train_loss = k.metrics.Mean(name='train_loss') train_metrics = METRICS_TRAIN val_loss = k.metrics.Mean(name='val_loss') val_metrics = METRICS_VAL test_loss = k.metrics.Mean(name='test_loss') test_metrics = METRICS_TEST final_test_loss = 0 final_test_prec = 0 final_test_acc = 0 final_test_rec = 0 #define learning rate step decay class decay_lr(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self): print('Learning rate initialized') @tf.function def __call__(self, step): if tf.less(step, 500): return 0.0001 elif tf.logical_and(tf.greater(step, 500), tf.less(step, 2000)): return 0.00005 elif tf.logical_and(tf.greater(step, 2000), tf.less(step, 5000)): return 0.00001 elif tf.logical_and(tf.greater(step, 5000), tf.less(step, 10000)): return 0.000005 elif tf.logical_and(tf.greater(step, 10000), tf.less(step, 20000)): return 0.0000025 else: return 0.000001 #Adam optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=decay_lr()) if discriminator: optimizer_disc = tf.keras.optimizers.Adam(learning_rate=decay_lr()) #Checkpoint ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), optimizer=optimizer, net=model) #Load checkpoint if there is if params.load_checkpoint: if os.path.isdir(params.load_checkpoint_path): latest_checkpoint = tf.train.latest_checkpoint( params.load_checkpoint_path) else: latest_checkpoint = params.load_checkpoint_path try: print(latest_checkpoint) if latest_checkpoint is None or latest_checkpoint == '': log_print("Initializing from scratch.") else: ckpt.restore(latest_checkpoint) log_print("Restored from {}".format(latest_checkpoint)) except tf.errors.NotFoundError: raise ValueError( "Could not load checkpoint: {}".format(latest_checkpoint)) else: log_print("Initializing from scratch.") manager = tf.train.CheckpointManager( ckpt, os.path.join(params.experiment_save_dir, 'NN_' + str(params_value[0]), 'tf_ckpts'), max_to_keep=params.save_checkpoint_max_to_keep, keep_checkpoint_every_n_hours=params.save_checkpoint_every_N_hours) @tf.function def train_step(image, label): with tf.GradientTape( ) as gen_tape: #, tf.GradientTape() as disc_tape: logits, output = model(image, True) if discriminator: real = disc(label[:, -1]) fake = disc(output[:, -1]) d_loss = tf.reduce_mean( tf.math.log(real) + tf.math.log(1 - fake)) loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, logits) else: loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, logits) gradients = gen_tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) if discriminator: gradients_disc = disc_tape.gradient(d_loss, disc.trainable_variables) optimizer_disc.apply_gradients( zip(gradients_disc, disc.trainable_variables)) ckpt.step.assign_add(1) train_loss(loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(train_metrics): metric(label[:, -1], output[:, -1]) train_metrics[i] = metric return output, loss @tf.function def val_step(image, label): logits, output = model(image, False) t_loss = loss_fn.bce_dice_loss(label, output, logits) # t_loss = loss_fn.loss(label, logits) val_loss(t_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(val_metrics): metric(label[:, -1], output[:, -1]) val_metrics[i] = metric return output, t_loss @tf.function def test_step(image, label): logits, output = model(image, False) tt_loss = loss_fn.bce_dice_loss(label, output, logits) # tt_loss = loss_fn.loss(label, logits) test_loss(tt_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) test_loss(tt_loss) for i, metric in enumerate(test_metrics): metric(label[:, -1], output[:, -1]) test_metrics[i] = metric return output, tt_loss #inizialize directories and dictionaries to use on tensorboard train_summary_writer = val_summary_writer = test_summary_writer = None train_scalars_dict = val_scalars_dict = test_scalars_dict = None if not params.dry_run: #Initialization of tensorboard's writers and dictionaries train_log_dir = os.path.join(params.experiment_log_dir, 'NN_' + str(params_value[0]), 'train') val_log_dir = os.path.join(params.experiment_log_dir, 'NN_' + str(params_value[0]), 'val') test_log_dir = os.path.join(params.experiment_log_dir, 'NN_' + str(params_value[0]), 'test') train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) train_scalars_dict = { 'Loss': train_loss, 'LUT values': train_metrics[0:4], 'Model evaluation': train_metrics[4:7] } val_scalars_dict = { 'Loss': val_loss, 'LUT values': val_metrics[0:4], 'Model evaluation': val_metrics[4:7] } test_scalars_dict = { 'Loss': test_loss, 'LUT values': test_metrics[0:4], 'Model evaluation': test_metrics[4:7] } # #write the values in tensorboard def tboard(writer, log_dir, step, scalar_loss_dict, images_dict, factor): with tf.device('/cpu:0'): with writer.as_default(): for scalar_loss_name, scalar_loss in scalar_loss_dict.items( ): if (scalar_loss_name == 'LUT values'): with tf.summary.create_file_writer( os.path.join(log_dir, 'TruePositive')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[0].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join( log_dir, 'FalsePositive')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[1].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'TrueNegative')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[2].result().numpy() / step_per_epoch * factor, step=step) with tf.summary.create_file_writer( os.path.join( log_dir, 'FalseNegative')).as_default(): tf.summary.scalar( scalar_loss_name, scalar_loss[3].result().numpy() / step_per_epoch * factor, step=step) elif (scalar_loss_name == 'Model evaluation'): with tf.summary.create_file_writer( os.path.join(log_dir, 'Accuracy')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[0].result() * 100, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'Precision')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[1].result() * 100, step=step) with tf.summary.create_file_writer( os.path.join(log_dir, 'Recall')).as_default(): tf.summary.scalar(scalar_loss_name, scalar_loss[2].result() * 100, step=step) else: tf.summary.scalar(scalar_loss_name, scalar_loss.result(), step=step) for image_name, image in images_dict.items(): if params.channel_axis == 1: image = tf.transpose(image, (0, 2, 3, 1)) tf.summary.image(image_name, image, max_outputs=1, step=step) #binarization of the output and perform some morphological operations def post_processing(images): images_shape = images.shape im_reshaped = np.reshape( images, (images.shape[0], images.shape[1], images.shape[2])) bw_predictions = np.zeros((images.shape[0], images.shape[1], images.shape[2])).astype(np.float32) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) for i in range(0, images.shape[0]): ret, bw_predictions[i] = cv2.threshold(im_reshaped[i], 0.8, 1, cv2.THRESH_BINARY) bw_predictions[i] = cv2.morphologyEx(bw_predictions[i], cv2.MORPH_OPEN, kernel) bw_predictions[i] = cv2.morphologyEx(bw_predictions[i], cv2.MORPH_CLOSE, kernel) bw_predictions = np.reshape(bw_predictions, images_shape) return bw_predictions template = '{}: Step {}, Loss: {}, Accuracy: {}, Precision: {}, Recall: {}' try: # if True: train_imgs_dict = {} val_imgs_dict = {} test_imgs_dict = {} log_print('Starting of epoch: {}'.format(int(num_epoch))) progbar = tf.keras.utils.Progbar(step_per_epoch) #iterate along the number of iterations for _ in range(int(ckpt.step), params.num_iterations + 1): if params.aws: r = requests.get( 'http://169.254.169.254/latest/meta-data/spot/instance-action' ) if not r.status_code == 404: raise AWSError('Quitting Spot Instance Gracefully') #read batch image_sequence, seg_sequence, is_last_batch = data_provider.read_batch( 'train', False, None) #train batch train_output_sequence, train_loss_value = train_step( image_sequence, seg_sequence) #postprocessing prediction train_bw_predictions = post_processing( train_output_sequence[:, -1]) progbar.update(int(ckpt.step) - num_epoch * step_per_epoch) #if epoch is finished if not int(ckpt.step) % step_per_epoch: #validation steps are performed for i in range(0, step_val): (val_image_sequence, val_seg_sequence, is_last_batch) = data_provider.read_batch( 'val', False, None) #if profile is true, write on tensorboard the network graph val_output_sequence, val_loss_value = val_step( val_image_sequence, val_seg_sequence) val_bw_predictions = post_processing( val_output_sequence[:, -1]) #print training values to console log_print( template.format('Training', int(ckpt.step), train_loss.result(), train_metrics[4].result() * 100, train_metrics[5].result() * 100, train_metrics[6].result() * 100)) #calling the function that writes the training dictionaries on tensorboard if not params.dry_run: display_image = image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) train_imgs_dict['Image'] = display_image train_imgs_dict['GT'] = seg_sequence[:, -1] train_imgs_dict['Output'] = train_output_sequence[:, -1] train_imgs_dict['Output_bw'] = train_bw_predictions tboard(train_summary_writer, train_log_dir, int(ckpt.step), train_scalars_dict, train_imgs_dict, 1) log_print( 'Printed Training Step: {} to Tensorboard'.format( int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) #reset train metrics for i in range(0, 7): train_metrics[i].reset_states() train_loss.reset_states() #print validation values to console log_print( template.format('Validation', int(ckpt.step), val_loss.result(), val_metrics[4].result() * 100, val_metrics[5].result() * 100, val_metrics[6].result() * 100)) #calling the function that writes the validation dictionaries on tensorboard if not params.dry_run: display_image = val_image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) val_imgs_dict['Image'] = display_image val_imgs_dict['GT'] = val_seg_sequence[:, -1] val_imgs_dict['Output'] = val_output_sequence[:, -1] val_imgs_dict['Output_bw'] = val_bw_predictions tboard(val_summary_writer, val_log_dir, int(ckpt.step), val_scalars_dict, val_imgs_dict, step_per_epoch / step_val) log_print('Printed Validation Step: {} to Tensorboard'. format(int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) #reset validation metrics for i in range(0, 7): val_metrics[i].reset_states() val_loss.reset_states() num_epoch = num_epoch + 1 log_print('Starting of epoch: {}'.format(int(num_epoch))) progbar = tf.keras.utils.Progbar(step_per_epoch) #save checkpoints if int(ckpt.step ) % params.save_checkpoint_iteration == 0 or int( ckpt.step) == params.num_iterations: if not params.dry_run: save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) else: log_print( "WARNING: dry_run flag is ON! Mot saving checkpoints or tensorboard data" ) #when it comes to the end if ckpt.step == params.num_iterations: #create the dataset num_testing = data_provider.num_test() data_provider.enqueue_index('test', None) #perform test on the new samples for i in range(0, num_testing): image_seq, seg_seq = data_provider.read_new_image( 'test') test_output_sequence, test_loss_value = test_step( image_seq, seg_seq) log_print( template.format('Testing', int(i), test_loss.result(), test_metrics[4].result() * 100, test_metrics[5].result() * 100, test_metrics[6].result() * 100)) display_image = image_seq[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) test_imgs_dict['Image'] = display_image test_imgs_dict['GT'] = seg_seq[:, -1] test_imgs_dict['Output'] = test_output_sequence[:, -1] tboard(test_summary_writer, test_log_dir, i, test_scalars_dict, test_imgs_dict, step_per_epoch / 1) log_print( 'Printed Testing Step: {} to Tensorboard'.format( i)) #save final test values final_test_loss = test_loss.result() final_test_acc = test_metrics[4].result() * 100 final_test_prec = test_metrics[5].result() * 100 final_test_rec = test_metrics[6].result() * 100 #save the values on tensorboard with tf.summary.create_file_writer( run_folder).as_default(): # record the hyperparameters values used in this trial hp.hparams(hparams) precision = test_metrics[5].result() * 100 loss = test_loss.result() recall = val_metrics[6].result() accuracy = test_metrics[4].result() tf.summary.scalar(METRIC_PRECISION, precision, step=ckpt.step) tf.summary.scalar(METRIC_LOSS, loss, step=ckpt.step) tf.summary.scalar(METRIC_RECALL, recall, step=ckpt.step) tf.summary.scalar(METRIC_ACCURACY, accuracy, step=ckpt.step) except (KeyboardInterrupt, ValueError, AWSError) as err: if not params.dry_run: log_print( 'Saving Model Before closing due to error: {}'.format( str(err))) save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) # raise err except Exception as err: # raise err finally: if not params.dry_run: #save model's weights log_print('Saving Model of inference:') model_fname = os.path.join(params.experiment_save_dir, 'NN_' + str(params_value[0]), 'model.ckpt'.format(int(ckpt.step))) model.save_weights(model_fname, save_format='tf') with open( os.path.join(params.experiment_save_dir, 'NN_' + str(params_value[0]), 'model_params.pickle'), 'wb') as fobj: pickle.dump( { 'name': model.__class__.__name__, 'params': (net_kernel_params, ) }, fobj, protocol=pickle.HIGHEST_PROTOCOL) #save hyperparameters values and test results with open( os.path.join(params.experiment_save_dir, 'NN_' + str(params_value[0]), 'params_list.csv'), 'w') as fobj: writer = csv.writer(fobj) model_dict = {'Model': []} model_dict.update( model_dict.fromkeys(dict_param.keys(), [])) for i, key in enumerate(model_dict.items()): model_dict[key[0]] = params_value[i] model_dict.update({ 'Loss': np.array(final_test_loss), 'Accuracy': np.array(final_test_acc), 'Precision': np.array(final_test_prec), 'Recall': np.array(final_test_rec) }) for key, value in model_dict.items(): writer.writerow([key, value]) log_print('Saved Model to file: {}'.format(model_fname)) end_time = time.time() log_print('Program execution time:', end_time - start_time) else: log_print('WARNING: dry_run flag is ON! Not Saving Model') log_print('Closing gracefully') # coord.request_stop() # coord.join() log_print('Done')
def train(train_index, test_index, kfold_dir): device = '/gpu:0' if params.gpu_id >= 0 else '/cpu:0' with tf.device(device): #Initialization of the data data_provider = params.data_provider #Initialization of the model and paramenters recurrent_dropout = 0.3 dropout = 0.3 l1 = 0 l2 = 0 kernel_init = 'he_uniform' net_type = 'original_net' pretraining = False lstm_type = 'full' num_epoch = 0 discriminator = False attention_gate = False #initialization neural network net_kernel_params = Net_type(recurrent_dropout, (l1, l2), kernel_init)[net_type] model = Nets.ULSTMnet2D(net_kernel_params, params.data_format, False, dropout, pretraining, lstm_type, attention_gate) if discriminator: disc = Nets.Discriminator(params.data_format) #Initialization of Losses and Metrics loss_fn = LossFunction() jaccard = JaccardIndex() # loss_fn = WeightedLoss() train_loss = k.metrics.Mean(name='train_loss') train_metrics = METRICS_TRAIN val_loss = k.metrics.Mean(name='val_loss') val_metrics = METRICS_VAL test_loss = k.metrics.Mean(name = 'test_loss') test_jindx = k.metrics.Mean(name = 'jaccard_index') test_metrics = METRICS_TEST final_test_loss = 0 final_test_prec = 0 final_test_acc = 0 final_test_rec = 0 final_test_jac = 0 #define learning rate step decay if lstm_type == 'enc': class decay_lr(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self): print('Learning rate initialized') @tf.function def __call__(self, step): if tf.less(step, 500): return 0.0001 elif tf.logical_and(tf.greater(step, 500), tf.less(step, 2000)): return 0.00005 elif tf.logical_and(tf.greater(step, 2000), tf.less(step, 5000)): return 0.00001 elif tf.logical_and(tf.greater(step, 5000), tf.less(step, 8000)): return 0.000005 else: return 0.000001 else: class decay_lr(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self): print('Learning rate initialized') @tf.function def __call__(self, step): if tf.less(step, 700): return 0.0001 elif tf.logical_and(tf.greater(step, 700), tf.less(step, 2800)): return 0.00005 elif tf.logical_and(tf.greater(step, 2800), tf.less(step, 7000)): return 0.00001 elif tf.logical_and(tf.greater(step, 7000), tf.less(step, 11200)): return 0.000005 else: return 0.000001 #Set optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=decay_lr()) if discriminator: optimizer_disc = tf.keras.optimizers.Adam(learning_rate=decay_lr()) #Checkpoint ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), optimizer=optimizer, net=model) #Load checkpoint if there is if params.load_checkpoint: if os.path.isdir(params.load_checkpoint_path): latest_checkpoint = tf.train.latest_checkpoint(params.load_checkpoint_path) else: latest_checkpoint = params.load_checkpoint_path try: print(latest_checkpoint) if latest_checkpoint is None or latest_checkpoint == '': log_print("Initializing from scratch.") else: ckpt.restore(latest_checkpoint) log_print("Restored from {}".format(latest_checkpoint)) except tf.errors.NotFoundError: raise ValueError("Could not load checkpoint: {}".format(latest_checkpoint)) else: log_print("Initializing from scratch.") manager = tf.train.CheckpointManager(ckpt, os.path.join(params.experiment_k_fold_dir, 'NN_' + str(kfold_dir), 'tf_ckpts'), max_to_keep=params.save_checkpoint_max_to_keep, keep_checkpoint_every_n_hours=params.save_checkpoint_every_N_hours) @tf.function def train_step(image, label): with tf.GradientTape() as gen_tape: #, tf.GradientTape() as disc_tape: logits, output = model(image, True) if discriminator: real = disc(label[:,-1]) fake = disc(output[:, -1]) d_loss = tf.reduce_mean(tf.math.log(real) + tf.math.log(1-fake)) loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, logits) else: loss = loss_fn.bce_dice_loss(label, output, logits) # loss = loss_fn.loss(label, logits) gradients = gen_tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) if discriminator: gradients_disc = disc_tape.gradient(d_loss, disc.trainable_variables) optimizer_disc.apply_gradients(zip(gradients_disc, disc.trainable_variables)) ckpt.step.assign_add(1) train_loss(loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(train_metrics): metric(label[:, -1], output[:,-1]) train_metrics[i] = metric return output, loss @tf.function def val_step(image, label): logits, output = model(image, False) t_loss = loss_fn.bce_dice_loss(label, output, logits) # t_loss = loss_fn.loss(label, logits) val_loss(t_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) for i, metric in enumerate(val_metrics): metric(label[:, -1], output[:, -1]) val_metrics[i] = metric return output, t_loss @tf.function def test_step(image, label): logits, output = model(image, False) tt_loss = loss_fn.bce_dice_loss(label, output, logits) # tt_loss = loss_fn.loss(label, logits) test_loss(tt_loss) if params.channel_axis == 1: output = tf.transpose(output, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) test_loss(tt_loss) for i, metric in enumerate(test_metrics): metric(label[:, -1], output[:, -1]) test_metrics[i] = metric jaccard_ind = jaccard.loss(label, output) test_jindx(jaccard_ind) return output, tt_loss template = '{}: Step {}, Loss: {}, Accuracy: {}, Precision: {}, Recall: {}' log_print('Start of training') try: log_print('Starting of epoch: {}'.format(int(num_epoch))) #divide in train and val set training_index, val_index = train_test_split(train_index, test_size = 0.2) #define number of train and val step per epoch step_per_epoch = int(np.floor(len(training_index)/params.batch_size)) step_val = int(np.floor(len(val_index)/params.batch_size)) progbar = tf.keras.utils.Progbar(step_per_epoch) val_history = [] for _ in range(int(ckpt.step), params.num_iterations + 1): if params.aws: r = requests.get('http://169.254.169.254/latest/meta-data/spot/instance-action') if not r.status_code == 404: raise AWSError('Quitting Spot Instance Gracefully') #read batch image_sequence, seg_sequence, is_last_batch = data_provider.read_batch('train', True, training_index) #train batch train_output_sequence, train_loss_value= train_step(image_sequence, seg_sequence) progbar.update(int(ckpt.step)- num_epoch*step_per_epoch) # model.reset_states_per_batch(is_last_batch) # reset states for sequences that ended if int(ckpt.step) % params.save_checkpoint_iteration == 0 or int(ckpt.step) == params.num_iterations: if not params.dry_run: save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path)) else: log_print("WARNING: dry_run flag is ON! Mot saving checkpoints or tensorboard data") #if epoch is finished if not int(ckpt.step) % step_per_epoch: #validation steps are performed for i in range(0, step_val): (val_image_sequence, val_seg_sequence, is_last_batch) = data_provider.read_batch('val', True, val_index) val_output_sequence, val_loss_value= val_step(val_image_sequence, val_seg_sequence) #validation metrics are stored in a dictionary val_dict = {'Accuracy ': np.array(val_metrics[4].result()) * 100, 'Precision': np.array(val_metrics[5].result()) * 100, 'Recall': np.array(val_metrics[6].result()) * 100} val_history.append(val_dict) #print training values to console log_print(template.format('Training', int(ckpt.step), train_loss.result(), train_metrics[4].result() * 100, train_metrics[5].result() * 100, train_metrics[6].result() * 100)) #reset train metrics for i in range(0, 7): train_metrics[i].reset_states() train_loss.reset_states() #print validation values to console log_print(template.format('Validation', int(ckpt.step), val_loss.result(), val_metrics[4].result() * 100, val_metrics[5].result() * 100, val_metrics[6].result() * 100)) #reset val metrics for i in range(0, 7): val_metrics[i].reset_states() val_loss.reset_states() num_epoch = num_epoch +1 log_print('Starting of epoch: {}'.format(int(num_epoch))) progbar = tf.keras.utils.Progbar(step_per_epoch) #when training is finished if ckpt.step == params.num_iterations: #define number of test steps num_testing = int(np.floor(len(test_index) / params.batch_size)) #create the dataset for test data_provider.enqueue_index('test', test_index) #perform testing on new data # loss_list = [] # acc_list = [] # rec_list = [] # prec_list = [] # jacc_list = [] for i in range(0, num_testing): image_seq, seg_seq = data_provider.read_new_image('test') test_output_sequence, test_loss_value= test_step(image_seq, seg_seq) log_print(template.format('Testing', int(i), test_loss.result(), test_metrics[4].result() * 100, test_metrics[5].result() * 100, test_metrics[6].result() * 100)) # loss_list.append(test_loss.result()) # acc_list.append(test_metrics[4].result() * 100) # rec_list.append(test_metrics[6].result() * 100) # prec_list.append(test_metrics[5].result() * 100) # jacc_list.append(test_jindx.result()) # for i in range(0, 7): # test_metrics[i].reset_states() # test_loss.reset_states() # test_jindx.reset_states() #save test values final_test_loss = test_loss.result() final_test_acc = test_metrics[4].result() * 100 final_test_prec = test_metrics[5].result() * 100 final_test_rec = test_metrics[6].result() * 100 final_test_jac = test_jindx.result() # final_test_loss = np.mean(loss_list) # final_test_acc = np.mean(acc_list) # final_test_prec = np.mean(prec_list) # final_test_rec = np.mean(rec_list) # final_test_jac = np.mean(jacc_list) except (KeyboardInterrupt, ValueError, AWSError) as err: if not params.dry_run: log_print('Saving Model Before closing due to error: {}'.format(str(err))) save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path)) # raise err except Exception as err: # raise err finally: if not params.dry_run: #save the model log_print('Saving Model of inference:') model_fname = os.path.join(params.experiment_k_fold_dir, 'NN_'+ str(kfold_dir), 'model.ckpt'.format(int(ckpt.step))) model.save_weights(model_fname, save_format='tf') with open(os.path.join(params.experiment_k_fold_dir, 'NN_'+ str(kfold_dir), 'model_params.pickle'), 'wb') as fobj: pickle.dump({'name': model.__class__.__name__, 'params': (net_kernel_params,)}, fobj, protocol=pickle.HIGHEST_PROTOCOL) #save parameters values and final loss and precision values with open(os.path.join(params.experiment_k_fold_dir, 'NN_'+ str(kfold_dir), 'metrics_list.csv'), 'w') as fobj: writer = csv.writer(fobj) model_dict = {'Loss': np.array(final_test_loss), 'Accuracy': np.array(final_test_acc), 'Precision': np.array(final_test_prec), 'Recall': np.array(final_test_rec), 'Jaccard': np.array(final_test_jac)} for key, value in model_dict.items(): writer.writerow([key, value]) with open(os.path.join(params.experiment_k_fold_dir, 'NN_'+ str(kfold_dir), 'val_history.csv'), 'w') as f: writer = csv.DictWriter(f, val_history[0].keys()) writer.writeheader() writer.writerows(val_history) log_print('Saved Model to file: {}'.format(model_fname)) end_time = time.time() log_print('Program execution time:', end_time - start_time) else: log_print('WARNING: dry_run flag is ON! Not Saving Model') log_print('Closing gracefully') log_print('Done')
arg_parser.add_argument('--save_intermediate', dest='save_intermediate', action='store_const', const=True, help="Save intermediate files") arg_parser.add_argument( '--save_intermediate_path', dest='save_intermediate_path', type=str, help= "Path to save intermediate files, used only with --save_intermediate") arg_parser.add_argument( '--dry_run', dest='dry_run', action='store_const', const=True, help="Do not write any outputs: for debugging only") sys_args = sys.argv input_args = arg_parser.parse_args() args_dict = { key: val for key, val in vars(input_args).items() if not (val is None) } params = CTCInferenceParams(args_dict) tf_eps = tf.constant(1E-8, name='epsilon') try: inference() finally: log_print('Done')
def get_cookie(username, password): cookie = Cookie() payload = { "username": username, "password": password, "savestate": "1", "ec": "0", "entry": "mweibo", "mainpageflag": "1" } headers = { "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Content-Length": "162", "Content-Type": "application/x-www-form-urlencoded", "Host": "passport.weibo.cn", "Origin": "https://passport.weibo.cn", "Referer": "https://passport.weibo.cn/signin/login", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36" } url = "https://passport.weibo.cn/sso/login" response = requests.post(url, data=payload, headers=headers) for info in response.headers["Set-Cookie"].split(): if info.startswith("SUB=") or info.startswith( "SUHB=") or info.startswith("SCF=") or info.startswith( "SSOLoginState="): key, value = info.split("=") cookie.set(key, value) ''' curl 'https://m.weibo.cn/' -H 'authority: m.weibo.cn' -H 'upgrade-insecure-requests: 1' -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' -H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'referer: https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=https%3A%2F%2Fm.weibo.cn%2F' -H 'accept-encoding: gzip, deflate, br' -H 'accept-language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7' -H 'cookie: SUB=_2A252GUo0DeThGeVM6FQZ9yvOyD6IHXVV4lZ8rDV6PUJbkdBeLRbikW1NTKWBvlMd6utP8-GniJG48Z69mYN_MLcM; SUHB=0QSp1-7WD51Jix; SCF=Aj52M7AisY2zemY_Am0nKcL71Og-kwj4KrbW9HkL8O511BUUv7LFPRgmmi6VLFtweCnnDEZFxb6DXYNUEBPSkx8.; SSOLoginState=1528642148' --compressed ''' headers = { "authority": "m.weibo.cn", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "referer": "https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=https%3A%2F%2Fm.weibo.cn%2F", "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7", "cookie": cookie.get_cookie() } url = "https://m.weibo.cn" response = requests.get(url, headers=headers) for info in response.headers["Set-Cookie"].split(): if info.startswith("M_WEIBOCN_PARAMS") or info.startswith("_T_WM"): key, value = info.split("=") cookie.set(key, value) cookie.set("MLOGIN", "1;") # 随意选择了一个超话尝试了一下 page_id = "100808066f8f58c6a0520a79d77ce704ab5ae6" headers = { "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", "accept": "application/json, text/plain, */*", "referer": "https://m.weibo.cn/p/%s" % page_id, "authority": "m.weibo.cn", "x-requested-with": "XMLHttpRequest", "Cookie": cookie.get_cookie() } utils.log_print("[** LOG **] Get Cookies %r" % cookie.get_cookie()) url = "https://m.weibo.cn/p/%s" % page_id response = requests.get(url, headers=headers) for info in response.headers["Set-Cookie"].split(): if info.startswith("M_WEIBOCN_PARAMS") or info.startswith( "WEIBOCN_FROM"): key, value = info.split("=") cookie.set(key, value) utils.log_print("[** LOG **] Get Current Cookie %r" % cookie.get_cookie()) return cookie
def inference(): # Load Model with open(os.path.join(params.model_path, 'model_params.pickle'), 'rb') as fobj: model_dict = pickle.load(fobj) model_cls = get_model(model_dict['name']) device = '/gpu:0' if params.gpu_id >= 0 else '/cpu:0' with tf.device(device): model = model_cls(*model_dict['params'], data_format=params.data_format, pad_image=True) model.load_weights(os.path.join(params.model_path, 'model.ckpt')) log_print("Restored from {}".format( os.path.join(params.model_path, 'model.ckpt'))) base_out_temp_vis_fname = base_out_temp_label_fname = base_out_fname = None if not params.dry_run: if params.save_intermediate_path: base_out_temp_vis_fname = os.path.join( params.save_intermediate_vis_path, 'softmax{time:03d}.tif') base_out_temp_label_fname = os.path.join( params.save_intermediate_label_path, 'mask{time:03d}.tif') base_out_fname = os.path.join(params.output_path, 'mask{time:03d}.tif') dataset = params.data_reader( params.sequence_path, params.filename_format, pre_sequence_frames=params.pre_sequence_frames).dataset try: for T, image in enumerate(dataset): t = T - params.pre_sequence_frames image_shape = image.shape if len(image_shape) == 2: if params.data_format == 'NCHW': image = tf.reshape( image, [1, 1, 1, image_shape[0], image_shape[1]]) else: image = tf.reshape( image, [1, 1, image_shape[0], image_shape[1], 1]) elif len(image_shape) == 3: image = tf.reshape( image, [1, 1, image_shape[0], image_shape[1], image_shape[2]]) else: raise ValueError() _, image_softmax = model(image, training=False) image_softmax_np = np.squeeze(image_softmax.numpy(), (0, 1)) if t < 0: continue if not params.dry_run: seg_edge = np.greater_equal(image_softmax_np[2], 0.2) seg_cell = np.logical_and( np.equal(np.argmax(image_softmax_np, 0), 1).astype(np.float32), np.logical_not(seg_edge)) seg_edge = seg_edge.astype(np.float32) seg_cell = scipy.ndimage.morphology.binary_fill_holes( seg_cell).astype(np.float32) seg_edge = np.maximum((seg_edge - seg_cell), 0) cc_out = cv2.connectedComponentsWithStats( seg_cell.astype(np.uint8), 8, cv2.CV_32S) num_cells = cc_out[0] labels = cc_out[1] stats = cc_out[2] dist, ind = scipy.ndimage.morphology.distance_transform_edt( 1 - seg_cell, return_indices=True) labels = labels[ind[0, :], ind[1, :]] * seg_edge * ( dist < params.edge_dist) + labels for n in range(1, num_cells): bw = labels == n if not np.any(bw): continue bw_crop, loc = bbox_crop(bw) fill_crop = scipy.ndimage.morphology.binary_fill_holes( bw_crop).astype(np.float32) fill_diff = fill_crop - bw_crop bw_fill = bbox_fill(bw, fill_diff, loc) labels = labels + bw_fill * n # filter by fov if params.FOV: fov_im = np.ones_like(labels) fov_im[:params.FOV, :] = 0 fov_im[-params.FOV:, :] = 0 fov_im[:, params.FOV] = 0 fov_im[:, -params.FOV:] = 0 fov_labels = labels * fov_im unique_fov_labels = np.unique(fov_labels.flatten()) remove_ind = np.setdiff1d(np.arange(num_cells), unique_fov_labels) else: remove_ind = [] if params.save_intermediate: if params.data_format == 'NCHW': image_softmax_np = np.transpose( image_softmax_np, (1, 2, 0)) out_fname = base_out_temp_vis_fname.format(time=t) sigoutnp_vis = np.flip( np.round(image_softmax_np * (2**16 - 1)).astype( np.uint16), 2) cv2.imwrite(filename=out_fname, img=sigoutnp_vis.astype(np.uint16)) log_print("Saved File: {}".format(out_fname)) labels_out = np.zeros_like(labels, dtype=np.uint16) # isbi_out_dict = {} p = 0 for n in range(1, num_cells): area = stats[n, cv2.CC_STAT_AREA] if params.min_cell_size <= area <= params.max_cell_size and not ( n in remove_ind): p += 1 # isbi_out_dict[p] = [p, 0, 0, 0] labels_out[labels == n] = p else: labels[labels == n] = 0 out_fname = base_out_fname.format(time=t) cv2.imwrite(filename=out_fname, img=labels_out.astype(np.uint16)) log_print("Saved File: {}".format(out_fname)) if params.save_intermediate: out_fname = base_out_temp_label_fname.format(time=t) cv2.imwrite(filename=out_fname, img=labels_out.astype(np.uint16)) log_print("Saved File: {}".format(out_fname)) except (KeyboardInterrupt, ValueError) as err: print('Error: {}'.format(str(err))) finally: print('Done!')
def run_train(dataset_path, exp_name, max_shapes, epochs, hidden_dim, eval_per, variational, loss_config, enc_lr, dec_lr, enc_step, dec_step, enc_decay, dec_decay, batch_size, holdout_perc, rd_seed, print_per, num_gen, num_eval, keep_missing, category, load_epoch=None): random.seed(rd_seed) np.random.seed(rd_seed) torch.manual_seed(rd_seed) raw_indices, progs = load_progs(dataset_path, max_shapes) inds_and_progs = list(zip(raw_indices, progs)) random.shuffle(inds_and_progs) inds_and_progs = inds_and_progs[:max_shapes] decoder = FDGRU(hidden_dim) decoder.to(device) encoder = ENCGRU(hidden_dim) encoder.to(device) print('Converting progs to tensors') samples = [] for ind, prog in tqdm(inds_and_progs): nprog = progToData(prog) samples.append((nprog, ind)) dec_opt = torch.optim.Adam(decoder.parameters(), lr=dec_lr, eps=ADAM_EPS) enc_opt = torch.optim.Adam(encoder.parameters(), lr=enc_lr, eps=ADAM_EPS) dec_sch = torch.optim.lr_scheduler.StepLR(dec_opt, step_size=dec_step, gamma=dec_decay) enc_sch = torch.optim.lr_scheduler.StepLR(enc_opt, step_size=enc_step, gamma=enc_decay) train_ind_file = f'data_splits/{category}/train.txt' val_ind_file = f'data_splits/{category}/val.txt' train_samples = [] val_samples = [] train_inds = getInds(train_ind_file) val_inds = getInds(val_ind_file) misses = 0. for (prog, ind) in samples: if ind in train_inds: train_samples.append((prog, ind)) elif ind in val_inds: val_samples.append((prog, ind)) else: if keep_missing: kept += 1 if random.random() < holdout_perc: val_samples.append((prog, ind)) else: train_samples.append((prog, ind)) else: misses += 1 print(f"Samples missed: {misses}") train_num = len(train_samples) val_num = len(val_samples) train_dataset = DataLoader(train_samples, batch_size, shuffle=True, collate_fn=_col) eval_train_dataset = DataLoader(train_samples[:num_eval], batch_size=1, shuffle=False, collate_fn=_col) val_dataset = DataLoader(val_samples, batch_size, shuffle=False, collate_fn=_col) eval_val_dataset = DataLoader(val_samples[:num_eval], batch_size=1, shuffle=False, collate_fn=_col) utils.log_print(f"Training size: {train_num}", f"{outpath}/{exp_name}/log.txt") utils.log_print(f"Validation size: {val_num}", f"{outpath}/{exp_name}/log.txt") with torch.no_grad(): gt_gen_results, _ = metrics.gen_metrics( [s[0] for s in val_samples[:num_eval]], '', '', '', VERBOSE, False) utils.log_print( f""" GT Val Number of parts = {gt_gen_results['num_parts']} GT Val Variance = {gt_gen_results['variance']} GT Val Rootedness = {gt_gen_results['rootedness']} GT Val Stability = {gt_gen_results['stability']} """, f"{outpath}/{exp_name}/log.txt") aepochs = [] train_res_plots = {} val_res_plots = {} gen_res_plots = {} eval_res_plots = {'train': {}, 'val': {}} print('training ...') if load_epoch is None: start = 0 else: start = load_epoch + 1 for e in range(start, epochs): do_print = (e + 1) % print_per == 0 t = time.time() if do_print: utils.log_print(f"\nEpoch {e}:", f"{outpath}/{exp_name}/log.txt") train_ep_result = model_train_results(train_dataset, encoder, decoder, dec_opt, enc_opt, variational, loss_config, 'train', do_print, exp_name) dec_sch.step() enc_sch.step() if do_print: utils.log_print(f" Train Epoch Time = {time.time() - t}", f"{outpath}/{exp_name}/log.txt") if (e + 1) % eval_per == 0: with torch.no_grad(): t = time.time() utils.log_print(f"Doing Evaluation", f"{outpath}/{exp_name}/log.txt") val_ep_result = model_train_results(val_dataset, encoder, decoder, None, None, False, loss_config, 'val', True, exp_name) eval_results, gen_results = model_eval(eval_train_dataset, eval_val_dataset, encoder, decoder, exp_name, e, num_gen) for name, named_results in eval_results: if named_results['nc'] > 0: named_results['cub_prm'] /= named_results['nc'] if named_results['na'] > 0: named_results['xyz_prm'] /= named_results['na'] named_results['cubc'] /= named_results['na'] if named_results['count'] > 0: named_results['bb'] /= named_results['count'] if named_results['nl'] > 0: named_results['cmdc'] /= named_results['nl'] if named_results['ns'] > 0: named_results['sym_cubc'] /= named_results['ns'] named_results['axisc'] /= named_results['ns'] if named_results['np'] > 0: named_results['corr_line_num'] /= named_results['np'] named_results['bad_leaf'] /= named_results['np'] if named_results['nsq'] > 0: named_results['uv_prm'] /= named_results['nsq'] named_results['sq_cubc'] /= named_results['nsq'] named_results['facec'] /= named_results['nsq'] if named_results['nap'] > 0: named_results['palignc'] /= named_results['nap'] if named_results['nan'] > 0: named_results['nalignc'] /= named_results['nan'] named_results.pop('nc') named_results.pop('nan') named_results.pop('nap') named_results.pop('na') named_results.pop('ns') named_results.pop('nsq') named_results.pop('nl') named_results.pop('count') named_results.pop('np') named_results.pop('cub') named_results.pop('sym_cub') named_results.pop('axis') named_results.pop('cmd') named_results.pop('miss_hier_prog') utils.log_print( f""" Evaluation on {name} set: Eval {name} F-score = {named_results['fscores']} Eval {name} IoU = {named_results['iou_shape']} Eval {name} PD = {named_results['param_dist_parts']} Eval {name} Prog Creation Perc: {named_results['prog_creation_perc']} Eval {name} Cub Prm Loss = {named_results['cub_prm']} Eval {name} XYZ Prm Loss = {named_results['xyz_prm']} Eval {name} UV Prm Loss = {named_results['uv_prm']} Eval {name} Sym Prm Loss = {named_results['sym_prm']} Eval {name} BBox Loss = {named_results['bb']} Eval {name} Cmd Corr % {named_results['cmdc']} Eval {name} Cub Corr % {named_results['cubc']} Eval {name} Squeeze Cub Corr % {named_results['sq_cubc']} Eval {name} Face Corr % {named_results['facec']} Eval {name} Pos Align Corr % {named_results['palignc']} Eval {name} Neg Align Corr % {named_results['nalignc']} Eval {name} Sym Cub Corr % {named_results['sym_cubc']} Eval {name} Sym Axis Corr % {named_results['axisc']} Eval {name} Corr Line # % {named_results['corr_line_num']} Eval {name} Bad Leaf % {named_results['bad_leaf']} """, f"{outpath}/{exp_name}/log.txt") utils.log_print( f""" Gen Prog creation % = {gen_results['prog_creation_perc']} Gen Number of parts = {gen_results['num_parts']} Gen Variance = {gen_results['variance']} Gen Rootedness = {gen_results['rootedness']} Gen Stability = {gen_results['stability']} """, f"{outpath}/{exp_name}/log.txt") utils.log_print(f"Eval Time = {time.time() - t}", f"{outpath}/{exp_name}/log.txt") # Plotting logic for key in train_ep_result: res = train_ep_result[key] if torch.is_tensor(res): res = res.detach().item() if not key in train_res_plots: train_res_plots[key] = [res] else: train_res_plots[key].append(res) for key in val_ep_result: res = val_ep_result[key] if torch.is_tensor(res): res = res.detach().item() if not key in val_res_plots: val_res_plots[key] = [res] else: val_res_plots[key].append(res) for key in gen_results: res = gen_results[key] if torch.is_tensor(res): res = res.detach().item() if not key in gen_res_plots: gen_res_plots[key] = [res] else: gen_res_plots[key].append(res) for name, named_results in eval_results: for key in named_results: res = named_results[key] if torch.is_tensor(res): res = res.detach().item() if not key in eval_res_plots[name]: eval_res_plots[name][key] = [res] else: eval_res_plots[name][key].append(res) aepochs.append(e) for key in train_res_plots: plt.clf() plt.plot(aepochs, train_res_plots[key], label='train') if key in val_res_plots: plt.plot(aepochs, val_res_plots[key], label='val') plt.legend() if key == "recon": plt.yscale('log') plt.grid() plt.savefig(f"{outpath}/{exp_name}/plots/train/{key}.png") for key in gen_res_plots: plt.clf() plt.plot(aepochs, gen_res_plots[key]) if key == "variance": plt.yscale('log') plt.grid() plt.savefig(f"{outpath}/{exp_name}/plots/gen/{key}.png") for key in eval_res_plots['train']: plt.clf() t_p, = plt.plot(aepochs, eval_res_plots['train'][key], label='train') if 'val' in eval_res_plots: if key in eval_res_plots['val']: v_p, = plt.plot(aepochs, eval_res_plots['val'][key], label='val') plt.legend(handles=[t_p, v_p]) plt.grid() plt.savefig(f"{outpath}/{exp_name}/plots/eval/{key}.png") try: if SAVE_MODELS: utils.log_print("Saving Models", f"{outpath}/{exp_name}/log.txt") # TODO: torch.save(x.state_dict(), so only the model parameters get saved (along with their names)) torch.save(decoder, f"{outpath}/{exp_name}/models/decoder_{e}.pt") torch.save(encoder, f"{outpath}/{exp_name}/models/encoder_{e}.pt") except Exception as e: utils.log_print(f"Couldnt save models for {e}", f"{outpath}/{exp_name}/log.txt")
def main(): utils.log_print("[** LOG **] Get cookie") try: cookie = get_cookie() if cookie is None: utils.log_print("[** ERROR LOG **] Failed getting cookie") exit(1) utils.log_print("[** LOG **] Succeed getting cookie") except: utils.log_print("[** ERROR LOG **] Failed getting cookie") try: filename = get_all_followers(cookie) utils.log_print("[** LOG **] Succeed getting followers") print(filename) except: utils.log_print("[** ERROR LOG **] Failed getting followers") utils.log_print("[** LOG **] Get hotness") try: filename = get_all_hotness(cookie) utils.log_print("[** LOG **] Succeed getting hotness") print(filename) except: utils.log_print("[** ERROR LOG **] Failed getting hotness")
def train(): device = '/gpu:0' if params.gpu_id >= 0 else '/cpu:0' with tf.device(device): # Data input train_data_provider = params.train_data_provider val_data_provider = params.val_data_provider coord = tf.train.Coordinator() train_data_provider.start_queues(coord) val_data_provider.start_queues(coord) # Model model = params.net_model(params.net_kernel_params, params.data_format, False) # Losses and Metrics ce_loss = losses.WeightedCELoss(params.channel_axis + 1, params.class_weights) seg_measure = losses.seg_measure(params.channel_axis + 1, three_d=False) train_loss = k.metrics.Mean(name='train_loss') train_seg_measure = k.metrics.Mean(name='train_seg_measure') train_accuracy = k.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_loss = k.metrics.Mean(name='val_loss') val_accuracy = k.metrics.SparseCategoricalAccuracy(name='val_accuracy') val_seg_measure = k.metrics.Mean(name='val_seg_measure') # Save Checkpoints optimizer = tf.compat.v2.keras.optimizers.Adam(lr=params.learning_rate) ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), optimizer=optimizer, net=model) if params.load_checkpoint: if os.path.isdir(params.load_checkpoint_path): latest_checkpoint = tf.train.latest_checkpoint( params.load_checkpoint_path) else: latest_checkpoint = params.load_checkpoint_path try: print(latest_checkpoint) if latest_checkpoint is None or latest_checkpoint == '': log_print("Initializing from scratch.") else: ckpt.restore(latest_checkpoint) log_print("Restored from {}".format(latest_checkpoint)) except tf.errors.NotFoundError: raise ValueError( "Could not load checkpoint: {}".format(latest_checkpoint)) else: log_print("Initializing from scratch.") manager = tf.train.CheckpointManager( ckpt, os.path.join(params.experiment_save_dir, 'tf_ckpts'), max_to_keep=params.save_checkpoint_max_to_keep, keep_checkpoint_every_n_hours=params.save_checkpoint_every_N_hours) @tf.function def train_step(image, label): with tf.GradientTape() as tape: predictions, softmax = model(image, True) loss = ce_loss(label, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) ckpt.step.assign_add(1) train_loss(loss) seg_value = seg_measure(label, predictions) if params.channel_axis == 1: predictions = tf.transpose(predictions, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) train_accuracy(label, predictions) train_seg_measure(seg_value) return softmax, predictions, loss @tf.function def val_step(image, label): predictions, softmax = model(image, False) t_loss = ce_loss(label, predictions) val_loss(t_loss) seg_value = seg_measure(label, predictions) if params.channel_axis == 1: predictions = tf.transpose(predictions, (0, 1, 3, 4, 2)) label = tf.transpose(label, (0, 1, 3, 4, 2)) val_accuracy(label, predictions) val_seg_measure(seg_value) return softmax, predictions, t_loss train_summary_writer = val_summary_writer = train_scalars_dict = val_scalars_dict = None if not params.dry_run: train_log_dir = os.path.join(params.experiment_log_dir, 'train') val_log_dir = os.path.join(params.experiment_log_dir, 'val') train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) train_scalars_dict = {'Loss': train_loss, 'SEG': train_seg_measure} val_scalars_dict = {'Loss': val_loss, 'SEG': val_seg_measure} def tboard(writer, step, scalar_loss_dict, images_dict): with tf.device('/cpu:0'): with writer.as_default(): for scalar_loss_name, scalar_loss in scalar_loss_dict.items( ): tf.summary.scalar(scalar_loss_name, scalar_loss.result(), step=step) for image_name, image in images_dict.items(): if params.channel_axis == 1: image = tf.transpose(image, (0, 2, 3, 1)) tf.summary.image(image_name, image, max_outputs=1, step=step) template = '{}: Step {}, Loss: {}, Accuracy: {}' try: # if True: val_states = model.get_states() train_imgs_dict = {} val_imgs_dict = {} for _ in range(int(ckpt.step), params.num_iterations + 1): if params.aws: r = requests.get( 'http://169.254.169.254/latest/meta-data/spot/instance-action' ) if not r.status_code == 404: raise AWSError('Quitting Spot Instance Gracefully') image_sequence, seg_sequence, _, is_last_batch = train_data_provider.get_batch( ) if params.profile: tf.summary.trace_on(graph=True, profiler=True) train_output_sequence, train_predictions, train_loss_value = train_step( image_sequence, seg_sequence) # q_stats = [qs().numpy() for qs in params.train_data_provider.q_stat_list] # print(q_stats) if params.profile: with train_summary_writer.as_default(): tf.summary.trace_export( 'train_step', step=int(ckpt.step), profiler_outdir=params.experiment_log_dir) model.reset_states_per_batch( is_last_batch) # reset states for sequences that ended if not int(ckpt.step) % params.write_to_tb_interval: if not params.dry_run: seg_onehot = tf.one_hot(tf.cast( tf.squeeze(seg_sequence[:, -1], params.channel_axis), tf.int32), depth=3) if params.channel_axis == 1: seg_onehot = tf.transpose(seg_onehot, (0, 3, 1, 2)) display_image = image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) train_imgs_dict['Image'] = display_image train_imgs_dict['GT'] = seg_onehot train_imgs_dict['Output'] = train_output_sequence[:, -1] tboard(train_summary_writer, int(ckpt.step), train_scalars_dict, train_imgs_dict) log_print( 'Printed Training Step: {} to Tensorboard'.format( int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) if int(ckpt.step ) % params.save_checkpoint_iteration == 0 or int( ckpt.step) == params.num_iterations: if not params.dry_run: save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) else: log_print( "WARNING: dry_run flag is ON! Mot saving checkpoints or tensorboard data" ) if not int(ckpt.step) % params.print_to_console_interval: log_print( template.format('Training', int(ckpt.step), train_loss.result(), train_accuracy.result() * 100)) if not int(ckpt.step) % params.validation_interval: train_states = model.get_states() model.set_states(val_states) ( val_image_sequence, val_seg_sequence, _, val_is_last_batch, ) = val_data_provider.get_batch() val_output_sequence, val_predictions, val_loss_value = val_step( val_image_sequence, val_seg_sequence) model.reset_states_per_batch( val_is_last_batch ) # reset states for sequences that ended if not params.dry_run: seg_onehot = tf.one_hot(tf.cast( tf.squeeze(val_seg_sequence[:, -1], params.channel_axis), tf.int32), depth=3) if params.channel_axis == 1: seg_onehot = tf.transpose(seg_onehot, (0, 3, 1, 2)) display_image = val_image_sequence[:, -1] display_image = display_image - tf.reduce_min( display_image, axis=(1, 2, 3), keepdims=True) display_image = display_image / tf.reduce_max( display_image, axis=(1, 2, 3), keepdims=True) val_imgs_dict['Image'] = display_image val_imgs_dict['GT'] = seg_onehot val_imgs_dict['Output'] = val_output_sequence[:, -1] tboard(val_summary_writer, int(ckpt.step), val_scalars_dict, val_imgs_dict) log_print('Printed Validation Step: {} to Tensorboard'. format(int(ckpt.step))) else: log_print( "WARNING: dry_run flag is ON! Not saving checkpoints or tensorboard data" ) log_print( template.format('Validation', int(ckpt.step), val_loss.result(), val_accuracy.result() * 100)) val_states = model.get_states() model.set_states(train_states) except (KeyboardInterrupt, ValueError, AWSError) as err: if not params.dry_run: log_print( 'Saving Model Before closing due to error: {}'.format( str(err))) save_path = manager.save(int(ckpt.step)) log_print("Saved checkpoint for step {}: {}".format( int(ckpt.step), save_path)) # raise err except Exception as err: # raise err finally: if not params.dry_run: log_print('Saving Model of inference:') model_fname = os.path.join(params.experiment_save_dir, 'model.ckpt'.format(int(ckpt.step))) model.save_weights(model_fname, save_format='tf') with open( os.path.join(params.experiment_save_dir, 'model_params.pickle'), 'wb') as fobj: pickle.dump( { 'name': model.__class__.__name__, 'params': (params.net_kernel_params, ) }, fobj, protocol=pickle.HIGHEST_PROTOCOL) log_print('Saved Model to file: {}'.format(model_fname)) else: log_print('WARNING: dry_run flag is ON! Not Saving Model') log_print('Closing gracefully') coord.request_stop() coord.join() log_print('Done')
cwd = os.getcwd() if not os.path.exists(os.path.join(cwd, 'logs')): os.makedirs(os.path.join(cwd, 'logs')) logging.basicConfig(filename="logs/logfile_rnn_" + str( time.ctime()).replace(':', '').replace(' ', ' ').replace(' ', '_') + ".log", format='%(message)s', level=logging.INFO) # Set params all_letters = string.ascii_letters + "0123456789 .,:!?'[]()/+-=" n_letters = len(all_letters) + 1 # Plus EOS marker n_layers = 3 hidden_size = 300 log_print(f'No. of layers: {n_layers}', logging) log_print(f'Hidden size: {hidden_size}', logging) stLSTM = StarTrekLSTM(input_size=n_letters, hidden_size=hidden_size, output_size=n_letters, n_layers=n_layers) # stLSTM = torch.load(os.path.join(os.getcwd(),'model','best_model.pth')) criterion = torch.nn.CrossEntropyLoss( ignore_index=-1) # Ignore the padding index -1 learning_rate = 0.005 optimizer = torch.optim.Adam(stLSTM.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [10, 18], gamma=0.5)
def get_all_hotness(cookie): date = datetime.date.today().strftime("%Y%m%d") filename = "%s_hotness.csv" % date is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print_hotness_header(f) get_hotness(cookie, f) return filename if __name__ == "__main__": utils.log_print("[** LOG **] Get cookie") try: cookie = get_cookie() if cookie is None: utils.log_print("[** ERROR LOG **] Failed getting cookie") exit(1) utils.log_print("[** LOG **] Succeed getting cookie") except: utils.log_print("[** ERROR LOG **] Failed getting cookie") print("Error") try: filename = get_all_followers(cookie) utils.log_print("[** LOG **] Succeed getting followers") print(filename) except:
name = option["text"].encode( "utf-8").strip() # encode utf-8 for chinese character strings showNum = option["showNum"] vipJoins = option["vipJoins"] return start_time, finish_time, name, showNum, vipJoins def print_header(f): print(utils.to_csv_line("start_date", "start_time", "finish_date", "finish_time", "name", "gift", "vip_gift"), file=f) if __name__ == "__main__": utils.log_print("[** LOG **] Run vote with") try: url = "http://vote.i.iqiyi.com/eagle/outer/get_votes?uid=null&vids=0536210296010472&t=1518343644386" response = utils.request(url, "json") data = response.get_json_data() options = get_options(data) time = datetime.date.today().strftime("%Y%m%d") filename = "%s_gift_counts.csv" % time is_file = os.path.isfile(filename) with open(filename, "a") as f: if not is_file: print_header(f) for option in options: print(utils.to_csv_line(*extract_option(option)), file=f)
def model_train_results(dataset, encoder, decoder, dec_opt, enc_opt, variational, loss_config, name, do_print, exp_name): ep_result = {} bc = 0. for batch in dataset: bc += 1. batch_result = model_train( batch, encoder, decoder, dec_opt, enc_opt, \ variational, loss_config ) for key in batch_result: res = batch_result[key] if torch.is_tensor(res): res = res.detach() if key not in ep_result: ep_result[key] = res else: ep_result[key] += res if len(ep_result) == 0: return {} arl = 0. for loss in loss_config: ep_result[loss] /= bc if loss == 'kl': continue if torch.is_tensor(ep_result[loss]): arl += ep_result[loss].detach().item() else: arl += ep_result[loss] ep_result['recon'] = arl if ep_result['nl'] > 0: ep_result['cmdc'] /= ep_result['nl'] if ep_result['na'] > 0: ep_result['cubc'] /= ep_result['na'] if ep_result['nc'] > 0: ep_result['cleaf'] /= ep_result['nc'] if ep_result['nap'] > 0: ep_result['palignc'] /= ep_result['nap'] if ep_result['nan'] > 0: ep_result['nalignc'] /= ep_result['nan'] if ep_result['ns'] > 0: ep_result['sym_cubc'] /= ep_result['ns'] ep_result['axisc'] /= ep_result['ns'] if ep_result['nsq'] > 0: ep_result['sq_cubc'] /= ep_result['nsq'] ep_result['facec'] /= ep_result['nsq'] ep_result.pop('na') ep_result.pop('nl') ep_result.pop('nc') ep_result.pop('nap') ep_result.pop('nan') ep_result.pop('np') ep_result.pop('ns') ep_result.pop('nsq') if do_print: utils.log_print( f""" TF Results for {name} Recon Loss = {ep_result['recon']} Cmd Loss = {ep_result['cmd']} Cub Prm Loss = {ep_result['cub_prm']} XYZ Prm Loss = {ep_result['xyz_prm']} UV Prm Loss = {ep_result['uv_prm']} Sym Prm Loss = {ep_result['sym_prm']} Cub Loss = {ep_result['cub']} Squeeze Cub Loss = {ep_result['sq_cub']} Sym Cub Loss = {ep_result['sym_cub']} Sym Axis Loss = {ep_result['axis']} Face Loss = {ep_result['face']} Leaf Loss = {ep_result['leaf']} Align Loss = {ep_result['align']} KL Loss = {ep_result['kl'] if 'kl' in ep_result else None} BBox Loss = {ep_result['bb']} Cmd Corr % {ep_result['cmdc']} Cub Corr % {ep_result['cubc']} Sq Cubb Corr % {ep_result['sq_cubc']} Face Corr % {ep_result['facec']} Leaf Corr % {ep_result['cleaf']} Align Pos Corr = {ep_result['palignc']} Align Neg Corr = {ep_result['nalignc']} Sym Cub Corr % {ep_result['sym_cubc']} Sym Axis Corr % {ep_result['axisc']}""", f"{outpath}/{exp_name}/log.txt") return ep_result