def __init__(self, content_img, style_img, img_width, img_height): """ 初始化 :param content_img: 待转换风格的图片(保留内容的图片) :param style_img: 风格图片(保留风格的图片) :param img_width: 图片的width :param img_height: 图片的height """ # 获取基本信息 self.content_name = str(content_img.split("/")[-1].split(".")[0]) self.style_name = str(style_img.split("/")[-1].split(".")[0]) self.img_width = img_width self.img_height = img_height # 规范化图片的像素尺寸 self.content_img = utils.get_resized_image(content_img, img_width, img_height) self.style_img = utils.get_resized_image(style_img, img_width, img_height) self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height) # 定义提取特征的层 self.content_layer = "conv4_2" self.style_layers = ["conv1_1", "conv2_1", "conv3_1", "conv4_1", "conv5_1"] # 定义content loss和style loss的权重 self.content_w = 0.001 self.style_w = 1 # 不同style layers的权重,层数越深权重越大 self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0] # global step和学习率 self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") # global step self.lr = 2.0 utils.safe_mkdir("outputs/%s_%s" % (self.content_name, self.style_name))
def plot_output(pred_baseline, pred_variation, varID_0, varID_1, weights_baseline, weights_variation, model_name, t=''): ''' ''' safe_mkdir('plots') plt.figure(figsize=(8, 8)) min_ = min(pred_baseline.min(), pred_variation.min()) max_ = max(pred_baseline.max(), pred_variation.max()) bins = np.linspace(min_, max_, 30) _ = plt.hist(pred_baseline, bins=bins, histtype='step', label=r'test - {}'.format(varID_0.replace('_', ' ')), weights=weights_baseline) _ = plt.hist(pred_variation, bins=bins, histtype='step', label=r'test - {}'.format(varID_1.replace('_', ' ')), weights=weights_variation) plt.legend(loc='upper left') plt.xlabel('Weighted NN Output') plt.savefig( os.path.join( 'plots', '{}_{}_{}_output_{}.pdf'.format(model_name, varID_0, varID_1, t))) plt.close()
def train(self, n_epochs): ''' The train function alternates between training one epoch and evaluating ''' utils.safe_mkdir('checkpoints') utils.safe_mkdir('checkpoints/convnet_layers') train_writer = tf.summary.FileWriter('./graphs/convnet_layers/train', tf.get_default_graph()) test_writer = tf.summary.FileWriter('./graphs/convnet_layers/eval', tf.get_default_graph()) # 配置GPU使用策略,否则 CUDA OUT OF MEMORY ERROR config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # session开始前,初始化所有参数 sess.run(tf.global_variables_initializer()) # 默认保存5个检查点 saver = tf.train.Saver(max_to_keep=10000) ckpt = tf.train.get_checkpoint_state( os.path.dirname('checkpoints/convnet_layers/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = self.gstep.eval() for epoch in range(n_epochs): step = self.train_one_epoch(sess, saver, self.train_init, train_writer, epoch, step) self.eval_once(sess, self.test_init, test_writer, epoch, step) train_writer.close() test_writer.close()
def train(self, n_epochs): ''' The train function alternates between training one epoch and evaluating ''' utils.safe_mkdir('checkpoints') utils.safe_mkdir('checkpoints/convnet_mnist') writer = tf.summary.FileWriter('./graphs/convnet', tf.get_default_graph()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) exp.set_model_graph(sess.graph) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname('checkpoints/convnet_mnist/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = self.gstep.eval() for epoch in range(n_epochs): step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step) self.eval_once(sess, self.test_init, writer, epoch, step) exp.log_epoch_end(epoch) writer.close()
def train(self,n_epochs): safe_mkdir('checkpoints') safe_mkdir('checkpoints/'+self.checkpoint_dir) #To plot two different curves on the same graph we need two different writers that write the #same group of summaries. train_writer = tf.summary.FileWriter('./graphs/'+self.checkpoint_dir + '/train', tf.get_default_graph()) test_writer = tf.summary.FileWriter('./graphs/'+self.checkpoint_dir + '/test',tf.get_default_graph()) #self.sess.run(tf.global_variables_initializer()) # # # #saver = tf.train.Saver(max_to_keep=None) #ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/'+self.checkpoint_dir+'/checkpoint')) #if ckpt and ckpt.model_checkpoint_path: # saver.restore(self.sess, ckpt.model_checkpoint_path) saver = self.load_weights() step = self.gstep.eval(session=self.sess) cprint("[!] Restarting at iteration {}".format(step), color="yellow") for epoch in range(n_epochs): step = self.train_one_epoch(saver, train_writer,test_writer, epoch, step) return step
def create_spkr_folder(data_dir, spkr_list): '''Make speaker folder e.g., ./data/train/FAEM0 ''' for s in spkr_list: folder = os.path.join(data_dir, s) safe_mkdir(folder)
def __init__(self, content_img, style_img, img_width, img_height): self.content_name = str(content_img.split("/")[-1].split(".")[0]) self.style_name = str(style_img.split("/")[-1].split(".")[0]) self.img_width = img_width self.img_height = img_height self.content_img = utils.get_resized_image(content_img, img_width, img_height) self.style_img = utils.get_resized_image(style_img, img_width, img_height) self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height) self.content_layer = "conv4_2" self.style_layers = [ "conv1_1", "conv2_1", "conv3_1", "conv4_1", "conv5_1" ] # 定义content loss和style loss的权重 self.content_w = 0.001 self.style_w = 1 self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0] self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") # global step self.lr = 2.0 utils.safe_mkdir("outputs/%s_%s" % (self.content_name, self.style_name))
def main(args): model = init_model(args.network_file, args.weight_file, gpu=args.gpu) print args dbs = open_dbs(args.lmdbs.split(args.delimiter)) max_images = min(args.max_images, dbs[0][0].stat()['entries']) max_iters = (max_images + args.batch_size - 1) / args.batch_size image_num = 0 safe_mkdir(args.out_dir) for iter_num in xrange(max_iters): ims, labels = get_batch(dbs, args) fprop(model, ims, args) for idx in xrange(len(labels)): reconstruction = model.blobs[args.blob].data[idx] save_image(reconstruction, image_num, args) image_num += 1 if iter_num > 0 and iter_num % 10 == 0: print "%.2f%% (%d/%d) Batches" % (100. * iter_num / max_iters, iter_num, max_iters) close_dbs(dbs)
def plot_batch_features(features_baseline, features_variation, varID_0, varID_1, weights_baseline, weights_variation, model_name): safe_mkdir('plots') for fn, (f0, f1) in enumerate(zip(features_baseline.T, features_variation.T)): plt.figure(figsize=(8, 8)) bins = np.linspace(min(f0.min(), f1.min()), max(f0.max(), f1.max()), 30) _ = plt.hist(f0, bins=bins, histtype='step', label=r'test - {}'.format(varID_0.replace('_', ' ')), weights=weights_baseline) _ = plt.hist(f1, bins=bins, histtype='step', label=r'test - {}'.format(varID_1.replace('_', ' ')), weights=weights_variation) plt.legend(loc='upper left') plt.savefig( os.path.join( 'plots', '{}_{}_{}_{}.pdf'.format(model_name, varID_0, varID_1, fn))) plt.close()
def check(self, n_epochs): utils.safe_mkdir('checkpoints') utils.safe_mkdir('checkpoints/initcheck') writer = tf.summary.FileWriter('./graphs/initcheck', tf.get_default_graph()) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # session开始前,初始化所有参数 sess.run(tf.global_variables_initializer()) # 默认保存5个检查点 saver = tf.train.Saver(max_to_keep=10000) ckpt = tf.train.get_checkpoint_state( os.path.dirname('checkpoints/convnet_layers/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = self.gstep.eval() for epoch in range(n_epochs): step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step) self.eval_once(sess, self.test_init, writer, epoch, step) writer.close()
def train(self, num_train_steps): saver = tf.train.Saver() # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias initial_step = 0 utils.safe_mkdir('checkpoints') with tf.Session() as sess: sess.run(self.iterator.initializer) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint')) # if that checkpoint exists, restore from checkpoint if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('graphs/word2vec/lr' + str(self.lr), sess.graph) initial_step = self.global_step.eval() for index in range(initial_step, initial_step + num_train_steps): try: loss_batch, _, summary = sess.run([self.loss, self.optimizer, self.summary_op]) writer.add_summary(summary, global_step=index) total_loss += loss_batch if (index + 1) % self.skip_step == 0: print('Average loss at step {}: {:5.1f}'.format(index, total_loss / self.skip_step)) total_loss = 0.0 saver.save(sess, 'checkpoints/skip-gram', index) except tf.errors.OutOfRangeError: sess.run(self.iterator.initializer) writer.close()
def backup_configs(backup_path, skip=False): """ Creates `configs` directory and places config backups there. Configs are application settings, generally. .plist files count. """ print_section_header("CONFIGS", Fore.BLUE) overwrite_dir_prompt_if_needed(backup_path, skip) config = get_config() configs_dir_mapping = config["config_path_to_dest_map"] plist_files = config["plist_path_to_dest_map"] print(Fore.BLUE + Style.BRIGHT + "Backing up configs..." + Style.RESET_ALL) # backup config dirs in backup_path/<target>/ for config, target in configs_dir_mapping.items(): src_dir = home_prefix(config) configs_backup_path = os.path.join(backup_path, target) if os.path.isdir(src_dir): # TODO: Exclude Sublime/Atom/VS Code Packages here to speed things up copytree(src_dir, configs_backup_path, symlinks=True) # backup plist files in backup_path/configs/plist/ print(Fore.BLUE + Style.BRIGHT + "Backing up plist files..." + Style.RESET_ALL) plist_backup_path = os.path.join(backup_path, "plist") safe_mkdir(plist_backup_path) for plist, dest in plist_files.items(): plist_path = home_prefix(plist) if os.path.exists(plist_path): copyfile(plist_path, os.path.join(backup_path, dest))
def word2vec(dataset): """ Build the graph for word2vec model and train it """ # Step 1: create iterator and get input, output from the dataset iterator = dataset.make_initializable_iterator() center_words, target_words = iterator.get_next() # Step 2: define weights. # In word2vec, it's the weights that we care about embed_matrix = tf.get_variable('embed_matrix', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.random_uniform_initializer()) # Step 3: define the inference (embedding lookup) embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed') # Step 4: define loss function # construct variables for NCE loss nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.truncated_normal_initializer( stddev=1.0 / (EMBED_SIZE**0.5))) nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE])) # define loss function to be NCE loss function loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, biases=nce_bias, labels=target_words, inputs=embed, num_sampled=NUM_SAMPLED, num_classes=VOCAB_SIZE), name='loss') # Step 5: define optimizer that follows gradient descent update rule # to minimize loss optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss) utils.safe_mkdir('checkpoints') with tf.Session() as sess: # Step 6: initialize iterator and variables sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph) for index in range(NUM_TRAIN_STEPS): try: # Step 7: execute optimizer and fetch loss _, loss_batch = sess.run([optimizer, loss]) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( index, total_loss / SKIP_STEP)) total_loss = 0.0 except tf.errors.OutOfRangeError: sess.run(iterator.initializer) writer.close()
def chk_servers(p: Path) -> int: jars_path: str = Path(str(p) + JAR_DIR) servers_path: str = Path(str(p) + SERVER_DIR) if not servers_path.exists() and utils.safe_mkdir(servers_path): return 2 if not jars_path.exists() and utils.safe_mkdir(jars_path): return 2 if chk_latest(p): return 1 return 0
def save_to_file(self, directory_name, name=None): if name is not None: filename = os.path.join(directory_name, '{}_model.p'.format(name)) else: filename = os.path.join(directory_name, 'model.p') utils.safe_mkdir(directory_name) utils.pickle_to_file(self.model, filename)
def main(): model = 'trump_tweets' utils.safe_mkdir('checkpoints') utils.safe_mkdir('checkpoints/' + model) lm = CharRNN(model) lm.create_model() lm.train()
def _install(self): """Install to the builder's specified install directory""" os.chdir(self.build_dir) safe_mkdir(self.install_dir) self.system(['make', 'install'])
def main(): model = 'trump_tweets' utils.safe_mkdir('checkpoints/' + model) lm = CharRNN(model, HIDDEN_SIZE, BATCH_SIZE, SKIP_STEP, LENGTH, NUM_STEP, LR) lm.create_mode() lm.train()
def word2vec(dataset): # 步骤1 获取input output with tf.name_scope('data'): iterator = dataset.make_initializable_iterator() center_words, target_words = iterator.get_next() # 步骤2+3:定义weights和embedding lookup with tf.name_scope('embed'): embed_matrix = tf.get_variable( 'embed_matrix', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.random_uniform_initializer()) embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding') # 步骤4:创建变量 NCE loss并定义损失函数 with tf.name_scope('loss'): nce_weight = tf.get_variable( 'nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE**0.5))) nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE])) # 定义损失函数 loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, biases=nce_bias, labels=target_words, inputs=embed, num_sampled=NUM_SAMPLED, num_classes=VOCAB_SIZE), name='loss') # 步骤5 定义optimizer with tf.name_scope('optimizer'): optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( loss) utils.safe_mkdir('checkpoints') with tf.Session() as sess: sess.run(iterator.initializer) sess.run(tf.global_variables_initializer()) total_loss = 0.0 writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph) for index in range(NUM_TRAIN_STEPS): try: loss_batch, _ = sess.run([loss, optimizer]) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print("Average loss at step: {}: {: 5.1f}".format( index, total_loss / SKIP_STEP)) total_loss = 0.0 except tf.errors.OutOfRangeError: sess.run([iterator.initializer]) writer.close()
def test(self,file_checkpoint=None): #return if not self.loaded_weights: self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=40) if file_checkpoint: if os.path.isfile('{0}.index'.format(file_checkpoint)): print('Taking the specified checkpoint...') saver.restore(self.sess,file_checkpoint ) else: print('Checkpoint {0} not found.'.format(file_checkpoint)) else: print('Taking the last checkpoint...') #Restore the session from checkpoint self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/'+self.checkpoint_dir+'/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(self.sess, ckpt.model_checkpoint_path) out_posterior = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32') out_prior = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32') out_alpha = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32') out_beta = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32') for i in range(np.shape(self.images_test_clipped[0:,:,:,:])[0]): out_posterior1, out_prior1, out_alpha1,out_beta1,out_L = self.sess.run([self.X_posterior,self.X_prior,self.alpha,self.beta,self.L_holder],feed_dict={self.X_noisy:self.images_test_clipped[i:i+1,:,:,:], self.L_holder:self.L, self.is_train:False, self.shift:1}) out_posterior[i:i+1,:,:,:] = out_posterior1 out_prior[i:i+1,:,:,:] = out_prior1 out_alpha[i:i+1,:,:,:] = out_alpha1 out_beta[i:i+1,:,:,:] = out_beta1 #denormalize out_posterior *= self.norm out_prior *= self.norm #copy point targets back > clip mask_outliers = np.logical_xor(self.mask_test,True) self.mask_outliers=mask_outliers out_posterior[mask_outliers] = self.images_test[mask_outliers] out_prior[mask_outliers] = self.images_test[mask_outliers] dir_test = 'test' safe_mkdir(dir_test) dir_final = os.path.join(dir_test,self.checkpoint_dir) safe_mkdir(dir_final) step=self.gstep.eval(session=self.sess) sio.savemat(os.path.join(dir_final,'{0}_{1}.mat').format(self.checkpoint_dir,step), {'posterior':out_posterior[:,:,:,0], 'prior':out_prior[:,:,:,0], 'alpha': out_alpha[:,:,:,0], 'beta': out_beta[:,:,:,0], 'noisy':self.images_test[:,:,:,0], 'L':out_L})
def filter_documents(self): """Read documents from input_dir, filter and write into a filtered dir. """ logging.info('Filtering documents') utils.safe_mkdir(self.output_dirpath) for input_filepath, output_filepath in zip(self.input_filepaths, self.output_filepaths): logging.info('Reading file: {}'.format(input_filepath)) self.write_file(input_filepath, output_filepath)
def main(): model = 'trump_tweets' # model = "arvix_abstracts" utils.safe_mkdir('data/checkpoints') utils.safe_mkdir('data/checkpoints/' + model) lm = CharRNN(model) lm.create_model() lm.train()
def filter_documents(self): """Read documents from input_dir, filter and write into a filtered dir. """ logging.info('Filtering documents') utils.safe_mkdir(self.output_dirpath) for input_filepath, output_filepath in zip( self.input_filepaths, self.output_filepaths): logging.info('Reading file: {}'.format(input_filepath)) self.write_file(input_filepath, output_filepath)
def download(self): """Fetch the package source from its URL and save it in our source directory.""" safe_mkdir(self.archive_dir) full_target_name = os.path.join(self.archive_dir, self.packed_name) utils.download_and_save(self.url, full_target_name)
def plot_invariances(invariance_sequences, out_dir, labels, title_prefix): out_dir = os.path.join(out_dir, 'invariance_plots') safe_mkdir(out_dir) for split in SPLITS: sdir = os.path.join(out_dir, split) safe_mkdir(sdir) for metric in ALL_METRICS: line_dict = {metric: invariance_sequences[split][metric]} out_file = os.path.join(sdir, metric + '.png') plot_lines(labels, "%s %s Invariance" % (title_prefix, metric), line_dict, out_file)
def manip(args, test_list, u_model): if args.test_weights_path == '': weights_path = os.path.join(args.check_dir, args.output_name + '_model_' + args.time + '.hdf5') else: weights_path = os.path.join(args.data_root_dir, args.test_weights_path) output_dir = os.path.join(args.data_root_dir, 'results', args.net) manip_out_dir = os.path.join(output_dir, 'manip_output') try: safe_mkdir(manip_out_dir) except: pass # Compile the loaded model manip_model = compile_model(args=args, uncomp_model=u_model) try: manip_model.load_weights(weights_path) except: raise NotImplementedError('Unable to find weights path.') # Manipulating capsule vectors print('Testing... This will take some time...') for i, img in enumerate(tqdm(test_list)): sitk_img = sitk.ReadImage(os.path.join(args.data_root_dir, 'imgs', img[0])) img_data = sitk.GetArrayFromImage(sitk_img) num_slices = img_data.shape[0] sitk_mask = sitk.ReadImage(os.path.join(args.data_root_dir, 'masks', img[0])) gt_data = sitk.GetArrayFromImage(sitk_mask) x, y = img_data[num_slices//2, :, :], gt_data[num_slices//2, :, :] x, y = np.expand_dims(np.expand_dims(x, -1), 0), np.expand_dims(np.expand_dims(y, -1), 0) noise = np.zeros([1, 512, 512, 1, 16]) x_recons = [] for dim in trange(16): for r in [-0.25, -0.125, 0, 0.125, 0.25]: tmp = np.copy(noise) tmp[:, :, :, :, dim] = r x_recon = manip_model.predict([x, y, tmp]) x_recons.append(x_recon) x_recons = np.concatenate(x_recons) out_img = combine_images(x_recons, height=16) out_image = out_img * 4096 out_image[out_image > 574] = 574 out_image = out_image / 574 * 255 Image.fromarray(out_image.astype(np.uint8)).save(os.path.join(manip_out_dir, img[0][:-4] + '_manip_output.png')) print('Done.')
def plot_loss_equivariance_compare(equivariance_sequences, out_dir, labels, title_prefix): out_dir = os.path.join(out_dir, 'loss_compare_equivariance_plots') safe_mkdir(out_dir) for model_type in MODEL_TYPES: for split in SPLITS: sdir = os.path.join(out_dir, model_type, split) safe_mkdir(sdir) for metric in ALL_METRICS: line_dict = {"%s_%s" % (metric, loss): equivariance_sequences[model_type][loss][split][metric] for loss in LOSS_TYPES} out_file = os.path.join(sdir, metric + '.png') plot_lines(labels, "%s %s Equivariance Loss Compare" % (title_prefix, metric), line_dict, out_file)
def build_the_vocab(dir_path, vocab_size, output_dir): """ :param dir_path: directory where text files are stored to be used for building vocab :param vocab_size: size of the vocabulary to be constructed :return: """ # create .tsv file with vocab_size utils.safe_mkdir(OUTPUT_DIR) output_file = open(os.path.join(output_dir, "vocab.tsv"), 'w', encoding="utf8") # read all the words all_words = [] for txt_file in glob.glob(dir_path + "\\*.txt"): print(txt_file) words = open(txt_file, 'r', encoding="utf8").read() words = words.lower() words = ' '.join(words.split()) words = words.replace('""', " ") words = words.replace(",", " ") words = words.replace("“", " ") words = words.replace("”", " ") words = words.replace(".", " ") words = words.replace(";", " ") words = words.replace("!", " ") words = words.replace("?", " ") words = words.replace("’", " ") words = words.replace("—", " ") words = words.split(' ') # check if empty words for word in words: if word: all_words.append(word) print("Number of words in all files is {}".format(len(all_words))) # Count all the words count = [('UNK', -1)] count.extend(Counter(all_words).most_common(vocab_size - 1)) print("Number of unique words: {}".format(len(count))) print(count[:10]) # write them to disk for word, _ in count: output_file.write(word + '\n') output_file.close() return os.path.join(output_dir, "vocab.tsv")
def save_to_files(self, directory_name, name=None): """Saves all the sample files into the directory directory_name. Args: directory_name (string): Name of directory to save files. name (string, optional): additional name to add into the dataset files. """ utils.safe_mkdir(directory_name) super(BaseSampledDataset, self).save_to_files(directory_name) utils.pickle_to_file(self._sample_indices, self._get_objective_filename( directory_name, 'sample_indices', name))
def write_modulefile(args): package_name = args.name full_path = get_modulefile_path(package_name) dirname, filename = os.path.split(full_path) safe_mkdir(dirname) with open(full_path, 'w') as f: file_text = generate_modulefile_text(package_name) f.write(file_text)
def word2vec(dataset): """ Build the graph for word2vec model and train it """ # Step 1: get input, output from the dataset with tf.name_scope('data'): iterator = dataset.make_initializable_iterator() center_words, target_words = iterator.get_next() """ Step 2 + 3: define weights and embedding lookup. In word2vec, it's actually the weights that we care about """ with tf.name_scope('embed'): embed_matrix = tf.get_variable('embed_matrix', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.random_uniform_initializer()) embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding') # Step 4: construct variables for NCE loss and define loss function with tf.name_scope('loss'): nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE], initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5))) nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE])) # define loss function to be NCE loss function loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, biases=nce_bias, labels=target_words, inputs=embed, num_sampled=NUM_SAMPLED, num_classes=VOCAB_SIZE), name='loss') # Step 5: define optimizer with tf.name_scope('optimizer'): optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss) utils.safe_mkdir('checkpoints') with tf.Session() as sess: sess.run(iterator.initializer) sess.run(tf.global_variables_initializer()) total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph) for index in range(NUM_TRAIN_STEPS): try: loss_batch, _ = sess.run([loss, optimizer]) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP)) total_loss = 0.0 except tf.errors.OutOfRangeError: sess.run(iterator.initializer) writer.close()
def plot_equivariances(equivariance_sequences, invariance_sequences, out_dir, labels, title_prefix): out_dir = os.path.join(out_dir, 'equivariance_plots') safe_mkdir(out_dir) for model_type in MODEL_TYPES: for loss in LOSS_TYPES: for split in SPLITS: sdir = os.path.join(out_dir, model_type, loss, split) safe_mkdir(sdir) for metric in ALL_METRICS: line_dict = {metric: equivariance_sequences[model_type][loss][split][metric], "invariance_%s" % metric: invariance_sequences[split][metric]} out_file = os.path.join(sdir, metric + '.png') plot_lines(labels, "%s %s Equivariance" % (title_prefix, metric), line_dict, out_file)
def split_data(root_path, num_splits=4): with open(os.path.join(root_path, 'file_lists', 'master_nodule_list.csv'), 'r') as f: reader = csv.reader(f) img_list = np.asarray(list(reader)) labels_list = [] indices = [0] nodule_list = [] mal_score_list = [] mal_scores = [] curr_nodule = os.path.dirname(img_list[0][0]) for i, img_label in enumerate(img_list): if os.path.dirname(img_label[0]) != curr_nodule: nodule_list.append(curr_nodule) mal_score_list.append(np.rint(np.mean(mal_scores))) indices.append(i) mal_scores = [] curr_nodule = os.path.dirname(img_label[0]) split_name = os.path.basename(img_label[0]).split('_') mal_scores.append(int(split_name[-1][-1])) labels_list.append([int(n[-1]) for n in split_name[1:]]) outdir = os.path.join(root_path, 'file_lists') safe_mkdir(outdir) skf = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=12) n = 0 for train_index, test_index in skf.split(nodule_list, mal_score_list): with open(os.path.join(outdir, 'train_split_{:02d}.csv'.format(n)), 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for i in train_index: for j in range(indices[i], indices[i + 1]): writer.writerow([img_list[j][0].split(root_path)[1][1:]] + labels_list[j] + list(img_list[j][1:])) with open(os.path.join(outdir, 'test_split_{:02d}.csv'.format(n)), 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for i in test_index: for j in range(indices[i], indices[i + 1]): writer.writerow([img_list[j][0].split(root_path)[1][1:]] + labels_list[j] + list(img_list[j][1:])) n += 1
def main(): # check command line arguments assert len(sys.argv) >= 2,\ '\n[Usage] python3 "%s" <URL to download>' \ '[directory to save images]' % __file__ # get the URL to download from using command line argument, and download the page # exit if failed to download source_url = sys.argv[1] source_html_filename = download_from_url(source_url, output_doc='index.html', exit_on_error=True) # get images URL and their descriptions images_info = get_images_info(source_html_filename) num_total_images = len(images_info) print('%s image(s) to download: ' % num_total_images) index = 0 for image_info in images_info: index += 1 print('[%s] %s' % (index, vars(image_info))) # remove the downloaded html safe_remove(source_html_filename) # download images target_dir = str(sys.argv[2]) if len(sys.argv) >= 3 else '.' safe_mkdir(target_dir) index = 0 num_success = 0 num_failure = 0 for image_info in images_info: index += 1 print('Downloading image %s of %s' % (index, num_total_images)) downloaded_filename = download_from_url(image_info.src, target_dir=target_dir) if downloaded_filename is not None: num_success += 1 print('[Download success: %s / %s]\n%s' % (num_success, num_total_images, downloaded_filename)) else: num_failure += 1 print('[Download failed: %s / %s]\n%s' % (num_failure, num_total_images, image_info.src)) # print final results print('[Download results]') print('Success: %s / %s' % (num_success, num_total_images)) print('Failure: %s / %s' % (num_failure, num_total_images))
def main(transform_file, in_dir, out_dir): safe_mkdir(out_dir) transforms, _ = get_transforms(transform_file) transforms = reorder_transforms(transforms) all_metrics, transforms = load_metrics(transforms, in_dir) label_names, title_prefix = format_labels(transforms) invariance_sequences = format_invariances(all_metrics) equivariance_sequences = format_equivariances(all_metrics) plot_invariances(invariance_sequences, out_dir, label_names, title_prefix) plot_equivariances(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix) plot_reductions(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix) #plot_loss_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix) #plot_model_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix) plot_split_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix)
def main(relation, limit, offset, directory_name): """Main script function.""" utils.safe_mkdir(directory_name) query = """SELECT DISTINCT ?related ?wikiPage WHERE { ?movie rdf:type <http://yago-knowledge.org/resource/%s> . ?related <http://yago-knowledge.org/resource/%s> ?movie . ?related <http://yago-knowledge.org/resource/hasWikipediaUrl> ?wikiPage } LIMIT %s OFFSET %s""" % (MOVIE_CATEGORY_NAME, relation, limit, offset) response = utils.query_sparql(query, utils.YAGO_ENPOINT_URL) print 'Reading {} objects.'.format(len(response)) filename = '{}-{}.pickle'.format(relation, offset) utils.pickle_to_file(response, os.path.join(directory_name, filename))
def build_vocab(words, vocab_size): """ Build vocabulary of VOCAB_SIZE most frequent words """ dictionary = dict() count = [('UNK', -1)] count.extend(Counter(words).most_common(vocab_size - 1)) index = 0 utils.safe_mkdir('processed') with open('processed/vocab_1000.tsv', "w") as f: for word, _ in count: dictionary[word] = index if index < 1000: f.write(word + "\n") index += 1 index_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return dictionary, index_dictionary
def mk_srv_dir(servers_path: Path) -> Path: userin: str = NONE server_path: Path = None while userin != EXIT: userin = str(input(strs.NEW_SRV_NAME)) if userin == EXIT: return None if userin == NONE: continue try: userin.index("/") except: pass else: print(strs.E_NAME_SLASH) continue server_path = Path("{0}/{1}".format(str(servers_path), userin)) if utils.is_in_dir(servers_path, server_path): print(strs.E_SRV_EXISTS.format(userin)) continue server_path = Path("{0}/{1}".format(str(servers_path), userin)) if utils.safe_mkdir(server_path): try: server_path.rmdir() except: pass continue break return server_path
def plot_reductions(equivariance_sequences, invariance_sequences, out_dir, labels, title_prefix): out_dir = os.path.join(out_dir, 'reduction_plots') safe_mkdir(out_dir) for model_type in MODEL_TYPES: for loss in LOSS_TYPES: for split in SPLITS: sdir = os.path.join(out_dir, model_type, loss, split) safe_mkdir(sdir) for metric in NORM_METRICS: eq_seq = equivariance_sequences[model_type][loss][split][metric] in_seq = invariance_sequences[split][metric] # compute relative reduction in metric error red_seq = compute_reduction(in_seq, eq_seq) line_dict = {metric: red_seq} out_file = os.path.join(sdir, metric + '.png') plot_lines(labels, "%s %s Equivariance" % (title_prefix, metric), line_dict, out_file)
def build_vocab(words, vocab_size, visual_fld): """ Build vocabulary of VOCAB_SIZE most frequent words and write it to visualization/vocab.tsv """ utils.safe_mkdir(visual_fld) file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w') dictionary = dict() count = [('UNK', -1)] index = 0 count.extend(Counter(words).most_common(vocab_size - 1)) for word, _ in count: dictionary[word] = index index += 1 file.write(word + '\n') index_dictionary = dict(zip(dictionary.values(), dictionary.keys())) file.close() return dictionary, index_dictionary
def train(self, n_epochs): ''' The train function alternates between training one epoch and evaluating ''' utils.safe_mkdir('checkpoints') utils.safe_mkdir('checkpoints/convnet_layers') writer = tf.summary.FileWriter('./graphs/convnet_layers', tf.get_default_graph()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_layers/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = self.gstep.eval() for epoch in range(n_epochs): step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step) self.eval_once(sess, self.test_init, writer, epoch, step) writer.close()
def setup_loggers(): logdir = os.path.join(get_base_dir(), 'log') safe_mkdir(logdir) def setup_file_logger(logger): short_name = logger.name[len(LOGGER_NAME_PREFIX):] file = os.path.join(logdir, '{name}.log'.format(name=short_name)) fileHandler = logging.FileHandler(file) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) setup_file_logger(app.logger) # set logger class for future loggers class RequestLogger(logging.Logger): def __init__(self, name): super(RequestLogger, self).__init__(name) setup_file_logger(self) self.setLevel(logging.INFO) logging.setLoggerClass(RequestLogger)
def write_splits(self): """Re-reads the input files and writes documents into the split files. """ if not self.output_dirname: return utils.safe_mkdir(self.output_dirname) logging.info("Writing {} documents".format(len(self.documents))) logging.info("Train dataset size {}".format(len(self.train_doc_index))) logging.info("Test dataset size {}".format(len(self.test_doc_index))) logging.info("Validation dataset size {}".format( len(self.validation_doc_index))) current_document_index = 0 if not self.indices_filepath: logging.info("Saving absolute indices") indices_filename = os.path.join(self.output_dirname, 'split_indices.pickle') with open(indices_filename, 'wb') as indices_file: pickle.dump((self.train_doc_index, self.test_doc_index, self.validation_doc_index), indices_file) train_filename = os.path.join(self.output_dirname, 'train.conll') test_filename = os.path.join(self.output_dirname, 'test.conll') val_filename = os.path.join(self.output_dirname, 'validation.conll') with open(train_filename, 'w') as train_f, \ open(test_filename, 'w') as test_f, \ open(val_filename, 'w') as val_f: for file_path in self.file_paths: logging.info("Writing file: {}".format(file_path)) parser = WikipediaCorpusColumnParser(file_path=file_path) for document in tqdm(parser): if current_document_index in self.train_doc_index: self.write_document(document, train_f) elif current_document_index in self.test_doc_index: self.write_document(document, test_f) elif current_document_index in self.validation_doc_index: self.write_document(document, val_f) current_document_index += 1
def scatter(fin='../data/original.tsv', out='../data/scatter/20110902/', figname='some', ellipses=False, contours=False, conv_hull=True, conv_thresh=[0.2], xnum=10, ynum=10, norm=None, mew=1, legend=False, special=None, simple=True, typevec=None, colorvec=None): print figname utils.safe_mkdir(out) z = files.filter_hack(tb.tabarray(SVfile=fin)) names = np.array([n for n in z.dtype.names if n not in ['type', 'names', 'name', 'ingredients']]) a = z[names].extract() if norm == None: a = utils.normalize(a) # normalize data if figname.startswith('some'): typevec = ['chocolate-cakes', 'angel-food-cakes', 'brownies', 'sugar-cookies', 'scones', 'loaves', 'pancakes', 'crepes'] colorvec = ['brown', 'g', 'm', 'b', 'k', 'r', 'y', 'c'] elif figname.startswith('all'): typevec = [''] z['type'] = '' colorvec = ['g'] ingredientvec = ['white sugar', 'all-purpose flour'] name_dict = dict(zip(names, range(len(names)))) idict = {} for i in ['egg', 'flour', 'sugar', 'oil']: idict[i] = [n for n in z.dtype.names if i in n] idict['liquid'] = ['water'] idict['liquid'] += [n for n in z.dtype.names if ('milk' in n) and ('powder' not in n) and ('chip' not in n)] idict['liquid'] += [n for n in z.dtype.names if ('juice' in n) and ('with' not in n)] idict['sugar'] += ['corn syrup', 'light corn syrup'] idict['butter'] = ['butter', 'margarine', 'butter or margarine', 'butter or stick margarine'] idict['oil'] += [n for n in z.dtype.names if 'shortening' in n] idict['fat'] = idict['butter'] + idict['oil'] idict.pop('butter') idict.pop('oil') print idict columns = [] ingredientvec = idict.keys() for i in ingredientvec: name_list = np.array([name_dict[j] for j in idict[i]]) columns += [a[:, name_list].sum(axis=1)] data = tb.tabarray(columns=columns, names=ingredientvec) print data n = len(ingredientvec) if norm is not None: d = data.extract() i = list(data.dtype.names).index(norm) array = d / np.repeat(d[:,i], d.shape[1]).reshape(d.shape[0], d.shape[1]) data = tb.tabarray(array=array, names=data.dtype.names) for j1 in range(n-1): i1 = ingredientvec[j1] for j2 in range(j1+1, n): i2 = ingredientvec[j2] k = 0 pylab.clf() for kind in typevec: color = colorvec[k] #p = a[z['type']==kind][:,name_dict[i1]] #q = a[z['type']==kind][:,name_dict[i2]] p = data[z['type']==kind][i1] q = data[z['type']==kind][i2] if simple: pylab.plot(p, q, '+', color=color, markeredgewidth=mew) if conv_hull: for ct in conv_thresh: x = p.mean() y = q.mean() d = np.sqrt((x - p)**2 + (y - q)**2) ind = d.argsort()[:-int(len(p) * ct)] pts = [(p[j], q[j]) for j in ind] if pts: hull = np.array(convexHull(pts)) pylab.fill(hull[:,0], hull[:,1], color=color, alpha=0.2) #else: # print t k += 1 pylab.xlabel(i1) pylab.ylabel(i2) if special is not None: p = data[z['name']==special][i1] q = data[z['name']==special][i2] pylab.plot(p, q, '*', color='y', markersize=20, mew=2) if legend: if special is not None: pylab.legend(typevec + [special]) else: pylab.legend(typevec) if norm is None: pylab.axis([0, 1, 0, 1]) pylab.savefig(out + figname + '_' + i1 + '_' + i2 +'.pdf') if special is not None: pylab.legend(typevec + [special]) else: pylab.legend(typevec) pylab.savefig(out + figname + '_legend.pdf') data = z[['type', 'name']].colstack(data) if figname.startswith('some'): data.saveSV('../data/words/ingredients-basic.tsv') return (z, data)
def main(args): log(args, str(args)) safe_mkdir(args.out_dir) all_transforms, _ = get_transforms(args.transform_file) # don't redo work that we have already done all_transforms, do_first = filter_existing(all_transforms, args.out_dir) if len(all_transforms) <= 1: log(args, "No transforms to do. Exiting...") exit() log(args, "Loaded Transforms. %d transforms" % len(all_transforms)) model = init_model(args.network_file, args.weight_file, gpu=args.gpu) train_lmdbs = args.train_lmdbs.split(args.delimiter) test_lmdbs = args.test_lmdbs.split(args.delimiter) base_transform = all_transforms[0] log(args, "Starting on Baseline Transform: %r\n" % base_transform) base_train_features, base_train_output_probs, base_train_classifications, _ = get_activations(model, [base_transform], train_lmdbs, args) base_test_features, base_test_output_probs, base_test_classifications, _ = get_activations(model, [base_transform], test_lmdbs, args) transform_partitions = partition_transforms(all_transforms, args.num_transforms) log(args, "Transform Partitions: %r" % transform_partitions) for transforms in transform_partitions: log(args, "Starting on Transforms: %r\n" % transforms) train_features, train_output_probs, train_classifications, train_labels = get_activations(model, transforms[1:], train_lmdbs, args) train_features.update(base_train_features) train_output_probs.update(base_train_output_probs) train_classifications.update(base_train_classifications) test_features, test_output_probs, test_classifications, test_labels = get_activations(model, transforms[1:], test_lmdbs, args) test_features.update(base_test_features) test_output_probs.update(base_test_output_probs) test_classifications.update(base_test_classifications) log(args, "Measuring invariances...") train_invariance_metrics = measure_invariances(train_features, train_output_probs, train_classifications, train_labels, transforms, do_first, args) test_invariance_metrics = measure_invariances(test_features, test_output_probs, test_classifications, test_labels, transforms, do_first, args) log(args, "Done...") setup_scratch_space(args) log(args, "Measuring equivariances...") train_equivariance_metrics, test_equivariance_metrics = measure_equivariances(train_features, train_labels, train_classifications, train_output_probs, test_features, test_labels, test_classifications, test_output_probs, transforms, model, do_first, args) for transform in transforms[(0 if do_first else 1):]: write_output(args.out_dir, transform, train_invariance_metrics[transform], test_invariance_metrics[transform], train_equivariance_metrics[transform], test_equivariance_metrics[transform]) do_first = False log(args, "Done Measure Equivariances") cleanup_scratch_space(args) log(args, "Exiting...") if args.log_file: args.log.close()
labels.append("S") elif "mirror" in net_dir: labels.append("M") else: labels.append("") return labels for split in SPLITS: print "Starting Split:", split for loss in LOSS_TYPES: for model_type in MODEL_TYPES: #out_dir = os.path.join(root_out_dir, split, loss, model_type) out_dir = os.path.join(root_out_dir, split) safe_mkdir(out_dir) for metric, is_distance in METRICS: print "Starting Metric:", metric dist_mat = np.zeros( (num_net_dirs, num_net_dirs), dtype=float) for idx1, net_dir1 in enumerate(net_dirs): result_dir = os.path.join(ROOT, net_dir1, 'equivalence/results') for idx2, net_dir2 in enumerate(net_dirs): fn = net_dir2.replace('/', '_') + '.txt' result_file = os.path.join(result_dir, fn) results = ast.literal_eval(open(result_file, 'r').read()) if is_distance: dist_mat[idx1,idx2] = SCALE * results[split][model_type][loss][metric] else: dist_mat[idx1,idx2] = SCALE * (1 - results[split][model_type][loss][metric])
def simplex(fin='../data/words/ingredients-basic.tsv', out='../data/simplex/20110920/', figname='some', ellipses=False, contours=False, conv_hull=True, conv_thresh=[0.2], xnum=10, ynum=10, norm=True, linewidth=0.2, text=True, mew=1.5, special=None): utils.safe_mkdir(out) z = tb.tabarray(SVfile=fin) names = [n for n in z.dtype.names if n not in ['type', 'names', 'name', 'ingredients']] n = len(names) array = utils.normalize(z[names].extract()) data = tb.tabarray(array=array, names=names) if figname.startswith('some'): typevec = ['chocolate-cakes', 'angel-food-cakes', 'brownies', 'sugar-cookies', 'scones', 'loaves', 'pancakes', 'crepes'] colorvec = ['brown', 'g', 'm', 'b', 'k', 'r', 'y', 'c'] elif figname.startswith('all'): typevec = [''] z['type'] = '' colorvec = ['g'] for j1 in range(n): i1 = names[j1] for j2 in range(n): i2 = names[j2] for j3 in range(n): i3 = names[j3] k = 0 pylab.clf() for kind in typevec: color = colorvec[k] p = data[z['type']==kind][i1] - data[z['type']==kind][i2] q = np.sqrt(3) * data[z['type']==kind][i3] pylab.plot(p, q, '+', color=color, markeredgewidth=mew) if conv_hull: for ct in conv_thresh: x = p.mean() y = q.mean() d = np.sqrt((x - p)**2 + (y - q)**2) ind = d.argsort()[:-int(len(p) * ct)] pts = [(p[j], q[j]) for j in ind] if pts: hull = np.array(convexHull(pts)) pylab.fill(hull[:,0], hull[:,1], color=color, alpha=0.2) k += 1 if special is not None: p = data[z['name']==special][i1] - data[z['name']==special][i2] q = np.sqrt(3) * data[z['name']==special][i3] pylab.plot(p, q, '*', color='y', markersize=20, mew=2) pylab.plot([-1, 1], [0, 0], 'k-.', linewidth=linewidth) pylab.plot([-1, 0], [0, np.sqrt(3)], 'k-.', linewidth=linewidth) pylab.plot([0, 1], [np.sqrt(3), 0], 'k-.', linewidth=linewidth) if text: pylab.text(1.1, -0.1, i1, fontsize=16) pylab.text(-1.1, -0.1, i2, fontsize=16) pylab.text(-0.05, 1.8, i3, fontsize=16) pylab.axis('equal') pylab.axis('off') pylab.savefig(out + figname + '_' + '_'.join([i1, i2, i3]) +'.pdf', transparent=True) if special is not None: pylab.legend(typevec + [special]) else: pylab.legend(typevec) pylab.savefig(out + figname + '_legend.pdf', transparent=True) return (z, data)
def setup(): """ 新建存储模型的文件夹checkpoints和存储合成图片结果的文件夹outputs """ utils.safe_mkdir("checkpoints") utils.safe_mkdir("outputs")
def setup(): utils.safe_mkdir('checkpoints') utils.safe_mkdir('outputs')