Beispiel #1
0
    def __init__(self, content_img, style_img, img_width, img_height):
        """
        初始化
        
        :param content_img: 待转换风格的图片(保留内容的图片)
        :param style_img: 风格图片(保留风格的图片)
        :param img_width: 图片的width
        :param img_height: 图片的height
        """
        # 获取基本信息
        self.content_name = str(content_img.split("/")[-1].split(".")[0])
        self.style_name = str(style_img.split("/")[-1].split(".")[0])
        self.img_width = img_width
        self.img_height = img_height
        # 规范化图片的像素尺寸
        self.content_img = utils.get_resized_image(content_img, img_width, img_height)
        self.style_img = utils.get_resized_image(style_img, img_width, img_height)
        self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)

        # 定义提取特征的层
        self.content_layer = "conv4_2"
        self.style_layers = ["conv1_1", "conv2_1", "conv3_1", "conv4_1", "conv5_1"]

        # 定义content loss和style loss的权重
        self.content_w = 0.001
        self.style_w = 1

        # 不同style layers的权重,层数越深权重越大
        self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]

        # global step和学习率
        self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step")  # global step
        self.lr = 2.0

        utils.safe_mkdir("outputs/%s_%s" % (self.content_name, self.style_name))
Beispiel #2
0
def plot_output(pred_baseline,
                pred_variation,
                varID_0,
                varID_1,
                weights_baseline,
                weights_variation,
                model_name,
                t=''):
    '''
    '''
    safe_mkdir('plots')
    plt.figure(figsize=(8, 8))
    min_ = min(pred_baseline.min(), pred_variation.min())
    max_ = max(pred_baseline.max(), pred_variation.max())
    bins = np.linspace(min_, max_, 30)
    _ = plt.hist(pred_baseline,
                 bins=bins,
                 histtype='step',
                 label=r'test - {}'.format(varID_0.replace('_', ' ')),
                 weights=weights_baseline)
    _ = plt.hist(pred_variation,
                 bins=bins,
                 histtype='step',
                 label=r'test - {}'.format(varID_1.replace('_', ' ')),
                 weights=weights_variation)
    plt.legend(loc='upper left')
    plt.xlabel('Weighted NN Output')
    plt.savefig(
        os.path.join(
            'plots', '{}_{}_{}_output_{}.pdf'.format(model_name, varID_0,
                                                     varID_1, t)))
    plt.close()
    def train(self, n_epochs):
        '''
        The train function alternates between training one epoch and evaluating
        '''
        utils.safe_mkdir('checkpoints')
        utils.safe_mkdir('checkpoints/convnet_layers')
        train_writer = tf.summary.FileWriter('./graphs/convnet_layers/train',
                                             tf.get_default_graph())
        test_writer = tf.summary.FileWriter('./graphs/convnet_layers/eval',
                                            tf.get_default_graph())
        # 配置GPU使用策略,否则 CUDA OUT OF MEMORY ERROR
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # session开始前,初始化所有参数
            sess.run(tf.global_variables_initializer())

            # 默认保存5个检查点
            saver = tf.train.Saver(max_to_keep=10000)

            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname('checkpoints/convnet_layers/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            step = self.gstep.eval()

            for epoch in range(n_epochs):
                step = self.train_one_epoch(sess, saver, self.train_init,
                                            train_writer, epoch, step)
                self.eval_once(sess, self.test_init, test_writer, epoch, step)
        train_writer.close()
        test_writer.close()
Beispiel #4
0
    def train(self, n_epochs):
        '''
        The train function alternates between training one epoch and evaluating
        '''
        utils.safe_mkdir('checkpoints')
        utils.safe_mkdir('checkpoints/convnet_mnist')
        writer = tf.summary.FileWriter('./graphs/convnet',
                                       tf.get_default_graph())

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            exp.set_model_graph(sess.graph)
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname('checkpoints/convnet_mnist/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            step = self.gstep.eval()

            for epoch in range(n_epochs):
                step = self.train_one_epoch(sess, saver, self.train_init,
                                            writer, epoch, step)
                self.eval_once(sess, self.test_init, writer, epoch, step)
                exp.log_epoch_end(epoch)
        writer.close()
Beispiel #5
0
 def train(self,n_epochs):
     
                           
     
     safe_mkdir('checkpoints')
     safe_mkdir('checkpoints/'+self.checkpoint_dir)
     #To plot two different curves on the same graph we need two different writers that write the
     #same group of summaries.
     train_writer = tf.summary.FileWriter('./graphs/'+self.checkpoint_dir + '/train', tf.get_default_graph())
     test_writer = tf.summary.FileWriter('./graphs/'+self.checkpoint_dir + '/test',tf.get_default_graph())
     #self.sess.run(tf.global_variables_initializer())
     #
     #
     #
     #saver = tf.train.Saver(max_to_keep=None)
     #ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/'+self.checkpoint_dir+'/checkpoint'))
     #if ckpt and ckpt.model_checkpoint_path:
     #    saver.restore(self.sess, ckpt.model_checkpoint_path)
         
     
     saver = self.load_weights()
               
                             
     step = self.gstep.eval(session=self.sess)
     
     cprint("[!] Restarting at iteration {}".format(step), color="yellow")
                           
     for epoch in range(n_epochs):
         step = self.train_one_epoch(saver, train_writer,test_writer, epoch, step)
     
     return step
Beispiel #6
0
def create_spkr_folder(data_dir, spkr_list):
    '''Make speaker folder
    e.g., ./data/train/FAEM0
    '''
    for s in spkr_list:
        folder = os.path.join(data_dir, s)
        safe_mkdir(folder)
    def __init__(self, content_img, style_img, img_width, img_height):

        self.content_name = str(content_img.split("/")[-1].split(".")[0])
        self.style_name = str(style_img.split("/")[-1].split(".")[0])
        self.img_width = img_width
        self.img_height = img_height

        self.content_img = utils.get_resized_image(content_img, img_width,
                                                   img_height)
        self.style_img = utils.get_resized_image(style_img, img_width,
                                                 img_height)
        self.initial_img = utils.generate_noise_image(self.content_img,
                                                      img_width, img_height)

        self.content_layer = "conv4_2"
        self.style_layers = [
            "conv1_1", "conv2_1", "conv3_1", "conv4_1", "conv5_1"
        ]

        # 定义content loss和style loss的权重
        self.content_w = 0.001
        self.style_w = 1

        self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]

        self.gstep = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="global_step")  # global step
        self.lr = 2.0

        utils.safe_mkdir("outputs/%s_%s" %
                         (self.content_name, self.style_name))
Beispiel #8
0
def main(args):
	model = init_model(args.network_file, args.weight_file, gpu=args.gpu)
	print args
	dbs = open_dbs(args.lmdbs.split(args.delimiter))
	max_images = min(args.max_images, dbs[0][0].stat()['entries'])
	max_iters = (max_images + args.batch_size - 1) / args.batch_size

	image_num = 0

	safe_mkdir(args.out_dir)
	for iter_num in xrange(max_iters):
		ims, labels = get_batch(dbs, args)

		fprop(model, ims, args)
		for idx in xrange(len(labels)):
			reconstruction = model.blobs[args.blob].data[idx]
			save_image(reconstruction, image_num, args)

			image_num += 1

		if iter_num > 0 and iter_num % 10 == 0:
			print "%.2f%% (%d/%d) Batches" % (100. * iter_num / max_iters, iter_num, max_iters)


	close_dbs(dbs)
Beispiel #9
0
def plot_batch_features(features_baseline, features_variation, varID_0,
                        varID_1, weights_baseline, weights_variation,
                        model_name):
    safe_mkdir('plots')
    for fn, (f0,
             f1) in enumerate(zip(features_baseline.T, features_variation.T)):
        plt.figure(figsize=(8, 8))
        bins = np.linspace(min(f0.min(), f1.min()), max(f0.max(), f1.max()),
                           30)
        _ = plt.hist(f0,
                     bins=bins,
                     histtype='step',
                     label=r'test - {}'.format(varID_0.replace('_', ' ')),
                     weights=weights_baseline)
        _ = plt.hist(f1,
                     bins=bins,
                     histtype='step',
                     label=r'test - {}'.format(varID_1.replace('_', ' ')),
                     weights=weights_variation)
        plt.legend(loc='upper left')
        plt.savefig(
            os.path.join(
                'plots', '{}_{}_{}_{}.pdf'.format(model_name, varID_0, varID_1,
                                                  fn)))
        plt.close()
    def check(self, n_epochs):
        utils.safe_mkdir('checkpoints')
        utils.safe_mkdir('checkpoints/initcheck')
        writer = tf.summary.FileWriter('./graphs/initcheck',
                                       tf.get_default_graph())

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # session开始前,初始化所有参数
            sess.run(tf.global_variables_initializer())
            # 默认保存5个检查点
            saver = tf.train.Saver(max_to_keep=10000)
            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname('checkpoints/convnet_layers/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            step = self.gstep.eval()

            for epoch in range(n_epochs):
                step = self.train_one_epoch(sess, saver, self.train_init,
                                            writer, epoch, step)
                self.eval_once(sess, self.test_init, writer, epoch, step)
        writer.close()
    def train(self, num_train_steps):
        saver = tf.train.Saver() # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias

        initial_step = 0
        utils.safe_mkdir('checkpoints')
        with tf.Session() as sess:
            sess.run(self.iterator.initializer)
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))

            # if that checkpoint exists, restore from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
            writer = tf.summary.FileWriter('graphs/word2vec/lr' + str(self.lr), sess.graph)
            initial_step = self.global_step.eval()

            for index in range(initial_step, initial_step + num_train_steps):
                try:
                    loss_batch, _, summary = sess.run([self.loss, self.optimizer, self.summary_op])
                    writer.add_summary(summary, global_step=index)
                    total_loss += loss_batch
                    if (index + 1) % self.skip_step == 0:
                        print('Average loss at step {}: {:5.1f}'.format(index, total_loss / self.skip_step))
                        total_loss = 0.0
                        saver.save(sess, 'checkpoints/skip-gram', index)
                except tf.errors.OutOfRangeError:
                    sess.run(self.iterator.initializer)
            writer.close()
Beispiel #12
0
def backup_configs(backup_path, skip=False):
	"""
	Creates `configs` directory and places config backups there.
	Configs are application settings, generally. .plist files count.
	"""
	print_section_header("CONFIGS", Fore.BLUE)
	overwrite_dir_prompt_if_needed(backup_path, skip)
	config = get_config()
	configs_dir_mapping = config["config_path_to_dest_map"]
	plist_files = config["plist_path_to_dest_map"]

	print(Fore.BLUE + Style.BRIGHT + "Backing up configs..." + Style.RESET_ALL)

	# backup config dirs in backup_path/<target>/
	for config, target in configs_dir_mapping.items():
		src_dir = home_prefix(config)
		configs_backup_path = os.path.join(backup_path, target)
		if os.path.isdir(src_dir):
			# TODO: Exclude Sublime/Atom/VS Code Packages here to speed things up
			copytree(src_dir, configs_backup_path, symlinks=True)

	# backup plist files in backup_path/configs/plist/
	print(Fore.BLUE + Style.BRIGHT + "Backing up plist files..." + Style.RESET_ALL)
	plist_backup_path = os.path.join(backup_path, "plist")
	safe_mkdir(plist_backup_path)
	for plist, dest in plist_files.items():
		plist_path = home_prefix(plist)
		if os.path.exists(plist_path):
			copyfile(plist_path, os.path.join(backup_path, dest))
Beispiel #13
0
    def train(self, num_train_steps):
        saver = tf.train.Saver() # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias

        initial_step = 0
        utils.safe_mkdir('checkpoints')
        with tf.Session() as sess:
            sess.run(self.iterator.initializer)
            sess.run(tf.global_variables_initializer())
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))

            # if that checkpoint exists, restore from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
            writer = tf.summary.FileWriter('graphs/word2vec/lr' + str(self.lr), sess.graph)
            initial_step = self.global_step.eval()

            for index in range(initial_step, initial_step + num_train_steps):
                try:
                    loss_batch, _, summary = sess.run([self.loss, self.optimizer, self.summary_op])
                    writer.add_summary(summary, global_step=index)
                    total_loss += loss_batch
                    if (index + 1) % self.skip_step == 0:
                        print('Average loss at step {}: {:5.1f}'.format(index, total_loss / self.skip_step))
                        total_loss = 0.0
                        saver.save(sess, 'checkpoints/skip-gram', index)
                except tf.errors.OutOfRangeError:
                    sess.run(self.iterator.initializer)
            writer.close()
def word2vec(dataset):
    """ Build the graph for word2vec model and train it """
    # Step 1: create iterator and get input, output from the dataset
    iterator = dataset.make_initializable_iterator()
    center_words, target_words = iterator.get_next()

    # Step 2: define weights.
    # In word2vec, it's the weights that we care about
    embed_matrix = tf.get_variable('embed_matrix',
                                   shape=[VOCAB_SIZE, EMBED_SIZE],
                                   initializer=tf.random_uniform_initializer())

    # Step 3: define the inference (embedding lookup)
    embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed')

    # Step 4: define loss function
    # construct variables for NCE loss
    nce_weight = tf.get_variable('nce_weight',
                                 shape=[VOCAB_SIZE, EMBED_SIZE],
                                 initializer=tf.truncated_normal_initializer(
                                     stddev=1.0 / (EMBED_SIZE**0.5)))
    nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))

    # define loss function to be NCE loss function
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight,
                                         biases=nce_bias,
                                         labels=target_words,
                                         inputs=embed,
                                         num_sampled=NUM_SAMPLED,
                                         num_classes=VOCAB_SIZE),
                          name='loss')

    # Step 5: define optimizer that follows gradient descent update rule
    # to minimize loss
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)

    utils.safe_mkdir('checkpoints')
    with tf.Session() as sess:

        # Step 6: initialize iterator and variables
        sess.run(tf.global_variables_initializer())
        sess.run(iterator.initializer)

        total_loss = 0.0  # we use this to calculate late average loss in the last SKIP_STEP steps
        writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)

        for index in range(NUM_TRAIN_STEPS):
            try:
                # Step 7: execute optimizer and fetch loss
                _, loss_batch = sess.run([optimizer, loss])
                total_loss += loss_batch

                if (index + 1) % SKIP_STEP == 0:
                    print('Average loss at step {}: {:5.1f}'.format(
                        index, total_loss / SKIP_STEP))
                    total_loss = 0.0
            except tf.errors.OutOfRangeError:
                sess.run(iterator.initializer)
        writer.close()
Beispiel #15
0
def chk_servers(p: Path) -> int:
    jars_path: str = Path(str(p) + JAR_DIR)
    servers_path: str = Path(str(p) + SERVER_DIR)

    if not servers_path.exists() and utils.safe_mkdir(servers_path): return 2
    if not jars_path.exists() and utils.safe_mkdir(jars_path): return 2
    if chk_latest(p): return 1
    return 0
Beispiel #16
0
    def save_to_file(self, directory_name, name=None):
        if name is not None:
            filename = os.path.join(directory_name, '{}_model.p'.format(name))
        else:
            filename = os.path.join(directory_name, 'model.p')

        utils.safe_mkdir(directory_name)
        utils.pickle_to_file(self.model, filename)
def main():
    model = 'trump_tweets'
    utils.safe_mkdir('checkpoints')
    utils.safe_mkdir('checkpoints/' + model)

    lm = CharRNN(model)
    lm.create_model()
    lm.train()
Beispiel #18
0
    def _install(self):
        """Install to the builder's specified install directory"""

        os.chdir(self.build_dir)

        safe_mkdir(self.install_dir)

        self.system(['make', 'install'])
def main():
    model = 'trump_tweets'
    utils.safe_mkdir('checkpoints/' + model)

    lm = CharRNN(model, HIDDEN_SIZE, BATCH_SIZE, SKIP_STEP, LENGTH, NUM_STEP,
                 LR)
    lm.create_mode()
    lm.train()
Beispiel #20
0
def main():
    model = 'trump_tweets'
    utils.safe_mkdir('checkpoints')
    utils.safe_mkdir('checkpoints/' + model)

    lm = CharRNN(model)
    lm.create_model()
    lm.train()
Beispiel #21
0
def word2vec(dataset):
    # 步骤1 获取input output
    with tf.name_scope('data'):
        iterator = dataset.make_initializable_iterator()
        center_words, target_words = iterator.get_next()
    # 步骤2+3:定义weights和embedding lookup
    with tf.name_scope('embed'):
        embed_matrix = tf.get_variable(
            'embed_matrix',
            shape=[VOCAB_SIZE, EMBED_SIZE],
            initializer=tf.random_uniform_initializer())
        embed = tf.nn.embedding_lookup(embed_matrix,
                                       center_words,
                                       name='embedding')

    # 步骤4:创建变量 NCE loss并定义损失函数
    with tf.name_scope('loss'):
        nce_weight = tf.get_variable(
            'nce_weight',
            shape=[VOCAB_SIZE, EMBED_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=1.0 /
                                                        (EMBED_SIZE**0.5)))
        nce_bias = tf.get_variable('nce_bias',
                                   initializer=tf.zeros([VOCAB_SIZE]))

        # 定义损失函数
        loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight,
                                             biases=nce_bias,
                                             labels=target_words,
                                             inputs=embed,
                                             num_sampled=NUM_SAMPLED,
                                             num_classes=VOCAB_SIZE),
                              name='loss')
    # 步骤5 定义optimizer
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(
            loss)

    utils.safe_mkdir('checkpoints')

    with tf.Session() as sess:
        sess.run(iterator.initializer)
        sess.run(tf.global_variables_initializer())

        total_loss = 0.0
        writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)

        for index in range(NUM_TRAIN_STEPS):
            try:
                loss_batch, _ = sess.run([loss, optimizer])
                total_loss += loss_batch
                if (index + 1) % SKIP_STEP == 0:
                    print("Average loss at step: {}: {: 5.1f}".format(
                        index, total_loss / SKIP_STEP))
                    total_loss = 0.0
            except tf.errors.OutOfRangeError:
                sess.run([iterator.initializer])
        writer.close()
Beispiel #22
0
 def test(self,file_checkpoint=None):
     #return                      
     if not self.loaded_weights:
         self.sess.run(tf.global_variables_initializer())
         saver = tf.train.Saver(max_to_keep=40)
         if file_checkpoint:
             if os.path.isfile('{0}.index'.format(file_checkpoint)):
                 print('Taking the specified checkpoint...')
                 saver.restore(self.sess,file_checkpoint )
             else:
                 print('Checkpoint {0} not found.'.format(file_checkpoint))
         else:
             print('Taking the last checkpoint...')
             #Restore the session from checkpoint
             self.sess.run(tf.global_variables_initializer())
             saver = tf.train.Saver()
             ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/'+self.checkpoint_dir+'/checkpoint'))
             if ckpt and ckpt.model_checkpoint_path:
                 saver.restore(self.sess, ckpt.model_checkpoint_path)
     
     out_posterior = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32')
     out_prior = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32')
     out_alpha = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32')
     out_beta = np.zeros_like(self.images_test_clipped[0:,:,:,:],dtype='float32')
     
     for i in range(np.shape(self.images_test_clipped[0:,:,:,:])[0]):
         out_posterior1, out_prior1, out_alpha1,out_beta1,out_L = self.sess.run([self.X_posterior,self.X_prior,self.alpha,self.beta,self.L_holder],feed_dict={self.X_noisy:self.images_test_clipped[i:i+1,:,:,:],
                                                                                                           self.L_holder:self.L,
                                                                                                           self.is_train:False,
                                                                                                           self.shift:1})
         out_posterior[i:i+1,:,:,:] = out_posterior1
         out_prior[i:i+1,:,:,:] = out_prior1
         out_alpha[i:i+1,:,:,:] = out_alpha1
         out_beta[i:i+1,:,:,:] = out_beta1
     
     #denormalize
     out_posterior *= self.norm
     out_prior *= self.norm
    
     #copy point targets back > clip
     mask_outliers = np.logical_xor(self.mask_test,True)
     self.mask_outliers=mask_outliers
     out_posterior[mask_outliers] = self.images_test[mask_outliers]
     out_prior[mask_outliers] = self.images_test[mask_outliers]
     
     dir_test = 'test'
     safe_mkdir(dir_test)
     dir_final = os.path.join(dir_test,self.checkpoint_dir)
     safe_mkdir(dir_final)
     
     step=self.gstep.eval(session=self.sess)
     sio.savemat(os.path.join(dir_final,'{0}_{1}.mat').format(self.checkpoint_dir,step), {'posterior':out_posterior[:,:,:,0],
                                                                    'prior':out_prior[:,:,:,0],
                                                                     'alpha': out_alpha[:,:,:,0],
                                                                     'beta': out_beta[:,:,:,0],                      
                                                                    'noisy':self.images_test[:,:,:,0],
                                                                    'L':out_L})
Beispiel #23
0
 def filter_documents(self):
     """Read documents from input_dir, filter and write into a filtered dir.
     """
     logging.info('Filtering documents')
     utils.safe_mkdir(self.output_dirpath)
     for input_filepath, output_filepath in zip(self.input_filepaths,
                                                self.output_filepaths):
         logging.info('Reading file: {}'.format(input_filepath))
         self.write_file(input_filepath, output_filepath)
Beispiel #24
0
def main():
    model = 'trump_tweets'
    # model = "arvix_abstracts"
    utils.safe_mkdir('data/checkpoints')
    utils.safe_mkdir('data/checkpoints/' + model)

    lm = CharRNN(model)
    lm.create_model()
    lm.train()
 def filter_documents(self):
     """Read documents from input_dir, filter and write into a filtered dir.
     """
     logging.info('Filtering documents')
     utils.safe_mkdir(self.output_dirpath)
     for input_filepath, output_filepath in zip(
         self.input_filepaths, self.output_filepaths):
         logging.info('Reading file: {}'.format(input_filepath))
         self.write_file(input_filepath, output_filepath)
Beispiel #26
0
    def download(self):
        """Fetch the package source from its URL and save it in our source
        directory."""

        safe_mkdir(self.archive_dir)

        full_target_name = os.path.join(self.archive_dir, self.packed_name)

        utils.download_and_save(self.url, full_target_name)
Beispiel #27
0
def plot_invariances(invariance_sequences, out_dir, labels, title_prefix):
	out_dir = os.path.join(out_dir, 'invariance_plots')
	safe_mkdir(out_dir)
	for split in SPLITS:
		sdir = os.path.join(out_dir, split)
		safe_mkdir(sdir)
		for metric in ALL_METRICS:
			line_dict = {metric: invariance_sequences[split][metric]}
			out_file = os.path.join(sdir, metric + '.png')
			plot_lines(labels, "%s %s Invariance" % (title_prefix, metric), line_dict, out_file)
Beispiel #28
0
def manip(args, test_list, u_model):
    if args.test_weights_path == '':
        weights_path = os.path.join(args.check_dir, args.output_name + '_model_' + args.time + '.hdf5')
    else:
        weights_path = os.path.join(args.data_root_dir, args.test_weights_path)

    output_dir = os.path.join(args.data_root_dir, 'results', args.net)
    manip_out_dir = os.path.join(output_dir, 'manip_output')
    try:
        safe_mkdir(manip_out_dir)
    except:
        pass

    # Compile the loaded model
    manip_model = compile_model(args=args, uncomp_model=u_model)

    try:
        manip_model.load_weights(weights_path)
    except:
        raise NotImplementedError('Unable to find weights path.')


    # Manipulating capsule vectors
    print('Testing... This will take some time...')

    for i, img in enumerate(tqdm(test_list)):
        sitk_img = sitk.ReadImage(os.path.join(args.data_root_dir, 'imgs', img[0]))
        img_data = sitk.GetArrayFromImage(sitk_img)
        num_slices = img_data.shape[0]
        sitk_mask = sitk.ReadImage(os.path.join(args.data_root_dir, 'masks', img[0]))
        gt_data = sitk.GetArrayFromImage(sitk_mask)

        x, y = img_data[num_slices//2, :, :], gt_data[num_slices//2, :, :]
        x, y = np.expand_dims(np.expand_dims(x, -1), 0), np.expand_dims(np.expand_dims(y, -1), 0)

        noise = np.zeros([1, 512, 512, 1, 16])
        x_recons = []
        for dim in trange(16):
            for r in [-0.25, -0.125, 0, 0.125, 0.25]:
                tmp = np.copy(noise)
                tmp[:, :, :, :, dim] = r
                x_recon = manip_model.predict([x, y, tmp])
                x_recons.append(x_recon)

        x_recons = np.concatenate(x_recons)

        out_img = combine_images(x_recons, height=16)
        out_image = out_img * 4096
        out_image[out_image > 574] = 574
        out_image = out_image / 574 * 255

        Image.fromarray(out_image.astype(np.uint8)).save(os.path.join(manip_out_dir, img[0][:-4] + '_manip_output.png'))

    print('Done.')
Beispiel #29
0
def plot_loss_equivariance_compare(equivariance_sequences, out_dir, labels, title_prefix):
	out_dir = os.path.join(out_dir, 'loss_compare_equivariance_plots')
	safe_mkdir(out_dir)
	for model_type in MODEL_TYPES:
		for split in SPLITS:
			sdir = os.path.join(out_dir, model_type, split)
			safe_mkdir(sdir)
			for metric in ALL_METRICS:
				line_dict = {"%s_%s" % (metric, loss): equivariance_sequences[model_type][loss][split][metric] for loss in LOSS_TYPES}
				out_file = os.path.join(sdir, metric + '.png')
				plot_lines(labels, "%s %s Equivariance Loss Compare" % (title_prefix, metric), line_dict, out_file)
def build_the_vocab(dir_path, vocab_size, output_dir):
    """

    :param dir_path: directory where text files are stored to be used for building vocab
    :param vocab_size: size of the vocabulary to be constructed
    :return:
    """
    # create .tsv file with vocab_size
    utils.safe_mkdir(OUTPUT_DIR)
    output_file = open(os.path.join(output_dir, "vocab.tsv"),
                       'w',
                       encoding="utf8")

    # read all the words
    all_words = []
    for txt_file in glob.glob(dir_path + "\\*.txt"):
        print(txt_file)
        words = open(txt_file, 'r', encoding="utf8").read()
        words = words.lower()
        words = ' '.join(words.split())

        words = words.replace('""', " ")
        words = words.replace(",", " ")
        words = words.replace("“", " ")
        words = words.replace("”", " ")
        words = words.replace(".", " ")
        words = words.replace(";", " ")
        words = words.replace("!", " ")
        words = words.replace("?", " ")
        words = words.replace("’", " ")
        words = words.replace("—", " ")

        words = words.split(' ')
        # check if empty words
        for word in words:
            if word:
                all_words.append(word)

    print("Number of words in all files is {}".format(len(all_words)))

    # Count all the words
    count = [('UNK', -1)]
    count.extend(Counter(all_words).most_common(vocab_size - 1))

    print("Number of unique words: {}".format(len(count)))
    print(count[:10])
    # write them to disk
    for word, _ in count:
        output_file.write(word + '\n')

    output_file.close()

    return os.path.join(output_dir, "vocab.tsv")
Beispiel #31
0
    def save_to_files(self, directory_name, name=None):
        """Saves all the sample files into the directory directory_name.

        Args:
            directory_name (string): Name of directory to save files.
            name (string, optional): additional name to add into the dataset
                files.
        """
        utils.safe_mkdir(directory_name)
        super(BaseSampledDataset, self).save_to_files(directory_name)
        utils.pickle_to_file(self._sample_indices, self._get_objective_filename(
            directory_name, 'sample_indices', name))
Beispiel #32
0
def write_modulefile(args):
    package_name = args.name
    
    full_path = get_modulefile_path(package_name)

    dirname, filename = os.path.split(full_path)

    safe_mkdir(dirname)

    with open(full_path, 'w') as f:
        file_text = generate_modulefile_text(package_name)
        f.write(file_text)
def word2vec(dataset):
    """ Build the graph for word2vec model and train it """
    # Step 1: get input, output from the dataset
    with tf.name_scope('data'):
        iterator = dataset.make_initializable_iterator()
        center_words, target_words = iterator.get_next()

    """ Step 2 + 3: define weights and embedding lookup.
    In word2vec, it's actually the weights that we care about 
    """
    with tf.name_scope('embed'):
        embed_matrix = tf.get_variable('embed_matrix', 
                                        shape=[VOCAB_SIZE, EMBED_SIZE],
                                        initializer=tf.random_uniform_initializer())
        embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')

    # Step 4: construct variables for NCE loss and define loss function
    with tf.name_scope('loss'):
        nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],
                        initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))
        nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))

        # define loss function to be NCE loss function
        loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, 
                                            biases=nce_bias, 
                                            labels=target_words, 
                                            inputs=embed, 
                                            num_sampled=NUM_SAMPLED, 
                                            num_classes=VOCAB_SIZE), name='loss')

    # Step 5: define optimizer
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    
    utils.safe_mkdir('checkpoints')

    with tf.Session() as sess:
        sess.run(iterator.initializer)
        sess.run(tf.global_variables_initializer())

        total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
        writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)

        for index in range(NUM_TRAIN_STEPS):
            try:
                loss_batch, _ = sess.run([loss, optimizer])
                total_loss += loss_batch
                if (index + 1) % SKIP_STEP == 0:
                    print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
                    total_loss = 0.0
            except tf.errors.OutOfRangeError:
                sess.run(iterator.initializer)
        writer.close()
Beispiel #34
0
def plot_equivariances(equivariance_sequences, invariance_sequences, out_dir, labels, title_prefix):
	out_dir = os.path.join(out_dir, 'equivariance_plots')
	safe_mkdir(out_dir)
	for model_type in MODEL_TYPES:
		for loss in LOSS_TYPES:
			for split in SPLITS:
				sdir = os.path.join(out_dir, model_type, loss, split)
				safe_mkdir(sdir)
				for metric in ALL_METRICS:
					line_dict = {metric: equivariance_sequences[model_type][loss][split][metric],
								 "invariance_%s" % metric: invariance_sequences[split][metric]}
					out_file = os.path.join(sdir, metric + '.png')
					plot_lines(labels, "%s %s Equivariance" % (title_prefix, metric), line_dict, out_file)
def split_data(root_path, num_splits=4):
    with open(os.path.join(root_path, 'file_lists', 'master_nodule_list.csv'),
              'r') as f:
        reader = csv.reader(f)
        img_list = np.asarray(list(reader))

    labels_list = []
    indices = [0]
    nodule_list = []
    mal_score_list = []
    mal_scores = []
    curr_nodule = os.path.dirname(img_list[0][0])
    for i, img_label in enumerate(img_list):
        if os.path.dirname(img_label[0]) != curr_nodule:
            nodule_list.append(curr_nodule)
            mal_score_list.append(np.rint(np.mean(mal_scores)))
            indices.append(i)
            mal_scores = []
            curr_nodule = os.path.dirname(img_label[0])

        split_name = os.path.basename(img_label[0]).split('_')
        mal_scores.append(int(split_name[-1][-1]))
        labels_list.append([int(n[-1]) for n in split_name[1:]])

    outdir = os.path.join(root_path, 'file_lists')
    safe_mkdir(outdir)

    skf = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=12)
    n = 0
    for train_index, test_index in skf.split(nodule_list, mal_score_list):
        with open(os.path.join(outdir, 'train_split_{:02d}.csv'.format(n)),
                  'w') as csvfile:
            writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
            for i in train_index:
                for j in range(indices[i], indices[i + 1]):
                    writer.writerow([img_list[j][0].split(root_path)[1][1:]] +
                                    labels_list[j] + list(img_list[j][1:]))
        with open(os.path.join(outdir, 'test_split_{:02d}.csv'.format(n)),
                  'w') as csvfile:
            writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
            for i in test_index:
                for j in range(indices[i], indices[i + 1]):
                    writer.writerow([img_list[j][0].split(root_path)[1][1:]] +
                                    labels_list[j] + list(img_list[j][1:]))
        n += 1
def main():
    # check command line arguments
    assert len(sys.argv) >= 2,\
        '\n[Usage] python3 "%s" <URL to download>' \
        '[directory to save images]' % __file__

    # get the URL to download from using command line argument, and download the page
    # exit if failed to download
    source_url = sys.argv[1]
    source_html_filename = download_from_url(source_url,
                                             output_doc='index.html',
                                             exit_on_error=True)

    # get images URL and their descriptions
    images_info = get_images_info(source_html_filename)
    num_total_images = len(images_info)
    print('%s image(s) to download: ' % num_total_images)
    index = 0
    for image_info in images_info:
        index += 1
        print('[%s] %s' % (index, vars(image_info)))

    # remove the downloaded html
    safe_remove(source_html_filename)

    # download images
    target_dir = str(sys.argv[2]) if len(sys.argv) >= 3 else '.'
    safe_mkdir(target_dir)
    index = 0
    num_success = 0
    num_failure = 0
    for image_info in images_info:
        index += 1
        print('Downloading image %s of %s' % (index, num_total_images))
        downloaded_filename = download_from_url(image_info.src,
                                                target_dir=target_dir)
        if downloaded_filename is not None:
            num_success += 1
            print('[Download success: %s / %s]\n%s' %
                  (num_success, num_total_images, downloaded_filename))
        else:
            num_failure += 1
            print('[Download failed: %s / %s]\n%s' %
                  (num_failure, num_total_images, image_info.src))

    # print final results
    print('[Download results]')
    print('Success: %s / %s' % (num_success, num_total_images))
    print('Failure: %s / %s' % (num_failure, num_total_images))
Beispiel #37
0
def main(transform_file, in_dir, out_dir):
	safe_mkdir(out_dir)
	transforms, _ = get_transforms(transform_file)
	transforms = reorder_transforms(transforms)
	all_metrics, transforms = load_metrics(transforms, in_dir)
	label_names, title_prefix = format_labels(transforms)
	invariance_sequences = format_invariances(all_metrics)
	equivariance_sequences = format_equivariances(all_metrics)

	plot_invariances(invariance_sequences, out_dir, label_names, title_prefix)
	plot_equivariances(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix)
	plot_reductions(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix)
	#plot_loss_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix)
	#plot_model_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix)
	plot_split_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix)
Beispiel #38
0
def main(relation, limit, offset, directory_name):
    """Main script function."""

    utils.safe_mkdir(directory_name)

    query = """SELECT DISTINCT ?related ?wikiPage WHERE {
        ?movie rdf:type <http://yago-knowledge.org/resource/%s> .
        ?related <http://yago-knowledge.org/resource/%s> ?movie .
        ?related <http://yago-knowledge.org/resource/hasWikipediaUrl> ?wikiPage
        } LIMIT %s OFFSET %s""" % (MOVIE_CATEGORY_NAME, relation, limit,
                                   offset)
    response = utils.query_sparql(query, utils.YAGO_ENPOINT_URL)
    print 'Reading {} objects.'.format(len(response))
    filename = '{}-{}.pickle'.format(relation, offset)
    utils.pickle_to_file(response, os.path.join(directory_name, filename))
Beispiel #39
0
def build_vocab(words, vocab_size):
    """ Build vocabulary of VOCAB_SIZE most frequent words """
    dictionary = dict()
    count = [('UNK', -1)]
    count.extend(Counter(words).most_common(vocab_size - 1))
    index = 0
    utils.safe_mkdir('processed')
    with open('processed/vocab_1000.tsv', "w") as f:
        for word, _ in count:
            dictionary[word] = index
            if index < 1000:
                f.write(word + "\n")
            index += 1
    index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, index_dictionary
Beispiel #40
0
def mk_srv_dir(servers_path: Path) -> Path:
  userin: str = NONE
  server_path: Path = None

  while userin != EXIT:
    userin = str(input(strs.NEW_SRV_NAME))
    if userin == EXIT: return None
    if userin == NONE: continue
    try: userin.index("/")
    except: pass
    else:
      print(strs.E_NAME_SLASH)
      continue

    server_path = Path("{0}/{1}".format(str(servers_path), userin))
    if utils.is_in_dir(servers_path, server_path):
      print(strs.E_SRV_EXISTS.format(userin))
      continue
    server_path = Path("{0}/{1}".format(str(servers_path), userin))
    if utils.safe_mkdir(server_path): 
      try: server_path.rmdir()
      except: pass
      continue
    break
  return server_path
Beispiel #41
0
def plot_reductions(equivariance_sequences, invariance_sequences, out_dir, labels, title_prefix):
	out_dir = os.path.join(out_dir, 'reduction_plots')
	safe_mkdir(out_dir)
	for model_type in MODEL_TYPES:
		for loss in LOSS_TYPES:
			for split in SPLITS:
				sdir = os.path.join(out_dir, model_type, loss, split)
				safe_mkdir(sdir)
				for metric in NORM_METRICS:
					eq_seq = equivariance_sequences[model_type][loss][split][metric]
					in_seq = invariance_sequences[split][metric]

					# compute relative reduction in metric error
					red_seq = compute_reduction(in_seq, eq_seq)
					line_dict = {metric: red_seq}
					out_file = os.path.join(sdir, metric + '.png')
					plot_lines(labels, "%s %s Equivariance" % (title_prefix, metric), line_dict, out_file)
def build_vocab(words, vocab_size, visual_fld):
    """ Build vocabulary of VOCAB_SIZE most frequent words and write it to
    visualization/vocab.tsv
    """
    utils.safe_mkdir(visual_fld)
    file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w')
    
    dictionary = dict()
    count = [('UNK', -1)]
    index = 0
    count.extend(Counter(words).most_common(vocab_size - 1))
    
    for word, _ in count:
        dictionary[word] = index
        index += 1
        file.write(word + '\n')
    
    index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    file.close()
    return dictionary, index_dictionary
    def train(self, n_epochs):
        '''
        The train function alternates between training one epoch and evaluating
        '''
        utils.safe_mkdir('checkpoints')
        utils.safe_mkdir('checkpoints/convnet_layers')
        writer = tf.summary.FileWriter('./graphs/convnet_layers', tf.get_default_graph())

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_layers/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            
            step = self.gstep.eval()

            for epoch in range(n_epochs):
                step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step)
                self.eval_once(sess, self.test_init, writer, epoch, step)
        writer.close()
Beispiel #44
0
def setup_loggers():
    logdir = os.path.join(get_base_dir(), 'log')
    safe_mkdir(logdir)

    def setup_file_logger(logger):
        short_name = logger.name[len(LOGGER_NAME_PREFIX):]
        file = os.path.join(logdir, '{name}.log'.format(name=short_name))
        fileHandler = logging.FileHandler(file)
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        fileHandler.setFormatter(formatter)
        logger.addHandler(fileHandler)

    setup_file_logger(app.logger)

    # set logger class for future loggers
    class RequestLogger(logging.Logger):
        def __init__(self, name):
            super(RequestLogger, self).__init__(name)
            setup_file_logger(self)
            self.setLevel(logging.INFO)
    logging.setLoggerClass(RequestLogger)
    def write_splits(self):
        """Re-reads the input files and writes documents into the split files.
        """
        if not self.output_dirname:
            return
        utils.safe_mkdir(self.output_dirname)
        logging.info("Writing {} documents".format(len(self.documents)))
        logging.info("Train dataset size {}".format(len(self.train_doc_index)))
        logging.info("Test dataset size {}".format(len(self.test_doc_index)))
        logging.info("Validation dataset size {}".format(
            len(self.validation_doc_index)))
        current_document_index = 0

        if not self.indices_filepath:
            logging.info("Saving absolute indices")
            indices_filename = os.path.join(self.output_dirname,
                                            'split_indices.pickle')
            with open(indices_filename, 'wb') as indices_file:
                pickle.dump((self.train_doc_index, self.test_doc_index,
                             self.validation_doc_index), indices_file)

        train_filename = os.path.join(self.output_dirname, 'train.conll')
        test_filename = os.path.join(self.output_dirname, 'test.conll')
        val_filename = os.path.join(self.output_dirname, 'validation.conll')
        with open(train_filename, 'w') as train_f, \
                open(test_filename, 'w') as test_f, \
                open(val_filename, 'w') as val_f:
            for file_path in self.file_paths:
                logging.info("Writing file: {}".format(file_path))
                parser = WikipediaCorpusColumnParser(file_path=file_path)
                for document in tqdm(parser):
                    if current_document_index in self.train_doc_index:
                        self.write_document(document, train_f)
                    elif current_document_index in self.test_doc_index:
                        self.write_document(document, test_f)
                    elif current_document_index in self.validation_doc_index:
                        self.write_document(document, val_f)
                    current_document_index += 1
Beispiel #46
0
def scatter(fin='../data/original.tsv', out='../data/scatter/20110902/', figname='some', ellipses=False, contours=False, conv_hull=True, conv_thresh=[0.2], xnum=10, ynum=10, norm=None, mew=1, legend=False, special=None, simple=True, typevec=None, colorvec=None):

	print figname

	utils.safe_mkdir(out)
	z = files.filter_hack(tb.tabarray(SVfile=fin))
	
	names = np.array([n for n in z.dtype.names if n not in ['type', 'names', 'name', 'ingredients']])
	
	a = z[names].extract()
	
	if norm == None:
		a = utils.normalize(a)	# normalize data 

	if figname.startswith('some'):
		typevec = ['chocolate-cakes', 'angel-food-cakes', 'brownies', 'sugar-cookies', 'scones', 'loaves', 'pancakes', 'crepes']
		colorvec = ['brown', 'g', 'm', 'b', 'k', 'r', 'y', 'c']
	elif figname.startswith('all'):
		typevec = ['']
		z['type'] = ''
		colorvec = ['g']
	
	ingredientvec = ['white sugar', 'all-purpose flour']
	name_dict = dict(zip(names, range(len(names))))	
	
	idict = {}	
	for i in ['egg', 'flour', 'sugar', 'oil']:
		idict[i] = [n for n in z.dtype.names if i in n]

	idict['liquid'] = ['water']
	idict['liquid'] += [n for n in z.dtype.names if ('milk' in n) and ('powder' not in n) and ('chip' not in n)]
	idict['liquid'] += [n for n in z.dtype.names if ('juice' in n) and ('with' not in n)]
	
	idict['sugar'] += ['corn syrup', 'light corn syrup']

	idict['butter'] = ['butter', 'margarine', 'butter or margarine', 'butter or stick margarine']
	idict['oil'] += [n for n in z.dtype.names if 'shortening' in n]	
	idict['fat'] = idict['butter'] + idict['oil']
	idict.pop('butter')
	idict.pop('oil')
	
	print idict
	
	columns = []
	ingredientvec = idict.keys()
	for i in ingredientvec:
		name_list = np.array([name_dict[j] for j in idict[i]])
		columns += [a[:, name_list].sum(axis=1)]
	data = tb.tabarray(columns=columns, names=ingredientvec)
	print data
	n = len(ingredientvec)
	
	if norm is not None:
		d = data.extract()
		i = list(data.dtype.names).index(norm)
		array = d / np.repeat(d[:,i], d.shape[1]).reshape(d.shape[0], d.shape[1])
		data = tb.tabarray(array=array, names=data.dtype.names)
	
	for j1 in range(n-1):
		i1 = ingredientvec[j1]
		for j2 in range(j1+1, n):
			i2 = ingredientvec[j2]
			k = 0
			pylab.clf()
			for kind in typevec:		
				color = colorvec[k]
		
		
				#p = a[z['type']==kind][:,name_dict[i1]]
				#q = a[z['type']==kind][:,name_dict[i2]]
				p = data[z['type']==kind][i1]
				q = data[z['type']==kind][i2]
				
				if simple:
					pylab.plot(p, q, '+', color=color, markeredgewidth=mew)
					
				if conv_hull:
					
					for ct in conv_thresh:
				
						x = p.mean()
						y = q.mean()
						d = np.sqrt((x - p)**2 + (y - q)**2)
						ind = d.argsort()[:-int(len(p) * ct)]							
						pts = [(p[j], q[j]) for j in ind]
						if pts:
							hull = np.array(convexHull(pts))
							pylab.fill(hull[:,0], hull[:,1], color=color, alpha=0.2)
						#else:
						#	print t		
					
				k += 1

			pylab.xlabel(i1)
			pylab.ylabel(i2)

			if special is not None:
				p = data[z['name']==special][i1]
				q = data[z['name']==special][i2]
				pylab.plot(p, q, '*', color='y', markersize=20, mew=2)
			
			if legend:
				if special is not None:
					pylab.legend(typevec + [special])	
				else:
					pylab.legend(typevec)	
				
			if norm is None:
				pylab.axis([0, 1, 0, 1])
			
			pylab.savefig(out + figname + '_' + i1 + '_' + i2 +'.pdf')

	if special is not None:
		pylab.legend(typevec + [special])	
	else:
		pylab.legend(typevec)	
	pylab.savefig(out + figname + '_legend.pdf')

	data = z[['type', 'name']].colstack(data)
	
	if figname.startswith('some'):
		data.saveSV('../data/words/ingredients-basic.tsv')
		
	return (z, data)
Beispiel #47
0
def main(args):
	log(args, str(args))

	safe_mkdir(args.out_dir)
	all_transforms, _ = get_transforms(args.transform_file)

	# don't redo work that we have already done
	all_transforms, do_first = filter_existing(all_transforms, args.out_dir)
	if len(all_transforms) <= 1:
		log(args, "No transforms to do.  Exiting...")
		exit()

	log(args, "Loaded Transforms.  %d transforms" % len(all_transforms))
	model = init_model(args.network_file, args.weight_file, gpu=args.gpu)

	train_lmdbs = args.train_lmdbs.split(args.delimiter)
	test_lmdbs = args.test_lmdbs.split(args.delimiter)

	base_transform = all_transforms[0]
	log(args, "Starting on Baseline Transform: %r\n" % base_transform)

	base_train_features, base_train_output_probs, base_train_classifications, _ = get_activations(model, [base_transform], train_lmdbs, args)
	base_test_features, base_test_output_probs, base_test_classifications, _ = get_activations(model, [base_transform], test_lmdbs, args)

	transform_partitions = partition_transforms(all_transforms, args.num_transforms)
	log(args, "Transform Partitions: %r" % transform_partitions)
	for transforms in transform_partitions:
		log(args, "Starting on Transforms: %r\n" % transforms)

		train_features, train_output_probs, train_classifications, train_labels = get_activations(model, transforms[1:], train_lmdbs, args)
		train_features.update(base_train_features)
		train_output_probs.update(base_train_output_probs)
		train_classifications.update(base_train_classifications)

		test_features, test_output_probs, test_classifications, test_labels = get_activations(model, transforms[1:], test_lmdbs, args)
		test_features.update(base_test_features)
		test_output_probs.update(base_test_output_probs)
		test_classifications.update(base_test_classifications)

		log(args, "Measuring invariances...")
		train_invariance_metrics = measure_invariances(train_features, train_output_probs, train_classifications, train_labels, transforms, do_first, args)
		test_invariance_metrics = measure_invariances(test_features, test_output_probs, test_classifications, test_labels, transforms, do_first, args)
		log(args, "Done...")

		setup_scratch_space(args)
		log(args, "Measuring equivariances...")
		train_equivariance_metrics, test_equivariance_metrics = measure_equivariances(train_features, train_labels, train_classifications, train_output_probs, 
				test_features, test_labels, test_classifications, test_output_probs, transforms, model, do_first, args)

		for transform in transforms[(0 if do_first else 1):]:
			write_output(args.out_dir, transform, train_invariance_metrics[transform], test_invariance_metrics[transform],
					train_equivariance_metrics[transform], test_equivariance_metrics[transform])

		do_first = False

	log(args, "Done Measure Equivariances")


	cleanup_scratch_space(args)
	log(args, "Exiting...")
		
	if args.log_file:
		args.log.close()
Beispiel #48
0
			labels.append("S")
		elif "mirror" in net_dir:
			labels.append("M")
		else:
			labels.append("")

	return labels


for split in SPLITS:
	print "Starting Split:", split
	for loss in LOSS_TYPES:
		for model_type in MODEL_TYPES:
			#out_dir = os.path.join(root_out_dir, split, loss, model_type)
			out_dir = os.path.join(root_out_dir, split)
			safe_mkdir(out_dir)
			for metric, is_distance in METRICS:
				print "Starting Metric:", metric

				dist_mat = np.zeros( (num_net_dirs, num_net_dirs), dtype=float)
				for idx1, net_dir1 in enumerate(net_dirs):
					result_dir = os.path.join(ROOT, net_dir1, 'equivalence/results')
					for idx2, net_dir2 in enumerate(net_dirs):
						fn = net_dir2.replace('/', '_') + '.txt'
						result_file = os.path.join(result_dir, fn)
						results = ast.literal_eval(open(result_file, 'r').read())
						if is_distance:
							dist_mat[idx1,idx2] = SCALE * results[split][model_type][loss][metric]
						else:
							dist_mat[idx1,idx2] = SCALE * (1 - results[split][model_type][loss][metric])
Beispiel #49
0
def simplex(fin='../data/words/ingredients-basic.tsv', out='../data/simplex/20110920/', figname='some', ellipses=False, contours=False, conv_hull=True, conv_thresh=[0.2], xnum=10, ynum=10, norm=True, linewidth=0.2, text=True, mew=1.5, special=None):

	utils.safe_mkdir(out)
	z = tb.tabarray(SVfile=fin)
	
	names = [n for n in z.dtype.names if n not in ['type', 'names', 'name', 'ingredients']]
	
	n = len(names)

	array = utils.normalize(z[names].extract())
	data = tb.tabarray(array=array, names=names)	

	if figname.startswith('some'):
		typevec = ['chocolate-cakes', 'angel-food-cakes', 'brownies', 'sugar-cookies', 'scones', 'loaves', 'pancakes', 'crepes']
		colorvec = ['brown', 'g', 'm', 'b', 'k', 'r', 'y', 'c']
	elif figname.startswith('all'):
		typevec = ['']
		z['type'] = ''
		colorvec = ['g']
	
	for j1 in range(n):
		i1 = names[j1]
		for j2 in range(n):
			i2 = names[j2]
			for j3 in range(n):
				i3 = names[j3]
				k = 0
				pylab.clf()
				for kind in typevec:		
					color = colorvec[k]
			
					p = data[z['type']==kind][i1] - data[z['type']==kind][i2]
					q = np.sqrt(3) * data[z['type']==kind][i3]

					pylab.plot(p, q, '+', color=color, markeredgewidth=mew)
					
					if conv_hull:
						
						for ct in conv_thresh:
					
							x = p.mean()
							y = q.mean()
							d = np.sqrt((x - p)**2 + (y - q)**2)
							ind = d.argsort()[:-int(len(p) * ct)]							
							pts = [(p[j], q[j]) for j in ind]
							if pts:
								hull = np.array(convexHull(pts))
								pylab.fill(hull[:,0], hull[:,1], color=color, alpha=0.2)
						
					k += 1

				if special is not None:
					p = data[z['name']==special][i1] - data[z['name']==special][i2]
					q = np.sqrt(3) * data[z['name']==special][i3]				
					pylab.plot(p, q, '*', color='y', markersize=20, mew=2)

				pylab.plot([-1, 1], [0, 0], 'k-.', linewidth=linewidth)
				pylab.plot([-1, 0], [0, np.sqrt(3)], 'k-.', linewidth=linewidth)
				pylab.plot([0, 1], [np.sqrt(3), 0], 'k-.', linewidth=linewidth)				
	
				if text:
					pylab.text(1.1, -0.1, i1, fontsize=16)
					pylab.text(-1.1, -0.1, i2, fontsize=16)
					pylab.text(-0.05, 1.8, i3, fontsize=16)
				
				pylab.axis('equal')
				pylab.axis('off')
				
				pylab.savefig(out + figname + '_' + '_'.join([i1, i2, i3]) +'.pdf', transparent=True)

	if special is not None:
		pylab.legend(typevec + [special])	
	else:
		pylab.legend(typevec)		
	pylab.savefig(out + figname + '_legend.pdf', transparent=True)
		
	return (z, data)
Beispiel #50
0
def setup():
    """
    新建存储模型的文件夹checkpoints和存储合成图片结果的文件夹outputs
    """
    utils.safe_mkdir("checkpoints")
    utils.safe_mkdir("outputs")
Beispiel #51
0
def setup():
    utils.safe_mkdir('checkpoints')
    utils.safe_mkdir('outputs')