예제 #1
0
def export_graph(model_name, XtoY=True):
    graph = tf.Graph()

    with graph.as_default():
        cycle_gan = CycleGAN(norm='instance')

        input_image = tf.placeholder(tf.float32,
                                     shape=[128, 128, 3],
                                     name='input_image')
        cycle_gan.model()
        if XtoY:
            output_image = cycle_gan.G.sample(tf.expand_dims(input_image, 0))
        else:
            output_image = cycle_gan.F.sample(tf.expand_dims(input_image, 0))

        output_image = tf.identity(output_image, name='output_image')
        restore_saver = tf.train.Saver()
        export_saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        latest_ckpt = tf.train.latest_checkpoint(FLAGS.checkpoints_dir)
        restore_saver.restore(sess, latest_ckpt)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess, graph.as_graph_def(), [output_image.op.name])

        tf.train.write_graph(output_graph_def,
                             'pretrained',
                             model_name,
                             as_text=False)
예제 #2
0
 def test_CycleGAN(self):
     cycleGan = CycleGAN(
         X_train_file='data/tfrecords/apple.tfrecord',
         Y_train_file = 'data/tfrecords/orange.tfrecord',
     )
     G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x = cycleGan.model()
     print( G_loss )
def export_graph(model_name, XtoY=True):
  graph = tf.Graph()

  with graph.as_default():
    cycle_gan = CycleGAN(ngf=FLAGS.ngf, norm=FLAGS.norm, image_size=FLAGS.image_size)

    input_image = tf.placeholder(tf.float32, shape=[FLAGS.image_size, FLAGS.image_size, 3], name='input_image')
    cycle_gan.model()
    if XtoY:
      output_image = cycle_gan.G.sample(tf.expand_dims(input_image, 0))
    else:
      output_image = cycle_gan.F.sample(tf.expand_dims(input_image, 0))

    output_image = tf.identity(output_image, name='output_image')
    restore_saver = tf.train.Saver()
    export_saver = tf.train.Saver()

  with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    latest_ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    restore_saver.restore(sess, latest_ckpt)
    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess, graph.as_graph_def(), [output_image.op.name])

    tf.train.write_graph(output_graph_def, 'pretrained', model_name, as_text=False)
예제 #4
0
def conversion(model_filepath, img_dir, conversion_direction, output_dir):

    input_size = [256, 256, 3]
    num_filters = 64

    model = CycleGAN(input_size=input_size,
                     num_filters=num_filters,
                     mode='test')

    model.load(filepath=model_filepath)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for file in os.listdir(img_dir):
        filepath = os.path.join(img_dir, file)
        img = cv2.imread(filepath)
        img_height, img_width, img_channel = img.shape
        img = cv2.resize(img, (input_size[1], input_size[0]))
        img = image_scaling(imgs=img)
        img_converted = model.test(inputs=np.array([img]),
                                   direction=conversion_direction)[0]
        img_converted = image_scaling_inverse(imgs=img_converted)
        img_converted = cv2.resize(img_converted, (img_width, img_height))
        cv2.imwrite(os.path.join(output_dir, os.path.basename(file)),
                    img_converted)
예제 #5
0
def train():
    current_time = datetime.now().strftime("%Y%m%d-%H%M")
    checkpoints_dir = "checkpoints/{}".format(current_time)
    os.makedirs(checkpoints_dir, exist_ok=True)

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN()
        G_loss, D_Y_loss, F_loss, D_X_loss = cycle_gan.model()
        optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss)
        train_writer = tf.summary.FileWriter(checkpoints_dir, graph)

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            while not coord.should_stop():
                _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary = (
                    sess.run([
                        optimizers, G_loss, D_Y_loss, F_loss, D_X_loss,
                        cycle_gan.summary
                    ]))

                train_writer.add_summary(summary, step)
                train_writer.flush()

                if step % 100 == 0:
                    print('-----------Step %d:-------------' % step)
                    print('  G_loss   : {}'.format(G_loss_val))
                    print('  D_Y_loss   : {}'.format(D_Y_loss_val))
                    print('  F_loss   : {}'.format(F_loss_val))
                    print('  D_X_loss   : {}'.format(D_X_loss_val))

                if step % 1000 == 0:
                    save_path = cycle_gan.saver.save(sess,
                                                     checkpoints_dir +
                                                     "/model.ckpt",
                                                     global_step=step)
                    print("Model saved in file: %s" % save_path)

                step += 1

        except KeyboardInterrupt:
            print('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = cycle_gan.saver.save(sess,
                                             checkpoints_dir + "/model.ckpt")
            print("Model saved in file: %s" % save_path)
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
예제 #6
0
def main(_):
	os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
	if args.phase == 'train':
		is_train = True
	else:
		is_train = False
	with tf.Session() as sess:
		model = CycleGAN(sess, args, is_train=is_train)
		model.run()
예제 #7
0
def conversion(model_dir, model_name, data_dir, conversion_direction, output_dir):

    num_features = 24
    sampling_rate = 16000
    frame_period = 5.0

    model = CycleGAN(num_features = num_features, mode = 'test')

    model.load(filepath = os.path.join(model_dir, model_name))

    mcep_normalization_params = np.load(os.path.join(model_dir, 'mcep_normalization.npz'))
    mcep_mean_A = mcep_normalization_params['mean_A']
    mcep_std_A = mcep_normalization_params['std_A']
    mcep_mean_B = mcep_normalization_params['mean_B']
    mcep_std_B = mcep_normalization_params['std_B']

    logf0s_normalization_params = np.load(os.path.join(model_dir, 'logf0s_normalization.npz'))
    logf0s_mean_A = logf0s_normalization_params['mean_A']
    logf0s_std_A = logf0s_normalization_params['std_A']
    logf0s_mean_B = logf0s_normalization_params['mean_B']
    logf0s_std_B = logf0s_normalization_params['std_B']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for file in os.listdir(data_dir):

        filepath = os.path.join(data_dir, file)
        wav, _ = librosa.load(filepath, sr = sampling_rate, mono = True)
        wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
        f0, timeaxis, sp, ap = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)
        coded_sp = world_encode_spectral_envelop(sp = sp, fs = sampling_rate, dim = num_features)
        coded_sp_transposed = coded_sp.T

        if conversion_direction == 'A2B':
            f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_A, std_log_src = logf0s_std_A, mean_log_target = logf0s_mean_B, std_log_target = logf0s_std_B)
            #f0_converted = f0
            coded_sp_norm = (coded_sp_transposed - mcep_mean_A) / mcep_std_A
            coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_B + mcep_mean_B
        else:
            f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_B, std_log_src = logf0s_std_B, mean_log_target = logf0s_mean_A, std_log_target = logf0s_std_A)
            #f0_converted = f0
            coded_sp_norm = (coded_sp_transposed - mcep_mean_B) / mcep_std_B
            coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_A + mcep_mean_A

        coded_sp_converted = coded_sp_converted.T
        coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
        decoded_sp_converted = world_decode_spectral_envelop(coded_sp = coded_sp_converted, fs = sampling_rate)
        wav_transformed = world_speech_synthesis(f0 = f0_converted, decoded_sp = decoded_sp_converted, ap = ap, fs = sampling_rate, frame_period = frame_period)
        librosa.output.write_wav(os.path.join(output_dir, os.path.basename(file)), wav_transformed, sampling_rate)
예제 #8
0
    def __init__(self, model_dir, model_name):
        self.num_features = 24
        self.sampling_rate = 16000
        self.frame_period = 5.0

        self.model = CycleGAN(num_features = self.num_features, mode = 'test')

        self.model.load(filepath = os.path.join(model_dir, model_name))

        # NB: Save the graph
        definition = self.model.sess.graph_def
        directory = 'saved_model_2'
        tf.train.write_graph(definition, directory, 'saved_model_2.pb', as_text=True)

        # https://github.com/tensorflow/models/issues/3530#issuecomment-395968881
        output_dir = './saved_model/'
        builder = tf.saved_model.builder.SavedModelBuilder(output_dir)

        builder.add_meta_graph_and_variables(
            self.model.sess,
            [tf.saved_model.tag_constants.SERVING],
            main_op=tf.tables_initializer(),
        )

        builder.save()

        """
        builder.add_meta_graph_and_variables(
            self.model.sess,
            [tf.saved_model.tag_constants.SERVING],
            signature_def_map={
                'predict_images':
                    prediction_signature,
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    classification_signature,
            },
            main_op=tf.tables_initializer())
        """

        self.mcep_normalization_params = np.load(os.path.join(model_dir, 'mcep_normalization.npz'))
        self.mcep_mean_A = self.mcep_normalization_params['mean_A']
        self.mcep_std_A = self.mcep_normalization_params['std_A']
        self.mcep_mean_B = self.mcep_normalization_params['mean_B']
        self.mcep_std_B = self.mcep_normalization_params['std_B']

        self.logf0s_normalization_params = np.load(os.path.join(model_dir, 'logf0s_normalization.npz'))
        self.logf0s_mean_A = self.logf0s_normalization_params['mean_A']
        self.logf0s_std_A = self.logf0s_normalization_params['std_A']
        self.logf0s_mean_B = self.logf0s_normalization_params['mean_B']
        self.logf0s_std_B = self.logf0s_normalization_params['std_B']
예제 #9
0
def main(_):
    if not os.path.exists(args.checkpoint_dir):
        os.makedirs(args.checkpoint_dir)
    if not os.path.exists(args.sample_dir):
        os.makedirs(args.sample_dir)
    if not os.path.exists(args.test_dir):
        os.makedirs(args.test_dir)

    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True
    with tf.Session(config=tfconfig) as sess:
        model = CycleGAN(sess, args)
        model.train(args) if args.phase == 'train' \
            else model.test(args)
예제 #10
0
def test(imglist_a,
         imglist_b,
         model_path,
         base_net='resnet',
         batch_size=1,
         image_save_path_a='../output/cat_2_dog_test_cl3/testA/',
         image_save_path_b='../output/cat_2_dog_test_cl3/testB/',
         show_image_every_step=50,
         show_image=False):

    gan = CycleGAN(mode='test', base=base_net, verbose=False)
    gan.build()
    # gan.compile(learning_rate=lr, cycle_loss_weight=cycle_loss_weight, identity_loss_weight=identity_loss_weight,
    #             disc_loss_weight=disc_loss_weight, disc_2_loss_weight=disc_2_loss_weight)
    gan.test(imglist_a,
             model_path,
             is_a2b=True,
             batch_size=1,
             image_save_path=image_save_path_a,
             show_image=show_image,
             show_image_every_step=show_image_every_step)
    gan.test(imglist_b,
             model_path,
             is_a2b=False,
             batch_size=1,
             image_save_path=image_save_path_b,
             show_image=show_image,
             show_image_every_step=show_image_every_step)
예제 #11
0
def main(unused_argv):
    with tf.Session() as sess:
        print("==================================")
        print("[*] Start initializing cyclegan...")
        cyclegan = CycleGAN(sess,
                            "cyclegan",
                            dataset=args.dataset,
                            image_size=args.image_size,
                            batch_size=args.batch_size,
                            ngf=args.ngf,
                            ndf=args.ndf,
                            lambda1=args.lambda1,
                            lambda2=args.lambda2)
        print("[*] Start training...")
        cyclegan.train(args.epoches, load_model=None, save_freq=0.25)
예제 #12
0
def export_graph(model_name, XtoY=True):
    graph = tf.Graph()

    with graph.as_default():
        cycle_gan = CycleGAN(ngf=FLAGS.ngf,
                             norm=FLAGS.norm,
                             image_size=FLAGS.image_size)

        input_image = tf.placeholder(
            tf.float32,
            shape=[FLAGS.image_size, FLAGS.image_size, 3],
            name='input_image')
        cycle_gan.model()
        if XtoY:
            output_image = cycle_gan.G.sample(tf.expand_dims(input_image, 0))
        else:
            output_image = cycle_gan.F.sample(tf.expand_dims(input_image, 0))

        output_image = tf.identity(output_image, name='output_image')

        restore_saver = tf.train.Saver()

        export_saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        ckpt_list = tf.train.get_checkpoint_state(
            FLAGS.checkpoint_dir).all_model_checkpoint_paths

        for m in ckpt_list:

            print("ckpt: " + m)

            temp = m.split("-")

            iteration = temp[1]

            restore_saver.restore(sess, m)

            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, graph.as_graph_def(), [output_image.op.name])

            tf.train.write_graph(output_graph_def,
                                 FLAGS.save_model_dir,
                                 str(iteration) + "_" + model_name,
                                 as_text=False)
예제 #13
0
def sample():
  graph = tf.Graph()

  with graph.as_default():
    cycle_gan = CycleGAN()

    with tf.gfile.FastGFile(IMG_PATH, 'r') as f:
      image_data = f.read()
    in_image = tf.image.decode_jpeg(image_data, channels=3)
    in_image = tf.image.resize_images(in_image, size=(128, 128))
    in_image = utils.convert2float(in_image)
    in_image.set_shape([128, 128, 3])

    cycle_gan = CycleGAN()
    cycle_gan.model()
    out_image = cycle_gan.sample(tf.expand_dims(in_image, 0))

  with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    cycle_gan.saver.restore(sess, CKPT_PATH)
    generated = out_image.eval()
    samples_dir = 'samples'
    os.makedirs(samples_dir, exist_ok=True)
    samples_file = os.path.join(samples_dir, 'sample.jpg')
    with open(samples_file, 'wb') as f:
      f.write(generated)
예제 #14
0
def eval():
    x, y, label = data_helpers.load_data_label(hp.source_test)
    x_test = utils.text2token(x, hp.checkpoint_dir)
    y_test = utils.text2token(y, hp.checkpoint_dir)

    # 生成batches
    batches = data_helpers.batch_iter(
        list(zip(x_test, y_test, label)), hp.batch_size, num_epochs=1, shuffle=False)

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(
            vocab_size=3961,
            LAMBDA=10,
            is_training=True)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, tf.train.latest_checkpoint(hp.checkpoint_dir))

        if not os.path.exists('results'):
            os.mkdir('results')
        with codecs.open("results/" + 'fake.txt', "w", "utf-8") as fout:
            list_of_refs, hypotheses = [], []
            i = 1
            for batch in batches:
                x_batch, y_batch, label = zip(*batch)

                preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
                for j in range(hp.maxlen):
                    _preds = sess.run(cycle_gan.preds_y, feed_dict={cycle_gan.x: x_batch, cycle_gan.y: preds})
                    preds[:, j] = _preds[:, j]

                for labels, targets, pred in zip(label, y_batch, preds):  # sentence-wise
                    got = utils.token2text(pred, hp.checkpoint_dir)
                    target = utils.token2text(targets, hp.checkpoint_dir)
                    print(str(i))

                    fout.write(got + "," + target + ',' + str(labels) + "\n")
                    fout.flush()
                    i += 1

                    # bleu score
                    ref = target.split()
                    hypothesis = got.split()
                    if len(ref) > 3 and len(hypothesis) > 3:
                        list_of_refs.append([ref])
                        hypotheses.append(hypothesis)

            # Calculate bleu score
            score = corpus_bleu(list_of_refs, hypotheses)
            fout.write("Bleu Score = " + str(100 * score))
예제 #15
0
def run(args):
    logger.info('Read data:')
    train_A, train_B, test_A, test_B = get_data(args.task, args.image_size)

    logger.info('Build graph:')
    model = CycleGAN(args)

    variables_to_save = tf.global_variables()
    init_op = tf.variables_initializer(variables_to_save)
    init_all_op = tf.global_variables_initializer()
    saver = FastSaver(variables_to_save)

    logger.info('Trainable vars:')
    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 tf.get_variable_scope().name)
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    if args.load_model != '':
        model_name = args.load_model
    else:
        model_name = '{}_{}'.format(
            args.task,
            datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    logdir = '/content/drive/My Drive/Colab Notebooks/Cycle/logs'
    makedirs(logdir)
    logdir = os.path.join(logdir, model_name)
    logger.info('Events directory: %s', logdir)
    summary_writer = tf.summary.FileWriter(logdir)

    def init_fn(sess):
        logger.info('Initializing all parameters.')
        sess.run(init_all_op)

    sv = tf.train.Supervisor(
        is_chief=True,
        logdir=logdir,
        saver=saver,
        summary_op=None,
        init_op=init_op,
        init_fn=init_fn,
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=model.global_step,
        save_model_secs=300,
        save_summaries_secs=30)

    if args.train:
        logger.info("Starting training session.")
        with sv.managed_session() as sess:
            model.train(sess, summary_writer, train_A, train_B)

    logger.info("Starting testing session.")
    with sv.managed_session() as sess:
        base_dir = os.path.join(
            '/content/drive/My Drive/Colab Notebooks/Cycle/results',
            model_name)
        makedirs(base_dir)
        model.test(sess, test_A, test_B, base_dir)
    def __init__(self, model_dir, model_name):
        self.num_features = 24
        self.sampling_rate = 16000
        self.frame_period = 5.0

        self.model = CycleGAN(num_features=self.num_features, mode='test')

        self.model.load(filepath=os.path.join(model_dir, model_name))

        self.mcep_normalization_params = np.load(
            os.path.join(model_dir, 'mcep_normalization.npz'))
        self.mcep_mean_A = self.mcep_normalization_params['mean_A']
        self.mcep_std_A = self.mcep_normalization_params['std_A']
        self.mcep_mean_B = self.mcep_normalization_params['mean_B']
        self.mcep_std_B = self.mcep_normalization_params['std_B']

        self.logf0s_normalization_params = np.load(
            os.path.join(model_dir, 'logf0s_normalization.npz'))
        self.logf0s_mean_A = self.logf0s_normalization_params['mean_A']
        self.logf0s_std_A = self.logf0s_normalization_params['std_A']
        self.logf0s_mean_B = self.logf0s_normalization_params['mean_B']
        self.logf0s_std_B = self.logf0s_normalization_params['std_B']
예제 #17
0
def get_result(XtoY=True):
    graph = tf.Graph()
    try:
        os.mkdir(FLAGS.output_dir)
    except:
        print('dir already exist!')
    with tf.Session(graph=graph) as sess:
        cycle_gan = CycleGAN(ngf=FLAGS.ngf,
                             norm=FLAGS.norm,
                             image_size_w=FLAGS.image_size_w,
                             image_size_h=FLAGS.image_size_h)
        input_image = tf.placeholder(
            tf.float32,
            shape=[FLAGS.image_size_w, FLAGS.image_size_h, 3],
            name='input_image')
        cycle_gan.model()

        if XtoY:
            output_image = cycle_gan.G.sample(tf.expand_dims(input_image, 0))
        else:
            output_image = cycle_gan.F.sample(tf.expand_dims(input_image, 0))
        fixed_output_img = tf.image.resize_images(
            output_image, (FLAGS.output_image_w, FLAGS.output_image_h))
        latest_ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, latest_ckpt)
        for index, fname in enumerate(os.listdir(FLAGS.test_image)):
            img = imread(FLAGS.test_image + fname)
            img = imresize(img, (128, 128))
            feed = {input_image: img}
            gen_img = sess.run(fixed_output_img, feed_dict=feed)
            image_dir = FLAGS.output_dir + fname
            imsave(image_dir, imresize(gen_img, (36, 136)))
            if index % 25 == 0:
                print(index)
예제 #18
0
def train(imglist_a,
          imglist_b,
          base_net='resnet',
          lr=2e-4,
          cycle_loss_weight=3,
          identity_loss_weight=0,
          disc_loss_weight=0.5,
          disc_2_loss_weight=0.5,
          epochs=200,
          decay_from=100,
          steps_per_epoch=3000,
          true_label_value=1,
          batch_size=1,
          image_save_path='../output/20190325/',
          model_save_path='../models/20190325/',
          save_image_every_step=100,
          save_model_every_epoch=1,
          show_image=False,
          load_model=False,
          model_load_path='../models/20190325/'):

    gan = CycleGAN(mode='train', base=base_net, verbose=False)
    gan.build()
    gan.compile(learning_rate=lr,
                cycle_loss_weight=cycle_loss_weight,
                identity_loss_weight=identity_loss_weight,
                disc_loss_weight=disc_loss_weight,
                disc_2_loss_weight=disc_2_loss_weight)
    gan.train(imglist_a,
              imglist_b,
              epochs=epochs,
              decay_from=decay_from,
              steps_per_epoch=steps_per_epoch,
              true_label_value=true_label_value,
              batch_size=batch_size,
              image_save_path=image_save_path,
              model_save_path=model_save_path,
              save_image_every_step=save_image_every_step,
              save_model_every_epoch=save_model_every_epoch,
              show_image=show_image,
              load_model=load_model,
              model_load_path=model_load_path)
예제 #19
0
def conversion(model_dir, model_name, data_dir, conversion_direction, output_dir):

    num_features = 24
    sampling_rate = 16000
    frame_period = 5.0

    model = CycleGAN(num_features = num_features, mode = 'test')

    model.load(filepath = os.path.join(model_dir, model_name))

    mcep_normalization_params = np.load(os.path.join(model_dir, 'mcep_normalization.npz'))
    mcep_mean_A = mcep_normalization_params['mean_A']
    mcep_std_A = mcep_normalization_params['std_A']
    mcep_mean_B = mcep_normalization_params['mean_B']
    mcep_std_B = mcep_normalization_params['std_B']

    logf0s_normalization_params = np.load(os.path.join(model_dir, 'logf0s_normalization.npz'))
    logf0s_mean_A = logf0s_normalization_params['mean_A']
    logf0s_std_A = logf0s_normalization_params['std_A']
    logf0s_mean_B = logf0s_normalization_params['mean_B']
    logf0s_std_B = logf0s_normalization_params['std_B']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for file in os.listdir(data_dir):

        filepath = os.path.join(data_dir, file)
        wav, _ = librosa.load(filepath, sr = sampling_rate, mono = True)
        wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
        f0, timeaxis, sp, ap = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)
        coded_sp = world_encode_spectral_envelop(sp = sp, fs = sampling_rate, dim = num_features)
        coded_sp_transposed = coded_sp.T

        if conversion_direction == 'A2B':
            f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_A, std_log_src = logf0s_std_A, mean_log_target = logf0s_mean_B, std_log_target = logf0s_std_B)
            #f0_converted = f0
            coded_sp_norm = (coded_sp_transposed - mcep_mean_A) / mcep_std_A
            coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_B + mcep_mean_B
        else:
            f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_B, std_log_src = logf0s_std_B, mean_log_target = logf0s_mean_A, std_log_target = logf0s_std_A)
            #f0_converted = f0
            coded_sp_norm = (coded_sp_transposed - mcep_mean_B) / mcep_std_B
            coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_A + mcep_mean_A

        coded_sp_converted = coded_sp_converted.T
        coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
        decoded_sp_converted = world_decode_spectral_envelop(coded_sp = coded_sp_converted, fs = sampling_rate)
        wav_transformed = world_speech_synthesis(f0 = f0_converted, decoded_sp = decoded_sp_converted, ap = ap, fs = sampling_rate, frame_period = frame_period)
        librosa.output.write_wav(os.path.join(output_dir, os.path.basename(file)), wav_transformed, sampling_rate)
예제 #20
0
def load_checkpoint():
    PATH = os.path.join(C.EXP_DIR, C.TAG)
    ckpt_file = C.CKPT_PREFIX % str(C.n_ckpt)
    model_path = os.path.join(PATH, ckpt_file)
    print("load model at %s" % model_path)
    cycle_gan = CycleGAN(
        Generator(C.g_conv_ch,C.g_trans_ch,C.g_kernels, C.g_strides,C.g_n_res_block, C.g_leaky_slop),
        Generator(C.g_conv_ch,C.g_trans_ch,C.g_kernels, C.g_strides,C.g_n_res_block, C.g_leaky_slop),
        Discriminator(C.nc_input),
        Discriminator(C.nc_input)
    )
    cycle_gan.load_checkpoint(model_path)
    if C.use_cuda:
        cycle_gan.cuda()

    return cycle_gan
예제 #21
0
def train(epochs=100, batch_size=1):
    #生成器
    #    img_shape = (256, 256, 3)
    netG = CycleGAN()
    netG_XY, real_X, fake_Y = netG.generator()
    netG_YX, real_Y, fake_X = netG.generator()

    reconstruct_X = netG_YX(fake_Y)
    reconstruct_Y = netG_XY(fake_X)
    #鉴别器
    netD = CycleGAN()
    netD_X = netD.discriminator()
    netD_Y = netD.discriminator()

    netD_X_predict_fake = netD_X(fake_X)
    netD_Y_predict_fake = netD_Y(fake_Y)
    netD_X_predict_real = netD_X(real_X)
    netD_Y_predict_real = netD_Y(real_Y)
    #    netD_X.summary()
    #优化器
    optimizer = Adam(lr=0.001,
                     beta_1=0.5,
                     beta_2=0.999,
                     epsilon=None,
                     decay=0.01)
    #    netG_XY.summary()
    #    plot_model(netG_XY, to_file='./netG_XY_model_graph.png')
    #GAN
    netD_X.trainable = False  #冻结
    netD_Y.trainable = False
    netG_loss_inputs = [
        netD_X_predict_fake, reconstruct_X, real_X, netD_Y_predict_fake,
        reconstruct_Y, real_Y
    ]
    netG_train = Model([real_X, real_Y], Lambda(netG_loss)(netG_loss_inputs))
    netG_train.compile(loss='mae', optimizer=optimizer, metrics=['accuracy'])

    _fake_X_inputs = Input(shape=(256, 256, 3))
    _fake_Y_inputs = Input(shape=(256, 256, 3))
    _netD_X_predict_fake = netD_X(_fake_X_inputs)
    _netD_Y_predict_fake = netD_Y(_fake_Y_inputs)
    netD_X.trainable = True
    netD_X_train = Model(
        [real_X, _fake_X_inputs],
        Lambda(netD_loss)([netD_X_predict_real, _netD_X_predict_fake]))
    netD_X_train.compile(loss='mae', optimizer=optimizer,
                         metrics=['accuracy'])  #均方误差

    netD_X.trainable = False
    netD_Y.trainable = True
    netD_Y_train = Model(
        [real_Y, _fake_Y_inputs],
        Lambda(netD_loss)([netD_Y_predict_real, _netD_Y_predict_fake]))
    netD_Y_train.compile(loss='mae', optimizer=optimizer, metrics=['accuracy'])

    dataloader = Dataloader()
    fake_X_pool = ImagePool()
    fake_Y_pool = ImagePool()

    netG_X_function = get_G_function(netG_XY)
    netG_Y_function = get_G_function(netG_YX)
    if len(os.listdir('./weights')):
        netG_train.load_weights('./weights/netG.h5')
        netD_X_train.load_weights('./weights/netD_X.h5')
        netD_Y_train.load_weights

    print('Info: Strat Training\n')
    for epoch in range(epochs):

        target_label = np.zeros((batch_size, 1))

        for batch_i, (imgs_X,
                      imgs_Y) in enumerate(dataloader.load_batch(batch_size)):
            start_time = time.time()
            num_batch = 0
            tmp_fake_X = netG_X_function([imgs_X])[0]
            tmp_fake_Y = netG_Y_function([imgs_Y])[0]

            #从缓存区读取图片
            _fake_X = fake_X_pool.action(tmp_fake_X)
            _fake_Y = fake_Y_pool.action(tmp_fake_Y)
            if batch_i % 2 == 0:
                save_image('fake_X_' + str(epoch) + '_' + str(batch_i),
                           _fake_X[0])
                save_image('fake_Y_' + str(epoch) + '_' + str(batch_i),
                           _fake_Y[0])
            _netG_loss = netG_train.train_on_batch([imgs_X, imgs_Y],
                                                   target_label)
            netD_X_loss = netD_X_train.train_on_batch([imgs_X, _fake_X],
                                                      target_label)
            netD_Y_loss = netD_Y_train.train_on_batch([imgs_Y, _fake_Y],
                                                      target_label)
            num_batch += 1
            diff = time.time() - start_time
            print('Epoch:{}/{},netG_loss:{}, netD_loss:{},{}, time_cost_per_epoch:{}/epoch'\
              .format(epoch+1, epochs, _netG_loss, netD_X_loss, netD_Y_loss, diff, diff/num_batch))

        netG_train.save_weights('./weights/netG.h5')
        netD_X_train.save_weights('./weights/netD_X.h5')
        netD_Y_train.save_weights('./weights/netD_Y.hs')
        print('Model saved!\n')
    pass
예제 #22
0
def conversion(model_dir, model_name, data_dir, conversion_direction,
               output_dir, pc, generation_model):

    num_features = 32
    sampling_rate = 44000
    frame_period = 5.0

    model = CycleGAN(num_features=num_features,
                     mode='test',
                     gen_model=generation_model)

    model.load(filepath=os.path.join(model_dir, model_name))

    mcep_normalization_params = np.load(
        os.path.join(model_dir, 'mcep_normalization.npz'))
    mcep_mean_A = mcep_normalization_params['mean_A']
    mcep_std_A = mcep_normalization_params['std_A']
    mcep_mean_B = mcep_normalization_params['mean_B']
    mcep_std_B = mcep_normalization_params['std_B']

    logf0s_normalization_params = np.load(
        os.path.join(model_dir, 'logf0s_normalization.npz'))
    logf0s_mean_A = logf0s_normalization_params['mean_A']
    logf0s_std_A = logf0s_normalization_params['std_A']
    logf0s_mean_B = logf0s_normalization_params['mean_B']
    logf0s_std_B = logf0s_normalization_params['std_B']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for file in os.listdir(data_dir):

        filepath = os.path.join(data_dir, file)
        wav, _ = librosa.load(filepath, sr=sampling_rate, mono=True)
        # wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
        f0, timeaxis, sp, ap = world_decompose(wav=wav,
                                               fs=sampling_rate,
                                               frame_period=frame_period)
        coded_sp = world_encode_spectral_envelop(sp=sp,
                                                 fs=sampling_rate,
                                                 dim=num_features)
        coded_sp_transposed = coded_sp.T

        frame_size = 128
        if conversion_direction == 'A2B':
            # pitch
            print("AtoB")
            if pc == True:
                print("pitch convert")
                f0_converted = pitch_conversion(f0=f0,
                                                mean_log_src=logf0s_mean_A,
                                                std_log_src=logf0s_std_A,
                                                mean_log_target=logf0s_mean_B,
                                                std_log_target=logf0s_std_B)
            else:
                print("pitch same")
                f0_converted = f0

            # normalization A Domain
            coded_sp_norm = (coded_sp_transposed - mcep_mean_A) / mcep_std_A

            # padding
            remain, padd = frame_size - coded_sp_norm.shape[
                1] % frame_size, False
            if coded_sp_norm.shape[1] % frame_size != 0:
                coded_sp_norm = np.concatenate(
                    (coded_sp_norm, np.zeros((32, remain))), axis=1)
                padd = True

            # inference for segmentation
            coded_sp_converted_norm = model.test(
                inputs=np.array([coded_sp_norm[:, 0:frame_size]]),
                direction=conversion_direction)[0]
            for i in range(1, coded_sp_norm.shape[1] // frame_size):
                ccat = model.test(inputs=np.array(
                    [coded_sp_norm[:, i * frame_size:(i + 1) * frame_size]]),
                                  direction=conversion_direction)[0]
                coded_sp_converted_norm = np.concatenate(
                    (coded_sp_converted_norm, ccat), axis=1)

            if padd == True:
                coded_sp_converted_norm = coded_sp_converted_norm[:, :-remain]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_B + mcep_mean_B
        else:
            print("BtoA")
            if pc == True:
                print("pitch convert")
                f0_converted = pitch_conversion(f0=f0,
                                                mean_log_src=logf0s_mean_A,
                                                std_log_src=logf0s_std_A,
                                                mean_log_target=logf0s_mean_B,
                                                std_log_target=logf0s_std_B)
            else:
                f0_converted = f0

            # normalization B Domain
            coded_sp_norm = (coded_sp_transposed - mcep_mean_B) / mcep_std_B

            # padding
            remain, padd = frame_size - coded_sp_norm.shape[
                1] % frame_size, False
            if coded_sp_norm.shape[1] % frame_size != 0:
                coded_sp_norm = np.concatenate(
                    (coded_sp_norm, np.zeros((32, remain))), axis=1)
                padd = True

            # inference for segmentation
            coded_sp_converted_norm = model.test(
                inputs=np.array([coded_sp_norm[:, 0:frame_size]]),
                direction=conversion_direction)[0]
            for i in range(1, coded_sp_norm.shape[1] // frame_size):
                ccat = model.test(inputs=np.array(
                    [coded_sp_norm[:, i * frame_size:(i + 1) * frame_size]]),
                                  direction=conversion_direction)[0]
                coded_sp_converted_norm = np.concatenate(
                    (coded_sp_converted_norm, ccat), axis=1)

            if padd == True:
                coded_sp_converted_norm = coded_sp_converted_norm[:, :-remain]
            coded_sp_converted = coded_sp_converted_norm * mcep_std_A + mcep_mean_A

        # output translation value processing
        coded_sp_converted = coded_sp_converted.T
        coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
        decoded_sp_converted = world_decode_spectral_envelop(
            coded_sp=coded_sp_converted, fs=sampling_rate)

        # World vocoder synthesis
        wav_transformed = world_speech_synthesis(
            f0=f0_converted,
            decoded_sp=decoded_sp_converted,
            ap=ap,
            fs=sampling_rate,
            frame_period=frame_period)
        librosa.output.write_wav(
            os.path.join(output_dir, os.path.basename(file)), wav_transformed,
            sampling_rate)
예제 #23
0
def train():
    if FLAGS.load_model is not None:
        checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip(
            "checkpoints/")
    else:
        current_time = datetime.now().strftime("%Y%m%d-%H%M")
        checkpoints_dir = "checkpoints/{}".format(current_time)
        try:
            os.makedirs(checkpoints_dir)
        except os.error:
            pass

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(X_train_file=FLAGS.X,
                             Y_train_file=FLAGS.Y,
                             batch_size=FLAGS.batch_size,
                             image_size=FLAGS.image_size,
                             use_lsgan=FLAGS.use_lsgan,
                             norm=FLAGS.norm,
                             lambda1=FLAGS.lambda1,
                             lambda2=FLAGS.lambda2,
                             learning_rate=FLAGS.learning_rate,
                             beta1=FLAGS.beta1,
                             ngf=FLAGS.ngf)
        G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x = cycle_gan.model()
        optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        if FLAGS.load_model is not None:
            checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
            meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
            restore = tf.train.import_meta_graph(meta_graph_path)
            restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
            step = int(meta_graph_path.split("-")[2].split(".")[0])
        else:
            sess.run(tf.global_variables_initializer())
            step = 0

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            fake_Y_pool = ImagePool(FLAGS.pool_size)
            fake_X_pool = ImagePool(FLAGS.pool_size)

            while not coord.should_stop():
                # get previously generated images
                fake_y_val, fake_x_val = sess.run([fake_y, fake_x])

                # train
                _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary = (
                    sess.run(
                        [
                            optimizers, G_loss, D_Y_loss, F_loss, D_X_loss,
                            summary_op
                        ],
                        feed_dict={
                            cycle_gan.fake_y: fake_Y_pool.query(fake_y_val),
                            cycle_gan.fake_x: fake_X_pool.query(fake_x_val)
                        }))

                train_writer.add_summary(summary, step)
                train_writer.flush()

                if step % 100 == 0:
                    logging.info('-----------Step %d:-------------' % step)
                    logging.info('  G_loss   : {}'.format(G_loss_val))
                    logging.info('  D_Y_loss : {}'.format(D_Y_loss_val))
                    logging.info('  F_loss   : {}'.format(F_loss_val))
                    logging.info('  D_X_loss : {}'.format(D_X_loss_val))

                if step % 5000 == 0:
                    save_path = saver.save(sess,
                                           checkpoints_dir + "/model.ckpt",
                                           global_step=step)
                    logging.info("Model saved in file: %s" % save_path)

                step += 1

        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = saver.save(sess,
                                   checkpoints_dir + "/model.ckpt",
                                   global_step=step)
            logging.info("Model saved in file: %s" % save_path)
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
예제 #24
0
def train(img_A_dir, img_B_dir, model_dir, model_name, random_seed,
          batch_size_maximum, validation_A_dir, validation_B_dir, output_dir,
          tensorboard_log_dir):

    np.random.seed(random_seed)

    num_epochs = 1000
    mini_batch_size = 1  # mini_batch_size = 1 is better
    learning_rate = 0.0002
    input_size = [256, 256, 3]
    num_filters = 64  # Tried num_filters = 8 still not good for 200 epochs

    if validation_A_dir is not None:
        validation_A_output_dir = os.path.join(output_dir, 'converted_A')
        if not os.path.exists(validation_A_output_dir):
            os.makedirs(validation_A_output_dir)

    if validation_B_dir is not None:
        validation_B_output_dir = os.path.join(output_dir, 'converted_B')
        if not os.path.exists(validation_B_output_dir):
            os.makedirs(validation_B_output_dir)

    model = CycleGAN(input_size=input_size,
                     num_filters=num_filters,
                     mode='train',
                     log_dir=tensorboard_log_dir)

    dataset_A_raw = load_data(img_dir=img_A_dir, load_size=256)
    dataset_B_raw = load_data(img_dir=img_B_dir, load_size=256)

    for epoch in range(num_epochs):
        print('Epoch: %d' % epoch)

        start_time_epoch = time.time()

        dataset_A, dataset_B = sample_train_data(
            dataset_A_raw,
            dataset_B_raw,
            load_size=286,
            output_size=256,
            batch_size_maximum=batch_size_maximum)

        n_samples = dataset_A.shape[0]
        for i in range(n_samples // mini_batch_size):

            start = i * mini_batch_size
            end = (i + 1) * mini_batch_size

            generator_loss, discriminator_loss = model.train(
                input_A=dataset_A[start:end],
                input_B=dataset_B[start:end],
                learning_rate=learning_rate)

            if i % 50 == 0:
                print(
                    'Minibatch: %d, Generator Loss : %f, Discriminator Loss : %f'
                    % (i, generator_loss, discriminator_loss))

        model.save(directory=model_dir, filename=model_name)

        if validation_A_dir is not None:
            for file in os.listdir(validation_A_dir):
                filepath = os.path.join(validation_A_dir, file)
                img = cv2.imread(filepath)
                img_height, img_width, img_channel = img.shape
                img = cv2.resize(img, (input_size[1], input_size[0]))
                img = image_scaling(imgs=img)
                img_converted = model.test(inputs=np.array([img]),
                                           direction='A2B')[0]
                img_converted = image_scaling_inverse(imgs=img_converted)
                img_converted = cv2.resize(img_converted,
                                           (img_width, img_height))
                cv2.imwrite(
                    os.path.join(validation_A_output_dir,
                                 os.path.basename(file)), img_converted)

        if validation_B_dir is not None:
            for file in os.listdir(validation_B_dir):
                filepath = os.path.join(validation_B_dir, file)
                img = cv2.imread(filepath)
                img_height, img_width, img_channel = img.shape
                img = cv2.resize(img, (input_size[1], input_size[0]))
                img = image_scaling(imgs=img)
                img_converted = model.test(inputs=np.array([img]),
                                           direction='B2A')[0]
                img_converted = image_scaling_inverse(imgs=img_converted)
                img_converted = cv2.resize(img_converted,
                                           (img_width, img_height))
                cv2.imwrite(
                    os.path.join(validation_B_output_dir,
                                 os.path.basename(file)), img_converted)

        end_time_epoch = time.time()
        time_elapsed_epoch = end_time_epoch - start_time_epoch

        print('Time Elapsed for This Epoch: %02d:%02d:%02d' %
              (time_elapsed_epoch // 3600, (time_elapsed_epoch % 3600 // 60),
               (time_elapsed_epoch % 60 // 1)))
예제 #25
0
def train():
    if not os.path.exists(hp.checkpoint_dir):
        os.mkdir(hp.checkpoint_dir)

    x, y = data_helpers.load_data(hp.source_train)
    # 建立词汇表
    max_document_length = hp.maxlen
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        max_document_length)

    vocab_processor.fit_transform(['<s>', '<e>'] + x + y)

    x_train = np.array(list(vocab_processor.fit_transform(x)))
    y_train = np.array(list(vocab_processor.fit_transform(y)))

    vocab_size = len(vocab_processor.vocabulary_)
    vocab_processor.save(os.path.join(hp.checkpoint_dir, 'vocab'))

    # 生成batches
    batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                      hp.batch_size, hp.num_epochs)

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(vocab_size=vocab_size,
                             LAMBDA=10,
                             is_training=True)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            gs = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)

                # train
                for i in range(5):
                    _, D_Y_loss = (sess.run(
                        [cycle_gan.D_Y_opt, cycle_gan.D_Y_loss],
                        feed_dict={
                            cycle_gan.x: x_batch,
                            cycle_gan.y: y_batch
                        }))
                _, G_loss, fake_y_val = (sess.run(
                    [cycle_gan.G_opt, cycle_gan.G_loss, cycle_gan.preds_y],
                    feed_dict={
                        cycle_gan.x: x_batch,
                        cycle_gan.y: y_batch
                    }))

                for i in range(5):
                    _, D_X_loss = (sess.run(
                        [cycle_gan.D_X_opt, cycle_gan.D_X_loss],
                        feed_dict={
                            cycle_gan.x: x_batch,
                            cycle_gan.y: y_batch
                        }))
                _, F_loss, fake_x_val = (sess.run(
                    [cycle_gan.F_opt, cycle_gan.F_loss, cycle_gan.preds_x],
                    feed_dict={
                        cycle_gan.x: x_batch,
                        cycle_gan.y: y_batch
                    }))

                if gs % 100 == 0:
                    print(
                        '********step: {}, DY_loss = {:.8f}, G_loss = {:.8f}, '
                        'DX_loss = {:.8f}, F_loss = {:.8f}********'.format(
                            gs, D_Y_loss, G_loss, D_X_loss, F_loss))

                if gs % 100 == 0:
                    saver.save(sess,
                               hp.checkpoint_dir + "/model.ckpt",
                               global_step=gs)

                gs += 1

        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = saver.save(sess,
                                   hp.checkpoint_dir + "/model.ckpt",
                                   global_step=gs)
            logging.info("Model saved in file: %s" % save_path)
            coord.request_stop()
            coord.join(threads)
예제 #26
0
def train(train_A_dir, train_B_dir, model_dir, model_name, random_seed, validation_A_dir, validation_B_dir, output_dir, tensorboard_log_dir):

    np.random.seed(random_seed)

    num_epochs = 5000
    mini_batch_size = 1 # mini_batch_size = 1 is better
    generator_learning_rate = 0.0002
    generator_learning_rate_decay = generator_learning_rate / 200000
    discriminator_learning_rate = 0.0001
    discriminator_learning_rate_decay = discriminator_learning_rate / 200000
    sampling_rate = 16000
    num_mcep = 24
    frame_period = 5.0
    n_frames = 128
    lambda_cycle = 10
    lambda_identity = 5

    print('Preprocessing Data...')

    start_time = time.time()

    wavs_A = load_wavs(wav_dir = train_A_dir, sr = sampling_rate)
    wavs_B = load_wavs(wav_dir = train_B_dir, sr = sampling_rate)

    f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = world_encode_data(wavs = wavs_A, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)
    f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = world_encode_data(wavs = wavs_B, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)

    log_f0s_mean_A, log_f0s_std_A = logf0_statistics(f0s_A)
    log_f0s_mean_B, log_f0s_std_B = logf0_statistics(f0s_B)

    print('Log Pitch A')
    print('Mean: %f, Std: %f' %(log_f0s_mean_A, log_f0s_std_A))
    print('Log Pitch B')
    print('Mean: %f, Std: %f' %(log_f0s_mean_B, log_f0s_std_B))


    coded_sps_A_transposed = transpose_in_list(lst = coded_sps_A)
    coded_sps_B_transposed = transpose_in_list(lst = coded_sps_B)

    coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = coded_sps_normalization_fit_transoform(coded_sps = coded_sps_A_transposed)
    print("Input data fixed.")
    coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = coded_sps_normalization_fit_transoform(coded_sps = coded_sps_B_transposed)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    np.savez(os.path.join(model_dir, 'logf0s_normalization.npz'), mean_A = log_f0s_mean_A, std_A = log_f0s_std_A, mean_B = log_f0s_mean_B, std_B = log_f0s_std_B)
    np.savez(os.path.join(model_dir, 'mcep_normalization.npz'), mean_A = coded_sps_A_mean, std_A = coded_sps_A_std, mean_B = coded_sps_B_mean, std_B = coded_sps_B_std)

    if validation_A_dir is not None:
        validation_A_output_dir = os.path.join(output_dir, 'converted_A')
        if not os.path.exists(validation_A_output_dir):
            os.makedirs(validation_A_output_dir)

    if validation_B_dir is not None:
        validation_B_output_dir = os.path.join(output_dir, 'converted_B')
        if not os.path.exists(validation_B_output_dir):
            os.makedirs(validation_B_output_dir)

    end_time = time.time()
    time_elapsed = end_time - start_time

    print('Preprocessing Done.')

    print('Time Elapsed for Data Preprocessing: %02d:%02d:%02d' % (time_elapsed // 3600, (time_elapsed % 3600 // 60), (time_elapsed % 60 // 1)))

    model = CycleGAN(num_features = num_mcep)

    for epoch in range(num_epochs):
        print('Epoch: %d' % epoch)
        '''
        if epoch > 60:
            lambda_identity = 0
        if epoch > 1250:
            generator_learning_rate = max(0, generator_learning_rate - 0.0000002)
            discriminator_learning_rate = max(0, discriminator_learning_rate - 0.0000001)
        '''

        start_time_epoch = time.time()

        dataset_A, dataset_B = sample_train_data(dataset_A = coded_sps_A_norm, dataset_B = coded_sps_B_norm, n_frames = n_frames)

        n_samples = dataset_A.shape[0]

        for i in range(n_samples // mini_batch_size):

            num_iterations = n_samples // mini_batch_size * epoch + i

            if num_iterations > 10000:
                lambda_identity = 0
            if num_iterations > 200000:
                generator_learning_rate = max(0, generator_learning_rate - generator_learning_rate_decay)
                discriminator_learning_rate = max(0, discriminator_learning_rate - discriminator_learning_rate_decay)

            start = i * mini_batch_size
            end = (i + 1) * mini_batch_size

            generator_loss, discriminator_loss = model.train(input_A = dataset_A[start:end], input_B = dataset_B[start:end], lambda_cycle = lambda_cycle, lambda_identity = lambda_identity, generator_learning_rate = generator_learning_rate, discriminator_learning_rate = discriminator_learning_rate)

            if i % 50 == 0:
                #print('Iteration: %d, Generator Loss : %f, Discriminator Loss : %f' % (num_iterations, generator_loss, discriminator_loss))
                print('Iteration: {:07d}, Generator Learning Rate: {:.7f}, Discriminator Learning Rate: {:.7f}, Generator Loss : {:.3f}, Discriminator Loss : {:.3f}'.format(num_iterations, generator_learning_rate, discriminator_learning_rate, generator_loss, discriminator_loss))

        model.save(directory = model_dir, filename = model_name)

        end_time_epoch = time.time()
        time_elapsed_epoch = end_time_epoch - start_time_epoch

        print('Time Elapsed for This Epoch: %02d:%02d:%02d' % (time_elapsed_epoch // 3600, (time_elapsed_epoch % 3600 // 60), (time_elapsed_epoch % 60 // 1)))

        if validation_A_dir is not None:
            if epoch % 50 == 0:
                print('Generating Validation Data B from A...')
                for file in os.listdir(validation_A_dir):
                    filepath = os.path.join(validation_A_dir, file)
                    wav, _ = librosa.load(filepath, sr = sampling_rate, mono = True)
                    wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
                    f0, timeaxis, sp, ap = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)
                    f0_converted = pitch_conversion(f0 = f0, mean_log_src = log_f0s_mean_A, std_log_src = log_f0s_std_A, mean_log_target = log_f0s_mean_B, std_log_target = log_f0s_std_B)
                    coded_sp = world_encode_spectral_envelop(sp = sp, fs = sampling_rate, dim = num_mcep)
                    coded_sp_transposed = coded_sp.T
                    coded_sp_norm = (coded_sp_transposed - coded_sps_A_mean) / coded_sps_A_std
                    coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = 'A2B')[0]
                    coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean
                    coded_sp_converted = coded_sp_converted.T
                    coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
                    decoded_sp_converted = world_decode_spectral_envelop(coded_sp = coded_sp_converted, fs = sampling_rate)
                    wav_transformed = world_speech_synthesis(f0 = f0_converted, decoded_sp = decoded_sp_converted, ap = ap, fs = sampling_rate, frame_period = frame_period)
                    librosa.output.write_wav(os.path.join(validation_A_output_dir, os.path.basename(file)), wav_transformed, sampling_rate)

        if validation_B_dir is not None:
            if epoch % 50 == 0:
                print('Generating Validation Data A from B...')
                for file in os.listdir(validation_B_dir):
                    filepath = os.path.join(validation_B_dir, file)
                    wav, _ = librosa.load(filepath, sr = sampling_rate, mono = True)
                    wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
                    f0, timeaxis, sp, ap = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)
                    f0_converted = pitch_conversion(f0 = f0, mean_log_src = log_f0s_mean_B, std_log_src = log_f0s_std_B, mean_log_target = log_f0s_mean_A, std_log_target = log_f0s_std_A)
                    coded_sp = world_encode_spectral_envelop(sp = sp, fs = sampling_rate, dim = num_mcep)
                    coded_sp_transposed = coded_sp.T
                    coded_sp_norm = (coded_sp_transposed - coded_sps_B_mean) / coded_sps_B_std
                    coded_sp_converted_norm = model.test(inputs = np.array([coded_sp_norm]), direction = 'B2A')[0]
                    coded_sp_converted = coded_sp_converted_norm * coded_sps_A_std + coded_sps_A_mean
                    coded_sp_converted = coded_sp_converted.T
                    coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
                    decoded_sp_converted = world_decode_spectral_envelop(coded_sp = coded_sp_converted, fs = sampling_rate)
                    wav_transformed = world_speech_synthesis(f0 = f0_converted, decoded_sp = decoded_sp_converted, ap = ap, fs = sampling_rate, frame_period = frame_period)
                    librosa.output.write_wav(os.path.join(validation_B_output_dir, os.path.basename(file)), wav_transformed, sampling_rate)
예제 #27
0
def train():
  if FLAGS.load_model is not None:
    checkpoints_dir = "checkpoints/" + FLAGS.load_model
  else:
    current_time = datetime.now().strftime("%Y%m%d-%H%M")
    checkpoints_dir = "checkpoints/{}".format(current_time)
    try:
      os.makedirs(checkpoints_dir)
    except os.error:
      pass

  graph = tf.Graph()
  with graph.as_default():
    cycle_gan = CycleGAN(
        X_train_file=FLAGS.X,
        Y_train_file=FLAGS.Y,
        batch_size=FLAGS.batch_size,
        image_size=FLAGS.image_size,
        use_lsgan=FLAGS.use_lsgan,
        norm=FLAGS.norm,
        lambda1=FLAGS.lambda1,
        lambda2=FLAGS.lambda1,
        learning_rate=FLAGS.learning_rate,
        beta1=FLAGS.beta1,
        ngf=FLAGS.ngf
    )
    G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x = cycle_gan.model()
    optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss)

    summary_op = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
    saver = tf.train.Saver()

  with tf.Session(graph=graph) as sess:
    if FLAGS.load_model is not None:
      checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
      meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
      restore = tf.train.import_meta_graph(meta_graph_path)
      restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
      step = int(meta_graph_path.split("-")[2].split(".")[0])
    else:
      sess.run(tf.global_variables_initializer())
      step = 0

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
      fake_Y_pool = ImagePool(FLAGS.pool_size)
      fake_X_pool = ImagePool(FLAGS.pool_size)

      while not coord.should_stop():
        # get previously generated images
        fake_y_val, fake_x_val = sess.run([fake_y, fake_x])

        # train
        _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary = (
              sess.run(
                  [optimizers, G_loss, D_Y_loss, F_loss, D_X_loss, summary_op],
                  feed_dict={cycle_gan.fake_y: fake_Y_pool.query(fake_y_val),
                             cycle_gan.fake_x: fake_X_pool.query(fake_x_val)}
              )
        )
        if step % 100 == 0:
          train_writer.add_summary(summary, step)
          train_writer.flush()

        if step % 100 == 0:
          logging.info('-----------Step %d:-------------' % step)
          logging.info('  G_loss   : {}'.format(G_loss_val))
          logging.info('  D_Y_loss : {}'.format(D_Y_loss_val))
          logging.info('  F_loss   : {}'.format(F_loss_val))
          logging.info('  D_X_loss : {}'.format(D_X_loss_val))

        if step % 10000 == 0:
          save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step)
          logging.info("Model saved in file: %s" % save_path)

        step += 1

    except KeyboardInterrupt:
      logging.info('Interrupted')
      coord.request_stop()
    except Exception as e:
      coord.request_stop(e)
    finally:
      save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step)
      logging.info("Model saved in file: %s" % save_path)
      # When done, ask the threads to stop.
      coord.request_stop()
      coord.join(threads)
예제 #28
0
def main(args):
    train_loader, test_loader = load_data(args)

    GeneratorA2B = CycleGAN()
    GeneratorB2A = CycleGAN()

    DiscriminatorA = Discriminator()
    DiscriminatorB = Discriminator()

    if args.cuda:
        GeneratorA2B = GeneratorA2B.cuda()
        GeneratorB2A = GeneratorB2A.cuda()

        DiscriminatorA = DiscriminatorA.cuda()
        DiscriminatorB = DiscriminatorB.cuda()

    optimizerG = optim.Adam(itertools.chain(GeneratorA2B.parameters(), GeneratorB2A.parameters()), lr=args.lr, betas=(0.5, 0.999))
    optimizerD = optim.Adam(itertools.chain(DiscriminatorA.parameters(), DiscriminatorB.parameters()), lr=args.lr, betas=(0.5, 0.999))

    if args.training:
        path = 'E:/cyclegan/checkpoints/model_{}_{}.pth'.format(285, 200)

        checkpoint = torch.load(path)
        GeneratorA2B.load_state_dict(checkpoint['generatorA'])
        GeneratorB2A.load_state_dict(checkpoint['generatorB'])
        DiscriminatorA.load_state_dict(checkpoint['discriminatorA'])
        DiscriminatorB.load_state_dict(checkpoint['discriminatorB'])
        optimizerG.load_state_dict(checkpoint['optimizerG'])
        optimizerD.load_state_dict(checkpoint['optimizerD'])

        start_epoch = 285
    else:
        init_net(GeneratorA2B, init_type='normal', init_gain=0.02, gpu_ids=[0])
        init_net(GeneratorB2A, init_type='normal', init_gain=0.02, gpu_ids=[0])

        init_net(DiscriminatorA, init_type='normal', init_gain=0.02, gpu_ids=[0])
        init_net(DiscriminatorB, init_type='normal', init_gain=0.02, gpu_ids=[0])
        start_epoch = 1

    if args.evaluation:
        evaluation(test_loader, GeneratorA2B, GeneratorB2A, args)
    else:
        cycle = nn.L1Loss()
        gan = nn.BCEWithLogitsLoss()
        identity = nn.L1Loss()

        for epoch in range(start_epoch, args.epochs):
            train(train_loader, GeneratorA2B, GeneratorB2A, DiscriminatorA, DiscriminatorB, optimizerG, optimizerD, cycle, gan, identity, args, epoch)
        evaluation(test_loader, GeneratorA2B, GeneratorB2A, args)
def train():
    max_accuracy = 0.98
    learning_loss_set = 4.0
    if FLAGS.load_model is not None:
        checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip(
            "checkpoints/")
    else:
        current_time = datetime.now().strftime("%Y%m%d-%H%M")
        checkpoints_dir = "checkpoints/{}".format(current_time)
        try:
            os.makedirs(checkpoints_dir)
        except os.error:
            pass

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(
            #X_train_file=FLAGS.X,
            #Y_train_file=FLAGS.Y,
            batch_size=FLAGS.batch_size,
            image_size=FLAGS.image_size,
            use_lsgan=FLAGS.use_lsgan,
            norm=FLAGS.norm,
            lambda1=FLAGS.lambda1,
            lambda2=FLAGS.lambda2,
            learning_rate=FLAGS.learning_rate,
            beta1=FLAGS.beta1,
            ngf=FLAGS.ngf)
        #G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss, x_correct, y_correct, fake_y_correct, fake_y_pre
        G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss, x_correct, y_correct, fake_y_correct = cycle_gan.model(
        )
        optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss,
                                        teacher_loss, student_loss,
                                        learning_loss)
        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        if FLAGS.load_model is not None:
            checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
            meta_graph_path = "checkpoints/20190224-1130/model.ckpt-7792.meta"
            print('meta_graph_path', meta_graph_path)
            restore = tf.train.import_meta_graph(meta_graph_path)
            restore.restore(sess, "checkpoints/20190224-1130/model.ckpt-7792")

            step = 7792
            #meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
            #restore = tf.train.import_meta_graph(meta_graph_path)
            #restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
            #step = int(meta_graph_path.split("-")[2].split(".")[0])
        else:
            sess.run(tf.global_variables_initializer())
            step = 0

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        last_1 = 0.0
        last_2 = 0.0
        best_1 = 0.0
        best_2 = 0.0

        try:
            while not coord.should_stop():
                #x_image, x_label = get_batch_images(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, FLAGS.X)
                #y_image, y_label = get_batch_images(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, FLAGS.Y)
                x_image, x_label = get_train_batch("X", FLAGS.batch_size,
                                                   FLAGS.image_size,
                                                   FLAGS.image_size,
                                                   "./dataset/")
                #print('x_label',x_label)
                y_image, y_label = get_train_batch("Y", FLAGS.batch_size,
                                                   FLAGS.image_size,
                                                   FLAGS.image_size,
                                                   "./dataset/")
                #print('y_label', y_label)
                # get previously generated images
                # fake_y_val, fake_x_val = sess.run([fake_y, fake_x],feed_dict={cycle_gan.x: x_image, cycle_gan.y: y_image})

                # train
                _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, teacher_loss_eval, student_loss_eval, learning_loss_eval, summary = (
                    sess.run(
                        [
                            optimizers, G_loss, D_Y_loss, F_loss, D_X_loss,
                            teacher_loss, student_loss, learning_loss,
                            summary_op
                        ],
                        feed_dict={
                            cycle_gan.x: x_image,
                            cycle_gan.y: y_image,
                            cycle_gan.x_label: x_label,
                            cycle_gan.y_label: y_label
                        }))

                train_writer.add_summary(summary, step)
                train_writer.flush()

                if step % 100 == 0:

                    print('-----------Step %d:-------------' % step)
                    print('  G_loss   : {}'.format(G_loss_val))
                    print('  D_Y_loss : {}'.format(D_Y_loss_val))
                    print('  F_loss   : {}'.format(F_loss_val))
                    print('  D_X_loss : {}'.format(D_X_loss_val))
                    print('teacher_loss: {}'.format(teacher_loss_eval))
                    print('student_loss: {}'.format(student_loss_eval))
                    print('learning_loss: {}'.format(learning_loss_eval))

                if step % 100 == 0 and step >= 10:
                    print('Now is in testing! Please wait result...')
                    test_images_y, test_labels_y = get_test_batch(
                        "Y", FLAGS.image_size, FLAGS.image_size, "./dataset/")
                    fake_y_correct_cout = 0
                    for i in range((len(test_images_y))):
                        y_imgs = []
                        y_lbs = []
                        y_imgs.append(test_images_y[i])
                        y_lbs.append(test_labels_y[i])
                        y_correct_eval, fake_y_correct_eval = (sess.run(
                            [y_correct, fake_y_correct],
                            feed_dict={
                                cycle_gan.y: y_imgs,
                                cycle_gan.y_label: y_lbs
                            }))
                        if fake_y_correct_eval:
                            fake_y_correct_cout = fake_y_correct_cout + 1

                    print('fake_y_accuracy: {}'.format(fake_y_correct_cout /
                                                       len(test_labels_y)))

                    # print('Now is in testing! Please wait result...')
                    # #save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step)
                    # #print("Model saved in file: %s" % save_path)
                    # test_images_y,test_labels_y= get_test_batch("Y",FLAGS.image_size,FLAGS.image_size,"./dataset/")
                    # test_images_x,test_labels_x= get_test_batch("X",FLAGS.image_size,FLAGS.image_size,"./dataset/")
                    # y_correct_cout=0
                    # fake_y_correct_cout=0
                    # for i in range(min(len(test_images_y),len(test_images_x))):
                    #     y_imgs=[]
                    #     y_lbs=[]
                    #     y_imgs.append(test_images_y[i])
                    #     y_lbs.append(test_labels_y[i])
                    #     x_imgs=[]
                    #     x_lbs=[]
                    #     x_imgs.append(test_images_x[i])
                    #     x_lbs.append(test_labels_x[i])
                    #     y_correct_eval,fake_y_correct_eval = (
                    #         sess.run(
                    #             [y_correct,fake_y_correct],
                    #             feed_dict={cycle_gan.x: x_imgs, cycle_gan.y: y_imgs,
                    #                        cycle_gan.x_label: x_lbs,cycle_gan.y_label: y_lbs}
                    #         )
                    #     )
                    #     #print('y_correct_eval', y_correct_eval)
                    #     #print('y_correct_cout',y_correct_cout)
                    #     #print('fake_y_correct_eval', fake_y_correct_eval)
                    #     #print('fake_y_correct_cout',fake_y_correct_cout)
                    #     #if y_correct_eval[0][0]:
                    #     if y_correct_eval:
                    #         y_correct_cout=y_correct_cout+1
                    #     #if fake_y_correct_eval[0][0]:
                    #     if fake_y_correct_eval:
                    #         fake_y_correct_cout=fake_y_correct_cout+1
                    #
                    #
                    # print('y_accuracy: {}'.format(y_correct_cout/(min(len(test_labels_y),len(test_labels_x)))))
                    # print('fake_y_accuracy: {}'.format(fake_y_correct_cout/(min(len(test_labels_y),len(test_labels_x)))))
                    # y_accuracy_1 = format(y_correct_cout / (min(len(test_labels_y), len(test_labels_x))))
                    # fake_y_accuracy_1 = format(fake_y_correct_cout / (min(len(test_labels_y), len(test_labels_x))))
                    #
                    # #print('test_images_len:',len(test_images_y))
                    # #print('test_labels_len:', len(test_labels_y))

                step += 1

        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = saver.save(sess,
                                   checkpoints_dir + "/model.ckpt",
                                   global_step=step)
            print("Model saved in file: %s" % save_path)
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
def train():
    if FLAGS.load_model is not None:
        checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip("checkpoints/")
    else:
        current_time = datetime.now().strftime("%Y%m%d-%H%M")
        checkpoints_dir = "checkpoints/{}".format(current_time)
        try:
            os.makedirs(checkpoints_dir)
        except os.error:
            pass

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(
            # X_train_file=FLAGS.X,
            # Y_train_file=FLAGS.Y,
            batch_size=FLAGS.batch_size,
            image_size=FLAGS.image_size,
            use_lsgan=FLAGS.use_lsgan,
            norm=FLAGS.norm,
            lambda1=FLAGS.lambda1,
            lambda2=FLAGS.lambda2,
            learning_rate=FLAGS.learning_rate,
            beta1=FLAGS.beta1,
            ngf=FLAGS.ngf
        )
        # G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss, x_correct, y_correct, fake_y_correct, fake_y_pre
        G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss, \
        x_correct, y_correct, fake_x_correct, softmax3, fake_x_pre, f_fakeX, fake_x, fake_y = cycle_gan.model()

        # G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss,
        # x_correct, y_correct, fake_y_correct, softmax3, fake_y_pre, f_fakeX, fake_x, fake_y= cycle_gan.model()

        # softmax3,fake_y_pre,f_fakeX,fake_x,fake_y= cycle_gan.model()
        # optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss, teacher_loss, student_loss, learning_loss)
        # summary_op = tf.summary.merge_all()
        # train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        if FLAGS.load_model is not None:
            checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
            meta_graph_path = "checkpoints/20190611-1650/model.ckpt-30000.meta"
            print('meta_graph_path', meta_graph_path)
            restore = tf.train.import_meta_graph(meta_graph_path)
            restore.restore(sess, "checkpoints/20190611-1650/model.ckpt-30000")

            step = 0
            # meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
            # restore = tf.train.import_meta_graph(meta_graph_path)
            # restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
            # step = int(meta_graph_path.split("-")[2].split(".")[0])
        else:
            sess.run(tf.global_variables_initializer())
            step = 0

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                result_dir = './result'
                fake_dir = os.path.join(result_dir, 'fake_xy')
                roc_dir = os.path.join(result_dir, 'roc_curves')
                plot_dir = os.path.join(result_dir, 'tsne_pca')
                conv_dir = os.path.join(result_dir, 'convs')
                occ_dir = os.path.join(result_dir, 'occ_test')
                Xconv_dir = os.path.join(result_dir, 'Xconv_dir')
                fakeXconv_dir = os.path.join(result_dir, 'fakeXconv_dir')
                Y_VGGconv_dir = os.path.join(result_dir, 'Y_VGGconv_dir')
                fakeY_VGGconv_dir = os.path.join(result_dir, 'fakeY_VGGconv_dir')

                rconv_dir = os.path.join(result_dir, 'resconvs')
                utils.prepare_dir(result_dir)
                utils.prepare_dir(occ_dir)
                utils.prepare_dir(fake_dir)
                utils.prepare_dir(roc_dir)
                utils.prepare_dir(plot_dir)
                utils.prepare_dir(conv_dir)
                utils.prepare_dir(rconv_dir)
                utils.prepare_dir(Xconv_dir)
                utils.prepare_dir(fakeXconv_dir)
                utils.prepare_dir(Y_VGGconv_dir)
                utils.prepare_dir(fakeY_VGGconv_dir)

                x_image, x_label, oximage = get_test_batch2("X", 1, FLAGS.image_size, FLAGS.image_size, "./dataset/")
                y_image, y_label, oyimage = get_test_batch2("Y", 1, FLAGS.image_size, FLAGS.image_size, "./dataset/")

                image = y_image[1]
                width = height = 256

                occluded_size = 16
                print('1----------------')
                # data  = NP()
                data = np.empty((width * height + 1, width, height, 3), dtype="float32")
                print('2----------------')
                print('data  ---')
                data[0, :, :, :] = image
                cnt = 1
                for i in range(height):
                    for j in range(width):
                        i_min = int(i - occluded_size / 2)
                        i_max = int(i + occluded_size / 2)
                        j_min = int(j - occluded_size / 2)
                        j_max = int(j + occluded_size / 2)
                        if i_min < 0:
                            i_min = 0
                        if i_max > height:
                            i_max = height
                        if j_min < 0:
                            j_min = 0
                        if j_max > width:
                            j_max = width
                        data[cnt, :, :, :] = image
                        data[cnt, i_min:i_max, j_min:j_max, :] = 255
                        # print(data[i].shape)
                        cnt += 1
                #
                # [idx_u]=np.where(np.max(Uy[id_y]))

                # [idx_u]=np.where(np.max(Uy))

                u_ys = np.empty([data.shape[0]], dtype='float64')
                occ_map = np.empty((width, height), dtype='float64')

                print('occ_map.shape', occ_map.shape)
                cnt = 0
                feature_y_eval = sess.run(
                    softmax3,
                    feed_dict={cycle_gan.y: [data[0]]})  #

                # print('softmax3',feature_y_eval.eval())

                u_y0 = feature_y_eval[0]
                [idx_u] = np.where(np.max(u_y0))
                idx_u = idx_u[0]
                print('feature_y_eval', feature_y_eval)
                print('u_y0', u_y0)
                max = 0
                print('len u_y0', len(u_y0))
                for val in range(len(u_y0)):
                    vallist = u_y0[val]
                    if vallist > max:
                        max = vallist

                u_y0 = max
                # print('max', u_y0[idx_u])
                print('max', u_y0)
                # print('u_y01',u_y0[idx_u])

                for i in range(width):
                    for j in range(height):
                        feature_y_eval = sess.run(
                            softmax3,
                            feed_dict={cycle_gan.y: [data[cnt + 1]]})
                        u_y = feature_y_eval[0]
                        # u_y =  max(u_y)
                        print('u_y', u_y)
                        u_y1 = 0
                        for val in range(len(u_y)):
                            vallist = u_y[val]
                            if vallist > u_y1:
                                u_y1 = vallist

                        occ_value = u_y0 - u_y1
                        occ_map[i, j] = occ_value
                        print(str(cnt) + ':' + str(occ_value))
                        cnt += 1

                occ_map_path = os.path.join(occ_dir, 'occlusion_map_{}.txt'.format('1'))
                np.savetxt(occ_map_path, occ_map, fmt='%0.8f')
                cv2.imwrite(os.path.join(occ_dir, '{}.png'.format('1')), oyimage[1])
                draw_heatmap(occ_map_path=occ_map_path, ori_img=oyimage[1],
                             save_dir=os.path.join(occ_dir, 'heatmap_{}.png'.format('1')))








        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:

            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
def train():
    if FLAGS.load_model is not None:
        checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip("checkpoints/")
    else:
        current_time = datetime.now().strftime("%Y%m%d-%H%M")
        checkpoints_dir = "checkpoints/{}".format(current_time)
        try:
            os.makedirs(checkpoints_dir)
        except os.error:
            pass

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(
            batch_size=FLAGS.batch_size,
            image_size=FLAGS.image_size,
            use_lsgan=FLAGS.use_lsgan,
            norm=FLAGS.norm,
            lambda1=FLAGS.lambda1,
            lambda2=FLAGS.lambda2,
            learning_rate=FLAGS.learning_rate,
            learning_rate2=FLAGS.learning_rate2,
            beta1=FLAGS.beta1,
            ngf=FLAGS.ngf
        )
        G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x, Disperse_loss, Fuzzy_loss,feature_x,feature_y,_,_ = cycle_gan.model()
        optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss, Disperse_loss)
        optimizers2 = cycle_gan.optimize2(Fuzzy_loss)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        if FLAGS.load_model is not None:
            checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
            meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
            restore = tf.train.import_meta_graph(meta_graph_path)
            restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
            step = int(meta_graph_path.split("-")[2].split(".")[0])
        else:
            sess.run(tf.global_variables_initializer())
            step = 1

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            x_path = FLAGS.X + FLAGS.UC_name
            print('now is in FCM initializing!')
            if FLAGS.load_model is None:
                
                x_images, x_id_list, x_len, x_labels,_ ,_= get_source_batch(0, 256, 256, source_dir=x_path)
                y_images, y_id_list, y_len, y_labels,_,_ = get_target_batch(0, 256, 256, target_dir=FLAGS.Y)
                print('x_len',len(x_images))
                print('y_len',len(y_images))
                x_data=[]
                y_data=[]
                for x in x_images:
                    feature_x_eval = ( sess.run(
                        feature_x, feed_dict={cycle_gan.x: [x]}
                    ))
                    x_data.append(feature_x_eval[0])
                for y in y_images:
                    feature_y_eval = (sess.run(
                        feature_y, feed_dict={cycle_gan.y: [y]}
                    ))
                    y_data.append(feature_y_eval[0])
                Ux, Uy, Cx, Cy= fuzzy.initialize_UC_test(x_len,x_data,y_len,y_data, FLAGS.UC_name,checkpoints_dir)
                np.savetxt(checkpoints_dir + "/Ux" + FLAGS.UC_name + '.txt', Ux, fmt="%.20f", delimiter=",")
                np.savetxt(checkpoints_dir + "/Uy" + FLAGS.UC_name + '.txt', Uy, fmt="%.20f", delimiter=",")
                np.savetxt(checkpoints_dir + "/Cx" + FLAGS.UC_name + '.txt', Cx, fmt="%.20f", delimiter=",")
                np.savetxt(checkpoints_dir + "/Cy" + FLAGS.UC_name + '.txt', Cy, fmt="%.20f", delimiter=",")

            else:
                Ux = np.loadtxt(checkpoints_dir + "/Ux" + FLAGS.UC_name + '.txt', delimiter=",")
                Ux = [[x] for x in Ux]
                Uy = np.loadtxt(checkpoints_dir + "/Uy" + FLAGS.UC_name + '.txt', delimiter=",")
                Cx = np.loadtxt(checkpoints_dir + "/Cx" + FLAGS.UC_name + '.txt', delimiter=",")
                Cx = [Cx]
                Cy = np.loadtxt(checkpoints_dir + "/Cy" + FLAGS.UC_name + '.txt', delimiter=",")
            print('FCM initialization is ended! Go to train')
            max_accuracy = 0
            while not coord.should_stop():
                images_x, idx_list, len_x, labels_x,_ ,_= get_source_batch(FLAGS.batch_size, FLAGS.image_size,
                                                                       FLAGS.image_size, source_dir=x_path)
                subUx = fuzzy.getSubU(Ux, idx_list)
                label_x = [x[0] for x in subUx]
                images_y, idy_list, len_y, labels_y,_,_ = get_target_batch(FLAGS.batch_size, FLAGS.image_size,
                                                                       FLAGS.image_size, target_dir=FLAGS.Y)
                subUy = fuzzy.getSubU(Uy, idy_list)
                label_y = [x[0] for x in subUy]
                _,_, Fuzzy_loss_val, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary, Disperse_loss_val,feature_x_eval,feature_y_eval = (
                    sess.run(
                        [optimizers,optimizers2,Fuzzy_loss, G_loss, D_Y_loss, F_loss, D_X_loss, summary_op,
                         Disperse_loss, feature_x, feature_y],
                        feed_dict={cycle_gan.x: images_x, cycle_gan.y: images_y,
                                   cycle_gan.Uy2x: subUy, cycle_gan.Ux2y: subUx,
                                   cycle_gan.x_label: label_x, cycle_gan.y_label: label_y,
                                   cycle_gan.ClusterX: Cx, cycle_gan.ClusterY: Cy}
                    )
                )
                train_writer.add_summary(summary, step)
                train_writer.flush()
                '''
                Optimize Networks
                
                
                if step % 10 == 0:
                    print('-----------Step %d:-------------' % step)
                    logging.info('  G_loss   : {}'.format(G_loss_val))
                    logging.info('  D_Y_loss : {}'.format(D_Y_loss_val))
                    logging.info('  F_loss   : {}'.format(F_loss_val))
                    logging.info('  D_X_loss : {}'.format(D_X_loss_val))
                    logging.info('  Disperse_loss : {}'.format(Disperse_loss_val))
                    logging.info('  Fuzzy_loss : {}'.format(Fuzzy_loss_val))
                
                Optimize FCM algorithm
                '''
                if step % 100== 0:
                    print('Now is in FCM training!')
                    y_images, y_id_list, y_len, y_labels,_ ,_= get_target_batch(0, 256, 256, target_dir=FLAGS.Y)
                    print('y_len', len(y_images))
                    #x_data = []
                    y_data = []
                    for y in y_images:
                        feature_y_eval = (sess.run(
                            feature_y, feed_dict={cycle_gan.y: [y]}
                        ))
                        y_data.append(feature_y_eval[0])

                    #print('y_data:',np.sum(y_data,1))
                    Uy, Cy = fuzzy.updata_U(checkpoints_dir, y_data, Uy, FLAGS.UC_name)
                    accuracy, tp, tn, fp, fn, f1_score, recall, precision, specificity=computeAccuracy(Uy, y_labels)

                    print("accuracy:%.4f\ttp:%.4f\ttn:%.4f\tfp %d\tfn:%d" %
                          (accuracy, tp, tn, fp, fn))
                    if accuracy==1:
                        break
                    if accuracy >= max_accuracy:
                        max_accuracy = accuracy
                        if not os.path.exists(checkpoints_dir + "/max"):
                            os.makedirs(checkpoints_dir + "/max")
                        f = open(checkpoints_dir + "/max/step.txt", 'w')
                        f.seek(0)
                        f.truncate()
                        f.write(str(step) + '\n')
                        f.write(str(accuracy) + '\taccuracy\n')
                        f.close()
                        np.save(checkpoints_dir + "/max/feature_fcgan.npy",y_data)
                        np.savetxt(checkpoints_dir + "/max/"+ "/Uy" + FLAGS.UC_name + '.txt', Uy, fmt="%.20f", delimiter=",")
                        np.savetxt(checkpoints_dir + "/max/"+ "/Cy" + FLAGS.UC_name + '.txt', Cy, fmt="%.20f", delimiter=",")
                        np.savetxt(checkpoints_dir + "/max/"+ "/Ux" + FLAGS.UC_name + '.txt', Ux, fmt="%.20f", delimiter=",")
                        np.savetxt(checkpoints_dir + "/max/"+ "/Cx" + FLAGS.UC_name + '.txt', Cx, fmt="%.20f", delimiter=",")
                        save_path = saver.save(sess, checkpoints_dir + "/max/model.ckpt",global_step=step)
                        print("Max model saved in file: %s" % save_path)
                    print('max_accuracy:', max_accuracy)
                    print('mean_U',np.min(Uy,0))
                step += 1
                if step>10000:
                    logging.info('train stop!')
                    break

        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step)
            np.savetxt(checkpoints_dir + "/Uy" + FLAGS.UC_name + '.txt', Uy, fmt="%.20f", delimiter=",")
            np.savetxt(checkpoints_dir + "/Cy" + FLAGS.UC_name + '.txt', Cy, fmt="%.20f", delimiter=",")
            np.savetxt(checkpoints_dir + "/Ux" + FLAGS.UC_name + '.txt', Ux, fmt="%.20f", delimiter=",")
            np.savetxt(checkpoints_dir + "/Cx" + FLAGS.UC_name + '.txt', Cx, fmt="%.20f", delimiter=",")
            logging.info("Model saved in file: %s" % save_path)
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
class Converter():
    def __init__(self, model_dir, model_name):
        self.num_features = 24
        self.sampling_rate = 16000
        self.frame_period = 5.0

        self.model = CycleGAN(num_features=self.num_features, mode='test')

        self.model.load(filepath=os.path.join(model_dir, model_name))

        self.mcep_normalization_params = np.load(
            os.path.join(model_dir, 'mcep_normalization.npz'))
        self.mcep_mean_A = self.mcep_normalization_params['mean_A']
        self.mcep_std_A = self.mcep_normalization_params['std_A']
        self.mcep_mean_B = self.mcep_normalization_params['mean_B']
        self.mcep_std_B = self.mcep_normalization_params['std_B']

        self.logf0s_normalization_params = np.load(
            os.path.join(model_dir, 'logf0s_normalization.npz'))
        self.logf0s_mean_A = self.logf0s_normalization_params['mean_A']
        self.logf0s_std_A = self.logf0s_normalization_params['std_A']
        self.logf0s_mean_B = self.logf0s_normalization_params['mean_B']
        self.logf0s_std_B = self.logf0s_normalization_params['std_B']

    def convert_to_pcm_data(self, wav, conversion_direction='A2B'):
        wav = wav_padding(wav=wav,
                          sr=self.sampling_rate,
                          frame_period=self.frame_period,
                          multiple=4)
        f0, timeaxis, sp, ap = world_decompose(wav=wav,
                                               fs=self.sampling_rate,
                                               frame_period=self.frame_period)
        coded_sp = world_encode_spectral_envelop(sp=sp,
                                                 fs=self.sampling_rate,
                                                 dim=self.num_features)
        coded_sp_transposed = coded_sp.T

        if conversion_direction == 'A2B':
            f0_converted = pitch_conversion(f0=f0,
                                            mean_log_src=self.logf0s_mean_A,
                                            std_log_src=self.logf0s_std_A,
                                            mean_log_target=self.logf0s_mean_B,
                                            std_log_target=self.logf0s_std_B)
            coded_sp_norm = (coded_sp_transposed -
                             self.mcep_mean_A) / self.mcep_std_A
            coded_sp_converted_norm = self.model.test(
                inputs=np.array([coded_sp_norm]),
                direction=conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * self.mcep_std_B + self.mcep_mean_B
        else:
            f0_converted = pitch_conversion(f0=f0,
                                            mean_log_src=self.logf0s_mean_B,
                                            std_log_src=self.logf0s_std_B,
                                            mean_log_target=self.logf0s_mean_A,
                                            std_log_target=self.logf0s_std_A)
            coded_sp_norm = (coded_sp_transposed -
                             self.mcep_mean_B) / self.mcep_std_B
            coded_sp_converted_norm = self.model.test(
                inputs=np.array([coded_sp_norm]),
                direction=conversion_direction)[0]
            coded_sp_converted = coded_sp_converted_norm * self.mcep_std_A + self.mcep_mean_A

        coded_sp_converted = coded_sp_converted.T
        coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
        decoded_sp_converted = world_decode_spectral_envelop(
            coded_sp=coded_sp_converted, fs=self.sampling_rate)
        wav_transformed = world_speech_synthesis(
            f0=f0_converted,
            decoded_sp=decoded_sp_converted,
            ap=ap,
            fs=self.sampling_rate,
            frame_period=self.frame_period)

        # For debugging model output, uncomment the following line:
        # librosa.output.write_wav('model_output.wav', wav_transformed, self.sampling_rate)

        # TODO: Perhaps ditch this. It's probably unnecessary work.
        upsampled = librosa.resample(wav_transformed, self.sampling_rate,
                                     48000)
        pcm_data = upsampled.astype(np.float64)
        stereo_pcm_data = np.tile(pcm_data, (2, 1)).T
        return stereo_pcm_data

    def convert_pcm_to_wav(self, stereo_pcm_data):
        buf = io.BytesIO()
        scipy.io.wavfile.write(buf, 48000, stereo_pcm_data.astype(np.float32))
        return buf

    def convert(self, wav, conversion_direction='A2B'):
        stereo_pcm_data = self.convert_to_pcm_data(
            wav, conversion_direction=conversion_direction)
        return self.convert_pcm_to_wav(stereo_pcm_data)
예제 #33
0
from utils import *
from config import *

# configure full paths
checkpoints_dir = os.path.join(checkpoints_dir, experiment_id)
samples_dir = os.path.join(samples_dir, experiment_id)
logs_dir = os.path.join(logs_dir, experiment_id)

# make directories
os.system('mkdir -p ' + checkpoints_dir)
os.system('mkdir -p ' + samples_dir)
os.system('mkdir -p ' + logs_dir)

## create models
# call the function to get models
G_XtoY, G_YtoX, Dp_X, Dp_Y, Dg_X, Dg_Y = CycleGAN(n_res_blocks=2)

# define optimizer parameters
g_params = list(G_XtoY.parameters()) + list(
    G_YtoX.parameters())  # Get generator parameters

# Create optimizers for the generators and discriminators
g_optimizer = optim.Adam(g_params, g_lr, [beta1, beta2])
dp_x_optimizer = optim.Adam(Dp_X.parameters(), d_lr, [beta1, beta2])
dp_y_optimizer = optim.Adam(Dp_Y.parameters(), d_lr, [beta1, beta2])
dg_x_optimizer = optim.Adam(Dg_X.parameters(), d_lr, [beta1, beta2])
dg_y_optimizer = optim.Adam(Dg_Y.parameters(), d_lr, [beta1, beta2])


# count number of parameters in a model
def count_model_parameters(model):