Esempio n. 1
0
    def build_model(self): 
        if self.args.model_type == 'AdaVAEd':
            speaker_encoder = VoiceEncoder()
            self.model = cc(AdaVAEd(self.config, speaker_encoder))
        elif self.args.model_type == 'AdaVAE' or self.args.model_type== 'AdaVAEaug':
            self.model = cc(AdaVAE(self.config))
        elif self.args.model_type == 'AdaVAEavg':
            self.model = cc(AdaVAEavg(self.config))
        elif self.args.model_type == 'AdaAE':
            self.model = cc(AdaAE(self.config))
        elif self.args.model_type == 'AdaVAEVAE':
            self.model = cc(AdaVAEVAE(self.config))
        else:
            print('No model')

        self.model.eval()
        if self.args.use_wavenet:
            from wavenet import build_model
            self.vocoder = cc(build_model())
        
        if self.args.infer_sproofing:
            from classify import SpeakerClassifier
            self.Speakerclassifier = cc(SpeakerClassifier(n_class=len(self.speaker2id)))
            self.Speakerclassifier.eval()
        return
Esempio n. 2
0
    def build_model(self):
        if self.args.model_type == 'AE':
            self.model = cc(AE(self.config))
        elif self.args.model_type == 'VQVAE':
            speaker_encoder = VoiceEncoder()
            self.model = cc(VQVAE(self.config, speaker_encoder))
        elif self.args.model_type == 'DAE':
            speaker_encoder = VoiceEncoder()
            self.model = cc(AE_D(self.config, speaker_encoder))
        elif self.args.model_type == 'AutoVC':
            speaker_encoder = VoiceEncoder()
            self.model = cc(AutoVC(32, 256, 512, 32, speaker_encoder))
        elif self.args.model_type == 'Prosody':
            speaker_encoder = VoiceEncoder()
            self.model = cc(VQVAE_Prosody(self.config, speaker_encoder))
        elif self.args.model_type == 'MBV':
            speaker_encoder = VoiceEncoder()
            self.model = cc(MBV(self.config, speaker_encoder))
        elif self.args.model_type == 'NORM':
            speaker_encoder = VoiceEncoder()
            self.model = cc(MultipleNorm(self.config, speaker_encoder))
        elif self.args.model_type == 'Attn':
            speaker_encoder = VoiceEncoder()
            self.model = cc(VQVAE_attn(self.config, speaker_encoder))

        self.model.eval()

        if self.args.use_wavenet:
            from wavenet import build_model
            self.vocoder = cc(build_model())

        return
Esempio n. 3
0
    def build_model(self):
        speaker_encoder = VoiceEncoder()
        self.model = cc(AE_D(self.config, speaker_encoder))
        self.model.eval()
        if self.args.use_wavenet:
            from wavenet import build_model
            self.vocoder = cc(build_model())

        return
    def build_model(self): 
        if self.args.model_type == 'AdaVAEd':
            speaker_encoder = VoiceEncoder()
            self.model = cc(AdaVAEd(self.config, speaker_encoder))
        elif self.args.model_type == 'AdaVAE':
            self.model = cc(AdaVAE(self.config))
        elif self.args.model_type == 'AdaVAEavg':
            self.model = cc(AdaVAEavg(self.config))
        else:
            print('No model')

        self.model.eval()
        if self.args.use_wavenet:
            from wavenet import build_model
            self.vocoder = cc(build_model())

        return
Esempio n. 5
0
    def build_model(self): 
        if self.args.model_type == 'AdaVAEGAN':
            self.model = cc(AdaVAEGAN(self.config))
            self.patch_model = cc(Decoder(**self.config['Decoder']))
            self.patch_model.eval()
        elif self.args.model_type == 'AdaVAE':
            self.model = cc(AdaVAEGAN(self.config))
        else:
            print('No model')

        self.model.eval()
        if self.args.use_wavenet:
            from wavenet import build_model
            self.vocoder = cc(build_model())
        if self.args.infer_sproofing:
            from classify import SpeakerClassifier
            self.Speakerclassifier = cc(SpeakerClassifier(n_class=len(self.speaker2id)))
            self.Speakerclassifier.eval()
Esempio n. 6
0
    sorted_datas = sorted(zip(all_datas, all_labels),
                          key=lambda x: x[0].shape[1])
    all_datas = []
    all_labels = []
    for _ in sorted_datas:
        all_datas.append(_[0])
        all_labels.append(_[1])
    mkdirs(args.output_dir)
    #ocr = OCR(args.weights_path, label_text_path, config=config)

    from wavenet import build_model
    # config = json.load(open(config_path, 'r'))
    old_config_path = './logdir/15_mar/sompo/keras/basemodel.json'
    label_text = json.load(open('./datasets/label_text_4855.json', 'r'))
    basemodel = build_model(len(label_text) + 1, old_config_path)

    basemodel.load_weights(
        './logdir/dc3/test/keras/basemodel_retrain_485000.h5')
    model = tf.keras.models.Model(basemodel.input, basemodel.outputs[-1])

    ocr = OCR(None, label_text_path, config=None, model=model)

    eval_ocr = Eval(ocr)
    output = eval_ocr.export_report_jp(all_datas,
                                       all_labels,
                                       name=name,
                                       max_sample_per_file=1000,
                                       output_dir_report=args.output_dir,
                                       output_dir_image='{}/eval_imgs'.format(
                                           args.output_dir))
Esempio n. 7
0
                   1,2,4,8,16,32,64,128,256,512,
                   1,2,4,8,16,32,64,128,256,512]
impulse = {0: './impulse/impulse.wav'}

generated_sample = np.load(seed_audio_path)
generated_sample = generated_sample.tolist()
generated_sample = q_to_one_hot(generated_sample, input_dim).astype(np.uint8)
generated_sample = generated_sample[generated_sample.shape[0]-sample_len:generated_sample.shape[0]]
# sample_list = []
# sample_list.append(generated_sample)
# sample_list = pad_sequences(sample_list, maxlen=sample_len, padding='post')
# generated_sample = sample_list[0]
generated_sample = np.reshape(generated_sample, (1, sample_len, input_dim))
pred_seed = np.reshape(generated_sample, (-1, input_dim))

model = build_model(sample_len, dilation_factor)
model.load_weights(weight_path)

generation_step = sr * sec
prev_sample = -1
equal_cnt = 0
impulse_idx = 0

for i in range(generation_step):
    preds = model.predict(np.expand_dims(pred_seed, 0))  # prediction with the model
    sampled = sample(preds[0][-1])  # multinomial sampling
    # To prevent dead silence.
    if sampled == prev_sample:
        equal_cnt += 1
    else:
        equal_cnt = 0
Esempio n. 8
0
run_data_init([train_initer], shuffle=True, sess=sess, batch_size=batch_size)

# In[9]:

# %%time
imgs, labels, train_paths, train_losses, train_preds = sess.run(train_tensors)
print(imgs.shape)
imgs = np.clip(imgs, 0, 1)
plot_images(imgs[..., 0], mxn=[3, 1], dpi=200)

# # 2. Build model

# In[10]:

from wavenet import build_model
basemodel = build_model(len(label_text) + 1, ngf=64)
# basemodel = Model.from_config(read_json('../lib-ocr/ocr/anson/model_config_files'))
basemodel.summary()

# ## 2.2 Build predict tensors
# For ctc loss only we take the 2nd output
#
# For entropy loss we take the 1st output

# In[11]:

logits_train, preds_train = basemodel(train_tensors[0])
preds_test = basemodel(test_tensors[0])[1]

# ## 2.3 Build training ops