Esempio n. 1
0
def main(_):
    # 下载数据集,默认下载清华数据集
    download_and_untar(FLAGS.data_url.split(','), FLAGS.data_dir)

    # 扫描训练集
    train_wav_files = load_wav_file(FLAGS.data_dir + 'data_thchs30/train')
    train_labels_dict = load_label_file(FLAGS.data_dir +
                                        'resource/trans/train.word.txt')

    # 提取MFCC特征, 生成字典, label向量化
    train_sample_files = preapre_wav_list(train_wav_files,
                                          FLAGS.dct_coefficient_count,
                                          FLAGS.mfcc_dir + 'train/')
    lexicon, train_labels, train_sample_files = prepare_label_list(
        train_sample_files, train_labels_dict)
    train_vector_labels = labels_to_vector(train_labels, lexicon)

    test_wav_files = load_wav_file(FLAGS.data_dir + 'data_thchs30/test')
    test_labels_dict = load_label_file(FLAGS.data_dir +
                                       'resource/trans/test.word.txt')

    test_sample_files = preapre_wav_list(test_wav_files,
                                         FLAGS.dct_coefficient_count,
                                         FLAGS.mfcc_dir + 'test/')
    _, test_labels, test_sample_files = prepare_label_list(
        test_sample_files, test_labels_dict)
    test_vector_labels = labels_to_vector(test_labels, lexicon)

    # 开始训练
    train(train_sample_files, train_vector_labels, test_sample_files,
          test_vector_labels, lexicon, FLAGS.dct_coefficient_count,
          FLAGS.num_contexts, FLAGS.how_many_training_steps,
          FLAGS.learning_rate, FLAGS.batch_size, FLAGS.summaries_dir,
          FLAGS.train_dir, FLAGS.eval_step_interval, FLAGS.model_architecture,
          FLAGS.model_size_info)
Esempio n. 2
0
def main(_):
    # 下载数据集,默认下载清华数据集
    maybe_download_and_untar(FLAGS.data_url.split(','), FLAGS.data_dir)
    # 扫描数据集,提取MFCC特征, 生成字典, label向量化
    audio_processer = AudioPorcesser(FLAGS.data_dir, FLAGS.num_filters,
                                     FLAGS.downsampling_ratio,
                                     FLAGS.num_contexts, FLAGS.output_dir)

    lexicon = audio_processer.prepare(
        os.path.basename(FLAGS.data_url).split('.')[0])
    with open(FLAGS.output_dir + 'symbol_table.txt', 'w') as f:
        f.write(
            re.sub('[\s\'{}]', '',
                   str(lexicon)).replace(',', '\n').replace(':', '\t'))

    num_inputs = FLAGS.num_filters + 2 * FLAGS.num_filters * FLAGS.num_contexts
    # 开始训练
    train(audio_processer, num_inputs, len(lexicon), FLAGS.model_architecture,
          FLAGS.model_size_info, FLAGS.learning_rate, FLAGS.training_steps,
          FLAGS.batch_size, FLAGS.aligning, FLAGS.eval_step_interval,
          FLAGS.output_dir)

    decoder(audio_processer, FLAGS.output_dir + 'train/', lexicon)
Esempio n. 3
0
                                  collate_fn=collate_function)

    if args.model == "Conv":
        model = ConvModel(args.conv_channels,
                          "ReLU",
                          pos_emb=args.conv_pos_emb)
    elif args.model == "TransformerEncoder":
        model = TransformerEncoder(args, 100)
    elif args.model == "ConvTransformerEncoder":
        model = ConvTransformerEncoder(args, 21 * 2)
    elif args.model == "TransformerEnc":
        model = TransformerEnc(ninp=12 * 2,
                               nhead=4,
                               nhid=100,
                               nout=21 * 2,
                               nlayers=4,
                               dropout=0.0)
    else:
        raise ValueError()
    print(args.resume)
    if not args.resume:
        if os.path.isdir(args.exp):
            raise Exception("Experiment name " + args.exp + " already exists.")
        os.mkdir(args.exp)
        os.mkdir(args.exp + "/models")

    with open(args.exp + "/args.pckl", "wb") as f:
        pickle.dump(args, f)

    train(model, train_dataloader, valid_dataloader, args)
Esempio n. 4
0
                                           drop_last=True,
                                           shuffle=True,
                                           num_workers=args.workers,
                                           collate_fn=pad_collate_t,
                                           worker_init_fn=worker_init_fn)
val_loader = torch.utils.data.DataLoader(dataset_test,
                                         batch_size=args.batch_size,
                                         drop_last=True,
                                         shuffle=False,
                                         num_workers=args.workers,
                                         collate_fn=pad_collate)

image_model = ImageModels.Resnet101(args)
audio_model = AudioModels.Resnet(args)
trans_model = AudioModels.Linear_transform(args)
class_model = classification.CLASSIFIER(args)
discr_model = classification.DISCRIMINATOR(args)

train(image_model, audio_model, trans_model, class_model, discr_model,
      train_loader, val_loader, args)
'''
aweight = torch.load('asl_audio.pth')
iweight = torch.load('asl_image.pth')
tweight = torch.load('asl_trans.pth')
audio_model.load_state_dict(aweight)
image_model.load_state_dict(iweight)
trans_model.load_state_dict(tweight)

eva.att(audio_model,image_model,trans_model,val_loader,args)
'''
Esempio n. 5
0
def main(argv):

    tf.random.set_seed(FLAGS.seed)

    if FLAGS.tbdir is not None:
        summary_writers = utils.create_summary_writers(
            utils.fix_path(FLAGS.tbdir))

    # prepare dataset
    dataset = datasets.get_dataset()()
    input_shape = dataset.get_input_shape()

    # Create Nets and Optimizers
    encoder_decoder = nets.encoder_decoder(
        input_shape=input_shape,
        msg_length=FLAGS.msg_length,
        noise_layers=FLAGS.noise_layers,
        n_convbnrelu_encoder=FLAGS.n_convbnrelu_encoder,
        n_convbnrelu_decoder=FLAGS.n_convbnrelu_decoder)

    discriminator = nets.discriminator(
        input_shape=input_shape, n_convbnrelu=FLAGS.n_convbnrelu_discriminator)

    optimizer_encoder_decoder = tf.keras.optimizers.Adam(1e-3)
    optimizer_discriminator = tf.keras.optimizers.Adam(1e-3)

    # global step / epoch variables
    step = tf.Variable(0, dtype=tf.int64)
    epoch = tf.Variable(0, dtype=tf.int64)

    # prepare checkpointer
    ckpt = tf.train.Checkpoint(
        step=step,
        epoch=epoch,
        optimizer_encoder_decoder=optimizer_encoder_decoder,
        optimizer_discriminator=optimizer_discriminator,
        encoder_decoder=encoder_decoder,
        discriminator=discriminator)

    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              utils.fix_path(FLAGS.ckptdir),
                                              max_to_keep=FLAGS.keep_ckpts)

    if ckpt_manager.latest_checkpoint is not None:
        if FLAGS.load_from_ckpt:
            ckpt.restore(ckpt_manager.latest_checkpoint)
            logging.info("Loading model from checkpoint: {}".format(
                ckpt_manager.latest_checkpoint))

    # Metrics Tracker
    metrics_train = metrics.MetricsTracker()
    metrics_val = metrics.MetricsTracker()

    while epoch < FLAGS.epochs:

        dataset_train = dataset.create_train_dataset()

        for epoch_step, cover_images in enumerate(dataset_train):

            messages = tf.random.uniform([FLAGS.batch_size, FLAGS.msg_length],
                                         minval=0,
                                         maxval=2,
                                         dtype=tf.int32)
            messages = tf.cast(messages, dtype=tf.float32)

            time_start = time.time()
            outputs = steps.train(
                cover_images=cover_images,
                messages=messages,
                encoder_decoder=encoder_decoder,
                discriminator=discriminator,
                training=True,
                optimizer_encoder_decoder=optimizer_encoder_decoder,
                optimizer_discriminator=optimizer_discriminator)

            ms_per_step = (time.time() - time_start) * 1000.0
            ms_per_sample = ms_per_step / FLAGS.batch_size

            # Write step summaries
            is_summary_step = (step.numpy() % FLAGS.summary_freq) == 0
            if is_summary_step:

                step_losses = losses.step_loss(
                    cover_images,
                    messages,
                    encoder_decoder_output=outputs['encoder_decoder'],
                    discriminator_on_cover=outputs['discriminator_on_cover'],
                    discriminator_on_encoded=outputs[
                        'discriminator_on_encoded'])

                metrics_train.update(
                    step_losses,
                    messages,
                    encoder_decoder_output=outputs['encoder_decoder'],
                    discriminator_on_cover=outputs['discriminator_on_cover'],
                    discriminator_on_encoded=outputs[
                        'discriminator_on_encoded'])

                metrics_train_results = metrics_train.results()
                metrics_train.reset()

                with summary_writers['train'].as_default():
                    for _name, _value in metrics_train_results.items():
                        tf.summary.scalar(_name, _value, step=step)

                    tf.summary.scalar('ms_per_step', ms_per_step, step=step)

                    tf.summary.scalar('ms_per_sample',
                                      ms_per_sample,
                                      step=step)

            step.assign_add(1)

        ckpt_save_path = ckpt_manager.save()
        logging.info("Saved model after epoch {} to {}".format(
            epoch.numpy(), ckpt_save_path))

        # Training Loss
        logging.info("Epoch {} Stats".format(epoch.numpy()))
        logging.info("Training Stats ===========================")
        for _name, _value in metrics_train_results.items():
            logging.info("{}: {:.4f}".format(_name, _value))

        # Evaluate
        dataset_val = dataset.create_val_dataset()

        for cover_images in dataset_val:

            messages = utils.create_messages(batch_size=cover_images.shape[0],
                                             msg_length=FLAGS.msg_length)

            # messages = tf.random.uniform(
            #     [FLAGS.batch_size, FLAGS.msg_length],
            #     minval=0, maxval=2, dtype=tf.int32)
            # messages = tf.cast(messages, dtype=tf.float32)

            outputs = steps.train(cover_images=cover_images,
                                  messages=messages,
                                  encoder_decoder=encoder_decoder,
                                  discriminator=discriminator,
                                  training=False)

            losses_val_step = losses.step_loss(
                cover_images,
                messages,
                encoder_decoder_output=outputs['encoder_decoder'],
                discriminator_on_cover=outputs['discriminator_on_cover'],
                discriminator_on_encoded=outputs['discriminator_on_encoded'])

            metrics_val.update(
                losses_val_step,
                messages,
                encoder_decoder_output=outputs['encoder_decoder'],
                discriminator_on_cover=outputs['discriminator_on_cover'],
                discriminator_on_encoded=outputs['discriminator_on_encoded'])

        metrics_val_results = metrics_val.results()
        metrics_val.reset()

        logging.info("Validation Stats ===========================")
        with summary_writers['val'].as_default():
            for _name, _value in metrics_val_results.items():
                tf.summary.scalar(_name, _value, step=step)
                logging.info("{}: {:.4f}".format(_name, _value))

        messages = utils.create_messages(batch_size=cover_images.shape[0],
                                         msg_length=FLAGS.msg_length)

        encoder_decoder_output = encoder_decoder(inputs={
            'cover_image': cover_images,
            'message': messages
        },
                                                 training=False)

        # write example images to Summaries
        with summary_writers['val'].as_default():

            transform_fn = None

            if FLAGS.to_yuv:
                transform_fn = tf.image.yuv_to_rgb

            utils.summary_images(
                cover=cover_images,
                encoded=encoder_decoder_output['encoded_image'],
                transmitted_encoded=encoder_decoder_output[
                    'transmitted_encoded_image'],
                transmitted_cover=encoder_decoder_output[
                    'transmitted_cover_image'],
                step=step,
                transform_fn=transform_fn)

        epoch.assign_add(1)
Esempio n. 6
0
        
print(args)

para = {"num_workers":8, "pin_memory":True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
    dataloaders.ImageCaptionDataset(args.data_train),
    batch_size=args.batch_size, shuffle=True, **para)

val_loader = torch.utils.data.DataLoader(
    dataloaders.ImageCaptionDataset(args.data_val, image_conf={'center_crop':True}),
    batch_size=args.batch_size, shuffle=False, **para)

audio_model = models.Davenet()
image_model = models.VGG16(pretrained=args.pretrained_image_model)

if not bool(args.exp_dir):
    print("exp_dir not specified, automatically creating one...")
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    args.exp_dir = "exp/Data-%s/AudioModel-%s_ImageModel-%s_Optim-%s_LR-%s_Epochs-%s_%s" % (
        os.path.basename(args.data_train), args.audio_model, args.image_model, args.optim,
        args.lr, args.n_epochs, timestamp)

if not args.resume:
    print("\nexp_dir: %s" % args.exp_dir)
    os.makedirs("%s/models" % args.exp_dir)
    with open("%s/args.pkl" % args.exp_dir, "wb") as f:
        pickle.dump(args, f)

train(audio_model, image_model, train_loader, val_loader, args)