Ejemplo n.º 1
0
def main(config, base_model, fine_tuned_model_dir, target_class, seed):

    config['metrics'] = ["pred_acc"]

    model, data_loader, loss_fn, metrics = load_model(config, base_model,
                                                      target_class, seed)

    if len(target_class) == 10:
        cp.print_progress("< Base Model >")
        util.print_setting(data_loader, None, model, loss_fn, metrics, None,
                           None)

        base_model_evaluation = evaluate(model, data_loader, loss_fn, metrics)

    model = combine_model(model, fine_tuned_model_dir, target_class)

    cp.print_progress("< Combined Model >")

    util.print_setting(data_loader, None, model, loss_fn, metrics, None, None)

    combined_model_evaluation = evaluate(model, data_loader, loss_fn, metrics)
Ejemplo n.º 2
0
def evaluate(model, data_loader, loss_fn, metrics):
    # prepare model for testing
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    total_loss = 0.0
    total_metrics = torch.zeros(len(metrics))

    with torch.no_grad():
        for i, (data, target) in enumerate(tqdm(data_loader)):
            one_hot_target = torch.eye(model.output_size)[target]

            data, target, one_hot_target = data.to(device), target.to(
                device), one_hot_target.to(device)
            output = model(data)

            # computing loss, metrics on test set
            loss = loss_fn(output, one_hot_target)
            # loss = loss_fn(output, target)
            batch_size = data.shape[0]
            total_loss += loss.item() * batch_size
            for i, metric in enumerate(metrics):
                total_metrics[i] += metric(output, target) * batch_size

    n_samples = len(data_loader.sampler)
    log = {'loss': total_loss / n_samples}
    log.update({
        met.__name__: total_metrics[i].item() / n_samples
        for i, met in enumerate(metrics)
    })

    test_result_str = 'TEST RESULTS\n'
    for key, val in log.items():
        test_result_str += ('\t' + str(key) + ' : ' + str(val) + '\n')

    cp.print_progress(test_result_str)

    return log
Ejemplo n.º 3
0
def fine_tune_model(config, base_model):
    target_class = config['target_class']
    cp.print_progress('Fine tune model with', target_class)

    config['data_loader']['args']['target_class'] = target_class

    train_logger = Logger()

    # setup data_loader instances
    data_loader = util.get_instance(data_loaders, 'data_loader', config)
    valid_data_loader = data_loader.split_validation()

    # build model architecture
    model = util.get_instance(models, 'model', config)

    # get function handles of loss and metrics
    loss_fn = getattr(loss_functions, config['loss'])
    metrics = [getattr(metric_functions, met) for met in config['metrics']]

    util.print_setting(data_loader, valid_data_loader, model, loss_fn, metrics,
                       None, None)

    # build base model
    trainer = FineTuner(model,
                        loss_fn,
                        metrics,
                        base_model=base_model,
                        config=config,
                        data_loader=data_loader,
                        valid_data_loader=valid_data_loader,
                        train_logger=train_logger,
                        target_class=target_class)

    trainer.train()

    cp.print_progress('Fine tuning is completed for ', target_class)

    return os.path.join(trainer.checkpoint_dir, 'model_best.pth')
Ejemplo n.º 4
0
def train_base_model(config):
    cp.print_progress('Training base model')
    train_logger = Logger()

    # setup data_loader instances
    data_loader = util.get_instance(data_loaders, 'data_loader', config)
    valid_data_loader = data_loader.split_validation()

    # build model architecture
    model = util.get_instance(models, 'model', config)

    # get function handles of loss and metrics
    loss_fn = getattr(loss_functions, config['loss'])
    metrics = [getattr(metric_functions, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = util.get_instance(torch.optim, 'optimizer', config,
                                  trainable_params)

    lr_scheduler = util.get_instance(torch.optim.lr_scheduler, 'lr_scheduler',
                                     config, optimizer)

    util.print_setting(data_loader, valid_data_loader, model, loss_fn, metrics,
                       optimizer, lr_scheduler)

    trainer = Trainer(model,
                      loss_fn,
                      metrics,
                      optimizer,
                      resume=None,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler,
                      train_logger=train_logger)

    cp.print_progress('TRAINER\n', trainer)

    trainer.train()

    cp.print_progress('Training base model completed')

    return os.path.join(trainer.checkpoint_dir, 'model_best.pth')
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("-k",
                        "--keyword",
                        type=str,
                        required=True,
                        help="keyword for the given evaluation data list")

    parser.add_argument(
        "-f",
        "--summary_file",
        type=str,
        help="file containing list of evaluation data to be generated")

    parser.add_argument("-e",
                        "--extractor",
                        type=str,
                        default="edit_distance_extractor",
                        help="type of extraction algorithm to use")

    parser.add_argument("-th",
                        "--threshold",
                        type=float,
                        default=0.95,
                        help="threshold for retrieving a window")

    args = parser.parse_args()
    data_folder_path = "./kws-gen-data"
    if not os.path.exists(data_folder_path):
        cp.print_error("please clone kws-gen-data folder using git submodule")
        exit()

    keyword = args.keyword.lower()
    audio_dir = os.path.join(data_folder_path, "audio_data/" + keyword)

    if not os.path.exists(audio_dir):
        cp.print_error("audio data is missing - ", audio_dir)
        exit()

    total = sum(
        [1 for i in open(args.summary_file, "r").readlines() if i.strip()])

    cp.print_progress("evaluation data file - ", args.summary_file)

    # load pre recorded target audios
    target_audios = []
    target_audio_dir = os.path.join(data_folder_path,
                                    "target_audio/" + keyword)

    if not os.path.exists(target_audio_dir):
        cp.print_error("target audio data is missing - ", target_audio_dir)
        exit()

    for file_name in os.listdir(target_audio_dir):
        target_audios.append(
            librosa.core.load(os.path.join(target_audio_dir, file_name))[0])

    # instantiate extractor
    extractor = None
    if args.extractor == "edit_distance_extractor":
        cp.print_progress("extractor type :", args.extractor, "( threshold :",
                          args.threshold, ", number of target audios : ",
                          len(target_audios), ")")
        extractor = EditDistanceExtractor(target_audios, args.threshold)

    # extract similar audio from each audio blocks
    with open(args.summary_file, "r") as file:
        reader = csv.reader(file, delimiter=",")

        for i, line in enumerate(reader):
            vid = line[0]
            start_time = line[1]
            end_time = line[2]
            wav_file = os.path.join(
                audio_dir, vid + "~" + start_time + "~" + end_time + ".wav")

            start_time = int(start_time)
            end_time = int(end_time)

            cp.print_progress(i + 1, " / ", total, " - ", wav_file)

            if not os.path.exists(wav_file):
                cp.print_warning("audio file is missing - ", wav_file)
                continue

            data = librosa.core.load(wav_file, SAMPLE_RATE)[0]

            extracted_audio_times = extractor.extract_keywords(data)

            # TODO :: count how many window has been extracted and compare it against true count

            # TODO :: might be good idea to update threshold if the accuracy is way too low

    cp.print_progress("evaluation is completed for ", keyword, " - ", total)
Ejemplo n.º 6
0
def evaluate_models(saved_model_dir, num_iter, step_size):
    cp.print_progress("< evaluate base model >")
    num_base_model, base_model_acc = evaluate_base_model(saved_model_dir)
    cp.print_progress("< base model acc >")
    cp.print_progress(json.dumps(base_model_acc, indent=4))

    cp.print_progress("< evaluate fine tuned model >")
    num_fine_tuned_model, fine_tuned_model_acc = evaluate_fine_tuned_model(
        saved_model_dir)
    cp.print_progress("< fine tuned model acc >")
    cp.print_progress(json.dumps(fine_tuned_model_acc, indent=4))

    cp.print_progress("< evaluate combined model >")
    num_combined_model, combined_model_acc = evaluate_combined_model(
        saved_model_dir, num_iter, step_size)
    cp.print_progress("< combined model acc >")
    cp.print_progress(json.dumps(combined_model_acc, indent=4))

    assert num_base_model == num_combined_model
    assert num_base_model == num_fine_tuned_model

    results = {}

    for loss in EXP_LOSS:
        results[loss] = {}
        results[loss]['base_model'] = base_model_acc[loss]
        results[loss]['fine_tuned_model'] = fine_tuned_model_acc[loss]
        results[loss]['combined_model'] = combined_model_acc[loss]

    results['num_model'] = num_base_model
    return results
Ejemplo n.º 7
0
        nargs='+',
        type=int,
        help="target class to fine tune (default: all classes)",
        default=None)
    parser.add_argument('-s',
                        '--seed',
                        default=None,
                        type=int,
                        help="random seed")

    args = parser.parse_args()

    latest_model = max(os.listdir(args.base_model))

    base_model = os.path.join(args.base_model, latest_model, 'model_best.pth')
    cp.print_progress("base model : ", base_model)

    if not torch.cuda.is_available():
        config = torch.load(base_model, map_location='cpu')['config']
    else:
        config = torch.load(base_model)['config']

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.target_class is None:
        args.target_class = np.arange(config['n_class']).tolist()

    main(config, base_model, args.fine_tuned_model_dir, args.target_class,
         args.seed)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("-k",
                        "--keyword",
                        type=str,
                        required=True,
                        help="target keyword to generate data for")

    parser.add_argument("-s",
                        "--size",
                        type=int,
                        default=100,
                        help="number of url to collect")

    parser.add_argument("-a",
                        "--api_key",
                        type=str,
                        required=True,
                        help="API key for youtube data v3 API")

    data_folder_path = "./kws-gen-data/evaluation_data"
    if not os.path.exists(data_folder_path):
        cp.print_error("please clone kws-gen-data folder using git submodule")
        exit()

    args = parser.parse_args()
    keyword = args.keyword.lower()
    cp.print_progress("keyword is ", keyword)

    url_fetcher = YoutubeSearcher(args.api_key, keyword)
    urls = []

    plural = inflect.engine()

    while len(urls) < args.size:
        url = url_fetcher.next()[0]

        if not url:
            cp.print_warning("there are no more urls to process")

        if url in urls:
            cp.print_warning("video is already added", url)
            continue

        try:
            video = PyTube(util.get_youtube_url(url))
        except Exception as exception:
            cp.print_error(
                "failed to generate PyTube representation for vidoe - ", url)
            continue

        caption = video.captions.get_by_language_code('en')
        if not caption:
            cp.print_warning("no caption available for video - ", url)
            continue

        try:
            srt_captions = caption.generate_srt_captions().lower().split(
                '\n\n')
        except Exception as exception:
            cp.print_error("failed to retrieve for vidoe - ", url)
            continue

        keyword_exist = False
        for captions in srt_captions:
            if keyword in captions or plural.plural(keyword) in captions:
                keyword_exist = True
                break

        if not keyword_exist:
            cp.print_warning("keywords never appear in the video - ", url)
            continue

        urls.append(url)
        cp.print_progress(len(urls), " / ", args.size, " - ", url)

    cp.print_warning(len(urls), "urls are collected for ", keyword)

    file_path = os.path.join(data_folder_path,
                             keyword + "_url_" + str(args.size) + ".txt")
    with open(file_path, 'w') as output_file:
        for url in urls:
            output_file.write(url + "\n")
Ejemplo n.º 9
0
def main(config, resume):
    # # setup data_loader instances
    # data_loader = get_instance(data_loaders, 'data_loader', config)

    # setup data_loader instances
    data_loader = getattr(data_loaders, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=512,
        shuffle=False,
        validation_split=0.0,
        training=False,
        num_workers=2
    )

    # TODO :: use generic function for printing out model setting
    cp.print_progress('test DATASET\n', data_loader)

    # build model architecture
    model = util.get_instance(models, 'model', config)
    cp.print_progress('MODEL\n', model)

    # get function handles of loss and metrics
    loss_fn = getattr(loss_functions, config['loss'])
    cp.print_progress('LOSS FUNCTION\n', loss_fn.__name__)

    metrics = [getattr(metric_functions, met) for met in config['metrics']]
    cp.print_progress('METRICS\n', [metric.__name__ for metric in metrics])

    # load state dict
    checkpoint = torch.load(resume)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    # prepare model for testing
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    total_loss = 0.0
    total_metrics = torch.zeros(len(metrics))

    with torch.no_grad():
        for i, (data, target) in enumerate(tqdm(data_loader)):
            data, target = data.to(device), target.to(device)
            output = model(data)

            # computing loss, metrics on test set
            loss = loss_fn(output, target)
            batch_size = data.shape[0]
            total_loss += loss.item() * batch_size
            for i, metric in enumerate(metrics):
                total_metrics[i] += metric(output, target) * batch_size

    n_samples = len(data_loader.sampler)
    log = {'loss': total_loss / n_samples}
    log.update({met.__name__ : total_metrics[i].item() / n_samples for i, met in enumerate(metrics)})

    test_result_str = 'TEST RESULTS\n'
    for key, val in log.items():
        test_result_str += ('\t' + str(key) + ' : ' + str(val) + '\n')

    cp.print_progress(test_result_str)