예제 #1
0
def loadCPCFeatureMaker(pathCheckpoint,
                        gru_level=-1,
                        get_encoded=False,
                        keep_hidden=True,
                        load_nullspace=False):
    """
    Load CPC Feature Maker from CPC checkpoint file.
    """
    # Set LSTM level
    if gru_level is not None and gru_level > 0:
        updateConfig = argparse.Namespace(nLevelsGRU=gru_level)
    else:
        updateConfig = None

    # Load CPC model
    model, nHiddenGar, nHiddenEncoder = loadModel(
        [pathCheckpoint],
        updateConfig=updateConfig,
        load_nullspace=load_nullspace)

    # Keep hidden units at LSTM layers on sequential batches
    if load_nullspace:
        model.cpc.gAR.keepHidden = keep_hidden
    else:
        model.gAR.keepHidden = keep_hidden

    # Build CPC Feature Maker from CPC model
    featureMaker = FeatureModule(model, get_encoded=get_encoded)

    return featureMaker
예제 #2
0
def main(argv):

    args = parse_args(argv)

    if args.load == 'from_checkpoint':
        # Checkpoint
        model = loadModel([args.path_checkpoint])[0]
        model.gAR.keepHidden = True
        # Feature maker
        feature_maker = FeatureModule(model, args.get_encoded).cuda().eval()

        def feature_function(x): return buildFeature(feature_maker, x,
                                                     seqNorm=args.seq_norm,
                                                     strict=args.strict,
                                                     maxSizeSeq=args.max_size_seq)
    elif args.load == 'from_pre_computed':
        def feature_function(x): return torch.load(x, 'cpu')

    # Modes
    if args.mode == 'all':
        modes = ["within", "across"]
    else:
        modes = [args.mode]

    distance_mode = 'cosine'

    step_feature = 1 / args.feature_size

    # Get the list of sequences
    seq_list, _ = findAllSeqs(args.path_dataset, extension=args.file_extension)
    seq_list = [(str(Path(x).stem), str(Path(args.path_dataset) / x))
                for (_, x) in seq_list]

    if args.debug:
        seq_list = seq_list[:1000]

    scores = ABX(feature_function, args.path_item_file,
                 seq_list, distance_mode,
                 step_feature, modes,
                 cuda=args.cuda,
                 seq_norm=args.seq_norm,
                 max_x_across=args.max_x_across,
                 max_size_group=args.max_size_group)

    out_dir = Path(args.path_checkpoint).parent if args.out is None \
        else Path(args.out)
    out_dir.mkdir(exist_ok=True)

    path_score = out_dir / 'ABX_scores.json'
    with open(path_score, 'w') as file:
        json.dump(scores, file, indent=2)

    path_args = out_dir / 'ABX_args.json'
    with open(path_args, 'w') as file:
        json.dump(vars(args), file, indent=2)
def main(argv):
    # Args parser
    args = parseArgs(argv)
    
    print("=============================================================")
    print(f"Quantizing data from {args.pathDB}")
    print("=============================================================")

    # Check if directory exists
    if not os.path.exists(args.pathOutput):
        print("")
        print(f"Creating the output directory at {args.pathOutput}")
        Path(args.pathOutput).mkdir(parents=True, exist_ok=True)

    # Get splits
    if args.split:
        assert len(args.split.split("-"))==2 and int(args.split.split("-")[1]) >= int(args.split.split("-")[0]) >= 1, \
            "SPLIT must be under the form idxSplit-numSplits (numSplits >= idxSplit >= 1), eg. --split 1-20"
        idx_split, num_splits = args.split.split("-")
        idx_split = int(idx_split)
        num_splits = int(num_splits)

    # Find all sequences
    print("")
    print(f"Looking for all {args.file_extension} files in {args.pathDB} with speakerLevel {args.recursionLevel}")
    seqNames, speakers = findAllSeqs(args.pathDB,
                                 speaker_level=args.recursionLevel,
                                 extension=args.file_extension,
                                 loadCache=True)

    if args.pathSeq:
        with open(args.pathSeq, 'r') as f:
            seqs = set([x.strip() for x in f])

        filtered = []
        for s in seqNames:
            if s[1].split('/')[-1].split('.')[0] in seqs:
                filtered.append(s)
        seqNames = filtered

    print(f"Done! Found {len(seqNames)} files and {len(speakers)} speakers!")
    if args.separate_speaker:
        seqNames_by_speaker = {}
        for seq in seqNames:
            speaker = seq[1].split("/")[args.recursionLevel-1]
            if speaker not in seqNames_by_speaker:
                seqNames_by_speaker[speaker] = []
            seqNames_by_speaker[speaker].append(seq)

    # Check if output file exists
    if not args.split:
        nameOutput = "quantized_outputs.txt"
    else:
        nameOutput = f"quantized_outputs_split_{idx_split}-{num_splits}.txt"
    if args.separate_speaker is False:
        outputFile = os.path.join(args.pathOutput, nameOutput)
        assert not os.path.exists(outputFile), \
            f"Output file {outputFile} already exists !!!"
    
    # Get splits
    if args.split:
        startIdx = len(seqNames) // num_splits * (idx_split-1)
        if idx_split == num_splits:
            endIdx = len(seqNames)
        else:
            endIdx = min(len(seqNames) // num_splits * idx_split, len(seqNames))
        seqNames = seqNames[startIdx:endIdx]
        print("")
        print(f"Quantizing split {idx_split} out of {num_splits} splits, with {len(seqNames)} files (idx in range({startIdx}, {endIdx})).")

    # Debug mode
    if args.debug:
        nsamples=20
        print("")
        print(f"Debug mode activated, only load {nsamples} samples!")
        # shuffle(seqNames)
        seqNames = seqNames[:nsamples]

    # Load Clustering args
    assert args.pathCheckpoint[-3:] == ".pt"
    if os.path.exists(args.pathCheckpoint[:-3] + "_args.json"):
        pathConfig = args.pathCheckpoint[:-3] + "_args.json"
    elif os.path.exists(os.path.join(os.path.dirname(args.pathCheckpoint), "checkpoint_args.json")):
        pathConfig = os.path.join(os.path.dirname(args.pathCheckpoint), "checkpoint_args.json")
    else:
        assert False, \
            f"Args file not found in the directory {os.path.dirname(args.pathCheckpoint)}"
    clustering_args = readArgs(pathConfig)
    print("")
    print(f"Clutering args:\n{json.dumps(vars(clustering_args), indent=4, sort_keys=True)}")
    print('-' * 50)

    # Load CluterModule
    clusterModule = loadClusterModule(args.pathCheckpoint, norm_vec_len=args.norm_vec_len)
    clusterModule.cuda()

    # Load FeatureMaker
    print("")
    print("Loading CPC FeatureMaker")
    if 'level_gru' in vars(clustering_args) and clustering_args.level_gru is not None:
        updateConfig = argparse.Namespace(nLevelsGRU=clustering_args.level_gru)
    else:
        updateConfig = None
    model = loadModel([clustering_args.pathCheckpoint], updateConfig=updateConfig)[0]
    ## If we don't apply batch implementation, we can set LSTM model to keep hidden units
    ## making the quality of the quantized units better
    if args.nobatch:
        model.gAR.keepHidden = True
    featureMaker = FeatureModule(model, clustering_args.encoder_layer)
    if clustering_args.dimReduction is not None:
        dimRed = loadDimReduction(clustering_args.dimReduction, clustering_args.centroidLimits)
        featureMaker = torch.nn.Sequential(featureMaker, dimRed)
    if not clustering_args.train_mode:
        featureMaker.eval()
    featureMaker.cuda()
    def feature_function(x): 
        if args.nobatch is False:
            res0 = buildFeature_batch(featureMaker, x,
                                                    seqNorm=False,
                                                    strict=args.strict,
                                                    maxSizeSeq=args.max_size_seq,
                                                    batch_size=args.batch_size)
            if args.norm_vec_len:
                # [!] we actually used CPC_audio/scripts/quantize_audio.py for that in the end
                res0Lengths = torch.sqrt((res0*res0).sum(2))
                res0 = res0 / res0Lengths.view(*(res0Lengths.shape), 1)
            return res0
        else:
            res0 = buildFeature(featureMaker, x,
                                seqNorm=False,
                                strict=args.strict)
            if args.norm_vec_len:
                # [!] we actually used CPC_audio/scripts/quantize_audio.py for that in the end
                res0Lengths = torch.sqrt((res0*res0).sum(2))
                res0 = res0 / res0Lengths.view(*(res0Lengths.shape), 1)
            return res0
    print("CPC FeatureMaker loaded!")
    
    # Quantization of files
    print("")
    print(f"Quantizing audio files...")
    seqQuantLines = []
    bar = progressbar.ProgressBar(maxval=len(seqNames))
    bar.start()
    start_time = time()
    for index, vals in enumerate(seqNames):
        bar.update(index)

        file_path = vals[1]
        file_path = os.path.join(args.pathDB, file_path)

        # Get features & quantizing
        cFeatures = feature_function(file_path).cuda()

        nGroups = cFeatures.size(-1)//clusterModule.Ck.size(-1)

        cFeatures = cFeatures.view(1, -1, clusterModule.Ck.size(-1))

        if len(vals) > 2 and int(vals[-1]) > 9400000: # Librilight, to avoid OOM
            clusterModule = clusterModule.cpu()
            cFeatures = cFeatures.cpu()
            qFeatures = torch.argmin(clusterModule(cFeatures), dim=-1)
            clusterModule = clusterModule.cuda()
        else:
            qFeatures = torch.argmin(clusterModule(cFeatures), dim=-1)
        qFeatures = qFeatures[0].detach().cpu().numpy()

        # Transform to quantized line
        quantLine = ",".join(["-".join([str(i) for i in item]) for item in qFeatures.reshape(-1, nGroups)])
        seqQuantLines.append(quantLine)

    bar.finish()
    print(f"...done {len(seqQuantLines)} files in {time()-start_time} seconds.")

    # Saving outputs
    print("")
    print(f"Saving outputs to {outputFile}")
    outLines = []
    for vals, quantln in zip(seqNames, seqQuantLines):
        file_path = vals[1]
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        outLines.append("\t".join([file_name, quantln]))
    with open(outputFile, "w") as f:
        f.write("\n".join(outLines))
예제 #4
0
                                        False,
                                        numWorkers=0)
    print(f"Length of dataLoader: {len(trainLoader)}")
    print("")

    if args.level_gru is None:
        updateConfig = None
    else:
        updateConfig = argparse.Namespace(nLevelsGRU=args.level_gru)

    model = loadModel([args.pathCheckpoint],
                      updateConfig=updateConfig,
                      load_nullspace=args.nullspace)[0]
    #model = loadModel([args.pathCheckpoint])[0]#, updateConfig=updateConfig)[0]

    featureMaker = FeatureModule(model, args.encoder_layer)
    print("Checkpoint loaded!")
    print("")

    if not args.train_mode:
        featureMaker.eval()
    featureMaker.cuda()

    # Check if dir exists
    if not os.path.exists(os.path.dirname(
            args.pathOutput)) and os.path.dirname(args.pathOutput):
        Path(os.path.dirname(args.pathOutput)).mkdir(parents=True,
                                                     exist_ok=True)

    pathConfig = f"{os.path.splitext(args.pathOutput)[0]}_args.json"
    with open(pathConfig, 'w') as file:
        os.mkdir(args.pathOut)

    with open(os.path.join(os.path.dirname(args.pathOut),
                           f"{os.path.basename(args.pathOut)}.json"), 'w') \
            as file:
        json.dump(vars(args), file, indent=2)

    outData = [
        x[1] for x in findAllSeqs(
            args.pathDB, extension=args.extension, loadCache=False)[0]
    ]

    featureMaker = loadModel([args.pathCheckpoint])[0]
    stepSize = featureMaker.gEncoder.DOWNSAMPLING / 16000
    print(f"stepSize : {stepSize}")
    featureMaker = FeatureModule(featureMaker, args.getEncoded)
    featureMaker.collapse = False

    if args.addCriterion:
        criterion, nPhones = loadSupervisedCriterion(args.pathCheckpoint)
        featureMaker = ModelPhoneCombined(featureMaker, criterion, nPhones,
                                          args.oneHot)
    if device == "cuda":
        featureMaker = featureMaker.cuda(device=0)

    if not args.train_mode:
        featureMaker.eval()

    buildAllFeature(featureMaker,
                    args.pathDB,
                    args.pathOut,