Beispiel #1
0
def main():
    lda = LDA(solver='svd')
    log.basicConfig(
        level=args.debug, format='%(asctime)s %(levelname)s %(message)s', datefmt='%d/%m %H:%M:%S')

    # Will be filled as dict
    testreferences = None
    if args.mlf:
        log.info("Parsing input label file %s as mlf file" % (args.testref))
        testreferences = mlffile(args.testref)
    else:
        log.info(
            "Parsing input label file %s as test_ref file( TARGETMDL ENROLEMODEL-TESTUTT LABEL )" % (args.testref))
        testreferences = test_ref(args.testref)
    # Check if the given data is in marshal format or cPickle
    if args.binary:
        log.info("Try to read input as a binary file")
        inputdata, testutts = checkBinary([args.inputdata, args.testutts])
        datadim = len(inputdata.values()[0])
        dvectors = np.zeros((len(inputdata.keys()), datadim))
        labels = []
        log.info("Getting dvectors from input data")
        for i, (spk, v) in enumerate(inputdata.iteritems()):
            dvectors[i] = v
            labels.append(getspkmodel(spk, args.delimiter, args.indices))
        log.debug("Data which was loaded in (%s) has %i labels and %i dvectors" %
                  (args.inputdata, len(labels), len(dvectors)))
        testtofeature = testutts

    else:
        log.info("Given data is either a folder or a filelist. Trying to read")
        inputdata = parseinputfiletomodels(
            args.inputdata, args.delimiter, args.indices)
        testtofeature = parsepaths(readFeats(args.testutts))
        labels = []
        dvectors = []
        log.info("Extracting dvectors for input data")
        for spk, v in inputdata.iteritems():
            dvectors.extend(itertools.imap(extractmethod, v))
            labels.extend([spk for i in xrange(len(v))])
    spktonum = {spk: num for num, spk in enumerate(np.unique(labels))}
    dvectors = np.array(dvectors)
    labelsnum = np.array([spktonum[i] for i in labels])

    log.debug("Overall we have %i labels" % (len(labelsnum)))
    log.debug("Number of speakers: %i" % (len(spktonum.keys())))
    log.debug("Dvector size is (%i,%i)" %
              (dvectors.shape[0], dvectors.shape[1]))

    log.info("Fitting LDA model")
    lda.fit(dvectors, labelsnum)

    errors = 0
    log.info("Starting test")
    for enrolemodel, vals in testreferences.iteritems():
        if enrolemodel not in spktonum:
            errors += 1
            log.warn("Enrolemodel %s not found in the labels" % (enrolemodel))
            continue
        curspk = spktonum[enrolemodel]
        for testutt, targetmdl in vals:
            if testutt not in testtofeature:
                log.warn("Utterance %s not found in the testset" % (testutt))
                errors += 1
                continue
            # LDA currently needs a twodimensional imput vector
            testdvector = testtofeature[testutt][np.newaxis, :]
            score = lda.predict_log_proba(testdvector)[0]
            # score is a list of all the scores ( not only one ). we need to
            # find the current speakers score
            finalscore = score[curspk]
            args.scoreoutfile.write(
                "{} {}-{} {:.3f}\n".format(enrolemodel, targetmdl, testutt, finalscore))
    if errors > 0:
        log.warn(
            "Overall %i happened while processing the testutterances. The scores may not be complete" % (errors))
    log.info("LDA estimation done, output file is: %s. Output file has the following structure: TARGETMODEL ENROLEMODEL-TESTUTT" %
             (args.scoreoutfile.name))
Beispiel #2
0
##############################################################################

# TODO

##############################################################################
# Part II' : LDA
##############################################################################


from sklearn.lda import LDA
clf = LDA()
clf.fit(X, y)

display_3 = [2.5, 2.5]

values_proba_lda_1 = np.exp(clf.predict_log_proba(display_1))[0]
values_proba_lda_2 = np.exp(clf.predict_log_proba(display_2))[0]
values_proba_lda_3 = np.exp(clf.predict_log_proba(display_3))[0]

fig2 = plt.figure()
plot_2d(X, y)
frontiere(lambda xx: clf.predict(xx), X, step=resolution_param)

plt.annotate(r'' + '(%.2f' % values_proba_lda_1[0] + ', %.2f'
             % values_proba_lda_1[1] + ', %.2f)' % values_proba_lda_1[2],
             xy=(display_1[0], display_1[1]), xycoords='data',
             color =color_text, xytext=(-150, 100), textcoords='offset points',
             fontsize=12, arrowprops=dict(arrowstyle="->",
             connectionstyle="arc3,rad=.2", color=color_text))

# plt.annotate(r'' + '(%.2f' % values_proba_lda_1[0] + ', %.2f'
Beispiel #3
0
def main():
    lda = LDA(solver='svd')
    log.basicConfig(level=args.debug,
                    format='%(asctime)s %(levelname)s %(message)s',
                    datefmt='%d/%m %H:%M:%S')

    # Will be filled as dict
    testreferences = None
    if args.mlf:
        log.info("Parsing input label file %s as mlf file" % (args.testref))
        testreferences = mlffile(args.testref)
    else:
        log.info(
            "Parsing input label file %s as test_ref file( TARGETMDL ENROLEMODEL-TESTUTT LABEL )"
            % (args.testref))
        testreferences = test_ref(args.testref)
    # Check if the given data is in marshal format or cPickle
    if args.binary:
        log.info("Try to read input as a binary file")
        inputdata, testutts = checkBinary([args.inputdata, args.testutts])
        datadim = len(inputdata.values()[0])
        dvectors = np.zeros((len(inputdata.keys()), datadim))
        labels = []
        log.info("Getting dvectors from input data")
        for i, (spk, v) in enumerate(inputdata.iteritems()):
            dvectors[i] = v
            labels.append(getspkmodel(spk, args.delimiter, args.indices))
        log.debug(
            "Data which was loaded in (%s) has %i labels and %i dvectors" %
            (args.inputdata, len(labels), len(dvectors)))
        testtofeature = testutts

    else:
        log.info("Given data is either a folder or a filelist. Trying to read")
        inputdata = parseinputfiletomodels(args.inputdata, args.delimiter,
                                           args.indices)
        testtofeature = parsepaths(readFeats(args.testutts))
        labels = []
        dvectors = []
        log.info("Extracting dvectors for input data")
        for spk, v in inputdata.iteritems():
            dvectors.extend(itertools.imap(extractmethod, v))
            labels.extend([spk for i in xrange(len(v))])
    spktonum = {spk: num for num, spk in enumerate(np.unique(labels))}
    dvectors = np.array(dvectors)
    labelsnum = np.array([spktonum[i] for i in labels])

    log.debug("Overall we have %i labels" % (len(labelsnum)))
    log.debug("Number of speakers: %i" % (len(spktonum.keys())))
    log.debug("Dvector size is (%i,%i)" %
              (dvectors.shape[0], dvectors.shape[1]))

    log.info("Fitting LDA model")
    lda.fit(dvectors, labelsnum)

    errors = 0
    log.info("Starting test")
    for enrolemodel, vals in testreferences.iteritems():
        if enrolemodel not in spktonum:
            errors += 1
            log.warn("Enrolemodel %s not found in the labels" % (enrolemodel))
            continue
        curspk = spktonum[enrolemodel]
        for testutt, targetmdl in vals:
            if testutt not in testtofeature:
                log.warn("Utterance %s not found in the testset" % (testutt))
                errors += 1
                continue
            # LDA currently needs a twodimensional imput vector
            testdvector = testtofeature[testutt][np.newaxis, :]
            score = lda.predict_log_proba(testdvector)[0]
            # score is a list of all the scores ( not only one ). we need to
            # find the current speakers score
            finalscore = score[curspk]
            args.scoreoutfile.write("{} {}-{} {:.3f}\n".format(
                enrolemodel, targetmdl, testutt, finalscore))
    if errors > 0:
        log.warn(
            "Overall %i happened while processing the testutterances. The scores may not be complete"
            % (errors))
    log.info(
        "LDA estimation done, output file is: %s. Output file has the following structure: TARGETMODEL ENROLEMODEL-TESTUTT"
        % (args.scoreoutfile.name))