Esempio n. 1
0
                break
            if ntot == 0:
                xsum = sum(data)
                xxsum = sum(data**2)
                ntot += len(data)
            else:
                xsum += sum(data)
                xxsum += sum(data**2)
                ntot += len(data)
        mnstd = numpy.array(
            [xsum / ntot,
             numpy.sqrt(xxsum / ntot - (xsum / ntot)**2)],
            dtype="float32")

    if xType in ("htkscp", "arkscp"):
        trainXLst = nnet.loadscpf(trainXfile)
        trainXall = numpy.array([], dtype=theano.config.floatX)
    else:
        print >> sys.stderr, "Right now, only htkscp and arkscp are supported as xType."
        sys.exit(1)
#        trainXLst = dict()
#        trainXall = nnet.loadCFile(trainXfile, xType)

    if yType in ("htkscp", "arkscp"):
        trainYLst = nnet.loadscpf(trainYfile)
        trainYall = numpy.array([], dtype=theano.config.floatX)
    else:
        trainYLst = dict()
        trainYall = nnet.loadCFile(trainYfile, yType)

    # prepare development data
Esempio n. 2
0
xType = "htkscp"
bbsize = 0  # big batch size (0 means all data)

for ag in sys.argv[3:]:
    if re.match("--xType=", ag):
        xType = re.sub("--xType=", "", ag)
    elif re.match("--bb=", ag):
        bbsize = int(re.sub("--bb=", "", ag))
    else:
        print >> sys.stderr, "Error: Unknown option: " + ag
        sys.exit(1)

# prepare training data
if xType in ("htkscp", "arkscp"):
    trainXLst = nnet.loadscpf(datfilename)  #读取文件内容
    trainXall = numpy.array([], dtype=theano.config.floatX)  #将trainXall变成需求的格式
else:
    print >> sys.stderr, "Right now, only htkscp and arkscp are supported as xType."
    sys.exit(1)

bblot = 0
ntot = 0
while True:
    data = nnet.prepX(trainXLst, trainXall, numpy.array([]), xType, bbsize,
                      bblot, 0, 0, False)
    bblot += 1
    if len(data) == 0:
        break
    if ntot == 0:
        xsum = sum(data)
Esempio n. 3
0
    if debug:
        nn.debuginfo()
        
    # Symbolic definition of transformation by Nnet
    x      = T.fmatrix("x") # fmatrix is float32
    nnout  = nn.forward(x)
    featrans = theano.function(inputs=[x], outputs=nnout)

    # prepare training data
    if mvnormf != "":
        mnstd=nnet.loadMVFile(mvnormf)
    else:
        mnstd=numpy.array([])

    if xType in ("htkscp", "arkscp"):
        trainXLst = nnet.loadscpf(datfile)
        trainXall = numpy.array([], dtype=theano.config.floatX)
    else:
        print >> sys.stderr, "Right now, only htkscp and arkscp are supported as xType."
        sys.exit(1)

    idlist = nnet.scp2idlist(trainXLst, xType)
    for ut in range(len(idlist)):
        # set bb size to 1 and process one utterance by one utterance.
        # no shuffling.
        data = nnet.prepX(trainXLst, trainXall, mnstd, 
                          xType, 1, ut, spl, trmspl, False)

        mbnum=len(data)/mbsize
        if mbnum*mbsize < len(data):
            mbnum+=1