Exemple #1
0
    sequence['dayofweek'] = (sequence['dayofweek'] -
                             meanDayOfWeek) / stdDayOfWeek

    ds = getPyBrainDataSet(sequence, nTrain, predictionStep, useTimeOfDay,
                           useDayOfWeek)

    print "train LSTM with " + str(rptNum) + " epochs"
    random.seed(6)
    net = initializeLSTMnet(nDimInput=len(ds.getSample()[0]),
                            nDimOutput=1,
                            nLSTMcells=20)

    trainer = RPropMinusTrainer(net, dataset=ds, verbose=True)
    error = []
    for rpt in xrange(rptNum):
        err = trainer.train()
        error.append(err)

    print "test LSTM"
    net.reset()

    predictedInput = np.zeros((len(sequence), ))
    targetInput = np.zeros((len(sequence), ))
    trueData = np.zeros((len(sequence), ))
    for i in xrange(1, len(sequence) - predictionStep):
        if useTimeOfDay and useDayOfWeek:
            sample = np.array([
                sequence['data'][i], sequence['timeofday'][i],
                sequence['dayofweek'][i]
            ])
        elif useTimeOfDay:
def main():
    config = MU.ConfigReader('configs/%s' % sys.argv[1])
    config.read()

    logDir = '%s-%s' % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open('%s/config.txt' % logDir, 'w') as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config['input_tsv_path'])
    data = dr.read(config['interested_columns'])

    inLabels = config['input_columns']

    outLabels = config['output_columns']

    tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'],
            config['test_seqno'], config['validation_seqno'])

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins = inScale, outs = outScale)
    normalizeDataSet(vds, ins = inScale, outs = outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 2 + 15
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim,
            fdim, fdim, xdim,
            tds.outdim,
            hiddenclass=SigmoidLayer,
            recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden0']))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005)
    #trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True)
    #trainer = BackpropTrainer(rnn, dataset=trainData, learningrate=0.0001,
    #        lrdecay=1.0, momentum=0.4, verbose=True, batchlearning=False,
    #        weightdecay=0)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config['epochs_per_update']):
            trainer.train()

        epochNo += config['epochs_per_update']
        NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config['output_scalar_label']])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut) ** 2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean((validationData['target'] - vOut) ** 2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f degC      max %5.3f degC" % (tAvgErr, tMaxErr)
        print "Validation error:    avg %5.3f degC      max %5.3f degC" % (vAvgErr, vMaxErr)
        print "------------------------------------------------------------------------------"

        if (config['visualize_on_training'] == 'yes'):

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(rnn, trainData, 0,
                    config['visualized_columns']['input'],
                    config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(rnn, validationData, 0,
                    config['visualized_columns']['input'],
                    config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label = 'Train')
            PL.plot(errTime, errValidation, label = 'Validation')
            PL.legend()
            PL.ion()
            PL.draw()
def main():
    config = MU.ConfigReader('configs/%s' % sys.argv[1])
    config.read()

    logDir = '%s-%s' % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open('%s/config.txt' % logDir, 'w') as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config['input_tsv_path'])
    data = dr.read(config['interested_columns'])

    inLabels = config['input_columns']

    outLabels = config['output_columns']

    tds, vds = seqDataSetPair(data, inLabels, outLabels,
                              config['seq_label_column'], config['test_seqno'],
                              config['validation_seqno'])

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins=inScale, outs=outScale)
    normalizeDataSet(vds, ins=inScale, outs=outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 5 + 5
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim,
                       fdim,
                       fdim,
                       fdim,
                       xdim,
                       tds.outdim,
                       hiddenclass=SigmoidLayer,
                       recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn['hidden0'], rnn['hidden0']))
    rnn.addRecurrentConnection(FullConnection(rnn['hidden1'], rnn['hidden1']))
    rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden2']))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn,
                                dataset=trainData,
                                batchlearning=True,
                                verbose=True,
                                weightdecay=0.005)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config['epochs_per_update']):
            trainer.train()

        epochNo += config['epochs_per_update']
        NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config['output_scalar_label']])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut)**2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean(
            (validationData['target'] - vOut)**2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f       max %5.3f" % (tAvgErr,
                                                                  tMaxErr)
        print "Validation error:    avg %5.3f       max %5.3f" % (vAvgErr,
                                                                  vMaxErr)
        print "------------------------------------------------------------------------------"

        if (config['visualize_on_training'] == 'yes'):

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(rnn, trainData, 0,
                            config['visualized_columns']['input'],
                            config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(rnn, validationData, 0,
                            config['visualized_columns']['input'],
                            config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label='Train')
            PL.plot(errTime, errValidation, label='Validation')
            PL.legend()
            PL.ion()
            PL.draw()
  meanDayOfWeek = np.mean(sequence['dayofweek'])
  stdDayOfWeek = np.std(sequence['dayofweek'])
  sequence['dayofweek'] = (sequence['dayofweek'] - meanDayOfWeek)/stdDayOfWeek

  ds = getPyBrainDataSetScalarEncoder(sequence, nTrain, encoderInput, encoderOutput,
                                      predictionStep, useTimeOfDay, useDayOfWeek)

  print "train LSTM with "+str(rptNum)+" repeats"

  net = initializeLSTMnet(nDimInput=len(ds.getSample()[0]), nDimOutput=len(ds.getSample()[1]), nLSTMcells=20)

  trainer = RPropMinusTrainer(net, dataset=ds, verbose=True)
  error = []
  for rpt in xrange(rptNum):
    err = trainer.train()
    error.append(err)

  print "test LSTM"
  net.reset()

  targetInput = np.zeros((len(sequence),))
  trueData = np.zeros((len(sequence),))
  predictedInput = np.zeros((len(sequence),))

  bucketValues = encoderOutput.getBucketValues()

  if encoderOutput is not None:
    predictedDistribution = np.zeros((len(sequence), encoderOutput.n))
    targetDistribution = np.zeros((len(sequence), encoderOutput.n))
def main():
    config = MU.ConfigReader("configs/%s" % sys.argv[1])
    config.read()

    logDir = "%s-%s" % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open("%s/config.txt" % logDir, "w") as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config["input_tsv_path"])
    data = dr.read(config["interested_columns"])

    inLabels = config["input_columns"]

    outLabels = config["output_columns"]

    tds, vds = seqDataSetPair(
        data, inLabels, outLabels, config["seq_label_column"], config["test_seqno"], config["validation_seqno"]
    )

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins=inScale, outs=outScale)
    normalizeDataSet(vds, ins=inScale, outs=outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 5 + 5
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn["hidden0"], rnn["hidden0"]))
    rnn.addRecurrentConnection(FullConnection(rnn["hidden1"], rnn["hidden1"]))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config["epochs_per_update"]):
            trainer.train()

        epochNo += config["epochs_per_update"]
        NetworkWriter.writeToFile(rnn, "%s/Epoch_%d.xml" % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, "%s/Latest.xml" % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config["output_scalar_label"]])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData["target"] - tOut) ** 2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean((validationData["target"] - vOut) ** 2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData["target"] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData["target"] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f degC      max %5.3f degC" % (tAvgErr, tMaxErr)
        print "Validation error:    avg %5.3f degC      max %5.3f degC" % (vAvgErr, vMaxErr)
        print "------------------------------------------------------------------------------"

        if config["visualize_on_training"] == "yes":

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(
                rnn, trainData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"]
            )
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(
                rnn, validationData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"]
            )
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label="Train")
            PL.plot(errTime, errValidation, label="Validation")
            PL.legend()
            PL.ion()
            PL.draw()