def visulizeDataSet(network, data, seqno, in_labels, out_labels):

    seq = data.getSequence(seqno)
    tmpDs = SequentialDataSet(data.indim, data.outdim)
    tmpDs.newSequence()

    for i in xrange(data.getSequenceLength(seqno)):
        tmpDs.addSample(seq[0][i], seq[1][i])

    nplots = len(in_labels) + len(out_labels)

    for i in range(len(in_labels)):
        p = PL.subplot(nplots, 1, i + 1)
        p.clear()
        p.plot(tmpDs['input'][:, i])
        p.set_ylabel(in_labels[i])

    for i in range(len(out_labels)):
        p = PL.subplot(nplots, 1, i + 1 + len(in_labels))
        p.clear()

        output = ModuleValidator.calculateModuleOutput(network, tmpDs)

        p.plot(tmpDs['target'][:, i], label='train')
        p.plot(output[:, i], label='sim')

        p.legend()
        p.set_ylabel(out_labels[i])
def visulizeDataSet(network, data, seqno, in_labels, out_labels):

    seq = data.getSequence(seqno)
    tmpDs = SequentialDataSet(data.indim, data.outdim)
    tmpDs.newSequence()

    for i in xrange(data.getSequenceLength(seqno)):
        tmpDs.addSample(seq[0][i], seq[1][i])

    nplots = len(in_labels) + len(out_labels)

    for i in range(len(in_labels)):
        p = PL.subplot(nplots, 1, i + 1)
        p.clear()
        p.plot(tmpDs['input'][:, i])
        p.set_ylabel(in_labels[i])

    for i in range(len(out_labels)):
        p = PL.subplot(nplots, 1, i + 1 + len(in_labels))
        p.clear()

        output = ModuleValidator.calculateModuleOutput(network, tmpDs)

        p.plot(tmpDs['target'][:, i], label='train')
        p.plot(output[:, i], label='sim')

        p.legend()
        p.set_ylabel(out_labels[i])
Beispiel #3
0
def testOnSequenceData(module, dataset):
    """
    Fetch targets and calculate the modules output on dataset.
    Output and target are in one-of-many format. The class for each sequence is
    determined by argmax OF THE LAST ITEM IN THE SEQUENCE.
    """
    target = dataset.getField("target")
    output = ModuleValidator.calculateModuleOutput(module, dataset)

    # determine last indices of the sequences inside dataset
    ends = SequenceHelper.getSequenceEnds(dataset)

    class_output = array([argmax(output[end]) for end in ends])
    class_target = array([argmax(target[end]) for end in ends])

    return Validator.classificationPerformance(class_output, class_target)
def get_segregation(
        file_path= "/computed/sentence_analysis_reg.pkl.gz",
        error_metric = regression_metric):

    # Loading necessary data.
    best_module, testData, Y_test = data.load( root + file_path )
    n_samples = len(Y_test)

    # Computing error and sorting and grouping errors by rating groups
    Y_pred = ModuleValidator.calculateModuleOutput(best_module, testData)
    
    
    error = [ error_metric(Y_pred[i],Y_test[i]) for i in xrange(n_samples) ]
    err_and_revidx = zip( error, range(n_samples) )

    sorted_err = {0:[], 1:[], 2:[], 3:[], 4:[]}
    # for some reason the last n_samples/2 are corrupted and are not alligned to the reviews.
    for idx in range(n_samples/2):
        sorted_err[ Y_test[idx] ].append( err_and_revidx[idx] )
    for idx in range(5):
        sorted_err[idx] = sorted( sorted_err[idx] )
 
    return sorted_err
def main():
    config = MU.ConfigReader('configs/%s' % sys.argv[1])
    config.read()

    logDir = '%s-%s' % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open('%s/config.txt' % logDir, 'w') as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config['input_tsv_path'])
    data = dr.read(config['interested_columns'])

    inLabels = config['input_columns']

    outLabels = config['output_columns']

    tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'],
            config['test_seqno'], config['validation_seqno'])

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins = inScale, outs = outScale)
    normalizeDataSet(vds, ins = inScale, outs = outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 2 + 15
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim,
            fdim, fdim, xdim,
            tds.outdim,
            hiddenclass=SigmoidLayer,
            recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden0']))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005)
    #trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True)
    #trainer = BackpropTrainer(rnn, dataset=trainData, learningrate=0.0001,
    #        lrdecay=1.0, momentum=0.4, verbose=True, batchlearning=False,
    #        weightdecay=0)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config['epochs_per_update']):
            trainer.train()

        epochNo += config['epochs_per_update']
        NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config['output_scalar_label']])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut) ** 2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean((validationData['target'] - vOut) ** 2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f degC      max %5.3f degC" % (tAvgErr, tMaxErr)
        print "Validation error:    avg %5.3f degC      max %5.3f degC" % (vAvgErr, vMaxErr)
        print "------------------------------------------------------------------------------"

        if (config['visualize_on_training'] == 'yes'):

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(rnn, trainData, 0,
                    config['visualized_columns']['input'],
                    config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(rnn, validationData, 0,
                    config['visualized_columns']['input'],
                    config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label = 'Train')
            PL.plot(errTime, errValidation, label = 'Validation')
            PL.legend()
            PL.ion()
            PL.draw()
Beispiel #6
0
    plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter)))
    pylab.savefig(plotname)


# set-up the neural network
nneuron = 5
mom = 0.98
netname = "LSL-" + str(nneuron) + "-" + str(mom)
mv = ModuleValidator()
v = Validator()

#create the test DataSet
x = numpy.arange(0.0, 1.0 + 0.01, 0.01)
s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x)
tsts = SupervisedDataSet(1, 1)
tsts.setField('input', x.reshape(len(x), 1))
tsts.setField('target', s.reshape(len(s), 1))
#read the train DataSet from file
trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata'))

myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml')
if os.path.isfile(myneuralnet):
    n = NetworkReader.readFrom(myneuralnet, name=netname)
    #calculate the test DataSet based on the trained Neural Network
    ctsts = mv.calculateModuleOutput(n, tsts)
    tserr = v.MSE(ctsts, tsts['target'])
    print 'MSE error on TSTS:', tserr
    myplot(trndata, tsts=tsts, ctsts=ctsts)

    pylab.show()
Beispiel #7
0
tsts.setField('target',s.reshape(len(s),1))

#read the train DataSet from file
trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(),'trndata'))

#create the trainer

t = BackpropTrainer(n, learningrate = 0.01 ,
                    momentum = mom)
#train the neural network from the train DataSet

cterrori=1.0
print "trainer momentum:"+str(mom)
for iter in range(25):
  t.trainOnDataset(trndata, 1000)
  ctrndata = mv.calculateModuleOutput(n,trndata)
  cterr = v.MSE(ctrndata,trndata['target'])
  relerr = abs(cterr-cterrori)
  cterrori = cterr
  print 'iteration:',iter+1,'MSE error:',cterr
  myplot(trndata,ctrndata,iter=iter+1)
  if cterr < 1.e-5 or relerr < 1.e-7:
    break
#write the network using xml file     
myneuralnet = os.path.join(os.getcwd(),'myneuralnet.xml')
if os.path.isfile(myneuralnet):
    NetworkWriter.appendToFile(n,myneuralnet)
else:
    NetworkWriter.writeToFile(n,myneuralnet)
    
#calculate the test DataSet based on the trained Neural Network
def main():
    config = MU.ConfigReader('configs/%s' % sys.argv[1])
    config.read()

    logDir = '%s-%s' % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open('%s/config.txt' % logDir, 'w') as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config['input_tsv_path'])
    data = dr.read(config['interested_columns'])

    inLabels = config['input_columns']

    outLabels = config['output_columns']

    tds, vds = seqDataSetPair(data, inLabels, outLabels,
                              config['seq_label_column'], config['test_seqno'],
                              config['validation_seqno'])

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins=inScale, outs=outScale)
    normalizeDataSet(vds, ins=inScale, outs=outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 5 + 5
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim,
                       fdim,
                       fdim,
                       fdim,
                       xdim,
                       tds.outdim,
                       hiddenclass=SigmoidLayer,
                       recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn['hidden0'], rnn['hidden0']))
    rnn.addRecurrentConnection(FullConnection(rnn['hidden1'], rnn['hidden1']))
    rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden2']))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn,
                                dataset=trainData,
                                batchlearning=True,
                                verbose=True,
                                weightdecay=0.005)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config['epochs_per_update']):
            trainer.train()

        epochNo += config['epochs_per_update']
        NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config['output_scalar_label']])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut)**2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean(
            (validationData['target'] - vOut)**2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f       max %5.3f" % (tAvgErr,
                                                                  tMaxErr)
        print "Validation error:    avg %5.3f       max %5.3f" % (vAvgErr,
                                                                  vMaxErr)
        print "------------------------------------------------------------------------------"

        if (config['visualize_on_training'] == 'yes'):

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(rnn, trainData, 0,
                            config['visualized_columns']['input'],
                            config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(rnn, validationData, 0,
                            config['visualized_columns']['input'],
                            config['visualized_columns']['output'])
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label='Train')
            PL.plot(errTime, errValidation, label='Validation')
            PL.legend()
            PL.ion()
            PL.draw()
Beispiel #9
0
tsts.setField('input', x.reshape(len(x), 1))
tsts.setField('target', s.reshape(len(s), 1))

#read the train DataSet from file
trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata'))

#create the trainer

t = BackpropTrainer(n, learningrate=0.01, momentum=mom)
#train the neural network from the train DataSet

cterrori = 1.0
print("trainer momentum:" + str(mom))
for iter in range(25):
    t.trainOnDataset(trndata, 1000)
    ctrndata = mv.calculateModuleOutput(n, trndata)
    cterr = v.MSE(ctrndata, trndata['target'])
    relerr = abs(cterr - cterrori)
    cterrori = cterr
    print('iteration:', iter + 1, 'MSE error:', cterr)
    myplot(trndata, ctrndata, iter=iter + 1)
    if cterr < 1.e-5 or relerr < 1.e-7:
        break
#write the network using xml file
myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml')
if os.path.isfile(myneuralnet):
    NetworkWriter.appendToFile(n, myneuralnet)
else:
    NetworkWriter.writeToFile(n, myneuralnet)

#calculate the test DataSet based on the trained Neural Network
Beispiel #10
0
  pylab.savefig(plotname)


# set-up the neural network
nneuron = 5
mom = 0.98
netname="LSL-"+str(nneuron)+"-"+str(mom)
mv=ModuleValidator()
v = Validator()


#create the test DataSet
x = numpy.arange(0.0, 1.0+0.01, 0.01)
s = 0.5+0.4*numpy.sin(2*numpy.pi*x)
tsts = SupervisedDataSet(1,1)
tsts.setField('input',x.reshape(len(x),1))
tsts.setField('target',s.reshape(len(s),1))
#read the train DataSet from file
trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(),'trndata'))

myneuralnet = os.path.join(os.getcwd(),'myneuralnet.xml')
if os.path.isfile(myneuralnet):
  n = NetworkReader.readFrom(myneuralnet,name=netname)
  #calculate the test DataSet based on the trained Neural Network
  ctsts = mv.calculateModuleOutput(n,tsts)
  tserr = v.MSE(ctsts,tsts['target'])
  print 'MSE error on TSTS:',tserr
  myplot(trndata,tsts = tsts,ctsts = ctsts)

  pylab.show()
def main():
    config = MU.ConfigReader("configs/%s" % sys.argv[1])
    config.read()

    logDir = "%s-%s" % (__file__, sys.argv[1])
    os.mkdir(logDir)

    with open("%s/config.txt" % logDir, "w") as outfile:
        json.dump(config.getConfigDict(), outfile, indent=4)

    dr = MU.DataReader(config["input_tsv_path"])
    data = dr.read(config["interested_columns"])

    inLabels = config["input_columns"]

    outLabels = config["output_columns"]

    tds, vds = seqDataSetPair(
        data, inLabels, outLabels, config["seq_label_column"], config["test_seqno"], config["validation_seqno"]
    )

    inScale = config.getDataScale(inLabels)
    outScale = config.getDataScale(outLabels)

    normalizeDataSet(tds, ins=inScale, outs=outScale)
    normalizeDataSet(vds, ins=inScale, outs=outScale)

    trainData = tds
    validationData = vds

    fdim = tds.indim / 5 + 5
    xdim = tds.outdim * 2

    rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True)

    rnn.addRecurrentConnection(FullConnection(rnn["hidden0"], rnn["hidden0"]))
    rnn.addRecurrentConnection(FullConnection(rnn["hidden1"], rnn["hidden1"]))
    rnn.sortModules()

    trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True)

    errTime = []
    errTrain = []
    errValidation = []
    epochNo = 0
    while True:

        for i in range(config["epochs_per_update"]):
            trainer.train()

        epochNo += config["epochs_per_update"]
        NetworkWriter.writeToFile(rnn, "%s/Epoch_%d.xml" % (logDir, epochNo))
        NetworkWriter.writeToFile(rnn, "%s/Latest.xml" % logDir)

        tOut = ModuleValidator.calculateModuleOutput(rnn, trainData)
        vOut = ModuleValidator.calculateModuleOutput(rnn, validationData)

        tScaler = config.getDataScale([config["output_scalar_label"]])[0][1]
        tAvgErr = NP.sqrt(NP.mean((trainData["target"] - tOut) ** 2)) * tScaler
        vAvgErr = NP.sqrt(NP.mean((validationData["target"] - vOut) ** 2)) * tScaler

        tMaxErr = NP.max(NP.abs(trainData["target"] - tOut)) * tScaler
        vMaxErr = NP.max(NP.abs(validationData["target"] - vOut)) * tScaler

        errTrain.append(tAvgErr)
        errValidation.append(vAvgErr)
        errTime.append(epochNo)

        print "Training error:      avg %5.3f degC      max %5.3f degC" % (tAvgErr, tMaxErr)
        print "Validation error:    avg %5.3f degC      max %5.3f degC" % (vAvgErr, vMaxErr)
        print "------------------------------------------------------------------------------"

        if config["visualize_on_training"] == "yes":

            PL.figure(1)
            PL.ioff()
            visulizeDataSet(
                rnn, trainData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"]
            )
            PL.ion()
            PL.draw()

            PL.figure(2)
            PL.ioff()
            visulizeDataSet(
                rnn, validationData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"]
            )
            PL.ion()
            PL.draw()

            p = PL.figure(3)
            PL.ioff()
            p.clear()
            PL.plot(errTime, errTrain, label="Train")
            PL.plot(errTime, errValidation, label="Validation")
            PL.legend()
            PL.ion()
            PL.draw()