def buildDS(n, num, dur): ds = SequentialDataSet(1,1) dt = n['gfnn'].dt t = np.arange(0, dur, dt) length = len(t) for i in range(num): x = np.zeros(length) # tempo between 1 and 2 bps bps = 1 + np.random.random() p = 1./bps lastPulse = np.random.random() * p for j in range(length): if t[j] > lastPulse and t[j] >= lastPulse + p: x[j] = 0.1 lastPulse = t[j] else: if j > 0: if x[j-1] < 1e-5: x[j] = 0 else: x[j] = x[j-1] * 0.5 # try to predict the next sample target = np.roll(x, -1) ds.newSequence() for j in range(length): ds.addSample(x[j], target[j]) return ds
def generateSuperimposedSineData( sinefreqs, space, yScales=None ): sine = SuperimposedSine( sinefreqs ) if yScales is not None: sine.yScales = array(yScales) dataset = SequentialDataSet(0,1) data = sine.getFuncValues(space) dataset.newSequence() for i in range(len(data)): dataset.addSample([], data[i]) return dataset
def createSequentialDataSets(self,testRatio=0.7): ixSeparator=int(self.data.shape[0]*0.7) trainData=self.data[0:ixSeparator] testData=self.data[ixSeparator:] self.trainData = SequentialDataSet(5,1) self.testData= SequentialDataSet(5,1) for i in range(len(trainData)): self.trainData.addSample(trainData[i,0:5],trainData[i,5]) for i in range(len(testData)): self.testData.addSample(testData[i,0:5],testData[i,5])
def makeMelodyDataSet(melodies, inspirationFunc=randomInspiration, inspirationLength=8): seqDataSet = SequentialDataSet(sampleSize(), outputSize()) for m in melodies: barCount = m.bars[-1]+1 assert barCount <= 8, "Bar counts greater than 8 unsupported" inspiration = inspirationFunc(inspirationLength,m) seqDataSet.newSequence() for s in range(len(m.pitches)-1): seqDataSet.addSample( makeNoteSample(m.pitches[s], m.durations[s], inspiration[s % inspirationLength], m.bars[s]), makeNoteTarget(m.pitches[s+1], m.durations[s+1])) return seqDataSet
def list_to_dataset(inputs, outputs, dataset=None): """List to Dataset Convert a standard list to a dataset. The list must be given in the following format: Inputs: 2 dimension list (N x M) Outputs: 2 dimension list (N x K) N: Number of time steps in data series M: Number of inputs per time step K: Number of outputs per time step Arguments: inputs: The input list given under the above conditions. outputs: The output list given under the above conditions. dataset: A SequentialDataSet object to add a new sequence. New dataset generated if None. (Default: None) Returns: A SequentialDataSet object built from the retrieved input/output data. """ assert len(inputs) > 0 assert len(outputs) > 0 assert len(inputs) == len(outputs) # The dataset object has not been initialized. We must determine the # input and output size based on the unpacked data num_samples = len(inputs) in_dim = 1 if len(inputs.shape) == 1 else inputs.shape[1] out_dim = 1 if len(outputs.shape) == 1 else outputs.shape[1] # If the dataset does not exist, create it. Otherwise, use the dataset # given if not dataset: dataset = SequentialDataSet(in_dim, out_dim) # Make a new sequence for the given input/output pair dataset.newSequence() for i in range(num_samples): dataset.addSample(inputs[i], outputs[i]) return dataset
def generateSuperimposedSineData(sinefreqs, space, yScales=None): sine = SuperimposedSine(sinefreqs) if yScales is not None: sine.yScales = array(yScales) dataset = SequentialDataSet(0, 1) data = sine.getFuncValues(space) dataset.newSequence() for i in xrange(len(data)): dataset.addSample([], data[i]) return dataset
def __init__(self, indim, targetdim): SequentialDataSet.__init__(self, indim, targetdim) self.addField('importance', targetdim) self.link.append('importance')
class StockData: def __init__(self): self.data=[] self.trainData=[] self.testData=[] def downloadData(self,stock,collapse,start="2012-12-01",end="2013-01-01"): self.stock=stock self.start=start self.end=end self.data = Quandl.get(ibexStocks[self.stock], authtoken="4bosWLqsiGqMtuuuYAcq", collapse=collapse, trim_start=self.start, trim_end=self.end, returns='numpy') def saveData(self,name): with open (name,'w') as f: for i in range(len(self.data)): if self.data[i][5]: f.write("%.3f\t%.3f\t%.3f\t%.3f\t%d\t%.3f\n" % (self.data[i][1],self.data[i][2],self.data[i][3],self.data[i][4],self.data[i][5], self.data[i][4])) else: pass def readData(self,name,delimiter='\t'): with open(name) as f: for line in f: self.data.append(line.strip().split(delimiter)) for item in self.data: for i in range(len(item)): item[i]=float(item[i]) self.data=np.array(self.data) def normalizeData(self): def normalize(vector): maximo=max(vector) for i in range(len(vector)): vector[i]=vector[i]/maximo return vector for i in range(self.data.shape[1]): self.data[:,i]=normalize(self.data[:,i]) def delayInputs(self): m=len(self.data) for i in range(1,m): self.data[i-1,-1]=self.data[i,-1] self.data=np.delete(self.data,m-1,axis=0) def createSequentialDataSets(self,testRatio=0.7): ixSeparator=int(self.data.shape[0]*0.7) trainData=self.data[0:ixSeparator] testData=self.data[ixSeparator:] self.trainData = SequentialDataSet(5,1) self.testData= SequentialDataSet(5,1) for i in range(len(trainData)): self.trainData.addSample(trainData[i,0:5],trainData[i,5]) for i in range(len(testData)): self.testData.addSample(testData[i,0:5],testData[i,5]) def plotData(self): plt.plot(self.data[:,5],'b') pylab.show()
#Then, make a simple time series: data = [1] * 3 + [2] * 3 data *= 3 print(data) #Now put this timeseries into a supervised dataset, where the target for each sample is the next sample:from pybrain.datasets import SequentialDataSet from itertools import cycle from pybrain.datasets.sequential import SequentialDataSet ds = SequentialDataSet(1, 1) for sample, next_sample in zip(data, cycle(data[1:])): ds.addSample(sample, next_sample) print ds #Build a simple LSTM network with 1 input node, 5 LSTM cells and 1 output node: from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import LSTMLayer net = buildNetwork(1, 5, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True) #Train the network: from pybrain.supervised import RPropMinusTrainer from sys import stdout trainer = RPropMinusTrainer(net, dataset=ds) train_errors = [] # save errors for plotting later EPOCHS_PER_CYCLE = 5 CYCLES = 100 EPOCHS = EPOCHS_PER_CYCLE * CYCLES for i in xrange(CYCLES): trainer.trainEpochs(EPOCHS_PER_CYCLE)
rnn.addInputModule(LinearLayer(dim=inputSize, name='in')) rnn.addModule(TanhLayer(dim=hiddenSize, name = 'in_proc')) rnn.addModule(LSTMLayer(dim=hiddenSize, peepholes=True, name='hidden')) rnn.addModule(TanhLayer(dim=hiddenSize, name = 'out_proc')) rnn.addOutputModule(SoftmaxLayer(dim=outputSize, name='out')) rnn.addConnection(FullConnection(rnn['in'], rnn['in_proc'], name='c1')) rnn.addConnection(FullConnection(rnn['in_proc'], rnn['hidden'], name='c2')) rnn.addRecurrentConnection(FullConnection(rnn['hidden'], rnn['hidden'], name='c3')) rnn.addConnection(FullConnection(rnn['hidden'], rnn['out_proc'], name='c4')) rnn.addConnection(FullConnection(rnn['out_proc'], rnn['out'], name='c5')) rnn.sortModules() # Construct dataset trainingData = SequentialDataSet(inputSize, outputSize) for index, row in df.iterrows(): trainingData.newSequence() inputSequence = list((row.values)[0]) outputVector = [0, 0, 0, 0] if index == 'A': outputVector[0] = 1 if index == 'B': outputVector[1] = 1 if index == 'C': outputVector[2] = 1
# df.plot() # plt.show() # i1 = np.sin(np.arange(0, 20)) # i2 = np.sin(np.arange(0, 20)) * 2 # # t1 = np.ones([1, 20]) # t2 = np.ones([1, 20]) * 2 # # input = np.array([i1, i2, i1, i2]).reshape(20 * 4, 1) # target = np.array([t1, t2, t1, t2]).reshape(20 * 4, 1) # Create datasets print 'Preparing dataset ...' # ts = sampler.load_csv('data/series.csv') ds = SequentialDataSet(inLayerCount, outLayerCount) ds.newSequence() # ds = SupervisedDataSet(inLayerCount, outLayerCount) for row in df.itertuples(index=False): ds.addSample(row[0:columns-2], row[columns-2]) ds.endOfData() # Create bp trainer trainer = BackpropTrainer(net, ds) # Trains the datasets print 'Training ...' epoch = 1000 error = 1.0
from pybrain import LinearLayer, SigmoidLayer, FullConnection from pybrain.datasets.sequential import SequentialDataSet from pybrain.supervised.trainers.backprop import BackpropTrainer from pybrain.tools.customxml.networkreader import NetworkReader from pybrain.tools.customxml.networkwriter import NetworkWriter from time import time import pickle n = NetworkReader.readFrom('net.xml') stats = pickle.load( open( "stats.p", "rb" ) ) epochs = stats["epochs"] totaltime = stats["time"] print("Loaded network from net.xml.", epochs, "epochs have already been run.", round(totaltime/36)/100, "hours spent training.") ds = SequentialDataSet(2,1) from random import randint start = time() for i in range(1000): an = randint(0, 1000000) bn = randint(0, 1000000) a = [int(x) for x in bin(an)[2:]] b = [int(x) for x in bin(bn)[2:]] r = [int(x) for x in bin(an+bn)[2:]] while len(a) < len(r): a = [0] + a while len(b) < len(r): b = [0] + b
o = TanhLayer(1, name = 'o') b = BiasUnit('bias') net.addModule(b) net.addOutputModule(o) net.addInputModule(i) net.addModule(h) net.addConnection(FullConnection(i, h, outSliceTo = 4*dim, name = 'f1')) net.addConnection(FullConnection(b, h, outSliceTo = 4*dim, name = 'f2')) net.addRecurrentConnection(FullConnection(h, h, inSliceTo = dim, outSliceTo = 4*dim, name = 'r1')) net.addRecurrentConnection(IdentityConnection(h, h, inSliceFrom = dim, outSliceFrom = 4*dim, name = 'rstate')) net.addConnection(FullConnection(h, o, inSliceTo = dim, name = 'f3')) net.sortModules() print net ds = SequentialDataSet(15, 1) ds.newSequence() input = open(sys.argv[1], 'r') for line in input.readlines(): row = np.array(line.split(',')) ds.addSample([float(x) for x in row[:15]], float(row[16])) print ds if len(sys.argv) > 2: test = SequentialDataSet(15, 1) test.newSequence() input = open(sys.argv[2], 'r') for line in input.readlines(): row = np.array(line.split(',')) test.addSample([float(x) for x in row[:15]], float(row[16]))
rnn.addModule(TanhLayer(dim=hiddenSize, name='in_proc')) rnn.addModule(LSTMLayer(dim=hiddenSize, peepholes=True, name='hidden')) rnn.addModule(TanhLayer(dim=hiddenSize, name='out_proc')) rnn.addOutputModule(SoftmaxLayer(dim=outputSize, name='out')) rnn.addConnection(FullConnection(rnn['in'], rnn['in_proc'], name='c1')) rnn.addConnection(FullConnection(rnn['in_proc'], rnn['hidden'], name='c2')) rnn.addRecurrentConnection( FullConnection(rnn['hidden'], rnn['hidden'], name='c3')) rnn.addConnection(FullConnection(rnn['hidden'], rnn['out_proc'], name='c4')) rnn.addConnection(FullConnection(rnn['out_proc'], rnn['out'], name='c5')) rnn.sortModules() # Construct dataset trainingData = SequentialDataSet(inputSize, outputSize) for index, row in df.iterrows(): trainingData.newSequence() inputSequence = list((row.values)[0]) outputVector = [0, 0, 0, 0] if index == 'A': outputVector[0] = 1 if index == 'B': outputVector[1] = 1 if index == 'C': outputVector[2] = 1
def makeMelodyDataSet(melodies): seqDataSet = SequentialDataSet(sampleSize(), outputSize()) for m in melodies: seqDataSet.newSequence() m.addSamples(seqDataSet) return seqDataSet