def buildDS(n, num, dur): ds = SequentialDataSet(1,1) dt = n['gfnn'].dt t = np.arange(0, dur, dt) length = len(t) for i in range(num): x = np.zeros(length) # tempo between 1 and 2 bps bps = 1 + np.random.random() p = 1./bps lastPulse = np.random.random() * p for j in range(length): if t[j] > lastPulse and t[j] >= lastPulse + p: x[j] = 0.1 lastPulse = t[j] else: if j > 0: if x[j-1] < 1e-5: x[j] = 0 else: x[j] = x[j-1] * 0.5 # try to predict the next sample target = np.roll(x, -1) ds.newSequence() for j in range(length): ds.addSample(x[j], target[j]) return ds
def generateSuperimposedSineData( sinefreqs, space, yScales=None ): sine = SuperimposedSine( sinefreqs ) if yScales is not None: sine.yScales = array(yScales) dataset = SequentialDataSet(0,1) data = sine.getFuncValues(space) dataset.newSequence() for i in range(len(data)): dataset.addSample([], data[i]) return dataset
def generateSuperimposedSineData(sinefreqs, space, yScales=None): sine = SuperimposedSine(sinefreqs) if yScales is not None: sine.yScales = array(yScales) dataset = SequentialDataSet(0, 1) data = sine.getFuncValues(space) dataset.newSequence() for i in xrange(len(data)): dataset.addSample([], data[i]) return dataset
def makeMelodyDataSet(melodies, inspirationFunc=randomInspiration, inspirationLength=8): seqDataSet = SequentialDataSet(sampleSize(), outputSize()) for m in melodies: barCount = m.bars[-1]+1 assert barCount <= 8, "Bar counts greater than 8 unsupported" inspiration = inspirationFunc(inspirationLength,m) seqDataSet.newSequence() for s in range(len(m.pitches)-1): seqDataSet.addSample( makeNoteSample(m.pitches[s], m.durations[s], inspiration[s % inspirationLength], m.bars[s]), makeNoteTarget(m.pitches[s+1], m.durations[s+1])) return seqDataSet
def list_to_dataset(inputs, outputs, dataset=None): """List to Dataset Convert a standard list to a dataset. The list must be given in the following format: Inputs: 2 dimension list (N x M) Outputs: 2 dimension list (N x K) N: Number of time steps in data series M: Number of inputs per time step K: Number of outputs per time step Arguments: inputs: The input list given under the above conditions. outputs: The output list given under the above conditions. dataset: A SequentialDataSet object to add a new sequence. New dataset generated if None. (Default: None) Returns: A SequentialDataSet object built from the retrieved input/output data. """ assert len(inputs) > 0 assert len(outputs) > 0 assert len(inputs) == len(outputs) # The dataset object has not been initialized. We must determine the # input and output size based on the unpacked data num_samples = len(inputs) in_dim = 1 if len(inputs.shape) == 1 else inputs.shape[1] out_dim = 1 if len(outputs.shape) == 1 else outputs.shape[1] # If the dataset does not exist, create it. Otherwise, use the dataset # given if not dataset: dataset = SequentialDataSet(in_dim, out_dim) # Make a new sequence for the given input/output pair dataset.newSequence() for i in range(num_samples): dataset.addSample(inputs[i], outputs[i]) return dataset
class StockData: def __init__(self): self.data=[] self.trainData=[] self.testData=[] def downloadData(self,stock,collapse,start="2012-12-01",end="2013-01-01"): self.stock=stock self.start=start self.end=end self.data = Quandl.get(ibexStocks[self.stock], authtoken="4bosWLqsiGqMtuuuYAcq", collapse=collapse, trim_start=self.start, trim_end=self.end, returns='numpy') def saveData(self,name): with open (name,'w') as f: for i in range(len(self.data)): if self.data[i][5]: f.write("%.3f\t%.3f\t%.3f\t%.3f\t%d\t%.3f\n" % (self.data[i][1],self.data[i][2],self.data[i][3],self.data[i][4],self.data[i][5], self.data[i][4])) else: pass def readData(self,name,delimiter='\t'): with open(name) as f: for line in f: self.data.append(line.strip().split(delimiter)) for item in self.data: for i in range(len(item)): item[i]=float(item[i]) self.data=np.array(self.data) def normalizeData(self): def normalize(vector): maximo=max(vector) for i in range(len(vector)): vector[i]=vector[i]/maximo return vector for i in range(self.data.shape[1]): self.data[:,i]=normalize(self.data[:,i]) def delayInputs(self): m=len(self.data) for i in range(1,m): self.data[i-1,-1]=self.data[i,-1] self.data=np.delete(self.data,m-1,axis=0) def createSequentialDataSets(self,testRatio=0.7): ixSeparator=int(self.data.shape[0]*0.7) trainData=self.data[0:ixSeparator] testData=self.data[ixSeparator:] self.trainData = SequentialDataSet(5,1) self.testData= SequentialDataSet(5,1) for i in range(len(trainData)): self.trainData.addSample(trainData[i,0:5],trainData[i,5]) for i in range(len(testData)): self.testData.addSample(testData[i,0:5],testData[i,5]) def plotData(self): plt.plot(self.data[:,5],'b') pylab.show()
#Then, make a simple time series: data = [1] * 3 + [2] * 3 data *= 3 print(data) #Now put this timeseries into a supervised dataset, where the target for each sample is the next sample:from pybrain.datasets import SequentialDataSet from itertools import cycle from pybrain.datasets.sequential import SequentialDataSet ds = SequentialDataSet(1, 1) for sample, next_sample in zip(data, cycle(data[1:])): ds.addSample(sample, next_sample) print ds #Build a simple LSTM network with 1 input node, 5 LSTM cells and 1 output node: from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import LSTMLayer net = buildNetwork(1, 5, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True) #Train the network: from pybrain.supervised import RPropMinusTrainer from sys import stdout trainer = RPropMinusTrainer(net, dataset=ds) train_errors = [] # save errors for plotting later EPOCHS_PER_CYCLE = 5 CYCLES = 100 EPOCHS = EPOCHS_PER_CYCLE * CYCLES for i in xrange(CYCLES): trainer.trainEpochs(EPOCHS_PER_CYCLE)
# # t1 = np.ones([1, 20]) # t2 = np.ones([1, 20]) * 2 # # input = np.array([i1, i2, i1, i2]).reshape(20 * 4, 1) # target = np.array([t1, t2, t1, t2]).reshape(20 * 4, 1) # Create datasets print 'Preparing dataset ...' # ts = sampler.load_csv('data/series.csv') ds = SequentialDataSet(inLayerCount, outLayerCount) ds.newSequence() # ds = SupervisedDataSet(inLayerCount, outLayerCount) for row in df.itertuples(index=False): ds.addSample(row[0:columns-2], row[columns-2]) ds.endOfData() # Create bp trainer trainer = BackpropTrainer(net, ds) # Trains the datasets print 'Training ...' epoch = 1000 error = 1.0 while error > delta_error and epoch >= 0: error = trainer.train() epoch -= 1 print 'Epoch = %d, Error = %f' % (epoch, error)
net.addConnection(FullConnection(i, h, outSliceTo = 4*dim, name = 'f1')) net.addConnection(FullConnection(b, h, outSliceTo = 4*dim, name = 'f2')) net.addRecurrentConnection(FullConnection(h, h, inSliceTo = dim, outSliceTo = 4*dim, name = 'r1')) net.addRecurrentConnection(IdentityConnection(h, h, inSliceFrom = dim, outSliceFrom = 4*dim, name = 'rstate')) net.addConnection(FullConnection(h, o, inSliceTo = dim, name = 'f3')) net.sortModules() print net ds = SequentialDataSet(15, 1) ds.newSequence() input = open(sys.argv[1], 'r') for line in input.readlines(): row = np.array(line.split(',')) ds.addSample([float(x) for x in row[:15]], float(row[16])) print ds if len(sys.argv) > 2: test = SequentialDataSet(15, 1) test.newSequence() input = open(sys.argv[2], 'r') for line in input.readlines(): row = np.array(line.split(',')) test.addSample([float(x) for x in row[:15]], float(row[16])) else: test = ds print test net.reset() trainer = RPropMinusTrainer( net, dataset=ds, verbose=True)