def main(): usage = ''' Usage: ------------------------------------------------ parallelized cross-validation. Prints misclassification rate and standard deviation python %s [OPTIONS] infile trainshapefile Options: -h this help -a <int> algorithm 1=MaxLike(def fault) 2=Gausskernel 3=NNet(backprop) 4=NNet(congrad) 5=NNet(Kalman) 6=Dnn(tensorflow) 7=SVM -p <list> band positions (default all) e.g. -p [1,2,3] -L <list> hidden neurons (default [10]) e.g. [10,10] -e <int> epochs (default 100) -------------------------------------------------''' % sys.argv[0] options, args = getopt.getopt(sys.argv[1:], 'hp:a:e:L:') pos = None L = [10] trainalg = 1 epochs = 100 for option, value in options: if option == '-h': print usage return elif option == '-p': pos = eval(value) elif option == '-e': epochs = eval(value) elif option == '-a': trainalg = eval(value) elif option == '-L': L = eval(value) if len(args) != 2: print 'Incorrect number of arguments' print usage sys.exit(1) infile = args[0] trnfile = args[1] gdal.AllRegister() inDataset = gdal.Open(infile, GA_ReadOnly) bands = inDataset.RasterCount if pos is None: pos = range(1, bands + 1) N = len(pos) if trainalg == 1: algorithm = 'MaxLike' elif trainalg == 2: algorithm = 'Gausskernel' elif trainalg == 3: algorithm = 'NNet(Backprop)' elif trainalg == 4: algorithm = 'NNet(Congrad)' elif trainalg == 5: algorithm = 'NNet(Kalman)' elif trainalg == 6: algorithm = 'Dnn(Tensorflow)' else: algorithm = 'SVM' print 'Algorithm: %s' % algorithm # get the training data Gs, ls, K, _ = rs.readshp(trnfile, inDataset, pos) m = ls.shape[0] print str(m) + ' training pixel vectors were read in' # stretch the pixel vectors to [-1,1] (for ffn) maxx = np.max(Gs, 0) minx = np.min(Gs, 0) for j in range(N): Gs[:,j]=2*(Gs[:,j]-minx[j])/(maxx[j]-minx[j]) \ - 1.0 # random permutation of training data idx = np.random.permutation(m) Gs = Gs[idx, :] ls = ls[idx, :] # cross-validation start = time.time() traintest = [] for i in range(10): sl = slice(i * m // 10, (i + 1) * m // 10) traintest.append( (np.delete(Gs,sl,0),np.delete(ls,sl,0), \ Gs[sl,:],ls[sl,:],L,epochs,trainalg) ) try: print 'attempting parallel calculation ...' c = Client() print 'available engines %s' % str(c.ids) v = c[:] result = v.map_sync(crossvalidate, traintest) except Exception as e: print '%s \nfailed, running sequentially ...' % e result = map(crossvalidate, traintest) print 'execution time: %s' % str(time.time() - start) print 'misclassification rate: %f' % np.mean(result) print 'standard deviation: %f' % np.std(result)
def main(): usage = ''' Usage: -------------------------------------- Supervised classification of multispectral images python %s [OPTIONS] filename shapefile Options: -h this help -p <list> RGB band positions to be included (default all) e.g. -p [1,2,3] -a <int> algorithm 1=MaxLike 2=Gausskernel 3=NNet(backprop) 4=NNet(congrad) 5=NNet(Kalman) 6=Dnn(tensorflow) 7=SVM -e <int> number of epochs (default 100) -t <float> fraction for training (default 0.67) -v use validation (reserve half of training data for validation) -P generate class probability image (not available for MaxLike) -n suppress graphical output -L <list> list of hidden neurons in each hidden layer (default [10]) If the input file is named path/filenbasename.ext then The output classification file is named path/filebasename_class.ext the class probabilities output file is named path/filebasename_classprobs.ext and the test results file is named path/filebasename_<classifier>.tst -------------------------------------'''%sys.argv[0] outbuffer = 100 options, args = getopt.getopt(sys.argv[1:],'hnvPp:t:e:a:L:') pos = None probs = False L = [10] trainalg = 1 epochs = 100 graphics = True validation = False trainfrac = 0.67 for option, value in options: if option == '-h': print usage return elif option == '-p': pos = eval(value) elif option == '-n': graphics = False elif option == '-v': validation = True elif option == '-t': trainfrac = eval(value) elif option == '-e': epochs = eval(value) elif option == '-a': trainalg = eval(value) elif option == '-L': L = eval(value) elif option == '-P': probs = True if len(args) != 2: print 'Incorrect number of arguments' print usage sys.exit(1) if trainalg == 1: algorithm = 'MaxLike' elif trainalg == 2: algorithm = 'Gausskernel' elif trainalg == 3: algorithm = 'NNet(Backprop)' elif trainalg == 4: algorithm = 'NNet(Congrad)' elif trainalg == 5: algorithm = 'NNet(Kalman)' elif trainalg == 6: algorithm = 'Dnn(tensorflow)' else: algorithm = 'SVM' print 'Training with %s'%algorithm infile = args[0] trnfile = args[1] gdal.AllRegister() if infile: inDataset = gdal.Open(infile,GA_ReadOnly) cols = inDataset.RasterXSize rows = inDataset.RasterYSize bands = inDataset.RasterCount geotransform = inDataset.GetGeoTransform() else: return if pos is None: pos = range(1,bands+1) N = len(pos) rasterBands = [] for b in pos: rasterBands.append(inDataset.GetRasterBand(b)) # output files path = os.path.dirname(infile) basename = os.path.basename(infile) root, ext = os.path.splitext(basename) outfile = '%s/%s_class%s'%(path,root,ext) tstfile = '%s/%s_%s.tst'%(path,root,algorithm) if (trainalg in (2,3,4,5,6)) and probs: # class probabilities file probfile = '%s/%s_classprobs%s'%(path,root,ext) else: probfile = None # get the training data Xs,Ls,K,classnames = rs.readshp(trnfile,inDataset,pos) m = Ls.shape[0] # stretch the pixel vectors to [-1,1] for ffn, dnn maxx = np.max(Xs,0) minx = np.min(Xs,0) for j in range(len(pos)): Xs[:,j] = 2*(Xs[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 # random permutation of training data idx = np.random.permutation(m) Xs = Xs[idx,:] Ls = Ls[idx,:] # train on trainfrac of training examples, rest for testing Xstrn = Xs[:int(trainfrac*m),:] Lstrn = Ls[:int(trainfrac*m),:] Xstst = Xs[int(trainfrac*m):,:] Lstst = Ls[int(trainfrac*m):,:] # setup output datasets driver = inDataset.GetDriver() outDataset = driver.Create(outfile,cols,rows,1,GDT_Byte) projection = inDataset.GetProjection() if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) if probfile: probDataset = driver.Create(probfile,cols,rows,K,GDT_Byte) if geotransform is not None: probDataset.SetGeoTransform(geotransform) if projection is not None: probDataset.SetProjection(projection) probBands = [] for k in range(K): probBands.append(probDataset.GetRasterBand(k+1)) # initialize classifier if trainalg == 1: classifier = sc.Maxlike(Xstrn,Lstrn) elif trainalg == 2: classifier = sc.Gausskernel(Xstrn,Lstrn) elif trainalg == 3: classifier = sc.Ffnbp(Xstrn,Lstrn,L,epochs,validation) elif trainalg == 4: classifier = sc.Ffncg(Xstrn,Lstrn,L,epochs,validation) elif trainalg == 5: classifier = sc.Ffnekf(Xstrn,Lstrn,L,epochs,validation) elif trainalg == 6: # classifier = sc.Dnn_learn(Xstrn,Lstrn,L,epochs) # classifier = sc.Dnn_core(Xstrn,Lstrn,L,epochs) classifier = sc.Dnn_keras(Xstrn,Lstrn,L,epochs) elif trainalg == 7: classifier = sc.Svm(Xstrn,Lstrn) # train it print 'training on %i pixel vectors...' % np.max(classifier._Gs.shape) print 'classes: %s'%str(classnames) start = time.time() result = classifier.train() print 'elapsed time %s' %str(time.time()-start) if result is not None: if (trainalg in [3,4,5]) and graphics: # the cost arrays are returned in result cost = np.log(result[0]) costv = np.log(result[1]) ymax = np.max(cost) #ymin = np.min(cost)-1 ymin = 5.0 xmax = len(cost) plt.plot(range(xmax),costv,'r',range(xmax),cost,'b') plt.axis([0,xmax,ymin,ymax]) plt.title('Log(Cross entropy)') plt.xlabel('Epoch') # classify the image print 'classifying...' start = time.time() tile = np.zeros((outbuffer*cols,N),dtype=np.float32) for row in range(rows/outbuffer): print 'row: %i'%(row*outbuffer) for j in range(N): tile[:,j] = rasterBands[j].ReadAsArray(0,row*outbuffer,cols,outbuffer).ravel() tile[:,j] = 2*(tile[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 cls, Ms = classifier.classify(tile) outBand.WriteArray(np.reshape(cls,(outbuffer,cols)),0,row*outbuffer) if probfile and Ms is not None: Ms = np.byte(Ms*255) for k in range(K): probBands[k].WriteArray(np.reshape(Ms[:,k],(outbuffer,cols)),0,row*outbuffer) outBand.FlushCache() print 'elapsed time %s' %str(time.time()-start) outDataset = None inDataset = None if probfile: for probBand in probBands: probBand.FlushCache() probDataset = None print 'class probabilities written to: %s'%probfile print 'thematic map written to: %s'%outfile if (trainalg in [3,4,5]) and graphics: plt.show() if tstfile: with open(tstfile,'w') as f: print >>f, algorithm +'test results for %s'%infile print >>f, time.asctime() print >>f, 'Classification image: %s'%outfile print >>f, 'Class probabilities image: %s'%probfile print >>f, Lstst.shape[0],Lstst.shape[1] classes, _ = classifier.classify(Xstst) labels = np.argmax(Lstst,axis=1)+1 for i in range(len(classes)): print >>f, classes[i], labels[i] f.close() print 'test results written to: %s'%tstfile print 'done' else: print 'an error occured' return
def main(): usage = ''' Usage: --------------------------------------------------------- python %s [-a algorithm] [-p bands] [-L hidden neurons] [-e epochs] infile trainShapefile bandPositions is a list, e.g., -p [1,2,4] algorithm 1=MaxLike 2=Gausskernel 3=NNet(backprop) 4=NNet(congrad) 5=NNet(Kalman) 6=Dnn(tensorflow) 7=SVM prints misclassification rate and standard deviation --------------------------------------------------------''' %sys.argv[0] options, args = getopt.getopt(sys.argv[1:],'hp:a:e:L:') pos = None L = [10] trainalg = 1 epochs = 100 for option, value in options: if option == '-h': print usage return elif option == '-p': pos = eval(value) elif option == '-e': epochs = eval(value) elif option == '-a': trainalg = eval(value) elif option == '-L': L = eval(value) if len(args) != 2: print 'Incorrect number of arguments' print usage sys.exit(1) infile = args[0] trnfile = args[1] gdal.AllRegister() inDataset = gdal.Open(infile,GA_ReadOnly) bands = inDataset.RasterCount if pos is None: pos = range(1,bands+1) N = len(pos) if trainalg == 1: algorithm = 'MaxLike' elif trainalg == 2: algorithm = 'Gausskernel' elif trainalg == 3: algorithm = 'NNet(Backprop)' elif trainalg == 4: algorithm = 'NNet(Congrad)' elif trainalg == 5: algorithm = 'NNet(Kalman)' elif trainalg == 6: algorithm = 'Dnn(Tensorflow)' else: algorithm = 'SVM' print 'Algorithm: %s'%algorithm # get the training data Gs,ls,K,classnames = rs.readshp(trnfile,inDataset,pos) m = ls.shape[0] print str(m) + ' training pixel vectors were read in' # stretch the pixel vectors to [-1,1] (for ffn) maxx = np.max(Gs,0) minx = np.min(Gs,0) for j in range(N): Gs[:,j] = 2*(Gs[:,j]-minx[j])/(maxx[j]-minx[j]) - 1.0 # random permutation of training data idx = np.random.permutation(m) Gs = Gs[idx,:] ls = ls[idx,:] # cross-validation start = time.time() rc = ipyparallel.Client() print 'running %i IPython engines'%len(rc) traintest = [] for i in range(10): sl = slice(i*m//10,(i+1)*m//10) traintest.append( (np.delete(Gs,sl,0),np.delete(ls,sl,0), \ Gs[sl,:],ls[sl,:],L,epochs,trainalg) ) v = rc[:] v.execute('import auxil.supervisedclass as sc') result = v.map(crossvalidate,traintest).get() print 'execution time: %s' %str(time.time()-start) print 'misclassification rate: %f' %np.mean(result) print 'standard deviation: %f' %np.std(result)
def main(): usage = ''' Usage: ------------------------------------------------ supervised classification of multispectral images with ADABOOST.M1 python %s [OPTIONS] filename trainShapefile Options: -h this help -p <list> band positions e.g. -p [1,2,3,4] -L <int> number of hidden neurons (default 10) -n <int> number of nnet instances (default 50) -e <int> epochs for ekf training (default 3) If the input file is named path/filenbasename.ext then The output classification file is named path/filebasename_class.ext ------------------------------------------------''' % sys.argv[0] outbuffer = 100 options, args = getopt.getopt(sys.argv[1:], 'hp:n:e:L:') pos = None L = [10] epochs = 3 instances = 50 for option, value in options: if option == '-h': print usage return elif option == '-p': pos = eval(value) elif option == '-e': epochs = eval(value) elif option == '-n': instances = eval(value) elif option == '-L': L = [eval(value)] if len(args) != 2: print 'Incorrect number of arguments' print usage sys.exit(1) print 'Training with ADABOOST.M1 and %i epochs per ffn' % epochs infile = args[0] trnfile = args[1] gdal.AllRegister() if infile: inDataset = gdal.Open(infile, GA_ReadOnly) cols = inDataset.RasterXSize rows = inDataset.RasterYSize bands = inDataset.RasterCount geotransform = inDataset.GetGeoTransform() else: return if pos is None: pos = range(1, bands + 1) N = len(pos) rasterBands = [] for b in pos: rasterBands.append(inDataset.GetRasterBand(b)) # output file path = os.path.dirname(infile) basename = os.path.basename(infile) root, ext = os.path.splitext(basename) outfile = '%s/%s_class%s' % (path, root, ext) # setup output class image dataset driver = inDataset.GetDriver() outDataset = driver.Create(outfile, cols, rows, 1, GDT_Byte) projection = inDataset.GetProjection() if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) # get the training data Xs, Ls, K, _ = rs.readshp(trnfile, inDataset, pos) m = Ls.shape[0] # stretch the pixel vectors to [-1,1] maxx = np.max(Xs, 0) minx = np.min(Xs, 0) for j in range(len(pos)): Xs[:, j] = 2 * (Xs[:, j] - minx[j]) / (maxx[j] - minx[j]) - 1.0 # random permutation of training data idx = np.random.permutation(m) Xs = Xs[idx, :] Ls = Ls[idx, :] # train on 2/3 of training examples, rest for testing mtrn = int(0.67 * m) mtst = m - mtrn Xstrn = Xs[:mtrn, :] Lstrn = Ls[:mtrn, :] Xstst = Xs[mtrn:, :] Lstst = Ls[mtrn:, :] labels_train = np.argmax(Lstrn, 1) labels_test = np.argmax(Lstst, 1) # list of network instances, weights and errors ffns = [] alphas = [] errtrn = [] errtst = [] # initial probability distribution p = np.ones(mtrn) / mtrn # loop through the network instance start = time.time() instance = 1 while instance < instances: trial = 1 while trial < 6: print 'running instance: %i trial: %i' \ %(instance,trial) # instantiate a ffn and train it ffn = Ffnekfab(Xstrn, Lstrn, p, L, epochs) ffn.train() # determine beta labels, _ = ffn.classify(Xstrn) labels -= 1 idxi = np.where(labels != labels_train)[0] idxc = np.where(labels == labels_train)[0] epsilon = np.sum(p[idxi]) beta = epsilon / (1 - epsilon) if beta < 1.0: # continue ffns.append(ffn) alphas.append(np.log(1.0 / beta)) # update distribution p[idxc] = p[idxc] * beta p = p / np.sum(p) # train error labels, _ = seq_class(ffns, Xstrn, alphas, K) tmp = np.where(labels != labels_train, 1, 0) errtrn.append(np.sum(tmp) / float(mtrn)) # test error labels, _ = seq_class(ffns, Xstst, alphas, K) tmp = np.where(labels != labels_test, 1, 0) errtst.append(np.sum(tmp) / float(mtst)) print 'train error: %f test error: %f'\ %(errtrn[-1],errtst[-1]) # this instance is done trial = 6 instance += 1 else: trial += 1 # break off training if trial == 6: instance = instances print 'elapsed time %s' % str(time.time() - start) # plot errors n = len(errtrn) errtrn = np.array(errtrn) errtst = np.array(errtst) x = np.arange(1, n + 1, 1) ax = plt.subplot(111) ax.semilogx(x, errtrn, label='train') ax.semilogx(x, errtst, label='test') ax.legend() ax.set_xlabel('number of networks') ax.set_ylabel('classification error') plt.savefig('/home/mort/LaTeX/new projects/CRC4/Chapter7/fig7_3.eps', bbox_inches='tight') plt.show() # classify the image print 'classifying...' start = time.time() tile = np.zeros((outbuffer * cols, N), dtype=np.float32) for row in range(rows / outbuffer): print 'row: %i' % (row * outbuffer) for j in range(N): tile[:, j] = rasterBands[j].ReadAsArray(0, row * outbuffer, cols, outbuffer).ravel() tile[:, j] = 2 * (tile[:, j] - minx[j]) / (maxx[j] - minx[j]) - 1.0 cls, _ = seq_class(ffns, tile, alphas, K) outBand.WriteArray(np.reshape(cls, (outbuffer, cols)), 0, row * outbuffer) outBand.FlushCache() print 'thematic map written to: %s' % outfile print 'elapsed time %s' % str(time.time() - start)