def readCROHMEdB(locationList): #Fa una llista amb els arxius a extreure la base de dades i ho ordena al sistema sdB = [] tagClassification = {} os.remove('toTrain.txt') trainListFile = open('toTrain.txt', 'w') for destiny in locationList: filesInkML = ls(destiny) for filenom in filesInkML: if '.inkml' in filenom: trainListFile.write(destiny + '/' + filenom + '\n') trainListFile.close() trainListFile = open('toTrain.txt', 'r') linea = 'o' lcont = 0 while linea != '': lcont += 1 linea = trainListFile.readline() if linea != '': if os.path.exists('segmentedData/' + 'Folder000' + str(lcont)): shutil.rmtree('segmentedData/' + 'Folder000' + str(lcont)) os.makedirs('segmentedData/' + 'Folder000' + str(lcont)) sg.main([ 'GO', linea, 'segmentedData/' + 'Folder000' + str(lcont) + '/trainFile' ]) #Escaneja els arxius de la base de dades i guarda les coordenades for cases in os.listdir('segmentedData'): filesHere = sorted(os.listdir('segmentedData/' + cases)) filesHere.remove('trainFile_GT.txt') filesHere = [ 'trainFile' + str( sorted([ int(filesHere[i].strip('trainFile').strip('.inkml')) for i in range(len(filesHere)) ])[j]) + '.inkml' for j in range(len(filesHere)) ] Gfile = open('segmentedData/' + cases + '/trainFile_GT.txt') linea = 'o' filco = 0 while linea != '': linea = Gfile.readline() if linea != '': whereInd = linea.index(',') etiq = linea[whereInd + 1:].rstrip('\n') sdB.append( ink2Traces.i2trained( 'segmentedData/' + cases + '/' + filesHere[filco], etiq)) filco += 1 #Preprocessa i troba els atributs dels simbols de la base de dades for symbol in sdB: symbol.draw() psdB = spp.preprocessing(sdB) for symbol in psdB: symbol.computeFeatures() if symbol.tag not in tagClassification: tagClassification[symbol.tag] = [] tagClassification[symbol.tag].append(symbol) return psdB, tagClassification
def readUNIPENdB(location): weHave = ls(location) if 'data' in weHave: sdB = [] tagClassification = {} folders = ls(location + '/data') for curFolder in folders: altFol = ls(location + '/data/' + curFolder) for curAltFold in altFol: if curAltFold == 'aga': dataFiles = ls(location + '/data/' + curFolder + '/' + curAltFold) for curFile in dataFiles: fullPath = location + '/data/' + curFolder + '/' + curAltFold + '/' + curFile print fullPath + ':' sdB = sdB + tds.mountDS(fullPath, 'UNIPEN') for symbol in sdB: symbol.draw() psdB = spp.preprocessing(sdB) for symbol in psdB: symbol.computeFeatures() if symbol.tag not in tagClassification: tagClassification[symbol.tag] = [] tagClassification[symbol.tag].append(symbol) print type(tagClassification) return psdB, tagClassification
def readUNIPENdB(location): weHave=ls(location) if 'data' in weHave: sdB=[] tagClassification={} folders=ls(location+'/data') for curFolder in folders: altFol=ls(location+'/data/'+curFolder) for curAltFold in altFol: if curAltFold=='aga': dataFiles=ls(location+'/data/'+curFolder+'/'+curAltFold) for curFile in dataFiles: fullPath=location+'/data/'+curFolder+'/'+curAltFold+'/'+curFile print fullPath+':' sdB=sdB+tds.mountDS(fullPath,'UNIPEN') for symbol in sdB: symbol.draw() psdB=spp.preprocessing(sdB) for symbol in psdB: symbol.computeFeatures() if symbol.tag not in tagClassification: tagClassification[symbol.tag]=[] tagClassification[symbol.tag].append(symbol) print type(tagClassification) return psdB,tagClassification
def readCROHMEdB(locationList): #Fa una llista amb els arxius a extreure la base de dades i ho ordena al sistema sdB=[] tagClassification={} os.remove('toTrain.txt') trainListFile=open('toTrain.txt','w') for destiny in locationList: filesInkML=ls(destiny) for filenom in filesInkML: if '.inkml' in filenom: trainListFile.write(destiny+'/'+filenom+'\n') trainListFile.close() trainListFile=open('toTrain.txt','r') linea='o' lcont=0 while linea!='': lcont+=1 linea=trainListFile.readline() if linea!='': if os.path.exists('segmentedData/'+'Folder000'+str(lcont)): shutil.rmtree('segmentedData/'+'Folder000'+str(lcont)) os.makedirs('segmentedData/'+'Folder000'+str(lcont)) sg.main(['GO',linea,'segmentedData/'+'Folder000'+str(lcont)+'/trainFile']) #Escaneja els arxius de la base de dades i guarda les coordenades for cases in os.listdir('segmentedData'): filesHere=sorted(os.listdir('segmentedData/'+cases)) filesHere.remove('trainFile_GT.txt') filesHere=['trainFile'+str(sorted([int(filesHere[i].strip('trainFile').strip('.inkml')) for i in range(len(filesHere))])[j])+'.inkml' for j in range(len(filesHere))] Gfile=open('segmentedData/'+cases+'/trainFile_GT.txt') linea='o' filco=0 while linea!='': linea=Gfile.readline() if linea!='': whereInd=linea.index(',') etiq=linea[whereInd+1:].rstrip('\n') sdB.append(ink2Traces.i2trained('segmentedData/'+cases+'/'+filesHere[filco],etiq)) filco+=1 #Preprocessa i troba els atributs dels simbols de la base de dades for symbol in sdB: symbol.draw() psdB=spp.preprocessing(sdB) for symbol in psdB: symbol.computeFeatures() if symbol.tag not in tagClassification: tagClassification[symbol.tag]=[] tagClassification[symbol.tag].append(symbol) return psdB,tagClassification
genTags={} for character in tagClassification: if character[:-1] not in genTags: genTags[character[:-1]]=[] genTags[character[:-1]].append([character,len(tagClassification[character])]) for onlySym in genTags: for i in range(len(genTags[onlySym])): if genTags[onlySym][i][1]<0.05*sum([genTags[onlySym][caseID][1] for caseID in range(len(genTags[onlySym]))]): del tagClassification[genTags[onlySym][i][0]] del average[genTags[onlySym][i][0]] filenom='algb02.inkml' Coord=ink2Traces.i2t(filenom) #Extreure les coordenades donades pel fitxer img,byAxis,difs=drawTraces.draw(Coord) #Mostrar resultat obtingut i montar imatge Symb,groupedStrokes=fileSeg.segment(Coord,byAxis,difs) #Agrupar traces en simbols Symb=drawRegions.drawS(Symb) symbol=spp.preprocessing([Symb[1]]) symbol[0].computeFeatures() #pprint(vars(symbol[0])) #for i in range(len(symbol[0].LP)): # print str(symbol[0].LP[i])+', ' auxAverage=copy.deepcopy(average) auxAverage['21'].computeFeatures() auxAverage['r1'].computeFeatures() os.remove('features.txt') report=open('features.txt','w') report.write('||||||||||||||||||||||||||||||||||||||||||||||||||||||\n') report.write('LP:\n') for i in range(len(tagClassification['21'])): for j in range(len(tagClassification['21'][i].LP)): report.write(str(tagClassification['21'][i].LP[j])+', ') report.write('\n')
for onlySym in genTags: for i in range(len(genTags[onlySym])): if genTags[onlySym][i][1] < 0.05 * sum([ genTags[onlySym][caseID][1] for caseID in range(len(genTags[onlySym])) ]): del tagClassification[genTags[onlySym][i][0]] del average[genTags[onlySym][i][0]] filenom = 'algb02.inkml' Coord = ink2Traces.i2t(filenom) #Extreure les coordenades donades pel fitxer img, byAxis, difs = drawTraces.draw( Coord) #Mostrar resultat obtingut i montar imatge Symb, groupedStrokes = fileSeg.segment(Coord, byAxis, difs) #Agrupar traces en simbols Symb = drawRegions.drawS(Symb) symbol = spp.preprocessing([Symb[1]]) symbol[0].computeFeatures() #pprint(vars(symbol[0])) #for i in range(len(symbol[0].LP)): # print str(symbol[0].LP[i])+', ' auxAverage = copy.deepcopy(average) auxAverage['21'].computeFeatures() auxAverage['r1'].computeFeatures() os.remove('features.txt') report = open('features.txt', 'w') report.write('||||||||||||||||||||||||||||||||||||||||||||||||||||||\n') report.write('LP:\n') for i in range(len(tagClassification['21'])): for j in range(len(tagClassification['21'][i].LP)): report.write(str(tagClassification['21'][i].LP[j]) + ', ') report.write('\n')
from pprint import pprint import elasticMatching as eM import pytemplate as temp import featurePonderation as fp import pyStructural as stru #Sistema sencer, llegeix nom del fitxer inkml a analitzar i retorna l'expressio (ex: overAll.py fitxerAClassificar.inkml) filenom = sys.argv[1] #Llegir el nom del fitxer InkML de la consola Coord = ink2Traces.i2t(filenom) #Extreure les coordenades donades pel fitxer img, byAxis, difs = drawTraces.draw( Coord) #Mostrar resultat obtingut i montar imatge Symb, groupedStrokes = fileSeg.segment(Coord, byAxis, difs) #Agrupar traces en simbols Symb = drawRegions.drawS( Symb) #Buscar la bounding box i el centre de cada simbol Symb = spp.preprocessing(Symb) #Preprocessar tots els simbols print 'Computing features..........' for i in range(len(Symb)): Symb[i].computeFeatures() #Calcular les features de cada simbol print 'Features extracted.' symboldB, tagClassification, averages = temp.readTemplate( ) #Llegeix la base de dades per extreure tots els simbols etiquetats, totes les mostres ordenades per caracter(etiqueta) i el template de cada caracter genTags = { } #Busca el significat independent de cada caracter i si la proporcio del caracter respecte el significat independent es molt baixa ho considera soroll i elimina el caracter #Elimina components sorollosos de la base de dades for character in tagClassification: if character[:-1] not in genTags: genTags[character[:-1]] = [] genTags[character[:-1]].append( [character, len(tagClassification[character])]) for onlySym in genTags: