Example #1
0
def elasticMatching(charTemplates,symbolToRec,weights,amp,alt):
	costCoord=[]
	costLP=[]
	costtAngle=[]
	costtAD=[]
	costliS=[]
	costaA=[]
	costqE=[]
	costrSL=[]
	costcog=[]
	coststyle=[]
	reference=[]
	cost=[]
	for character in charTemplates:
		#Nomes si tenen el mateix nombre de traces ho decideix
		if character[-1:]=='1':
			tNum=1
		elif character[-1:]=='2':
			tNum=2
		elif character[-1:]=='3':
			tNum=3
		if tNum==symbolToRec.tE.shape[0]:
			adaptedSymbol=spp.altArcLengthResampling([symbolToRec],charTemplates[character].tE)[0]
			adaptedSymbol.computeFeatures()
			#Busca el cost de cada feature
			costCoord.append(sum([math.sqrt(((charTemplates[character].Coord[i,0]-adaptedSymbol.Coord[i,0])**2)+((charTemplates[character].Coord[i,1]-adaptedSymbol.Coord[i,1])**2)) for i in range(adaptedSymbol.Coord.shape[0])]))
			costLP.append(sum([abs(charTemplates[character].LP[i]-adaptedSymbol.LP[i]) for i in range(adaptedSymbol.LP.shape[0])]))
			costtAngle.append(sum([abs(charTemplates[character].turningAngle[i]-adaptedSymbol.turningAngle[i]) for i in range(adaptedSymbol.turningAngle.shape[0])]))
			costtAD.append(sum([abs(charTemplates[character].turningAngleDifference[i]-adaptedSymbol.turningAngleDifference[i]) for i in range(adaptedSymbol.turningAngleDifference.shape[0])]))
			if adaptedSymbol.liS.shape[0]!=len(charTemplates[character].liS):
				costliS.append(spp.lcomp(adaptedSymbol.Coord,adaptedSymbol.Coord.shape[0]))
			else:
				costliS.append(sum([abs(charTemplates[character].liS[i]-adaptedSymbol.liS[i]) for i in range(adaptedSymbol.liS.shape[0])]))
			if adaptedSymbol.accAngle.shape[0]!=len(charTemplates[character].accAngle):
				costaA.append(adaptedSymbol.tE.shape[0]*2*math.pi)
			else:	
				costaA.append(sum([abs(charTemplates[character].accAngle[i]-adaptedSymbol.accAngle[i]) for i in range(adaptedSymbol.accAngle.shape[0])]))
			if adaptedSymbol.quadraticError.shape[0]!=len(charTemplates[character].quadraticError):
				costqE.append(math.sqrt(8)*adaptedSymbol.tE.shape[0])
			else:
				costqE.append(sum([abs(charTemplates[character].quadraticError[i]-adaptedSymbol.quadraticError[i]) for i in range(adaptedSymbol.quadraticError.shape[0])]))
			if adaptedSymbol.relStrokeLength.shape[0]!=len(charTemplates[character].relStrokeLength):
				costrSL.append(1)
			else:
				costrSL.append(sum([abs(charTemplates[character].relStrokeLength[i]-adaptedSymbol.relStrokeLength[i]) for i in range(adaptedSymbol.relStrokeLength.shape[0])]))	
			if adaptedSymbol.coG.shape[0]!=len(charTemplates[character].coG):
				costcog.append(adaptedSymbol.tE.shape[0]*math.sqrt(8))
			else:
				costcog.append(sum([math.sqrt(((charTemplates[character].coG[i][0]-adaptedSymbol.coG[i][0])**2)+((charTemplates[character].coG[i][1]-adaptedSymbol.coG[i][1])**2)) for i in range(adaptedSymbol.coG.shape[0])]))
			if charTemplates[character].Style==adaptedSymbol.Style:
				coststyle.append(0)
			else:
				coststyle.append(1)
			reference.append(character)
			cost.append(0)
	allThisCost={}
	kindsOfCost={'Coord':costCoord,'LP':costLP,'turningAngle':costtAngle,'turningAngleDifference':costtAD,'liS':costliS,'accAngle':costaA,'quadraticError':costqE,'relStrokeLength':costrSL,'coG':costcog,'Style':coststyle}
	probByCost={}
	fiProb={}
	#Segons el cost busca una probabilitat i la pondera
	for kind in kindsOfCost:
		allThisCost[kind]=np.nansum(kindsOfCost[kind])
		probByCost[kind]=[allThisCost[kind]/float(ch) for ch in kindsOfCost[kind]]
		totProb=np.nansum(probByCost[kind])
		fiProb[kind]=[ch/float(totProb) for ch in probByCost[kind]]
	probPonderada=[np.nansum([weights[kind]*fiProb[kind][i] for kind in fiProb]) for i in range(len(cost))]
	while charTemplates[reference[np.argmax(probPonderada)]].tE.shape[0]!=adaptedSymbol.tE.shape[0]:
		del reference[np.argmax(probPonderada)]
		del probPonderada[np.argmax(probPonderada)]
	etiqBelongs=reference[np.argmax(probPonderada)]
	#Casos especials amb bounding box petita
	if (symbolToRec.bBox[1]-symbolToRec.bBox[0])<0.0125*amp and (symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt:
		etiqBelongs='.1'
	elif ((symbolToRec.bBox[1]-symbolToRec.bBox[0])>0.0125*amp and (symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt):
		etiqBelongs='-1'
	elif (symbolToRec.bBox[1]-symbolToRec.bBox[0])<0.0125*amp and (symbolToRec.bBox[3]-symbolToRec.bBox[2])>0.017*alt:
		if symbolToRec.tE.shape[0]==1:
			etiqBelongs='11'
		elif symbolToRec.tE.shape[0]==2:
			if abs(symbolToRec.Coord[0,1]-symbolToRec.Coord[symbolToRec.tE[0],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt and abs(symbolToRec.Coord[symbolToRec.tE[0]+1,1]-symbolToRec.Coord[symbolToRec.tE[1],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt:
				etiqBelongs='\ldots2'
			elif (abs(symbolToRec.Coord[0,1]-symbolToRec.Coord[symbolToRec.tE[0],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])>0.017*alt and abs(symbolToRec.Coord[symbolToRec.tE[0]+1,1]-symbolToRec.Coord[symbolToRec.tE[1],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt) or (abs(symbolToRec.Coord[0,1]-symbolToRec.Coord[symbolToRec.tE[0],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])<0.017*alt and abs(symbolToRec.Coord[symbolToRec.tE[0]+1,1]-symbolToRec.Coord[symbolToRec.tE[1],1])*(symbolToRec.bBox[3]-symbolToRec.bBox[2])>0.017*alt):
				if symbolToRec.coG[0,1]<symbolToRec.coG[1,1]:
					etiqBelongs='i2'
				else:
					etiqBelongs='!2'
		else:
			etiqBelongs='\ldots3'
	if (symbolToRec.bBox[1]-symbolToRec.bBox[0])>(symbolToRec.bBox[3]-symbolToRec.bBox[2])*8:
		etiqBelongs='-1'
	return etiqBelongs
Example #2
0
def templateGenerator():
    symboldB, tagClassification = dB.readCROHMEdB([
        'trainData/CROHME_training', 'trainData/trainData_v2',
        'trainData/TrainINKML'
    ])
    option = 3
    counta = 0
    tagAverages = {}
    #Opcio 1: S'adapten els simbols de la base de dades amb el minim nombre de traces trobat
    if option == 1:
        for character in tagClassification:
            counta += 1
            print character, ':', len(tagClassification[character])
            numStrokes = [
                len(tagClassification[character][i].tE)
                for i in range(len(tagClassification[character]))
            ]
            nStrokesTemp = min(numStrokes)
            tagClassification[character] = spp.strokeReduction(
                tagClassification[character], nStrokesTemp, True)
            eachStroke = np.asarray([
                int(
                    sum([
                        tagClassification[character][i].tE[j]
                        for i in range(len(tagClassification[character]))
                    ]) / len(tagClassification[character]))
                for j in range(nStrokesTemp)
            ])
            tagClassification[character] = spp.altArcLengthResampling(
                tagClassification[character], eachStroke)
            average = np.zeros([len(tagClassification[character][0].Coord), 2],
                               np.float64)
            for example in tagClassification[character]:
                average = np.array([[(average[i, 0] + example.Coord[i, 0]),
                                     (average[i, 1] + example.Coord[i, 1])]
                                    for i in range(example.Coord.shape[0])],
                                   np.float64)
            average = np.array([[
                average[i, 0] / len(tagClassification[character]),
                average[i, 1] / len(tagClassification[character])
            ] for i in range(example.Coord.shape[0])], np.float64)
            tagAverages[character] = average
            plt.figure(counta)
            for j in range(nStrokesTemp):
                if j == 0:
                    ini = -1
                else:
                    ini = int(eachStroke[j - 1])
                plt.plot(average[range(ini + 1,
                                       int(eachStroke[j]) + 1), 0],
                         -average[range(ini + 1,
                                        int(eachStroke[j]) + 1), 1], 'r')
    #Opcio 2: S'adapten els simbols de la base de dades amb el maxim nombre de traces trobat
    elif option == 2:
        for character in tagClassification:
            counta += 1
            print character, ':', len(tagClassification[character])
            numStrokes = [
                len(tagClassification[character][i].tE)
                for i in range(len(tagClassification[character]))
            ]
            nStrokesTemp = int(round(sum(numStrokes) / float(len(numStrokes))))
            tagClassification[character] = spp.strokeReduction(
                tagClassification[character], nStrokesTemp, True)
            eachStroke = np.asarray([
                int(
                    sum([
                        tagClassification[character][i].tE[j]
                        for i in range(len(tagClassification[character]))
                    ]) / len(tagClassification[character]))
                for j in range(nStrokesTemp)
            ])
            tagClassification[character] = spp.altArcLengthResampling(
                tagClassification[character], eachStroke)
            average = np.zeros([len(tagClassification[character][0].Coord), 2],
                               np.float64)
            for example in tagClassification[character]:
                average = np.array([[(average[i, 0] + example.Coord[i, 0]),
                                     (average[i, 1] + example.Coord[i, 1])]
                                    for i in range(example.Coord.shape[0])],
                                   np.float64)
            average = np.array([[
                average[i, 0] / len(tagClassification[character]),
                average[i, 1] / len(tagClassification[character])
            ] for i in range(example.Coord.shape[0])], np.float64)
            tagAverages[character] = average
    #Opcio 3: Es separen els simbols segons el seu numero de traces
    elif option == 3:
        charList = [character for character in tagClassification]
        for charInd in range(len(charList)):
            numStrokes = [
                len(tagClassification[charList[charInd]][i].tE)
                for i in range(len(tagClassification[charList[charInd]]))
            ]
            c = 0
            typesByN = []
            for n in numStrokes:
                if n not in typesByN:
                    typesByN.append(n)
                    tagClassification[charList[charInd] + str(n)] = []
                tagClassification[charList[charInd] + str(n)].append(
                    tagClassification[charList[charInd]][c])
                c += 1
            del tagClassification[charList[charInd]]
        print tagClassification['-1'][0].LP
        #Soroll de la dB
        del tagClassification['\exists2'][1]
        del tagClassification['\pi2']
        del tagClassification['\\' + 'forall3']
        del tagClassification['Y2']
        del tagClassification['x2']
        del tagClassification['Y3']
        tagClassification['k1'] = [
            tagClassification['k1'][valid] for valid in
            [0, 1, 2, 3, 8, 12, 20, 24, 29, 30, 34, 35, 42, 43, 58, 59, 61]
        ]
        tagClassification['\sum2'] = [
            tagClassification['\sum2'][valid]
            for valid in [1, 5, 13, 18, 24, 25, 28, 32, 35, 37, 38, 45, 61]
        ]
        tagClassification[']2'] = [
            tagClassification[']2'][valid] for valid in [3, 4]
        ]
        tagClassification['[2'] = [
            tagClassification['[2'][valid] for valid in [0, 8]
        ]
        tagClassification['\\' + 'theta2'] = [
            tagClassification['\\' + 'theta2'][valid] for valid in [
                1, 2, 5, 7, 9, 12, 13, 28, 29, 31, 32, 34, 37, 39, 40, 41, 52,
                61, 65, 66, 67, 69
            ]
        ]
        tagClassification['\div3'] = [
            tagClassification['\div3'][valid] for valid in [10, 24, 38]
        ]
        tagClassification['\\' + 'tan3'] = [
            tagClassification['\\' + 'tan3'][valid]
            for valid in [0, 4, 5, 16, 22, 34, 41, 45, 50, 53, 72, 86, 144]
        ]
        tagClassification['\lim3'] = [tagClassification['\lim3'][9]]
        ###
        os.remove('results.txt')
        report = open('results.txt', 'w')
        for character in tagClassification:
            #EachStroke fa referencia a com es reparteixen els grups en traces, segons la mitjana d'aquesta distribucio a la base de dades
            eachStroke = np.asarray([
                int(
                    sum([
                        tagClassification[character][i].tE[j]
                        for i in range(len(tagClassification[character]))
                    ]) / len(tagClassification[character]))
                for j in range(tagClassification[character][0].tE.shape[0])
            ])
            tagClassification[character] = spp.altArcLengthResampling(
                tagClassification[character], eachStroke)
            counta += 1
            print character, ':', len(tagClassification[character])
            #Totes les features del template d'una etiqueta es calculen com la seva mitjana en els simbols d'aquesta etiqueta
            average = np.zeros([len(tagClassification[character][0].Coord), 2],
                               np.float64)
            for example in tagClassification[character]:
                average = np.array([[(average[i, 0] + example.Coord[i, 0]),
                                     (average[i, 1] + example.Coord[i, 1])]
                                    for i in range(example.Coord.shape[0])],
                                   np.float64)
            average = np.array([[
                average[i, 0] / len(tagClassification[character]),
                average[i, 1] / len(tagClassification[character])
            ] for i in range(example.Coord.shape[0])], np.float64)
            tagAverages[character] = nsi.taggedSymbol(average, eachStroke,
                                                      character)
            tagAverages[character].computeFeatures()
            tagAverages[character].LP = [
                np.nansum([
                    tagClassification[character][i].LP[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character])
                for j in range(len(tagClassification[character][0].LP))
            ]
            tagAverages[character].accAngle = [
                np.nansum([
                    tagClassification[character][i].accAngle[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character])
                for j in range(len(tagClassification[character][0].accAngle))
            ]
            tagAverages[character].coG = [[
                np.nansum([
                    tagClassification[character][i].coG[j][0]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character]),
                np.nansum([
                    tagClassification[character][i].coG[j][1]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character])
            ] for j in range(len(tagClassification[character][0].coG))]
            tagAverages[character].liS = [
                np.nansum([
                    tagClassification[character][i].liS[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character])
                for j in range(len(tagClassification[character][0].liS))
            ]
            tagAverages[character].quadraticError = [
                np.nansum([
                    tagClassification[character][i].quadraticError[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character]) for j in range(
                    len(tagClassification[character][0].quadraticError))
            ]
            tagAverages[character].relStrokeLength = [
                np.nansum([
                    tagClassification[character][i].relStrokeLength[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character]) for j in range(
                    len(tagClassification[character][0].relStrokeLength))
            ]
            tagAverages[character].turningAngle = [
                np.nansum([
                    tagClassification[character][i].turningAngle[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character]) for j in range(
                    len(tagClassification[character][0].turningAngle))
            ]
            tagAverages[character].turningAngleDifference = [
                np.nansum([
                    tagClassification[character][i].turningAngleDifference[j]
                    for i in range(len(tagClassification[character]))
                ]) / len(tagClassification[character]) for j in range(
                    len(tagClassification[character]
                        [0].turningAngleDifference))
            ]
            styles = ['horizontal', 'vertical', 'diagonal', 'closed']
            tagAverages[character].Style = styles[np.argmax(
                [[
                    tagClassification[character][i].Style
                    for i in range(len(tagClassification[character]))
                ].count('horizontal'),
                 [
                     tagClassification[character][i].Style
                     for i in range(len(tagClassification[character]))
                 ].count('vertical'),
                 [
                     tagClassification[character][i].Style
                     for i in range(len(tagClassification[character]))
                 ].count('diagonal'),
                 [
                     tagClassification[character][i].Style
                     for i in range(len(tagClassification[character]))
                 ].count('closed')])]
            report.write('-----------------------------------------------\n')
            report.write(character + '      |\n')
            report.write('---------\n')
            for i in range(len(tagClassification[character])):
                report.write(
                    str(tagClassification[character][i].tE) + '       :\n')
                for j in range(tagClassification[character][i].Coord.shape[0]):
                    report.write(
                        str(tagClassification[character][i].Coord[j]) + ', ')
                report.write('\n')
            report.write('average:\n               ')
            for j in range(average.shape[0]):
                report.write(str(average[j]) + ', ')
            report.write('\n')
    #Guarda els resultats al sistema
    if os.path.isfile('varSimbdB.txt'):
        os.remove('varSimbdB.txt')
    f = open('varSimbdB.txt', 'wb')
    pickle.dump(symboldB, f)
    f.close()
    if os.path.isfile('varTagClass.txt'):
        os.remove('varTagClass.txt')
    f = open('varTagClass.txt', 'wb')
    pickle.dump(tagClassification, f)
    f.close()
    if os.path.isfile('varAverages.txt'):
        os.remove('varAverages.txt')
    f = open('varAverages.txt', 'wb')
    pickle.dump(tagAverages, f)
    f.close()
    report.close()
    plt.show()
Example #3
0
def elasticMatching(charTemplates, symbolToRec, weights, amp, alt):
    costCoord = []
    costLP = []
    costtAngle = []
    costtAD = []
    costliS = []
    costaA = []
    costqE = []
    costrSL = []
    costcog = []
    coststyle = []
    reference = []
    cost = []
    for character in charTemplates:
        #Nomes si tenen el mateix nombre de traces ho decideix
        if character[-1:] == '1':
            tNum = 1
        elif character[-1:] == '2':
            tNum = 2
        elif character[-1:] == '3':
            tNum = 3
        if tNum == symbolToRec.tE.shape[0]:
            adaptedSymbol = spp.altArcLengthResampling(
                [symbolToRec], charTemplates[character].tE)[0]
            adaptedSymbol.computeFeatures()
            #Busca el cost de cada feature
            costCoord.append(
                sum([
                    math.sqrt(((charTemplates[character].Coord[i, 0] -
                                adaptedSymbol.Coord[i, 0])**2) +
                              ((charTemplates[character].Coord[i, 1] -
                                adaptedSymbol.Coord[i, 1])**2))
                    for i in range(adaptedSymbol.Coord.shape[0])
                ]))
            costLP.append(
                sum([
                    abs(charTemplates[character].LP[i] - adaptedSymbol.LP[i])
                    for i in range(adaptedSymbol.LP.shape[0])
                ]))
            costtAngle.append(
                sum([
                    abs(charTemplates[character].turningAngle[i] -
                        adaptedSymbol.turningAngle[i])
                    for i in range(adaptedSymbol.turningAngle.shape[0])
                ]))
            costtAD.append(
                sum([
                    abs(charTemplates[character].turningAngleDifference[i] -
                        adaptedSymbol.turningAngleDifference[i]) for i in
                    range(adaptedSymbol.turningAngleDifference.shape[0])
                ]))
            if adaptedSymbol.liS.shape[0] != len(charTemplates[character].liS):
                costliS.append(
                    spp.lcomp(adaptedSymbol.Coord,
                              adaptedSymbol.Coord.shape[0]))
            else:
                costliS.append(
                    sum([
                        abs(charTemplates[character].liS[i] -
                            adaptedSymbol.liS[i])
                        for i in range(adaptedSymbol.liS.shape[0])
                    ]))
            if adaptedSymbol.accAngle.shape[0] != len(
                    charTemplates[character].accAngle):
                costaA.append(adaptedSymbol.tE.shape[0] * 2 * math.pi)
            else:
                costaA.append(
                    sum([
                        abs(charTemplates[character].accAngle[i] -
                            adaptedSymbol.accAngle[i])
                        for i in range(adaptedSymbol.accAngle.shape[0])
                    ]))
            if adaptedSymbol.quadraticError.shape[0] != len(
                    charTemplates[character].quadraticError):
                costqE.append(math.sqrt(8) * adaptedSymbol.tE.shape[0])
            else:
                costqE.append(
                    sum([
                        abs(charTemplates[character].quadraticError[i] -
                            adaptedSymbol.quadraticError[i])
                        for i in range(adaptedSymbol.quadraticError.shape[0])
                    ]))
            if adaptedSymbol.relStrokeLength.shape[0] != len(
                    charTemplates[character].relStrokeLength):
                costrSL.append(1)
            else:
                costrSL.append(
                    sum([
                        abs(charTemplates[character].relStrokeLength[i] -
                            adaptedSymbol.relStrokeLength[i])
                        for i in range(adaptedSymbol.relStrokeLength.shape[0])
                    ]))
            if adaptedSymbol.coG.shape[0] != len(charTemplates[character].coG):
                costcog.append(adaptedSymbol.tE.shape[0] * math.sqrt(8))
            else:
                costcog.append(
                    sum([
                        math.sqrt(((charTemplates[character].coG[i][0] -
                                    adaptedSymbol.coG[i][0])**2) +
                                  ((charTemplates[character].coG[i][1] -
                                    adaptedSymbol.coG[i][1])**2))
                        for i in range(adaptedSymbol.coG.shape[0])
                    ]))
            if charTemplates[character].Style == adaptedSymbol.Style:
                coststyle.append(0)
            else:
                coststyle.append(1)
            reference.append(character)
            cost.append(0)
    allThisCost = {}
    kindsOfCost = {
        'Coord': costCoord,
        'LP': costLP,
        'turningAngle': costtAngle,
        'turningAngleDifference': costtAD,
        'liS': costliS,
        'accAngle': costaA,
        'quadraticError': costqE,
        'relStrokeLength': costrSL,
        'coG': costcog,
        'Style': coststyle
    }
    probByCost = {}
    fiProb = {}
    #Segons el cost busca una probabilitat i la pondera
    for kind in kindsOfCost:
        allThisCost[kind] = np.nansum(kindsOfCost[kind])
        probByCost[kind] = [
            allThisCost[kind] / float(ch) for ch in kindsOfCost[kind]
        ]
        totProb = np.nansum(probByCost[kind])
        fiProb[kind] = [ch / float(totProb) for ch in probByCost[kind]]
    probPonderada = [
        np.nansum([weights[kind] * fiProb[kind][i] for kind in fiProb])
        for i in range(len(cost))
    ]
    while charTemplates[reference[np.argmax(
            probPonderada)]].tE.shape[0] != adaptedSymbol.tE.shape[0]:
        del reference[np.argmax(probPonderada)]
        del probPonderada[np.argmax(probPonderada)]
    etiqBelongs = reference[np.argmax(probPonderada)]
    #Casos especials amb bounding box petita
    if (symbolToRec.bBox[1] - symbolToRec.bBox[0]) < 0.0125 * amp and (
            symbolToRec.bBox[3] - symbolToRec.bBox[2]) < 0.017 * alt:
        etiqBelongs = '.1'
    elif ((symbolToRec.bBox[1] - symbolToRec.bBox[0]) > 0.0125 * amp
          and (symbolToRec.bBox[3] - symbolToRec.bBox[2]) < 0.017 * alt):
        etiqBelongs = '-1'
    elif (symbolToRec.bBox[1] - symbolToRec.bBox[0]) < 0.0125 * amp and (
            symbolToRec.bBox[3] - symbolToRec.bBox[2]) > 0.017 * alt:
        if symbolToRec.tE.shape[0] == 1:
            etiqBelongs = '11'
        elif symbolToRec.tE.shape[0] == 2:
            if abs(symbolToRec.Coord[0, 1] -
                   symbolToRec.Coord[symbolToRec.tE[0], 1]) * (
                       symbolToRec.bBox[3] -
                       symbolToRec.bBox[2]) < 0.017 * alt and abs(
                           symbolToRec.Coord[symbolToRec.tE[0] + 1, 1] -
                           symbolToRec.Coord[symbolToRec.tE[1], 1]) * (
                               symbolToRec.bBox[3] -
                               symbolToRec.bBox[2]) < 0.017 * alt:
                etiqBelongs = '\ldots2'
            elif (
                    abs(symbolToRec.Coord[0, 1] -
                        symbolToRec.Coord[symbolToRec.tE[0], 1]) *
                (symbolToRec.bBox[3] - symbolToRec.bBox[2]) > 0.017 * alt
                    and abs(symbolToRec.Coord[symbolToRec.tE[0] + 1, 1] -
                            symbolToRec.Coord[symbolToRec.tE[1], 1]) *
                (symbolToRec.bBox[3] - symbolToRec.bBox[2]) < 0.017 * alt) or (
                    abs(symbolToRec.Coord[0, 1] -
                        symbolToRec.Coord[symbolToRec.tE[0], 1]) *
                    (symbolToRec.bBox[3] - symbolToRec.bBox[2]) < 0.017 * alt
                    and abs(symbolToRec.Coord[symbolToRec.tE[0] + 1, 1] -
                            symbolToRec.Coord[symbolToRec.tE[1], 1]) *
                    (symbolToRec.bBox[3] - symbolToRec.bBox[2]) > 0.017 * alt):
                if symbolToRec.coG[0, 1] < symbolToRec.coG[1, 1]:
                    etiqBelongs = 'i2'
                else:
                    etiqBelongs = '!2'
        else:
            etiqBelongs = '\ldots3'
    if (symbolToRec.bBox[1] - symbolToRec.bBox[0]) > (symbolToRec.bBox[3] -
                                                      symbolToRec.bBox[2]) * 8:
        etiqBelongs = '-1'
    return etiqBelongs
Example #4
0
def templateGenerator():
	symboldB,tagClassification=dB.readCROHMEdB(['trainData/CROHME_training','trainData/trainData_v2','trainData/TrainINKML'])
	option=3
	counta=0
	tagAverages={}
	#Opcio 1: S'adapten els simbols de la base de dades amb el minim nombre de traces trobat
	if option==1:
		for character in tagClassification:
			counta+=1
			print character,':',len(tagClassification[character])
			numStrokes=[len(tagClassification[character][i].tE) for i in range(len(tagClassification[character]))]
			nStrokesTemp=min(numStrokes)
			tagClassification[character]=spp.strokeReduction(tagClassification[character],nStrokesTemp,True)
			eachStroke=np.asarray([int(sum([tagClassification[character][i].tE[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character])) for j in range(nStrokesTemp)])
			tagClassification[character]=spp.altArcLengthResampling(tagClassification[character],eachStroke)
			average=np.zeros([len(tagClassification[character][0].Coord),2],np.float64)
			for example in tagClassification[character]:
				average=np.array([[(average[i,0]+example.Coord[i,0]),(average[i,1]+example.Coord[i,1])] for i in range(example.Coord.shape[0])],np.float64)
			average=np.array([[average[i,0]/len(tagClassification[character]),average[i,1]/len(tagClassification[character])] for i in range(example.Coord.shape[0])],np.float64)
			tagAverages[character]=average
			plt.figure(counta)
			for j in range(nStrokesTemp):
				if j==0:
					ini=-1
				else:
					ini=int(eachStroke[j-1])
				plt.plot(average[range(ini+1,int(eachStroke[j])+1),0],-average[range(ini+1,int(eachStroke[j])+1),1],'r')
	#Opcio 2: S'adapten els simbols de la base de dades amb el maxim nombre de traces trobat
	elif option==2:
		for character in tagClassification:
			counta+=1
			print character,':',len(tagClassification[character])
			numStrokes=[len(tagClassification[character][i].tE) for i in range(len(tagClassification[character]))]
			nStrokesTemp=int(round(sum(numStrokes)/float(len(numStrokes))))
			tagClassification[character]=spp.strokeReduction(tagClassification[character],nStrokesTemp,True)
			eachStroke=np.asarray([int(sum([tagClassification[character][i].tE[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character])) for j in range(nStrokesTemp)])
			tagClassification[character]=spp.altArcLengthResampling(tagClassification[character],eachStroke)
			average=np.zeros([len(tagClassification[character][0].Coord),2],np.float64)
			for example in tagClassification[character]:
				average=np.array([[(average[i,0]+example.Coord[i,0]),(average[i,1]+example.Coord[i,1])] for i in range(example.Coord.shape[0])],np.float64)
			average=np.array([[average[i,0]/len(tagClassification[character]),average[i,1]/len(tagClassification[character])] for i in range(example.Coord.shape[0])],np.float64)
			tagAverages[character]=average
	#Opcio 3: Es separen els simbols segons el seu numero de traces
	elif option==3:
		charList=[character for character in tagClassification]
		for charInd in range(len(charList)):
			numStrokes=[len(tagClassification[charList[charInd]][i].tE) for i in range(len(tagClassification[charList[charInd]]))]
			c=0
			typesByN=[]
			for n in numStrokes:
				if n not in typesByN:
					typesByN.append(n)
					tagClassification[charList[charInd]+str(n)]=[]
				tagClassification[charList[charInd]+str(n)].append(tagClassification[charList[charInd]][c])
				c+=1
			del tagClassification[charList[charInd]]
		print tagClassification['-1'][0].LP
		#Soroll de la dB
		del tagClassification['\exists2'][1]
		del tagClassification['\pi2']
		del tagClassification['\\'+'forall3']
		del tagClassification['Y2']
		del tagClassification['x2']
		del tagClassification['Y3']
		tagClassification['k1']=[tagClassification['k1'][valid] for valid in [0,1,2,3,8,12,20,24,29,30,34,35,42,43,58,59,61]]
		tagClassification['\sum2']=[tagClassification['\sum2'][valid] for valid in [1,5,13,18,24,25,28,32,35,37,38,45,61]]
		tagClassification[']2']=[tagClassification[']2'][valid] for valid in [3,4]]
		tagClassification['[2']=[tagClassification['[2'][valid] for valid in [0,8]]
		tagClassification['\\'+'theta2']=[tagClassification['\\'+'theta2'][valid] for valid in [1,2,5,7,9,12,13,28,29,31,32,34,37,39,40,41,52,61,65,66,67,69]]
		tagClassification['\div3']=[tagClassification['\div3'][valid] for valid in [10,24,38]]
		tagClassification['\\'+'tan3']=[tagClassification['\\'+'tan3'][valid] for valid in [0,4,5,16,22,34,41,45,50,53,72,86,144]]
		tagClassification['\lim3']=[tagClassification['\lim3'][9]]
		###	
		os.remove('results.txt')
		report=open('results.txt','w')
		for character in tagClassification:
			#EachStroke fa referencia a com es reparteixen els grups en traces, segons la mitjana d'aquesta distribucio a la base de dades
			eachStroke=np.asarray([int(sum([tagClassification[character][i].tE[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character])) for j in range(tagClassification[character][0].tE.shape[0])])
			tagClassification[character]=spp.altArcLengthResampling(tagClassification[character],eachStroke)
			counta+=1
			print character,':',len(tagClassification[character])
			#Totes les features del template d'una etiqueta es calculen com la seva mitjana en els simbols d'aquesta etiqueta
			average=np.zeros([len(tagClassification[character][0].Coord),2],np.float64)
			for example in tagClassification[character]:
				average=np.array([[(average[i,0]+example.Coord[i,0]),(average[i,1]+example.Coord[i,1])] for i in range(example.Coord.shape[0])],np.float64)
			average=np.array([[average[i,0]/len(tagClassification[character]),average[i,1]/len(tagClassification[character])] for i in range(example.Coord.shape[0])],np.float64)
			tagAverages[character]=nsi.taggedSymbol(average,eachStroke,character)
			tagAverages[character].computeFeatures()
			tagAverages[character].LP=[np.nansum([tagClassification[character][i].LP[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].LP))]
			tagAverages[character].accAngle=[np.nansum([tagClassification[character][i].accAngle[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].accAngle))]
			tagAverages[character].coG=[[np.nansum([tagClassification[character][i].coG[j][0] for i in range(len(tagClassification[character]))])/len(tagClassification[character]),np.nansum([tagClassification[character][i].coG[j][1] for i in range(len(tagClassification[character]))])/len(tagClassification[character])] for j in range(len(tagClassification[character][0].coG))]
			tagAverages[character].liS=[np.nansum([tagClassification[character][i].liS[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].liS))]
			tagAverages[character].quadraticError=[np.nansum([tagClassification[character][i].quadraticError[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].quadraticError))]
			tagAverages[character].relStrokeLength=[np.nansum([tagClassification[character][i].relStrokeLength[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].relStrokeLength))]
			tagAverages[character].turningAngle=[np.nansum([tagClassification[character][i].turningAngle[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].turningAngle))]
			tagAverages[character].turningAngleDifference=[np.nansum([tagClassification[character][i].turningAngleDifference[j] for i in range(len(tagClassification[character]))])/len(tagClassification[character]) for j in range(len(tagClassification[character][0].turningAngleDifference))]
			styles=['horizontal','vertical','diagonal','closed']
			tagAverages[character].Style=styles[np.argmax([[tagClassification[character][i].Style for i in range(len(tagClassification[character]))].count('horizontal'),[tagClassification[character][i].Style for i in range(len(tagClassification[character]))].count('vertical'),[tagClassification[character][i].Style for i in range(len(tagClassification[character]))].count('diagonal'),[tagClassification[character][i].Style for i in range(len(tagClassification[character]))].count('closed')])]
			report.write('-----------------------------------------------\n')
			report.write(character+'      |\n')
			report.write('---------\n')
			for i in range(len(tagClassification[character])):
				report.write(str(tagClassification[character][i].tE)+'       :\n')
				for j in range(tagClassification[character][i].Coord.shape[0]):
					report.write(str(tagClassification[character][i].Coord[j])+', ')
				report.write('\n')
			report.write('average:\n               ')
			for j in range(average.shape[0]):
				report.write(str(average[j])+', ')
			report.write('\n')
	#Guarda els resultats al sistema
	if os.path.isfile('varSimbdB.txt'):
		os.remove('varSimbdB.txt')
	f = open('varSimbdB.txt','wb')
	pickle.dump(symboldB,f)
	f.close()
	if os.path.isfile('varTagClass.txt'):
		os.remove('varTagClass.txt')
	f = open('varTagClass.txt','wb')
	pickle.dump(tagClassification,f)
	f.close()
	if os.path.isfile('varAverages.txt'):
		os.remove('varAverages.txt')
	f = open('varAverages.txt','wb')
	pickle.dump(tagAverages,f)
	f.close()	
	report.close()
	plt.show()