def getTotalDistance(vps,vp): tvps = [] for i in xrange(len(vps)): t = [] for j in xrange(len(vps[i])): t.append(vps[i][j]) tvps.append(t) t = [] for i in xrange(len(vp)): t.append(vp[i]) tvps.append(t) s = 0.0 sSquare = 0.0 n = 0 for i in xrange(len(tvps)): for j in xrange(len(tvps)): if i == j: continue dist = utils.hammingDistance(tvps[i],tvps[j]) s = s + dist sSquare = sSquare + (dist * dist) n = n + 1 m = float(s) / n ret = (float(sSquare) / n) - (m*m) return float(ret)
def makeOneRun(): ''' Realiza el hash de una cadena aleatoria de bits 256 y una copia de la cadena con un bit complementado. Luego calcula y devuelve la distancia de hamming entre los dos resultados.''' # Se selecciona la posicion de la cadena a cambiar. pos = random.randint(0,255) # Se genera la cadena aleatoria de 256 bits de longitud x1 = random.getrandbits(256) # Creamos la máscara apropiada y se la aplicamos a la primera # cadena para crear la cadena casi identica mask = 1 << pos x2 = x1 ^ mask # Generamos las cadenas de bits a partir de los enteros generados m1 = BitArray(uint=x1, length=256) m2 = BitArray(uint=x2, length=256) # Calculamos ambos hashes h1 = hashIt(m1.bytes) h2 = hashIt(m2.bytes) # Calculamos la distancia de hamming de los resultados y acumulamos # el resultado. r1 = BitArray(h1) r2 = BitArray(h2) return hammingDistance(r1,r2)
def generateHeuristicVantagePoints(options): dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] threshold = dim*0.4 vps = [] for i in xrange(numberOfVP): if i == 0: vps.append(generateUniformRandomVP(dim,cardinality)) continue while True: nvp = generateUniformRandomVP(dim,cardinality) ok = True for j in xrange(i): dist = utils.hammingDistance(vps[j],nvp) if dim-dist > threshold: ok = False break if ok: vps.append(nvp) break utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP),vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP + 1)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(1): majorPattern[k].append(d[k][0]) vps = [] vps.append(majorPattern[0]) d = [0 for i in xrange(dim + 1)] one_pass = True threshold = 0 while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) if d[dist] > threshold: ok = True if datas[i] == vps[j]: ok = True if ok: continue for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) d[dist] += 1 vps.append(datas[i]) threshold += 1 utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def calculateMany(vps): s = set() for i in xrange(len(vps)): for j in xrange(len(vps)): if i != j : now = utils.hammingDistance(vps[i],vps[j]) s.add(now) return len(s)
def calculateFx(vps): d = [0 for i in xrange(len(vps[0]) + 1)] for i in xrange(len(vps)): for j in xrange(len(vps)): if i != j: now = utils.hammingDistance(vps[i], vps[j]) d[now] += 1 return max(d)
def calculateMany(vps): s = set() for i in xrange(len(vps)): for j in xrange(len(vps)): if i != j: now = utils.hammingDistance(vps[i], vps[j]) s.add(now) return len(s)
def calculateAllPairDistance(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] vps = utils.readDataFromFile(utils.getVPFileName(options)) for i in xrange(len(vps)): for j in xrange(i+1,len(vps)): print i,j,utils.hammingDistance(vps[i],vps[j])
def calculateMany(vps): s = set() d = [0 for i in xrange(len(vps[0]) + 1)] for i in xrange(len(vps)): for j in xrange(len(vps)): if i != j: now = utils.hammingDistance(vps[i], vps[j]) d[now] += 1 s.add(now) return len(s), d
def convertNDDSToCDS(options): size = options['numberOfData'] dim = options['numberOfDimension'] distribution = options['distribution'] cardinality = options['numberOfAlphabet'] numberOfVP = options['numberOfVP'] typeOfVP = options['typeOfVP'] dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP) cdsDataFileName = utils.getCDSDataFileName(options) cdsQueryFileName = utils.getCDSQueryFileName(options) # cdsDataFileName = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) # cdsQueryFileName = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) vps = utils.readDataFromFile(vpFileName) print len(datas), len(querys), len(vps) cdsDatas = [] for i in xrange(len(datas)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(datas[i], vps[j])) cdsDatas.append(t) utils.writeDataToFile(cdsDataFileName, cdsDatas) cdsQuerys = [] for i in xrange(len(querys)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(querys[i], vps[j])) cdsQuerys.append(t) utils.writeDataToFile(cdsQueryFileName, cdsQuerys) print cdsDataFileName, cdsQueryFileName
def convertNDDSToCDS(options): size = options['numberOfData'] dim = options['numberOfDimension'] distribution = options['distribution'] cardinality = options['numberOfAlphabet'] numberOfVP = options['numberOfVP'] typeOfVP = options['typeOfVP'] dataFileName = 'data/data_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality) queryFileName = 'query/query_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality) vpFileName = 'vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP) cdsDataFileName = utils.getCDSDataFileName(options) cdsQueryFileName= utils.getCDSQueryFileName(options) # cdsDataFileName = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) # cdsQueryFileName = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) vps = utils.readDataFromFile(vpFileName) print len(datas),len(querys),len(vps) cdsDatas = [] for i in xrange(len(datas)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(datas[i],vps[j])) cdsDatas.append(t) utils.writeDataToFile(cdsDataFileName,cdsDatas) cdsQuerys = [] for i in xrange(len(querys)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(querys[i],vps[j])) cdsQuerys.append(t) utils.writeDataToFile(cdsQueryFileName,cdsQuerys) print cdsDataFileName, cdsQueryFileName
def calculateAllPairDistance(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] vps = utils.readDataFromFile(utils.getVPFileName(options)) d = [0 for i in xrange(dim + 1)] s = set() for i in xrange(len(vps)): for j in xrange(i + 1, len(vps)): dist = utils.hammingDistance(vps[i], vps[j]) d[dist] = d[dist] + 1 s.add(dist) print i, j, dist for i in xrange(0, dim + 1): print i, d[i] print len(s)
def _algorithm(N, M, K): V = set() dist = set() candidateSet = set() V.add(getMajorPattern()) while len(V) < M: for vp in V: candidateSet.add(generateCandidateSet(vp, N, len(V), K)) maxDistCadinality = 0 nextPoint = () nextDist = set() for candidate in candidateSet: newDist = set() for vp in V: newDist = newDist.add(utils.hammingDistance(vp, candidate)) if len(newDist) > maxDistCadinality: maxDistCadinality = len(newDist) nextPoint = candidate nextDist = newDist V.add(nextPoint[1]) dist.add(newDist) candidateSet = eraseCandidateSet(candidateSet) return V
if __name__ == '__main__': utils.createDirectory('figure') options = utils.getOptions() dataFileName = utils.getDataFileName(options) vpFileName = utils.getVPFileName(options) datas = utils.getDataInFile(dataFileName) vps = utils.readDataFromFile(vpFileName) curDatas = datas for i in xrange(len(vps)): print i n = len(curDatas) x = [ [] for j in xrange(len(vps[i])+1) ] for j in xrange(n): nextPosition = utils.hammingDistance(vps[i],curDatas[j]) x[nextPosition].append(j) mx, position = (0,0) xp = [] yp = [] for j in xrange(len(vps[i])+1): xp.append(j) yp.append(len(x[j])) if mx < len(x[j]): mx, position = (len(x[j]),j) imageFileName = utils.getImageFileName(options,i) if os.path.exists(imageFileName): print '%s is exists'%(imageFileName) elif not os.path.exists(imageFileName): utils.saveGraphWithHighValue(imageFileName,xp,yp,mx)
if __name__ == '__main__': utils.createDirectory('figure') options = utils.getOptions() dataFileName = utils.getDataFileName(options) vpFileName = utils.getVPFileName(options) datas = utils.getDataInFile(dataFileName) vps = utils.readDataFromFile(vpFileName) curDatas = datas for i in xrange(len(vps)): print i n = len(curDatas) x = [[] for j in xrange(len(vps[i]) + 1)] for j in xrange(n): nextPosition = utils.hammingDistance(vps[i], curDatas[j]) x[nextPosition].append(j) mx, position = (0, 0) xp = [] yp = [] for j in xrange(len(vps[i]) + 1): xp.append(j) yp.append(len(x[j])) if mx < len(x[j]): mx, position = (len(x[j]), j) imageFileName = utils.getImageFileName(options, i) if os.path.exists(imageFileName): print '%s is exists' % (imageFileName) elif not os.path.exists(imageFileName): utils.saveGraphWithHighValue(imageFileName, xp, yp, mx)
dataFileName = utils.getDataFileName(options) vpFileName = utils.getVPFileName(options) datas = utils.getDataInFile(dataFileName) vps = utils.readDataFromFile(vpFileName) for i in xrange(len(vps)): for j in xrange(i+1,len(vps)): cc = utils.calculateCorrelationCoefficient(vps[i],vps[j],datas) cc = abs(cc) imageFileName = utils.getFigurePairName(options,i,j,cc) print imageFileName if os.path.exists(imageFileName): print '%s is exists'%imageFileName continue xp = [] yp = [] zp = [] zcnt = [ [ 0 for ii in xrange(dim+1) ] for jj in xrange(dim+1) ] for k in xrange(len(datas)): x = utils.hammingDistance(vps[i],datas[k]) y = utils.hammingDistance(vps[j],datas[k]) zcnt[y][x] = zcnt[y][x] + 1 xp.append(x) yp.append(y) for k in xrange(len(xp)): zp.append(zcnt[yp[k]][xp[k]]) utils.saveGraphUsingPointWithCC(imageFileName,xp,yp,cc,dim) #utils.saveGraphUsing3DSurfaceWithCC(imageFileName,xp,yp,zp,cc,dim)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) threshold = 2 majorPattern = [] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) majorPattern.append(d[0][0]) vps = [] vps.append(majorPattern) d = [0 for i in xrange(dim + 1)] isSelected = [False for i in xrange(len(datas))] for i in xrange(len(datas)): if datas[i] == majorPattern: isSelected[i] = True notChangedCount = 0 while len(vps) < numberOfVP: print len(vps) changed = False for i in xrange(len(datas)): if isSelected[i]: continue is_pass = False for j in xrange(len(vps)): if datas[i] == vps[j]: is_pass = True dist = utils.hammingDistance(datas[i], vps[j]) if d[dist] > threshold: is_pass = True if is_pass: continue for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) d[dist] += 1 vps.append(datas[i]) isSelected[i] = True changed = True if not changed: print 'not changed so pop worst (%d)' % notChangedCount worstIdx = getWorstVP(vps) for j in xrange(len(vps)): if j == worstIdx: continue dist = utils.hammingDistance(vps[j], vps[worstIdx]) d[dist] -= 1 nextVPS = popDataAtIndex(vps, worstIdx) vps = nextVPS notChangedCount += 1 if notChangedCount > numberOfVP / 2: notChangedCount = 0 for k in xrange(notChangedCount - 1): worstIdx = getWorstVP(vps) for j in xrange(len(vps)): if j == worstIdx: continue dist = utils.hammingDistance(vps[j], vps[worstIdx]) d[dist] -= 1 nextVPS = popDataAtIndex(vps, worstIdx) vps = nextVPS print len(vps) print d
dim = options['numberOfDimension'] dataFileName = utils.getDataFileName(options) vpFileName = utils.getVPFileName(options) datas = utils.getDataInFile(dataFileName) vps = utils.readDataFromFile(vpFileName) for i in xrange(len(vps)): for j in xrange(i + 1, len(vps)): cc = utils.calculateCorrelationCoefficient(vps[i], vps[j], datas) cc = abs(cc) imageFileName = utils.getFigurePairName(options, i, j, cc) print imageFileName if os.path.exists(imageFileName): print '%s is exists' % imageFileName continue xp = [] yp = [] zp = [] zcnt = [[0 for ii in xrange(dim + 1)] for jj in xrange(dim + 1)] for k in xrange(len(datas)): x = utils.hammingDistance(vps[i], datas[k]) y = utils.hammingDistance(vps[j], datas[k]) zcnt[y][x] = zcnt[y][x] + 1 xp.append(x) yp.append(y) for k in xrange(len(xp)): zp.append(zcnt[yp[k]][xp[k]]) utils.saveGraphUsingPointWithCC(imageFileName, xp, yp, cc, dim) #utils.saveGraphUsing3DSurfaceWithCC(imageFileName,xp,yp,zp,cc,dim)
def generateVantagePointsWithHybridAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP + 1)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(1): majorPattern[k].append(d[k][0]) vps = [] vps.append(majorPattern[0]) d = [0 for i in xrange(dim + 1)] one_pass = False while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 if one_pass: for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) if d[dist] > 1: ok = True if datas[i] == vps[j]: ok = True if ok: continue for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) d[dist] += 1 vps.append(datas[i]) one_pass = False else: change = False for i in xrange(dim + 1): if d[i] == 0: change = True ans, ans_vp = -1, '' fx = 987654321 for j in xrange(len(vps)): cur_vp = generateVpWithDist(dim, cardinality, vps[j], i) vps.append(cur_vp) cur, dists = calculateMany(vps) if cur > ans: ans, ans_vp = cur, cur_vp fx = max(dists) elif cur == ans: if max(dists) < fx: ans, ans_vp = cur, cur_vp fx = max(dists) vps = vps[:-1] for j in xrange(len(vps)): dist = utils.hammingDistance(vps[j], ans_vp) d[dist] += 1 vps.append(ans_vp) break if not change: vps.append(datas[random.randrange(0, numberOfData)]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
#!/usr/bin/python #-*- coding:utf-8 -*- import utils if __name__ == '__main__': utils.createDirectory('rq_result') options = utils.getOptions() queryRange = options['queryRange'] dataFileName = utils.getDataFileName(options) queryFileName = utils.getQueryFileName(options) rqResultFileName= utils.getRQResultFileName(options) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) with open(rqResultFileName,'w') as fp: for i in xrange(len(querys)): print '#%d'%i fp.write(('#%d'%(i))+'\n') ans = [] for j in xrange(len(datas)): dist = utils.hammingDistance(querys[i],datas[j]) if dist <= queryRange: ans.append(datas[j]) for data in ans: for j in xrange(len(data)): fp.write('%c '%data[j]) fp.write('\n')