コード例 #1
0
def getTotalDistance(vps,vp):
    tvps = []
    for i in xrange(len(vps)):
        t = []
        for j in xrange(len(vps[i])):
            t.append(vps[i][j])
        tvps.append(t)
    t = []
    for i in xrange(len(vp)):
        t.append(vp[i])
    tvps.append(t)

    s = 0.0
    sSquare = 0.0
    n = 0
    for i in xrange(len(tvps)):
        for j in xrange(len(tvps)):
            if i == j:
                continue
            dist = utils.hammingDistance(tvps[i],tvps[j])
            s = s + dist
            sSquare = sSquare + (dist * dist)
            n = n + 1

    m = float(s) / n
    
    ret =  (float(sSquare) / n) - (m*m)
    return float(ret)
コード例 #2
0
def makeOneRun():
    ''' Realiza el hash de una cadena aleatoria de bits 256
    y una copia de la cadena con un bit complementado. Luego calcula y
    devuelve la distancia de hamming entre los dos resultados.'''
    
    # Se selecciona la posicion de la cadena a cambiar.
    pos = random.randint(0,255)
    # Se genera la cadena aleatoria de 256 bits de longitud
    x1 = random.getrandbits(256)

    # Creamos la máscara apropiada y se la aplicamos a la primera
    # cadena para crear la cadena casi identica
    mask = 1 << pos
    x2 = x1 ^ mask

    # Generamos las cadenas de bits a partir de los enteros generados
    m1 = BitArray(uint=x1, length=256)
    m2 = BitArray(uint=x2, length=256)

    # Calculamos ambos hashes
    h1 = hashIt(m1.bytes)
    h2 = hashIt(m2.bytes)

    # Calculamos la distancia de hamming de los resultados y acumulamos 
    # el resultado.

    r1 = BitArray(h1)
    r2 = BitArray(h2)

    return hammingDistance(r1,r2)
コード例 #3
0
def generateHeuristicVantagePoints(options):
    dim          = options['numberOfDimension']
    numberOfVP   = options['numberOfVP']
    cardinality  = options['numberOfAlphabet']
    typeOfVP     = options['typeOfVP']

    threshold = dim*0.4

    vps = []
    for i in xrange(numberOfVP):
        if i == 0:
            vps.append(generateUniformRandomVP(dim,cardinality))
            continue
        while True:
            nvp = generateUniformRandomVP(dim,cardinality)
            ok = True
            for j in xrange(i):
                dist = utils.hammingDistance(vps[j],nvp)
                if dim-dist > threshold:
                    ok = False
                    break
            if ok:
                vps.append(nvp)
                break
    utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP),vps)
コード例 #4
0
def generateHeuristicVantagePoints(options):
    dim          = options['numberOfDimension']
    numberOfVP   = options['numberOfVP']
    cardinality  = options['numberOfAlphabet']
    typeOfVP     = options['typeOfVP']

    threshold = dim*0.4

    vps = []
    for i in xrange(numberOfVP):
        if i == 0:
            vps.append(generateUniformRandomVP(dim,cardinality))
            continue
        while True:
            nvp = generateUniformRandomVP(dim,cardinality)
            ok = True
            for j in xrange(i):
                dist = utils.hammingDistance(vps[j],nvp)
                if dim-dist > threshold:
                    ok = False
                    break
            if ok:
                vps.append(nvp)
                break
    utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP),vps)
コード例 #5
0
def getTotalDistance(vps,vp):
    tvps = []
    for i in xrange(len(vps)):
        t = []
        for j in xrange(len(vps[i])):
            t.append(vps[i][j])
        tvps.append(t)
    t = []
    for i in xrange(len(vp)):
        t.append(vp[i])
    tvps.append(t)

    s = 0.0
    sSquare = 0.0
    n = 0
    for i in xrange(len(tvps)):
        for j in xrange(len(tvps)):
            if i == j:
                continue
            dist = utils.hammingDistance(tvps[i],tvps[j])
            s = s + dist
            sSquare = sSquare + (dist * dist)
            n = n + 1

    m = float(s) / n
    
    ret =  (float(sSquare) / n) - (m*m)
    return float(ret)
コード例 #6
0
def generateVantagePointsWithManyAlgorithm(options):
    numberOfData = options['numberOfData']
    dim = options['numberOfDimension']
    numberOfVP = options['numberOfVP']
    cardinality = options['numberOfAlphabet']
    typeOfVP = options['typeOfVP']
    datas = utils.getDataInFile(utils.getDataFileName(options))

    majorPattern = [[] for i in xrange(numberOfVP + 1)]
    for j in xrange(dim):
        d = {}
        for i in xrange(numberOfData):
            if datas[i][j] in d:
                d[datas[i][j]] += 1
            else:
                d[datas[i][j]] = 0
        d = sorted(d.items(), key=lambda x: x[1], reverse=True)
        for k in xrange(1):
            majorPattern[k].append(d[k][0])

    vps = []
    vps.append(majorPattern[0])
    d = [0 for i in xrange(dim + 1)]
    one_pass = True
    threshold = 0
    while len(vps) < numberOfVP:
        print len(vps)
        ans, ansDataIndex = -1, -1
        for i in xrange(len(datas)):
            ok = False
            for j in xrange(len(vps)):
                dist = utils.hammingDistance(datas[i], vps[j])
                if d[dist] > threshold:
                    ok = True
                if datas[i] == vps[j]:
                    ok = True
            if ok:
                continue
            for j in xrange(len(vps)):
                dist = utils.hammingDistance(datas[i], vps[j])
                d[dist] += 1
            vps.append(datas[i])
        threshold += 1

    utils.writeDataToFile(
        'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP),
        vps)
コード例 #7
0
def calculateMany(vps):
    s = set()
    for i in xrange(len(vps)):
        for j in xrange(len(vps)):
            if i != j :
                now = utils.hammingDistance(vps[i],vps[j])
                s.add(now)
    return len(s)
コード例 #8
0
def calculateFx(vps):
    d = [0 for i in xrange(len(vps[0]) + 1)]
    for i in xrange(len(vps)):
        for j in xrange(len(vps)):
            if i != j:
                now = utils.hammingDistance(vps[i], vps[j])
                d[now] += 1
    return max(d)
コード例 #9
0
def calculateMany(vps):
    s = set()
    for i in xrange(len(vps)):
        for j in xrange(len(vps)):
            if i != j:
                now = utils.hammingDistance(vps[i], vps[j])
                s.add(now)
    return len(s)
コード例 #10
0
def calculateAllPairDistance(options):
    numberOfData = options['numberOfData']
    dim          = options['numberOfDimension']
    numberOfVP   = options['numberOfVP']
    cardinality  = options['numberOfAlphabet']
    typeOfVP     = options['typeOfVP']
    vps = utils.readDataFromFile(utils.getVPFileName(options))
    for i in xrange(len(vps)):
        for j in xrange(i+1,len(vps)):
            print i,j,utils.hammingDistance(vps[i],vps[j])
コード例 #11
0
def calculateMany(vps):
    s = set()
    d = [0 for i in xrange(len(vps[0]) + 1)]
    for i in xrange(len(vps)):
        for j in xrange(len(vps)):
            if i != j:
                now = utils.hammingDistance(vps[i], vps[j])
                d[now] += 1
                s.add(now)
    return len(s), d
コード例 #12
0
ファイル: convert_ndds_to_cds.py プロジェクト: sisobus/NDDS
def convertNDDSToCDS(options):
    size = options['numberOfData']
    dim = options['numberOfDimension']
    distribution = options['distribution']
    cardinality = options['numberOfAlphabet']
    numberOfVP = options['numberOfVP']
    typeOfVP = options['typeOfVP']

    dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                  cardinality)
    queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                     cardinality)
    vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality,
                                            typeOfVP)
    cdsDataFileName = utils.getCDSDataFileName(options)
    cdsQueryFileName = utils.getCDSQueryFileName(options)
    #    cdsDataFileName     = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)
    #    cdsQueryFileName    = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)

    datas = utils.getDataInFile(dataFileName)
    querys = utils.readDataFromFile(queryFileName)
    vps = utils.readDataFromFile(vpFileName)
    print len(datas), len(querys), len(vps)

    cdsDatas = []
    for i in xrange(len(datas)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(datas[i], vps[j]))
        cdsDatas.append(t)
    utils.writeDataToFile(cdsDataFileName, cdsDatas)

    cdsQuerys = []
    for i in xrange(len(querys)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(querys[i], vps[j]))
        cdsQuerys.append(t)
    utils.writeDataToFile(cdsQueryFileName, cdsQuerys)
    print cdsDataFileName, cdsQueryFileName
コード例 #13
0
ファイル: convert_ndds_to_cds.py プロジェクト: sisobus/NDDS
def convertNDDSToCDS(options):
    size            = options['numberOfData']
    dim             = options['numberOfDimension']
    distribution    = options['distribution']
    cardinality     = options['numberOfAlphabet']
    numberOfVP      = options['numberOfVP']
    typeOfVP        = options['typeOfVP']

    dataFileName    = 'data/data_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality)
    queryFileName   = 'query/query_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality)
    vpFileName      = 'vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP)
    cdsDataFileName = utils.getCDSDataFileName(options)
    cdsQueryFileName= utils.getCDSQueryFileName(options)
#    cdsDataFileName     = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)
#    cdsQueryFileName    = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)

    datas   = utils.getDataInFile(dataFileName)
    querys  = utils.readDataFromFile(queryFileName)
    vps     = utils.readDataFromFile(vpFileName)
    print len(datas),len(querys),len(vps)

    cdsDatas = []
    for i in xrange(len(datas)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(datas[i],vps[j]))
        cdsDatas.append(t)
    utils.writeDataToFile(cdsDataFileName,cdsDatas)

    cdsQuerys = []
    for i in xrange(len(querys)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(querys[i],vps[j]))
        cdsQuerys.append(t)
    utils.writeDataToFile(cdsQueryFileName,cdsQuerys)
    print cdsDataFileName, cdsQueryFileName
コード例 #14
0
def calculateAllPairDistance(options):
    numberOfData = options['numberOfData']
    dim = options['numberOfDimension']
    numberOfVP = options['numberOfVP']
    cardinality = options['numberOfAlphabet']
    typeOfVP = options['typeOfVP']
    vps = utils.readDataFromFile(utils.getVPFileName(options))
    d = [0 for i in xrange(dim + 1)]
    s = set()
    for i in xrange(len(vps)):
        for j in xrange(i + 1, len(vps)):
            dist = utils.hammingDistance(vps[i], vps[j])
            d[dist] = d[dist] + 1
            s.add(dist)
            print i, j, dist
    for i in xrange(0, dim + 1):
        print i, d[i]
    print len(s)
コード例 #15
0
def _algorithm(N, M, K):
    V = set()
    dist = set()
    candidateSet = set()
    V.add(getMajorPattern())
    while len(V) < M:
        for vp in V:
            candidateSet.add(generateCandidateSet(vp, N, len(V), K))
        maxDistCadinality = 0
        nextPoint = ()
        nextDist = set()
        for candidate in candidateSet:
            newDist = set()
            for vp in V:
                newDist = newDist.add(utils.hammingDistance(vp, candidate))
            if len(newDist) > maxDistCadinality:
                maxDistCadinality = len(newDist)
                nextPoint = candidate
                nextDist = newDist
        V.add(nextPoint[1])
        dist.add(newDist)
        candidateSet = eraseCandidateSet(candidateSet)
    return V
コード例 #16
0
ファイル: draw_deep_graph.py プロジェクト: sisobus/NDDS
if __name__ == '__main__':
    utils.createDirectory('figure')
    options = utils.getOptions()

    dataFileName    = utils.getDataFileName(options)
    vpFileName      = utils.getVPFileName(options)
    datas           = utils.getDataInFile(dataFileName)
    vps             = utils.readDataFromFile(vpFileName)

    curDatas = datas
    for i in xrange(len(vps)):
        print i
        n = len(curDatas)
        x = [ [] for j in xrange(len(vps[i])+1) ]
        for j in xrange(n):
            nextPosition = utils.hammingDistance(vps[i],curDatas[j])
            x[nextPosition].append(j)
        mx, position = (0,0)
        xp = []
        yp = []
        for j in xrange(len(vps[i])+1):
            xp.append(j)
            yp.append(len(x[j]))
            if mx < len(x[j]):
                mx, position = (len(x[j]),j)
        imageFileName = utils.getImageFileName(options,i)
        if os.path.exists(imageFileName):
            print '%s is exists'%(imageFileName)
        elif not os.path.exists(imageFileName):
            utils.saveGraphWithHighValue(imageFileName,xp,yp,mx)
        
コード例 #17
0
ファイル: draw_deep_graph.py プロジェクト: sisobus/NDDS
if __name__ == '__main__':
    utils.createDirectory('figure')
    options = utils.getOptions()

    dataFileName = utils.getDataFileName(options)
    vpFileName = utils.getVPFileName(options)
    datas = utils.getDataInFile(dataFileName)
    vps = utils.readDataFromFile(vpFileName)

    curDatas = datas
    for i in xrange(len(vps)):
        print i
        n = len(curDatas)
        x = [[] for j in xrange(len(vps[i]) + 1)]
        for j in xrange(n):
            nextPosition = utils.hammingDistance(vps[i], curDatas[j])
            x[nextPosition].append(j)
        mx, position = (0, 0)
        xp = []
        yp = []
        for j in xrange(len(vps[i]) + 1):
            xp.append(j)
            yp.append(len(x[j]))
            if mx < len(x[j]):
                mx, position = (len(x[j]), j)
        imageFileName = utils.getImageFileName(options, i)
        if os.path.exists(imageFileName):
            print '%s is exists' % (imageFileName)
        elif not os.path.exists(imageFileName):
            utils.saveGraphWithHighValue(imageFileName, xp, yp, mx)
コード例 #18
0
ファイル: draw_pair_graph.py プロジェクト: sisobus/NDDS
    dataFileName    = utils.getDataFileName(options)
    vpFileName      = utils.getVPFileName(options)
    datas           = utils.getDataInFile(dataFileName)
    vps             = utils.readDataFromFile(vpFileName)

    for i in xrange(len(vps)):
        for j in xrange(i+1,len(vps)):
            cc = utils.calculateCorrelationCoefficient(vps[i],vps[j],datas)
            cc = abs(cc)
            imageFileName = utils.getFigurePairName(options,i,j,cc)
            print imageFileName
            if os.path.exists(imageFileName):
                print '%s is exists'%imageFileName
                continue
            xp = []
            yp = []
            zp = []
            zcnt = [ [ 0 for ii in xrange(dim+1) ] for jj in xrange(dim+1) ]
            for k in xrange(len(datas)):
                x = utils.hammingDistance(vps[i],datas[k])
                y = utils.hammingDistance(vps[j],datas[k])
                zcnt[y][x] = zcnt[y][x] + 1
                xp.append(x)
                yp.append(y)
            for k in xrange(len(xp)):
                zp.append(zcnt[yp[k]][xp[k]])
            utils.saveGraphUsingPointWithCC(imageFileName,xp,yp,cc,dim)
            #utils.saveGraphUsing3DSurfaceWithCC(imageFileName,xp,yp,zp,cc,dim)


コード例 #19
0
def generateVantagePointsWithManyAlgorithm(options):
    numberOfData = options['numberOfData']
    dim = options['numberOfDimension']
    numberOfVP = options['numberOfVP']
    cardinality = options['numberOfAlphabet']
    typeOfVP = options['typeOfVP']
    datas = utils.getDataInFile(utils.getDataFileName(options))
    threshold = 2

    majorPattern = []
    for j in xrange(dim):
        d = {}
        for i in xrange(numberOfData):
            if datas[i][j] in d:
                d[datas[i][j]] += 1
            else:
                d[datas[i][j]] = 0
        d = sorted(d.items(), key=lambda x: x[1], reverse=True)
        majorPattern.append(d[0][0])

    vps = []
    vps.append(majorPattern)
    d = [0 for i in xrange(dim + 1)]
    isSelected = [False for i in xrange(len(datas))]
    for i in xrange(len(datas)):
        if datas[i] == majorPattern:
            isSelected[i] = True
    notChangedCount = 0
    while len(vps) < numberOfVP:
        print len(vps)
        changed = False
        for i in xrange(len(datas)):
            if isSelected[i]:
                continue
            is_pass = False
            for j in xrange(len(vps)):
                if datas[i] == vps[j]:
                    is_pass = True
                dist = utils.hammingDistance(datas[i], vps[j])
                if d[dist] > threshold:
                    is_pass = True
            if is_pass:
                continue
            for j in xrange(len(vps)):
                dist = utils.hammingDistance(datas[i], vps[j])
                d[dist] += 1
            vps.append(datas[i])
            isSelected[i] = True
            changed = True
        if not changed:
            print 'not changed so pop worst (%d)' % notChangedCount
            worstIdx = getWorstVP(vps)
            for j in xrange(len(vps)):
                if j == worstIdx: continue
                dist = utils.hammingDistance(vps[j], vps[worstIdx])
                d[dist] -= 1
            nextVPS = popDataAtIndex(vps, worstIdx)
            vps = nextVPS
            notChangedCount += 1
            if notChangedCount > numberOfVP / 2:
                notChangedCount = 0
            for k in xrange(notChangedCount - 1):
                worstIdx = getWorstVP(vps)
                for j in xrange(len(vps)):
                    if j == worstIdx: continue
                    dist = utils.hammingDistance(vps[j], vps[worstIdx])
                    d[dist] -= 1
                nextVPS = popDataAtIndex(vps, worstIdx)
                vps = nextVPS

        print len(vps)
        print d
コード例 #20
0
    dim = options['numberOfDimension']

    dataFileName = utils.getDataFileName(options)
    vpFileName = utils.getVPFileName(options)
    datas = utils.getDataInFile(dataFileName)
    vps = utils.readDataFromFile(vpFileName)

    for i in xrange(len(vps)):
        for j in xrange(i + 1, len(vps)):
            cc = utils.calculateCorrelationCoefficient(vps[i], vps[j], datas)
            cc = abs(cc)
            imageFileName = utils.getFigurePairName(options, i, j, cc)
            print imageFileName
            if os.path.exists(imageFileName):
                print '%s is exists' % imageFileName
                continue
            xp = []
            yp = []
            zp = []
            zcnt = [[0 for ii in xrange(dim + 1)] for jj in xrange(dim + 1)]
            for k in xrange(len(datas)):
                x = utils.hammingDistance(vps[i], datas[k])
                y = utils.hammingDistance(vps[j], datas[k])
                zcnt[y][x] = zcnt[y][x] + 1
                xp.append(x)
                yp.append(y)
            for k in xrange(len(xp)):
                zp.append(zcnt[yp[k]][xp[k]])
            utils.saveGraphUsingPointWithCC(imageFileName, xp, yp, cc, dim)
            #utils.saveGraphUsing3DSurfaceWithCC(imageFileName,xp,yp,zp,cc,dim)
コード例 #21
0
def generateVantagePointsWithHybridAlgorithm(options):
    numberOfData = options['numberOfData']
    dim = options['numberOfDimension']
    numberOfVP = options['numberOfVP']
    cardinality = options['numberOfAlphabet']
    typeOfVP = options['typeOfVP']
    datas = utils.getDataInFile(utils.getDataFileName(options))

    majorPattern = [[] for i in xrange(numberOfVP + 1)]
    for j in xrange(dim):
        d = {}
        for i in xrange(numberOfData):
            if datas[i][j] in d:
                d[datas[i][j]] += 1
            else:
                d[datas[i][j]] = 0
        d = sorted(d.items(), key=lambda x: x[1], reverse=True)
        for k in xrange(1):
            majorPattern[k].append(d[k][0])

    vps = []
    vps.append(majorPattern[0])
    d = [0 for i in xrange(dim + 1)]
    one_pass = False
    while len(vps) < numberOfVP:
        print len(vps)
        ans, ansDataIndex = -1, -1
        if one_pass:
            for i in xrange(len(datas)):
                ok = False
                for j in xrange(len(vps)):
                    dist = utils.hammingDistance(datas[i], vps[j])
                    if d[dist] > 1:
                        ok = True
                    if datas[i] == vps[j]:
                        ok = True
                if ok:
                    continue
                for j in xrange(len(vps)):
                    dist = utils.hammingDistance(datas[i], vps[j])
                    d[dist] += 1
                vps.append(datas[i])
            one_pass = False
        else:
            change = False
            for i in xrange(dim + 1):
                if d[i] == 0:
                    change = True
                    ans, ans_vp = -1, ''
                    fx = 987654321
                    for j in xrange(len(vps)):
                        cur_vp = generateVpWithDist(dim, cardinality, vps[j],
                                                    i)
                        vps.append(cur_vp)
                        cur, dists = calculateMany(vps)
                        if cur > ans:
                            ans, ans_vp = cur, cur_vp
                            fx = max(dists)
                        elif cur == ans:
                            if max(dists) < fx:
                                ans, ans_vp = cur, cur_vp
                                fx = max(dists)
                        vps = vps[:-1]
                    for j in xrange(len(vps)):
                        dist = utils.hammingDistance(vps[j], ans_vp)
                        d[dist] += 1
                    vps.append(ans_vp)
                    break
            if not change:
                vps.append(datas[random.randrange(0, numberOfData)])

    utils.writeDataToFile(
        'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP),
        vps)
コード例 #22
0
#!/usr/bin/python
#-*- coding:utf-8 -*-
import utils

if __name__ == '__main__':
    utils.createDirectory('rq_result')
    options = utils.getOptions()
    queryRange = options['queryRange']

    dataFileName    = utils.getDataFileName(options)
    queryFileName   = utils.getQueryFileName(options)
    rqResultFileName= utils.getRQResultFileName(options)
    datas           = utils.getDataInFile(dataFileName)
    querys          = utils.readDataFromFile(queryFileName)

    with open(rqResultFileName,'w') as fp:
        for i in xrange(len(querys)):
            print '#%d'%i
            fp.write(('#%d'%(i))+'\n')
            ans = []
            for j in xrange(len(datas)):
                dist = utils.hammingDistance(querys[i],datas[j])
                if dist <= queryRange:
                    ans.append(datas[j])
            for data in ans:
                for j in xrange(len(data)):
                    fp.write('%c '%data[j])
                fp.write('\n')