Esempio n. 1
0
File: sbwt.py Progetto: il2255/SBWT
def checkFirstCodonPos(refSequence, refSeqPosition, querySequence, queryLength, mask_array, threshold, reverse):
	if DEBUG: print "Ref Pos   :", refSeqPosition
	#if DEBUG: print "Reference :", refSequence
	if DEBUG: print "Query1    :", querySequence

	if reverse:
		# Get the reverse complementary
		querySequence = str(Seq(querySequence).reverse_complement())
		refSeqPosition = abs(refSeqPosition)
	# Convert to bitarray
	querySequenceBitarray = bitarray()
	querySequenceBitarray.encode(const.BIT_DICT, querySequence)
	
	bitStart = abs(refSeqPosition * 4)
	bitEnd = bitStart + queryLength * 4
	
	refSeqPosition = refSeqPosition
	refBitArray = refSequence[0][refSeqPosition*4:(refSeqPosition+queryLength)*4]
	if DEBUG: print "Query2    :", querySequence
	if DEBUG: print "Ref Corres:", refSequence[1][refSeqPosition:refSeqPosition+queryLength]
	if DEBUG: print "Ref Bit   :", refBitArray
	if DEBUG: print "Query Bit :", querySequenceBitarray
	try:
		diffPercent = float(queryLength - bitdiff(refBitArray, querySequenceBitarray)/2)/queryLength
		if DEBUG: print "Diff Bit  :", diffPercent, bitdiff(refBitArray, querySequenceBitarray)/2
		if diffPercent >= threshold:
			return True
		else:
			return False
	except:
		return False
Esempio n. 2
0
def rNN_OutLSH(TABLEs, HLLs, LSHs, query, data):

    L = len(LSHs)
    hashVALUEs = [0] * L

    #start = time.clock()
    for idxTable in range(L):

        hashVALUEs[idxTable] = hashComputation(LSHs[idxTable], query)

    #print("Hash Computation time: ", time.clock() - start, " in second")

    # Estimate output size
    #start = time.clock()
    candidateSizeEst, numCollision = outputSizeEstimate(HLLs, hashVALUEs)

    #print("Output Size Estimation Time: ", time.clock() - start, " in second")
    #print("Candidate Size Estimate:", candidateSizeEst, " Number of Collisions: ", numCollision)

    # Searching
    rNN = list()
    N = getN()

    if candidateSizeEst + numCollision * getCPURatio() > N:

        #print("Use Linear Search for OutputSensitive LSH...")
        for idxPoint in range(N):

            if bitdiff(query, data[idxPoint]) <= 5:
                rNN.append(idxPoint)

    else:

        #print("Use standard LSH for OutputSensitive LSH...")
        candidateNN = set()

        # Remove duplicates
        for idxTable in range(L):

            hashValue = hashVALUEs[idxTable]

            if TABLEs[idxTable][hashValue]:
                candidateNN = candidateNN.union(TABLEs[idxTable][hashValue])

        # Compute distance
        for idxPoint in candidateNN:

            if bitdiff(query, data[idxPoint]) <= 5:
                rNN.append(idxPoint)

    return rNN  #, candidateSizeEst, numCollision
Esempio n. 3
0
def dist(x, y):

    if getType() == "bit_sampling":
        return bitdiff(x, y)

    elif getType() == "basic_covering":
        return bitdiff(x, y)

    elif getType() == "l1":
        return scipy.spatial.distance.cityblock(x, y)

    elif getType() == "l2":
        return scipy.spatial.distance.euclidean(x, y)

    elif getType() == "cosine":
        return scipy.spatial.distance.cosine(x, y)
Esempio n. 4
0
def hamming_distance(bv1, bv2):
    """
    Return the Hamming distance between `bv1` and `bv2`  bitvectors as the
    number of equal bits for all positions. (e.g. the count of bits set to one
    in an XOR between two bit strings.)

    `bv1` and `bv2` must both be  either hash-like Halohash instances (with a
    hash() function) or bit array instances (that can be manipulated as-is).

    See http://en.wikipedia.org/wiki/Hamming_distance

    For example:

    >>> b1 = bitarray('0001010111100001111')
    >>> b2 = bitarray('0001010111100001111')
    >>> hamming_distance(b1, b2)
    0
    >>> b1 = bitarray('11110000')
    >>> b2 = bitarray('00001111')
    >>> hamming_distance(b1, b2)
    8
    >>> b1 = bitarray('11110000')
    >>> b2 = bitarray('00110011')
    >>> hamming_distance(b1, b2)
    4
    """
    return int(bitdiff(bv1, bv2))
Esempio n. 5
0
 def showPos(self):
     length = self.table.rowCount()
     curstate = bitarray.bitarray(length)
     curstate.setall(False)
     for i in range(self.table.rowCount()):
         if self.table.item(i, 1).checkState():
             curstate[i] = 1
     if bitarray.bitdiff(curstate, self.laststate):
         self.laststate = curstate
Esempio n. 6
0
def rNN_Linear(query, data):

    # Searching
    rNN = list()
    for idxPoint in range(getN()):

        if bitdiff(query, data[idxPoint]) <= 5:
            rNN.append(idxPoint)

    return rNN
Esempio n. 7
0
    def hamming_distance(self, fingerprint1, fingerprint2):
        """
        Return hamming distance between two given fingerprints.
        Hamming distance is the difference in the bits of two binary string.
        Files with fingerprints whose hamming distance are less tends to be more similar.
        """
        distance = bitdiff(fingerprint1, fingerprint2)
        result = int(distance)

        return result
Esempio n. 8
0
def hamming_distance(h1, h2):
    """
    Computes the hamming distance between hashes h1 and h2
    :param h1: A locality sensitive hash
    :param h2: A second locality sensitive hash produced from the same basis as h1
    :return: The hamming distance between h1 and h2
    """
    b1 = bitarray.bitarray()
    b1.frombytes(h1)
    b2 = bitarray.bitarray()
    b2.frombytes(h2)
    return bitarray.bitdiff(b1, b2)
Esempio n. 9
0
def dehamming(bits):
    G = [
        '00000000', '11010010', '01010101', '10000111', '10011001', '01001011',
        '11001100', '00011110', '11100001', '00110011', '10110100', '01100110',
        '01111000', '10101010', '00101101', '11111111'
    ]
    for c in G:
        bc = bitarray(c, endian='little')
        n = bitdiff(bc, bits)
        if n == 0 or n == 1:
            return ([bc[2], bc[4], bc[5], bc[6]], 0)
        elif n == 2:
            return ([0 == 1, bool([]), not 1, False], 1)  # XD
    return ([0 == 1, not 1, bool([]), False], 1)  # XDD
Esempio n. 10
0
def rNN_LSH(TABLEs, LSHs, query, data):

    L = len(LSHs)
    candidateNN = set()

    # Removing duplicates
    for idxTable in range(L):

        hashValue = hashComputation(LSHs[idxTable], query)

        if TABLEs[idxTable][hashValue]:
            candidateNN = candidateNN.union(TABLEs[idxTable][hashValue])

    # Compute distance
    rNN = list()
    for idxPoint in candidateNN:

        if bitdiff(query, data[idxPoint]) <= 5:
            rNN.append(idxPoint)

    return rNN
Esempio n. 11
0
def _bit_candidates(qbitvector, bitvectors_by_rid, min_score=100, ignore_empty=True):
    """
    Return rule candidates for further matching based on matching bitvectors
    """
    if ignore_empty and not qbitvector.any():
        return []

    matchable_len = len(qbitvector)

    candidates = []
    candidates_append = candidates.append
    for rid, ibitvector in bitvectors_by_rid:
        # we compute the AND that tells us all tokenids that exist in both. then
        # the hamming distance of that bitarray to the rule array
        distance = bitdiff(qbitvector & ibitvector, ibitvector)
        # a difference means we have some common bits (i.e. tokenids)
        if distance != matchable_len:
            if min_score == 100:
                if distance == 0:
                    # only keep possible 100% matches
                    candidates_append((distance, rid,))
            else:
                candidates_append((distance, rid,))
    return candidates
    # plt.ylim(0, 6)
    plt.legend()
    plt.show()


if __name__ == '__main__':
    b1 = 0
    b2 = 99
    t = 10
    s = 512

    ran = []
    for i in range(s):
        ran.append(np.random.random() * (b2 - b1 + 2 * t) + b1 - t)

    # inputData = np.random.random(1000) * 100
    # outputData = [distanceEncoding(x, b1, b2, t, s) for x in inputData]
    inputNum = [i for i in range(b2 + 1)]
    outputData = [distanceEncoding(x, b1, b2, t, s, ran) for x in inputNum]
    onesNum = [x.count() for x in outputData]
    factEw = np.mean(onesNum)
    Ew = s * 2. * t / (b2 - b1)

    factDist = [
        bitarray.bitdiff(outputData[0], outputData[i])
        for i in range(1, b2 + 1)
    ]
    de = [(b2 - b1) * dh / (2. * s) for dh in factDist]

    draw(b2, de, s)
            output[i] = True
    return output




if __name__ == '__main__':
    b1 = 0
    b2 = 999
    t = 50
    s = 512

    # ran = []
    # for i in range(s):
    #     ran.append(np.random.random() * (b2 - b1) + b1)

    # inputData = np.random.random(1000) * 100
    # outputData = [distanceEncoding(x, b1, b2, t, s) for x in inputData]
    inputNum = [i for i in range(b2 + 1)]
    outputData = [distanceEncoding(x, b1, b2, t, s) for x in inputNum]
    onesNum = [x.count() for x in outputData]
    factEw = np.mean(onesNum)
    Ew = s * 2. * t / (b2 - b1)

    factDist = [bitarray.bitdiff(outputData[0], outputData[i]) for i in range(1, b2 + 1)]
    de = [(b2 - b1) * dh / (2. * s) for dh in factDist]


    draw(b2, de, s)

Esempio n. 14
0
    
    #bitmask to keep track of kmers that can be eliminated
    junk = bitarray(total)
    junk.setall(False)
    for i in xrange(0, len(encodedklist)):
        if progress_count % math.ceil(total/10000.0) == 0:
            p = (float(progress_count)/total)*100
            reduction = total - junk.count()
            sys.stdout.write("\r%.2f%% progress. " % p + "Feature set reduced to %d kmers ... " % reduction)
            sys.stdout.flush()
        progress_count += 1
    
        if junk[i]: continue
        for j in xrange(i+1, len(encodedklist)):
            if junk[j]: continue
            if bitdiff(encodedklist[i],encodedklist[j])/2 <= args.threshold:
                junk[j] = 1
    print
    print "Done reducing kmer set ... "

    for i in xrange(0,len(encodedklist)):
        if junk[i]: continue
        featureMap[''.join(encodedklist[i].decode(encoding))] = f 
        f += 1



print "Creating positive feature vectors in svm-light format ... "
output = "pos_svm_light_" + repr(args.k) + "_" + repr(args.threshold) + "_" + args.distance

print "Feature vectors will be saved in " + output + " ... "
Esempio n. 15
0
 def areImageSigsSimilar(sig1, sig2):
   """ Compare 2 image "signatures" and return True if they seem to come from a similar image, False otherwise. """
   return bitarray.bitdiff(sig1, sig2) < 100
Esempio n. 16
0
 def areImageSigsSimilar(sig1, sig2):
   """ Compare 2 image "signatures" and return True if they seem to come from a similar image, False otherwise. """
   return bitarray.bitdiff(sig1, sig2) < 100
Esempio n. 17
0
def scoreHash(h):
    return 128 - bitdiff(h, getTargetHash())
Esempio n. 18
0
def test_ber():
    # Generate a random bit steam
    Nbits = 1000
    bits=bitarray.bitarray((random.rand(Nbits)>0.5).tolist())

    # modulate
    sig = afsk1200(bits,fs=44100.0)


    # add noise
    sig_n = sig + 1*random.randn(len(sig))

    # with noise or without?
    # demodulate and decode bits with non-coherent and FM demodulators
    NRZa_nc = nc_afskDemod(sig_n, tbw=2.0)
    NRZa_fm = fm_afskDemod(sig_n, TBW=4)

    # Compute Error Bit Rate Curves
    BER_nc = []
    BER_fm = []

    for sigma in r_[0.1:8.0:0.2]:
        bits_temp=bitarray.bitarray((random.rand(Nbits)>0.5).tolist())

        # modulate and add noise
        sig = afsk1200(bits_temp,fs=44100.0)
        sig_n = sig + sigma*random.randn(len(sig))

        # demodulate and decode bits with non-coherent and FM demodulators
        NRZa_nc = nc_afskDemod(sig_n, tbw=2.0, fs=44100.0)
        NRZa_fm = fm_afskDemod(sig_n, TBW=4, N=74)
        NRZ_nc = np.sign(NRZa_nc)
        NRZ_fm = np.sign(NRZa_fm)

        E_nc = 0
        E_fm = 0

        fs = 44100.0

        bits_dec_nc = decode_bits(NRZ_nc, Nbits, 56, fs=fs)
        E_nc = int(bitarray.bitdiff(bits_temp, bits_dec_nc[0: min(len(bits_temp), len(bits_dec_nc)) ]))

        bits_dec_fm = decode_bits(NRZ_fm, Nbits, 92, fs=fs)
        E_fm = int(bitarray.bitdiff(bits_temp, bits_dec_fm[0:
            min(len(bits_temp), len(bits_dec_fm)) ]))

        BER_nc.append(1.0*E_nc/len(bits_temp))
        BER_fm.append(1.0*E_fm/len(bits_temp))

    print BER_nc
    print BER_fm

    # plot
    f = plt.figure()
    plt.loglog(1/(r_[0.1:8.1:0.2]),BER_nc)
    plt.loglog(1/(r_[0.1:8.1:0.2]),BER_fm,'r')
    plt.title("empirical BER for AFSK demodulation")
    plt.xlabel("SNR")
    plt.ylabel("BER")
    plt.legend(("non-coherent","FM"))
    plt.show()
Esempio n. 19
0
    junk = bitarray(total)
    junk.setall(False)
    for i in xrange(0, len(encodedklist)):
        if progress_count % math.ceil(total / 10000.0) == 0:
            p = (float(progress_count) / total) * 100
            reduction = total - junk.count()
            sys.stdout.write("\r%.2f%% progress. " % p +
                             "Feature set reduced to %d kmers ... " %
                             reduction)
            sys.stdout.flush()
        progress_count += 1

        if junk[i]: continue
        for j in xrange(i + 1, len(encodedklist)):
            if junk[j]: continue
            if bitdiff(encodedklist[i], encodedklist[j]) / 2 <= args.threshold:
                junk[j] = 1
    print
    print "Done reducing kmer set ... "

    for i in xrange(0, len(encodedklist)):
        if junk[i]: continue
        featureMap[''.join(encodedklist[i].decode(encoding))] = f
        f += 1

print "Creating positive feature vectors in svm-light format ... "
output = "pos_svm_light_" + repr(args.k) + "_" + repr(
    args.threshold) + "_" + args.distance

print "Feature vectors will be saved in " + output + " ... "
Esempio n. 20
0
 def distance(self, other):
     """
     Return the Hamming distance between this hash and another hash.
     """
     return int(bitdiff(self.hash(), other.hash()))
Esempio n. 21
0
 def equalBitArrays(ba1, ba2):
     return bitdiff(ba1, ba2) == 0