def predict(self, input_F):
        distance_v_plus = utils.hamming(input_F, self.prototype_v_plus)
        distance_v_min = utils.hamming(input_F, self.prototype_v_min)

        distance_a_high = utils.hamming(input_F, self.prototype_a_high)
        distance_a_low = utils.hamming(input_F, self.prototype_a_low)

        self.distance_v_history.append([distance_v_plus, distance_v_min])
        self.distance_a_history.append([distance_a_high, distance_a_low])

        return (0 if distance_v_plus < distance_v_min else 1,
                0 if distance_a_high < distance_a_low else 1)
Example #2
0
File: cme.py Project: poneill/amic
def rate_matrix(q,koffs,verbose=False):
    """Generate the stochastic rate matrix for the givens system."""
    # Chromosome states can be represented by binary numerals; order the
    # states this way.
    G = len(koffs)
    states = enumerate_states(G,q)
    num_states = len(states)
    assert len(states) == sum(choose(G,i) for i in range(q+1))
    R = np.zeros((num_states,num_states))
    for i,state_i in enumerate(states):
        for j,state_j in enumerate(states):
            if verbose:
                print "considering:",i,state_i,"->",j,state_j
            dist = hamming(state_i,state_j)
            if dist != 1:
                # deal with diagonal elements later...
                if verbose:
                    print "distance is:",dist,"continuing..."
                continue
            if sum(state_j) == sum(state_i) + 1:
                R[i][j] = q - sum(state_i)
                if verbose:
                    print i,state_i,"->",j,state_j, "is an on-reaction, rate:",R[i][j]
            elif sum(state_j) == sum(state_i) - 1:
                diff_idx,diff_site = find(lambda (idx,(si,sj)):si != sj,enumerate(zip(state_i,state_j)))
                R[i][j] = koffs[diff_idx]
                if verbose:
                    print i,state_i,"->",j,state_j, "is an off-reaction (at site",diff_idx,")  rate:",R[i][j]
    # deal with diagonal elements
    for i in range(num_states):
        R[i][i] = -sum(R[i])
    print "finished rate matrix"
    return R
Example #3
0
def ddpl_dHdt_singular(ps,l):
    K = len(ps)
    L = num_cols_from_vector(ps)
    kmers = list(make_kmers(L))
    s = kmers[l]
    term1 = -one_point_avg_inner(ps,l)/(ps[l])
    term2 = -sum(log(ps[k]) for k in range(K) if hamming(kmers[k],s) == 1)/(3*L)
    term3 = log(ps[k]) + 1
    return term1 + term2 + term3
Example #4
0
 def compute_hamming(self, sample, train):
     """
     compute hamming distance for one sample over the train-set
     :param sample: sample from test-set
     :param train: the train-set
     :return: list of distances between every example from train set to the sample
     """
     distace_sample = []
     for t in train:
         distace_sample.append((hamming(t[0], sample), t[1], t[2]))
     return distace_sample
    def hamming_distance_to_true_naive(self, true_line, line, query_name, restrict_to_region='', normalize=False, debug=False):
        """
        Hamming distance between the inferred naive sequence and the tue naive sequence.
        <restrict_to_region> if set, restrict the comparison to the section of the *true* sequence assigned to the given region.
        NOTE this will not in general correspond to the similarly-assigned region in the inferred naive sequence.
        if <normalize> divide by sequence length
        """

        true_naive_seq = utils.get_full_naive_seq(self.germlines, true_line)
        inferred_naive_seq = utils.get_full_naive_seq(self.germlines, line)

        left_hack_add_on = ''
        right_hack_add_on = ''
        if len(true_line['seq']) > len(line['seq']):  # ihhhmmm doesn't report the bits of the sequence it erodes off the ends, so we have to add them back on
        # if len(true_naive_seq) > len(inferred_naive_seq):  # hm, now why I did use line['seq'] stuff before?
            start = true_line['seq'].find(line['seq'])
            assert start >= 0
            end = len(line['seq']) + start
            left_hack_add_on = true_line['seq'][: start]
            right_hack_add_on = true_line['seq'][ end :]
            # extra_penalty = len(left_hack_add_on) + len(right_hack_add_on)
            inferred_naive_seq = 'x'*len(left_hack_add_on) + inferred_naive_seq + 'x'*len(right_hack_add_on)
            if debug:
                print '  adding to inferred naive seq'

        bounds = None
        if restrict_to_region != '':
            bounds = utils.get_regional_naive_seq_bounds(restrict_to_region, self.germlines, true_line)  # get the bounds of this *true* region
            true_naive_seq = true_naive_seq[bounds[0] : bounds[1]]
            inferred_naive_seq = inferred_naive_seq[bounds[0] : bounds[1]]


        # if len(true_naive_seq) > len(inferred_naive_seq):
            

        if debug:
            print restrict_to_region, 'region, bounds', bounds
            print '  true ', true_naive_seq
            print '  infer', inferred_naive_seq

        if len(true_naive_seq) != len(inferred_naive_seq):
            print 'ERROR still not the same lengths for %s' % query_name
            print '  true ', true_naive_seq
            print '  infer', inferred_naive_seq
            sys.exit()
        total_distance = utils.hamming(true_naive_seq, inferred_naive_seq)
        if len(true_naive_seq) == 0:
            print 'WARNING zero length sequence in hamming_distance_to_true_naive'
            return 0
        if normalize:
            return int(100 * (float(total_distance) / len(true_naive_seq)))
        else:
            return total_distance
Example #6
0
def one_point_avg_inner(ps,k):
    L = int(log(len(ps),4))
    kmers = list(make_kmers(L))
    s = kmers[k]
    acc = 0
    hits = 0
    for i,kmer in enumerate(kmers):
        if hamming(kmer,s) == 1:
            acc += ps[i]
            hits += 1
    assert hits == 3*L
    return acc/(3*L)
    def get_hamming_distances(self, pairs):  #, return_info):
        # NOTE duplicates a function in utils
        return_info = []
        for query_a, query_b in pairs:
            seq_a = self.input_info[query_a]['seq']
            seq_b = self.input_info[query_b]['seq']
            if self.args.truncate_pairs:  # chop off the left side of the longer one if they're not the same length
                min_length = min(len(seq_a), len(seq_b))
                seq_a = seq_a[-min_length : ]
                seq_b = seq_b[-min_length : ]
                chopped_off_left_sides = True
            mutation_frac = utils.hamming(seq_a, seq_b) / float(len(seq_a))
            return_info.append({'id_a':query_a, 'id_b':query_b, 'score':mutation_frac})

        return return_info
Example #8
0
 def calcsimilarity(known, table, id1, id2, comparison):
     tokens = re.split("[/,;]", comparison[1])
     ret = False
     for j in xrange(0, len(known)):
         if i == j:
             continue
         if comparison[2] == known[j][2]:
             similarity = 1.0
         else:
             compared_genre = re.split("[/,;]", known[j][1])
             distance = {}
             sametags = 0
             for a in tokens:
                 if not a:
                     continue
                 for b in compared_genre:
                     if not b or b in distance:
                         continue
                     if len(a) == len(b):
                         h = hamming(a, b) / float(len(a))
                         if h:
                             distance[b] = h
                         else:
                             sametags = sametags + 1
                     else:
                         distance[b] = levenshtein(a, b) /  \
                             float(max(len(a), len(b)))
             if distance:
                 # geometric mean + weighted equal tags
                 similarity = 1.0 - (
                     reduce(lambda x, y: x * y, distance.values())) ** \
                     (1.0 / len(distance)) + \
                     (sametags / (sametags + len(distance)))
             else:
                 similarity = 0.0
         if similarity > 0.33:
             if not db.execute(
                 "select * from %s  where %s  = ? and %s = ?" %
                 (table, id1, id2),
                     (comparison[0], known[j][0])).fetchall():
                 db.execute(
                     "insert or ignore into %s "
                     "(%s, %s, similarity) values ( ?, ?, ?)" %
                     (table, id1, id2),
                     (comparison[0], known[j][0], similarity))
                 ret = True
     return ret
Example #9
0
 def calcsimilarity(known, table, id1, id2, comparison):
     tokens = re.split("[/,;]", comparison[1])
     ret = False
     for j in xrange(0, len(known)):
         if i == j:
             continue
         if comparison[2] == known[j][2]:
             similarity = 1.0
         else:
             compared_genre = re.split("[/,;]", known[j][1])
             distance = {}
             sametags = 0
             for a in tokens:
                 if not a:
                     continue
                 for b in compared_genre:
                     if not b or b in distance:
                         continue
                     if len(a) == len(b):
                         h = hamming(a, b) / float(len(a))
                         if h:
                             distance[b] = h
                         else:
                             sametags = sametags + 1
                     else:
                         distance[b] = levenshtein(a, b) /  \
                             float(max(len(a), len(b)))
             if distance:
                 # geometric mean + weighted equal tags
                 similarity = 1.0 - (
                     reduce(lambda x, y: x * y, distance.values())) ** \
                     (1.0 / len(distance)) + \
                     (sametags / (sametags + len(distance)))
             else:
                 similarity = 0.0
         if similarity > 0.33:
             if not db.execute(
                     "select * from %s  where %s  = ? and %s = ?" %
                 (table, id1, id2),
                 (comparison[0], known[j][0])).fetchall():
                 db.execute(
                     "insert or ignore into %s "
                     "(%s, %s, similarity) values ( ?, ?, ?)" %
                     (table, id1, id2),
                     (comparison[0], known[j][0], similarity))
                 ret = True
     return ret
    def get_hamming_distances(self, pairs):  #, return_info):
        # NOTE duplicates a function in utils
        return_info = []
        for query_a, query_b in pairs:
            seq_a = self.input_info[query_a]['seq']
            seq_b = self.input_info[query_b]['seq']
            if self.args.truncate_pairs:  # chop off the left side of the longer one if they're not the same length
                min_length = min(len(seq_a), len(seq_b))
                seq_a = seq_a[-min_length:]
                seq_b = seq_b[-min_length:]
                chopped_off_left_sides = True
            mutation_frac = utils.hamming(seq_a, seq_b) / float(len(seq_a))
            return_info.append({
                'id_a': query_a,
                'id_b': query_b,
                'score': mutation_frac
            })

        return return_info
Example #11
0
def checkGolomg(seq):
    """
        1. 0s and 1s in the sequence are as near as possible to n/2
        2. The number of runs of given length should halve when the length is in-
           creased by one (as long as possible), and where possible equally many runs
           of given length should consist of 0s as of 1s
        3. The out-of-phase autocorrelation should be constant (independent of the shift)
    """

    postulates = [True, True, True]

    ## Check postulate 1
    zeros = seq.count("0")
    ones = seq.count("1")

    if abs(zeros - ones) > 1:
        postulates[0] = False

    ## Postulate 2
    r = Runs(seq)
    keys = r.keys()

    if r:
        for i in xrange(len(keys) - 1):
            if keys[i] - keys[i + 1] != -1:
                postulates[1] = False
                break
            if r[keys[i]] != 2 * r[keys[i + 1]]:
                if r[keys[i]] != 1 and r[keys[i + 1]] != 1:
                    postulates[1] = False
                    break
    else:
        postulates[1] = False

    ## Postulate 3

    postulates[2] = hamming(seq)

    return True if sum(postulates) == 3 else False
Example #12
0
    def run(self):

        if not self.fpcalc:
            return

        logging.debug("fpcalc: %s" % self.fpcalc)

        self.db = dbapi.connect(self.dbpath)
        # lastrelease = ""
        lastdata = []
        lastquery = ""
        laststatus = 0
        starttime = time()
        stoptime = starttime + 1
        requests = 0
        while self.running:
            try:
                path, title, artist, album = self.queue.get()
            except Empty as e:
                logging.warning(e)
                continue
            except Exception as e:
                logging.error(e)
                continue
            if not path or not album:
                logging.warning("No path/album name provided")
                continue

            if requests / (stoptime - starttime) > 3:
                sleep(1)
                starttime = stoptime

            logging.info("Getting infos for %s %s" % (artist, album))
            fingerprint = ''
            duration = 0
            try:
                logging.info("Analyzing %s file" % path)
                if self.fpcalc:
                    logging.debug("fingerprint for %s" % path)
                    fpcalc_process = subprocess.Popen(
                        ["/usr/bin/fpcalc", path],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
                    fpcalc_output = fpcalc_process.communicate()[0].split('\n')

                    duration = fpcalc_output[1][9:]
                    fingerprint = fpcalc_output[2][12:]
            except Exception as e:
                logging.error(e)

            if fingerprint:
                query = u"/v2/lookup?" \
                    "client=8XaBELgH" \
                    "&meta=recording+releasegroups" \
                    "+tracks+puids+usermeta+compress" \
                    "&duration=%s&format=json&fingerprint=%s" % \
                    (duration, fingerprint)
                if query == lastquery and laststatus == 200:
                    logging.info("Same request already occurred - skipping")

                try:
                    conn = HTTPConnection("api.acoustid.org", 80)
                    conn.request("GET", query)
                    response = conn.getresponse()
                except:
                    continue
                puid = ""
                mb_title = ""
                mb_artists = ""
                if response.status != 200:
                    continue
                try:
                    lastquery = query
                    laststatus = 200
                    results = json.loads(response.read())
                    lastdata = results["results"][0]
                    logging.debug(lastdata)
                    release = "releasegroups" in lastdata \
                        and len(lastdata) and lastdata["releasegroups"][0]
                    recording = "recordings" in lastdata \
                        and len(lastdata) and lastdata["recordings"][0]
                    score = "score" in lastdata and lastdata['score']
                    logging.debug(release)
                    logging.debug(recording)
                    if len(lastdata):
                        logging.debug("%s results found" % len(lastdata))
                        puid = 'puids' in lastdata and lastdata["puids"][0]
                        mbid = release and release['id'] \
                            or recording \
                            and recording[0]['releasegroups'][0]['id']
                        mb_title = release and release["title"] \
                            or recording \
                            and recording[0]['title']
                        mb_artists = " ".join([
                            i['name']
                            for i in (release and release["artists"]
                                      or recording and recording[0]['artists'])
                        ])
                    logging.debug("Response status: %d %s" %
                                  (response.status, response.read()))
                except Exception as e:
                    continue
                    logging.error(e)

                stoptime = time()
                requests = (requests + 1) % 3

                if score < 0.7:
                    continue

                if len(title) == len(mb_title):
                    title_distance = hamming(title, mb_title) / float(
                        len(title))
                else:
                    title_distance = levenshtein(title, mb_title) / float(
                        max(len(title), len(mb_title)))

                if len(artist) == len(mb_artists):
                    author_distance = hamming(artist, mb_artists) / float(
                        len(artist))
                else:
                    author_distance = levenshtein(artist, mb_artists) / float(
                        max(len(artist), len(mb_artists)))

                # if title_distance > 0.33 and author_distance > 0.5:
                logging.debug("distances: %s %s %s" %
                              (score, title_distance, author_distance))
                #     continue

                logging.debug("puid: %s, mbid %s" % (puid, mbid))
                with self.condition:
                    try:
                        song_id, album_id = self.db.execute(
                            "select id, album_id from song "
                            "where path = ?;", (path, )).fetchone()
                        self.db.execute(
                            "update song set puid = ?, mbid = ? "
                            "where id = ?", (puid, mbid, song_id))
                        if title_distance > 0:
                            self.db.execute(
                                "update song set title = ? "
                                "where id = ?", (mb_title, song_id))
                        self.db.commit()
                    except Exception as e:
                        logging.error(e)
        self.db.close()
Example #13
0
    def run(self):

        if not self.fpcalc:
            return

        logging.debug("fpcalc: %s" % self.fpcalc)

        self.db = dbapi.connect(self.dbpath)
        # lastrelease = ""
        lastdata = []
        lastquery = ""
        laststatus = 0
        starttime = time()
        stoptime = starttime + 1
        requests = 0
        while self.running:
            try:
                path, title, artist, album = self.queue.get()
            except Empty as e:
                logging.warning(e)
                continue
            except Exception as e:
                logging.error(e)
                continue
            if not path or not album:
                logging.warning("No path/album name provided")
                continue

            if requests / (stoptime - starttime) > 3:
                sleep(1)
                starttime = stoptime

            logging.info("Getting infos for %s %s" % (artist, album))
            fingerprint = ''
            duration = 0
            try:
                logging.info("Analyzing %s file" % path)
                if self.fpcalc:
                    logging.debug("fingerprint for %s" % path)
                    fpcalc_process = subprocess.Popen(
                        ["/usr/bin/fpcalc", path],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
                    fpcalc_output = fpcalc_process.communicate()[0].split('\n')

                    duration = fpcalc_output[1][9:]
                    fingerprint = fpcalc_output[2][12:]
            except Exception as e:
                logging.error(e)

            if fingerprint:
                query = u"/v2/lookup?" \
                    "client=8XaBELgH" \
                    "&meta=recording+releasegroups" \
                    "+tracks+puids+usermeta+compress" \
                    "&duration=%s&format=json&fingerprint=%s" % \
                    (duration, fingerprint)
                if query == lastquery and laststatus == 200:
                    logging.info("Same request already occurred - skipping")

                try:
                    conn = HTTPConnection("api.acoustid.org", 80)
                    conn.request("GET", query)
                    response = conn.getresponse()
                except:
                    continue
                puid = ""
                mb_title = ""
                mb_artists = ""
                if response.status != 200:
                    continue
                try:
                    lastquery = query
                    laststatus = 200
                    results = json.loads(response.read())
                    lastdata = results["results"][0]
                    logging.debug(lastdata)
                    release = "releasegroups" in lastdata \
                        and len(lastdata) and lastdata["releasegroups"][0]
                    recording = "recordings" in lastdata \
                        and len(lastdata) and lastdata["recordings"][0]
                    score = "score" in lastdata and lastdata['score']
                    logging.debug(release)
                    logging.debug(recording)
                    if len(lastdata):
                        logging.debug("%s results found" % len(lastdata))
                        puid = 'puids' in lastdata and lastdata["puids"][0]
                        mbid = release and release['id'] \
                            or recording \
                            and recording[0]['releasegroups'][0]['id']
                        mb_title = release and release["title"] \
                            or recording \
                            and recording[0]['title']
                        mb_artists = " ".join(
                            [i['name'] for i in (release and release["artists"]
                             or recording and recording[0]['artists'])])
                    logging.debug(
                        "Response status: %d %s" %
                        (response.status, response.read()))
                except Exception as e:
                    continue
                    logging.error(e)

                stoptime = time()
                requests = (requests + 1) % 3

                if score < 0.7:
                    continue

                if len(title) == len(mb_title):
                    title_distance = hamming(
                        title, mb_title) / float(len(title))
                else:
                    title_distance = levenshtein(
                        title, mb_title) / float(
                        max(len(title), len(mb_title)))

                if len(artist) == len(mb_artists):
                    author_distance = hamming(
                        artist, mb_artists) / float(len(artist))
                else:
                    author_distance = levenshtein(
                        artist, mb_artists) / float(
                        max(len(artist), len(mb_artists)))

                # if title_distance > 0.33 and author_distance > 0.5:
                logging.debug(
                    "distances: %s %s %s" %
                    (score, title_distance, author_distance))
                #     continue

                logging.debug("puid: %s, mbid %s" % (puid, mbid))
                with self.condition:
                    try:
                        song_id, album_id = self.db.execute(
                            "select id, album_id from song "
                            "where path = ?;", (path,)).fetchone()
                        self.db.execute(
                            "update song set puid = ?, mbid = ? "
                            "where id = ?", (puid, mbid, song_id))
                        if title_distance > 0:
                            self.db.execute(
                                "update song set title = ? "
                                "where id = ?", (mb_title, song_id))
                        self.db.commit()
                    except Exception as e:
                        logging.error(e)
        self.db.close()
Example #14
0
        desired_states = []
        desired_unclamped_states = []
        diffs = []
        unclamped_diffs = []
        for i in trange(trials):
            rstate = random_state(self.V)
            init_state = clamp(rstate,init_obs)
            final_state = self.sample_from_clamped_equilibrium(init_state,treatment)
            final_unclamped_state = self.sample_from_equilibrium(init_state)
            final_states.append(final_state)
            final_unclamped_states.append(final_unclamped_state)
            desired_state = clamp(final_state,final_obs)
            desired_states.append(desired_state)
            desired_unclamped_state = clamp(final_unclamped_state,final_obs)
            desired_unclamped_states.append(desired_unclamped_state)
            discrepancy = hamming(final_state,desired_state)
            unclamped_discrepancy = hamming(final_unclamped_state,desired_state)
            diff = final_state - desired_state
            unclamped_diff = final_unclamped_state - desired_unclamped_state
            diffs.append(diff)
            unclamped_diffs.append(unclamped_diff)
            discrepancies.append(discrepancy)
        print "distinct final states:",len(set(map(tuple,final_states)))
        print "distinct desired states:",len(set(map(tuple,desired_states)))
        print "distinct final unclamped states:",len(set(map(tuple,final_unclamped_states)))
        print "distinct desired unclamped states:",len(set(map(tuple,desired_unclamped_states)))
        print "distinct diffs:",len(set(map(tuple,diffs)))
        print "distinct unclamped diffs:",len(set(map(tuple,unclamped_diffs)))
        print [i for i,d in enumerate(diffs[0]) if d != 0]
        return discrepancies
Example #15
0
#  block. Put them together and you have the key.
#

import sys
sys.path.insert(1, "../common") # Want to locate modules in our 'common' directory


import string
import binascii
import utils
import ltrfreq

s1 = "this is a test"
s2 = "wokka wokka!!!"

hamster = utils.hamming(s1, s2)
print hamster

assert hamster == 37


# That's all for now - below here doesnt work

decoded = []
bytes1 = []
bytes2 = []

minkeylength = 2
maxkeylength = 40

Example #16
0
def test_hamming():
    m = 10
    assert np.allclose(hamming(m), np.hamming(m))