Esempio n. 1
0
def _generate_codons(AAseq, bias, cutoff=0.):
    """Generate unordered codons to use for each amino acid present in AAseq 
    such that the codon usage is as close as possible to bias."""
    bias = util.normalise(bias)

    out = {}
    for aa in util.AA:
        #list all codons which could be used for this aa
        cdn_list = [c for c in util.codon_table[aa] if bias[c] > cutoff]
        #how many codons do we need for this aa?
        count = len([1 for aas in AAseq if aas == aa])
        #what number of each codon should we have?
        counts = (bias[cdn_list] / np.sum(bias[cdn_list]))*count

        #sort by smallest residual
        counts = pd.DataFrame({'c':counts, 
                               'r':np.abs(counts-np.around(counts))
                              }).sort_values(by='r')['c']
        #assign integers
        overflow = 0.
        icounts = pd.Series(np.zeros(len(counts), dtype=int), index=counts.index)
        for i in range(len(counts)):
            icounts[i] = int(np.round(counts[i]+overflow))
            overflow = overflow + counts[i] - icounts[i]

        #list of codons
        out[aa] = []
        for cdn,count in icounts.iteritems():
            out[aa] = out[aa] + [cdn,]*count
            #shuffle the list (in some schemes, the codons are taken in list order
            #when the genome lacks information)
            np.random.shuffle(out[aa])

    return out
Esempio n. 2
0
    def from_seqrecord(cls, sr, featuretype='CDS', name=None):

        if not name:
            name = sr.name

        CDS = [f for f in sr.features if f.type == featuretype]

        _data = pd.DataFrame(np.zeros((len(CDS), 64), dtype=int), 
                             columns = util.list_codons())

        _second_order = pd.DataFrame(np.zeros((64,64), dtype=int),
                                     index = util.list_codons(),
                                     columns = util.list_codons())

        _scores = pd.DataFrame(np.zeros((len(CDS), 2)), 
                               columns = ['first', 'second',])

        _seqs = [util._extract(sr, cds) for cds in CDS]
        for i,seq in enumerate(_seqs):
            _data.loc[i,:] = util.get_bias(seq)
            util.add_second_order(_second_order, seq)

        #calculate scores
        _nd = util.normalise(_data.sum(0))
        _nso= util.so_normalise(_second_order)
        for i,seq in enumerate(_seqs):
            _scores.at[i,'first'] = util.score(_nd, seq)
            _scores.at[i,'second'] = util.so_score(_nso, seq)

        return cls(name, _data, _second_order, _scores, _nd, _nso)
Esempio n. 3
0
def auto_PCA(gs, 
						 AAseq, 
						 rare_codon_cutoff=0., 
						 GMM_components=3, 
						 prior_weight=1., 
						 PCA_components=3,
						 mode='rand'):

	ret = []
	#Perform PCA and GMM/EM clustering
	pca = PCA.PrincipalComponentAnalysis.from_GMM(gs.fo(),
																								K=GMM_components,
																								PCA_components=PCA_components,
																								prior_weight=prior_weight)

	#for each cluster
	for name, indexes in pca.labels().items():

		#calculate first order bias
		data = gs.fo().loc[indexes]

		bias = util.normalise(data.sum(0))

		#generate codon lists given fo bias	
		codons = _generate_codons(AAseq, bias, cutoff=rare_codon_cutoff)

		#order codons according to whole genome so preference
		oseq = _second(gs.so(), AAseq, codons, mode)
		
		seq = _verify(AAseq, oseq)

		ret.append(seq)
	
	return ret
Esempio n. 4
0
def convolve(x1, x2):
    N = x1.size
    M = x2.size
    y = np.zeros(N+M-1)

    for n in range(N):
        for m in range(M):
            y[n+m] += x1[n] * x2[m]

    return util.normalise(y)
Esempio n. 5
0
    def collide(self, other, normal, local_a, local_b):
        if hasattr(other, 'lethal') and other.lethal:
            self.dead = max(self.dead, 0)

        if hasattr(other, 'checkpoint') and type(other.checkpoint) == dict: # make sure it's not another player
            self.checkpoint = other

        gravity = util.normalise(np.array(self.world.gravity, dtype=float))
        if np.dot(normal, gravity) < -0.7 and self.jump is True:
            self.jump = JumpConstraint(normal, local_a, local_b, -self.action[1])
            self.constraints.append((other, self.jump))
            return True # Ensure no contact constraint interferes
        return False
Esempio n. 6
0
    def set_interface_mac(self, device, mac, port=None):
        description, adapter_name, address, current_address = self.find_interface(
            device)

        # Locate adapter's registry and update network address (mac)
        reg_hdl = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
        key = winreg.OpenKey(reg_hdl, self.WIN_REGISTRY_PATH)
        info = winreg.QueryInfoKey(key)

        # Find adapter key based on sub keys
        adapter_key = None
        adapter_path = None

        for x in range(info[0]):
            subkey = winreg.EnumKey(key, x)
            path = self.WIN_REGISTRY_PATH + "\\" + subkey

            if subkey == 'Properties':
                break

            # Check for adapter match for appropriate interface
            new_key = winreg.OpenKey(reg_hdl, path)
            try:
                adapterDesc = winreg.QueryValueEx(new_key, "DriverDesc")
                if adapterDesc[0] == description:
                    adapter_path = path
                    break
                else:
                    winreg.CloseKey(new_key)
            except (WindowsError) as err:
                if err.errno == 2:  # register value not found, ok to ignore
                    pass
                else:
                    raise err

        if adapter_path is None:
            winreg.CloseKey(key)
            winreg.CloseKey(reg_hdl)
            return

        # Registry path found update mac addr
        adapter_key = winreg.OpenKey(reg_hdl, adapter_path, 0,
                                     winreg.KEY_WRITE)
        winreg.SetValueEx(adapter_key, "NetworkAddress", 0, winreg.REG_SZ,
                          normalise(mac))
        winreg.CloseKey(adapter_key)
        winreg.CloseKey(key)
        winreg.CloseKey(reg_hdl)

        # Adapter must be restarted in order for change to take affect
        self.restart_adapter(adapter_name)
Esempio n. 7
0
def run(p_method, p_normalise = True, p_reverse_results = True):

    all_dirs = current.train + [current.test]

    for d in all_dirs:
        vectors[d] = {}
        util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors)

    if p_normalise:
        util.normalise(vectors)
    
    if p_method == Method.DOT_PRODUCT:
        dot.compute_dot_product(current, vectors, results)
    elif p_method == Method.DIRICHLET:
        bayes.calculate(current, vectors, results)
    elif p_method == Method.DIFFERENCE:
        dot.compute_difference(current, vectors, results)
    elif p_method == Method.PEARSON:
        pearson.compute(current, vectors, results)

    #bayes.cal(current, vectors, results)

    util.print_results(results, p_reverse_results, decimal_numbers)
Esempio n. 8
0
def run(p_method, p_normalise=True, p_reverse_results=True):

    all_dirs = current.train + [current.test]

    for d in all_dirs:
        vectors[d] = {}
        util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors)

    if p_normalise:
        util.normalise(vectors)

    if p_method == Method.DOT_PRODUCT:
        dot.compute_dot_product(current, vectors, results)
    elif p_method == Method.DIRICHLET:
        bayes.calculate(current, vectors, results)
    elif p_method == Method.DIFFERENCE:
        dot.compute_difference(current, vectors, results)
    elif p_method == Method.PEARSON:
        pearson.compute(current, vectors, results)

    #bayes.cal(current, vectors, results)

    util.print_results(results, p_reverse_results, decimal_numbers)
    def set_interface_mac(self, device, mac, port=None):
        description, adapter_name, address, current_address = self.find_interface(device)

        # Locate adapter's registry and update network address (mac)
        reg_hdl = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
        key = winreg.OpenKey(reg_hdl, self.WIN_REGISTRY_PATH)
        info = winreg.QueryInfoKey(key)

        # Find adapter key based on sub keys
        adapter_key = None
        adapter_path = None

        for x in range(info[0]):
            subkey = winreg.EnumKey(key, x)
            path = self.WIN_REGISTRY_PATH + "\\" + subkey

            if subkey == "Properties":
                break

            # Check for adapter match for appropriate interface
            new_key = winreg.OpenKey(reg_hdl, path)
            try:
                adapterDesc = winreg.QueryValueEx(new_key, "DriverDesc")
                if adapterDesc[0] == description:
                    adapter_path = path
                    break
                else:
                    winreg.CloseKey(new_key)
            except (WindowsError) as err:
                if err.errno == 2:  # register value not found, ok to ignore
                    pass
                else:
                    raise err

        if adapter_path is None:
            winreg.CloseKey(key)
            winreg.CloseKey(reg_hdl)
            return

        # Registry path found update mac addr
        adapter_key = winreg.OpenKey(reg_hdl, adapter_path, 0, winreg.KEY_WRITE)
        winreg.SetValueEx(adapter_key, "NetworkAddress", 0, winreg.REG_SZ, normalise(mac))
        winreg.CloseKey(adapter_key)
        winreg.CloseKey(key)
        winreg.CloseKey(reg_hdl)

        # Adapter must be restarted in order for change to take affect
        self.restart_adapter(adapter_name)
Esempio n. 10
0
    def __init__(self, _name, _data, _second_order, _scores, _ndata=None, _nso=None):
        """Calculate codon bias in CDS annotations.
        sr: genome seqrecord"""

        self._name = _name
        self._data = _data
        self._second_order = _second_order
        self._scores = _scores
        self._bias = self._data.sum(0)
        if _ndata is None:
            self._normed = util.normalise(self._bias)
        else:
            self._normed = _ndata
        if _nso is None:
            self._so_normed = util.so_normalise(self._second_order)
        else:
            self._so_normed = _nso
Esempio n. 11
0
    def deconvolve(self):
        target = gauss(self.vbi, self.gauss_sd)
        self.target = normalise(target)

        self.make_guess_mask()
        self.make_possible_bytes(Vbi.possible_bytes)

        self._oldbytes = np.zeros(42, dtype=np.uint8)

        self._deconvolve()

        packet = "".join([chr(x) for x in self.g.bytes])

        F = finders.test(self.finders, packet)
        if F:
                sys.stderr.write("matched by finder "+F.name+"\n");
                sys.stderr.flush()               
                self.make_possible_bytes(F.possible_bytes)
                self._deconvolve()
                F.find(self.g.bytes)
                packet = F.fixup()
                return packet

        # if the packet did not match any of the finders then it isn't 
        # a packet 0 (or 30). if the packet still claims to be a packet 0 it 
        # will mess up the page splitter. so redo the deconvolution but with 
        # packet 0 (and 30) header removed from possible bytes.

        # note: this doesn't work. i am not sure why. a packet in 63322
        # does not match the finders but still passes through this next check
        # with r=0. which should be impossible.
        ((m,r),e) = mrag(self.g.bytes[:2])
        if r == 0:
            sys.stderr.write("packet falsely claimed to be packet %d\n" % r);
            sys.stderr.flush()
            if not self.allow_unmatched:
                self._nzdeconvolve()
            packet = "".join([chr(x) for x in self.g.bytes])
        # if it's a link packet, it is completely hammed
        elif r == 27:
            self.make_possible_bytes([hammbytes]*42)
            self._deconvolve()
            packet = "".join([chr(x) for x in self.g.bytes])

        return packet
Esempio n. 12
0
def make_graph(sentences):
    graph = []
    for i, current_sentence in enumerate(sentences):
        out_row = []
        current_value_list = map(lambda x:x[1], current_sentence)
        current_useful_term_limit = get_kth_min(current_value_list, p=0.2)
        current_sentence_length = len(current_sentence)
        current_sentence = [term for term in current_sentence 
                if term[1] >= current_useful_term_limit]
        for j, sentence in enumerate(sentences):
            if i == j:
                out_row.append(0)
            else:
                same_term_count = 0
                for current_term in current_sentence:
                    for term in sentence:
                        if current_term[0] == term[0]:
                            same_term_count += 1
                            break
                out_row.append(
                        float(same_term_count) /
                        ( current_sentence_length + len(sentence) )
                )
        out_row = normalise(out_row, method='normal')
        graph.append(out_row)
    graph = np.array(graph)
    count = 0
    while count < len(graph):
        if sum(graph[count]) > 1 - 0.01:
            count += 1
        else:
            """
            choose_list = [True] * len(graph) 
            choose_list[count] = False
            choose_list = np.array(choose_list)
            graph = graph[choose_list]
            graph = graph[:, choose_list]
            sentences = sentences[:count] + sentences[count+1:]
            """
            graph[count] = 1.0/len(graph)
    return sentences, graph.T
Esempio n. 13
0
def random_pts(im, edges=True):
    '''
	Returns an array of random points coordinates withing the image's bounds.
	Arguments:

	im: the image from which bound the points should conform.
	
	edges: when False, edges are not added to the points set.
	'''
    # get the shape of the image
    height, width = im.shape[:2]
    # if the width is larger use it as the upper bound, else use the min
    # coordinates are represented as (x,y); thus 2.
    points = np.random.rand(max(width, height), 2)
    # normalise before adding the borders as the borders use a larger range
    # than np.random.rand which foes from [0..1)
    points = normalise(points, im)

    if edges:
        # now add the edges to the random points list
        points = add_edges(points, width, height)
    return points
Esempio n. 14
0
 def bits(self):
     """Chops and averages the raw samples to produce an array where one byte = one bit of the original signal."""
     self.bits_array = normalise(numpy.add.reduceat(self.line, Line.config.bits, dtype=numpy.float32)[:-1]/Line.config.bit_lengths)
Esempio n. 15
0
fs = 44100.0
N = 1024
f0 = 440.0
H = 10
harmonics = np.arange(1, H+1)
phi = np.random.random(H) * np.pi

# Test signal
x = np.zeros(N)

for n in range(N):
    for h in harmonics:
        x[n] += np.cos(2.0 * np.pi * n / fs * h * f0) / h

x = util.normalise(x)

# Compute magnitude spectrum
X = np.fft.rfft(x)
M = abs(X)

# Fourier transform of log(magnitude spectrum)
C = np.fft.rfft(np.log2(M))
Mc = abs(C)
Mc = util.normalise(Mc)

# Find highest bin (ignore first 10 bins)
DC = 10
maxbin = np.argmax(Mc[DC:]) + DC
f0_estimate = (fs / 2.0) / maxbin
Esempio n. 16
0
def YIN(x):
    N = x.size
    y = np.zeros(N)

    for n in range(1, N):
        diff = 0.0
        for m in range(N - n):
            diff += pow(x[m] - x[m + n], 2)

        y[n] = diff

    return y


if __name__ == "__main__":
    fn = "/Users/olafwisselink/Code/sms-tools/sounds/flute-A4.wav"
    fs, x = util.readwav(fn)
    N = x.size

    # Magnitude spectrum of x
    X = np.fft.rfft(x, norm="ortho")
    M = abs(X)
    M = util.normalise(M)

    # Matplotlib
    plt.loglog(M, basex=10, basey=10)
    plt.xlim(0, X.size)
    plt.ylim(0, 1)
    plt.show()
Esempio n. 17
0
    def read_zmatrix(self,fname):
        f = open(fname,'r')
        self.file = f;
        name = f.readline().strip()
        self.mol = Molecule()
        self.mol.add_atom(Atom(name.lower(),1,0,0,0,connect=0));

        str = f.readline();
        sp = str.split();
        if (sp == []):
            return self.mol
        sp[0] = sp[0].lower()
        if (sp==[]):
            return self.mol
        try:
            bl = float(sp[2])
        except:
            bl = self.lookup_var(sp[2])
        self.mol.add_atom(Atom(sp[0],2,0,0,bl,int(sp[1])))
        self.mol.add_bond(2,int(sp[1]))



        str = f.readline();
        sp = str.split();
        if (sp == []):
            return self.mol
        sp[0] = sp[0].lower()
        try:
            bl = float(sp[2]);
        except:
            bl = self.lookup_var(sp[2])

        try:
            ba = float(sp[4]);
        except:
            ba = self.lookup_var(sp[4])
        a = Atom(sp[0],3,math.sin(ba*math.pi/180.0)*bl,0,self.mol.atoms[int(sp[1]) -1].z - math.cos(ba*math.pi/180.0)*bl,int(sp[1]))# BIG CHANGE

        self.mol.add_atom(a)
        self.mol.add_bond(3,int(sp[1]))

        atom_number = 4;
        while (True):
            #print atom_number
            str = f.readline();
            sp = str.split();
            if (sp==[]):
                print 'end of matrix'
                break
            sp[0] = sp[0].lower()
            try:
                bl = float(sp[2]);
            except:
                bl = self.lookup_var(sp[2])

            try:
                ba = float(sp[4]);
            except:
                ba = self.lookup_var(sp[4])
            try:
                dh = float(sp[6]);
            except:
                dh = self.lookup_var(sp[6])

            connect = int(sp[1])
            angle_connect = int(sp[3])
            dihed_connect = int(sp[5])


            atoms = self.mol.atoms;
            #print 'connect: %i angle_connect: %i' %(connect,angle_connect)
            vector1 = vec_minus(atoms[connect-1].xyz,atoms[angle_connect-1].xyz );
            vector2 = vec_minus(atoms[connect-1].xyz,atoms[dihed_connect-1].xyz );
            norm1 = vec_cross(vector1,vector2)
            norm2 = vec_cross(vector1,norm1)
            norm1 = normalise(norm1)
            norm2 = normalise(norm2)

            norm1 =vec_times(norm1,-1*math.sin(dh*math.pi/180))
            norm2 = vec_times(norm2,math.cos(dh*math.pi/180))

            vector3 =vec_add(norm1,norm2)
            vector3 =normalise(vector3)

            vector3 = vec_times(vector3,bl*math.sin(ba*math.pi/180.0))

            vector1 = normalise(vector1)

            vector1 = vec_times(vector1,bl*math.cos(ba*math.pi/180.0))

            vector2 = vec_add(atoms[connect - 1].xyz,vector3)
            vector2 = vec_minus(vector2,vector1)

            a = Atom(sp[0],atom_number,vector2[0],vector2[1],vector2[2],int(sp[1]))
            self.mol.add_atom(a)
            self.mol.add_bond(atom_number,int(sp[1]))

            atom_number+=1;
        return self.mol
Esempio n. 18
0
 def get_bias(self, indexes):
     return util.normalise(self._data.loc[indexes].sum(0))
Esempio n. 19
0
 def write_url(self):
     #urn = self.urn.strip().lower()
     urn = normalise(self.urn.strip())
     if urn == None:
         print('Invalid URN: %s' % self.urn)
         return
     #self.cursor.execute("SELECT (urn, url, source_id, type) FROM urn2url WHERE urn = '%s'" % urn)
     self.cursor.execute("SELECT * FROM urn2url WHERE urn = '%s'" % urn)
     existing_rows = [t for t in self.cursor.fetchall()]
     existing_urns = [row[0] for row in existing_rows]
     existing_source_ids = [row[2] for row in existing_rows]
     if urn in existing_urns:
         if self.source_id in existing_source_ids:
             for row in existing_rows:
                 if row[2] == self.source_id:
                     if row[1] != self.url:
                         # The URL for this URN in this source has changed!
                         print('< urn change')
                         self.cursor.execute(
                             "UPDATE urn2url SET url='%s' WHERE urn='%s' AND source_id = '%s'"
                             % (self.url, urn, self.source_id))
                         self._update_history(urn=urn,
                                              r_component=None,
                                              url_old=row[1],
                                              url_new=self.url,
                                              url_type_old=None,
                                              url_type_new=self.urn_type)
                         print('update')
         else:
             print('< (another) urn from new source')
             # We have already harvested URN from some other source.
             self.cursor.execute(
                 "INSERT INTO urn2url (urn, url, source_id, url_type) VALUES ('%s', '%s', '%s', '%s')"
                 % (urn, self.url, self.source_id, self.urn_type))
             self._update_history(urn=urn,
                                  r_component=None,
                                  url_old=None,
                                  url_new=self.url,
                                  url_type_old=None,
                                  url_type_new=self.urn_type)
             print('insert another')
     else:
         # New URN!
         print('< new urn')
         self.cursor.execute(
             "INSERT INTO urn2url (urn, url, source_id, url_type) VALUES ('%s', '%s', '%s', '%s')"
             % (urn, self.url, self.source_id, self.urn_type))
         print('---')
         print('urn: %s' % urn)
         #print('r_component: %s' % r_component)
         print('url: %s' % self.url)
         print('urn_type: %s' % self.urn_type)
         print('<<<')
         self._update_history(urn=urn,
                              r_component=None,
                              url_old=None,
                              url_new=self.url,
                              url_type_old=None,
                              url_type_new=self.urn_type)
         print('>>>')
         print(self.cursor, self.source_id, self.title, self.urn, self.url)