def _generate_codons(AAseq, bias, cutoff=0.): """Generate unordered codons to use for each amino acid present in AAseq such that the codon usage is as close as possible to bias.""" bias = util.normalise(bias) out = {} for aa in util.AA: #list all codons which could be used for this aa cdn_list = [c for c in util.codon_table[aa] if bias[c] > cutoff] #how many codons do we need for this aa? count = len([1 for aas in AAseq if aas == aa]) #what number of each codon should we have? counts = (bias[cdn_list] / np.sum(bias[cdn_list]))*count #sort by smallest residual counts = pd.DataFrame({'c':counts, 'r':np.abs(counts-np.around(counts)) }).sort_values(by='r')['c'] #assign integers overflow = 0. icounts = pd.Series(np.zeros(len(counts), dtype=int), index=counts.index) for i in range(len(counts)): icounts[i] = int(np.round(counts[i]+overflow)) overflow = overflow + counts[i] - icounts[i] #list of codons out[aa] = [] for cdn,count in icounts.iteritems(): out[aa] = out[aa] + [cdn,]*count #shuffle the list (in some schemes, the codons are taken in list order #when the genome lacks information) np.random.shuffle(out[aa]) return out
def from_seqrecord(cls, sr, featuretype='CDS', name=None): if not name: name = sr.name CDS = [f for f in sr.features if f.type == featuretype] _data = pd.DataFrame(np.zeros((len(CDS), 64), dtype=int), columns = util.list_codons()) _second_order = pd.DataFrame(np.zeros((64,64), dtype=int), index = util.list_codons(), columns = util.list_codons()) _scores = pd.DataFrame(np.zeros((len(CDS), 2)), columns = ['first', 'second',]) _seqs = [util._extract(sr, cds) for cds in CDS] for i,seq in enumerate(_seqs): _data.loc[i,:] = util.get_bias(seq) util.add_second_order(_second_order, seq) #calculate scores _nd = util.normalise(_data.sum(0)) _nso= util.so_normalise(_second_order) for i,seq in enumerate(_seqs): _scores.at[i,'first'] = util.score(_nd, seq) _scores.at[i,'second'] = util.so_score(_nso, seq) return cls(name, _data, _second_order, _scores, _nd, _nso)
def auto_PCA(gs, AAseq, rare_codon_cutoff=0., GMM_components=3, prior_weight=1., PCA_components=3, mode='rand'): ret = [] #Perform PCA and GMM/EM clustering pca = PCA.PrincipalComponentAnalysis.from_GMM(gs.fo(), K=GMM_components, PCA_components=PCA_components, prior_weight=prior_weight) #for each cluster for name, indexes in pca.labels().items(): #calculate first order bias data = gs.fo().loc[indexes] bias = util.normalise(data.sum(0)) #generate codon lists given fo bias codons = _generate_codons(AAseq, bias, cutoff=rare_codon_cutoff) #order codons according to whole genome so preference oseq = _second(gs.so(), AAseq, codons, mode) seq = _verify(AAseq, oseq) ret.append(seq) return ret
def convolve(x1, x2): N = x1.size M = x2.size y = np.zeros(N+M-1) for n in range(N): for m in range(M): y[n+m] += x1[n] * x2[m] return util.normalise(y)
def collide(self, other, normal, local_a, local_b): if hasattr(other, 'lethal') and other.lethal: self.dead = max(self.dead, 0) if hasattr(other, 'checkpoint') and type(other.checkpoint) == dict: # make sure it's not another player self.checkpoint = other gravity = util.normalise(np.array(self.world.gravity, dtype=float)) if np.dot(normal, gravity) < -0.7 and self.jump is True: self.jump = JumpConstraint(normal, local_a, local_b, -self.action[1]) self.constraints.append((other, self.jump)) return True # Ensure no contact constraint interferes return False
def set_interface_mac(self, device, mac, port=None): description, adapter_name, address, current_address = self.find_interface( device) # Locate adapter's registry and update network address (mac) reg_hdl = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) key = winreg.OpenKey(reg_hdl, self.WIN_REGISTRY_PATH) info = winreg.QueryInfoKey(key) # Find adapter key based on sub keys adapter_key = None adapter_path = None for x in range(info[0]): subkey = winreg.EnumKey(key, x) path = self.WIN_REGISTRY_PATH + "\\" + subkey if subkey == 'Properties': break # Check for adapter match for appropriate interface new_key = winreg.OpenKey(reg_hdl, path) try: adapterDesc = winreg.QueryValueEx(new_key, "DriverDesc") if adapterDesc[0] == description: adapter_path = path break else: winreg.CloseKey(new_key) except (WindowsError) as err: if err.errno == 2: # register value not found, ok to ignore pass else: raise err if adapter_path is None: winreg.CloseKey(key) winreg.CloseKey(reg_hdl) return # Registry path found update mac addr adapter_key = winreg.OpenKey(reg_hdl, adapter_path, 0, winreg.KEY_WRITE) winreg.SetValueEx(adapter_key, "NetworkAddress", 0, winreg.REG_SZ, normalise(mac)) winreg.CloseKey(adapter_key) winreg.CloseKey(key) winreg.CloseKey(reg_hdl) # Adapter must be restarted in order for change to take affect self.restart_adapter(adapter_name)
def run(p_method, p_normalise = True, p_reverse_results = True): all_dirs = current.train + [current.test] for d in all_dirs: vectors[d] = {} util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors) if p_normalise: util.normalise(vectors) if p_method == Method.DOT_PRODUCT: dot.compute_dot_product(current, vectors, results) elif p_method == Method.DIRICHLET: bayes.calculate(current, vectors, results) elif p_method == Method.DIFFERENCE: dot.compute_difference(current, vectors, results) elif p_method == Method.PEARSON: pearson.compute(current, vectors, results) #bayes.cal(current, vectors, results) util.print_results(results, p_reverse_results, decimal_numbers)
def run(p_method, p_normalise=True, p_reverse_results=True): all_dirs = current.train + [current.test] for d in all_dirs: vectors[d] = {} util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors) if p_normalise: util.normalise(vectors) if p_method == Method.DOT_PRODUCT: dot.compute_dot_product(current, vectors, results) elif p_method == Method.DIRICHLET: bayes.calculate(current, vectors, results) elif p_method == Method.DIFFERENCE: dot.compute_difference(current, vectors, results) elif p_method == Method.PEARSON: pearson.compute(current, vectors, results) #bayes.cal(current, vectors, results) util.print_results(results, p_reverse_results, decimal_numbers)
def set_interface_mac(self, device, mac, port=None): description, adapter_name, address, current_address = self.find_interface(device) # Locate adapter's registry and update network address (mac) reg_hdl = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) key = winreg.OpenKey(reg_hdl, self.WIN_REGISTRY_PATH) info = winreg.QueryInfoKey(key) # Find adapter key based on sub keys adapter_key = None adapter_path = None for x in range(info[0]): subkey = winreg.EnumKey(key, x) path = self.WIN_REGISTRY_PATH + "\\" + subkey if subkey == "Properties": break # Check for adapter match for appropriate interface new_key = winreg.OpenKey(reg_hdl, path) try: adapterDesc = winreg.QueryValueEx(new_key, "DriverDesc") if adapterDesc[0] == description: adapter_path = path break else: winreg.CloseKey(new_key) except (WindowsError) as err: if err.errno == 2: # register value not found, ok to ignore pass else: raise err if adapter_path is None: winreg.CloseKey(key) winreg.CloseKey(reg_hdl) return # Registry path found update mac addr adapter_key = winreg.OpenKey(reg_hdl, adapter_path, 0, winreg.KEY_WRITE) winreg.SetValueEx(adapter_key, "NetworkAddress", 0, winreg.REG_SZ, normalise(mac)) winreg.CloseKey(adapter_key) winreg.CloseKey(key) winreg.CloseKey(reg_hdl) # Adapter must be restarted in order for change to take affect self.restart_adapter(adapter_name)
def __init__(self, _name, _data, _second_order, _scores, _ndata=None, _nso=None): """Calculate codon bias in CDS annotations. sr: genome seqrecord""" self._name = _name self._data = _data self._second_order = _second_order self._scores = _scores self._bias = self._data.sum(0) if _ndata is None: self._normed = util.normalise(self._bias) else: self._normed = _ndata if _nso is None: self._so_normed = util.so_normalise(self._second_order) else: self._so_normed = _nso
def deconvolve(self): target = gauss(self.vbi, self.gauss_sd) self.target = normalise(target) self.make_guess_mask() self.make_possible_bytes(Vbi.possible_bytes) self._oldbytes = np.zeros(42, dtype=np.uint8) self._deconvolve() packet = "".join([chr(x) for x in self.g.bytes]) F = finders.test(self.finders, packet) if F: sys.stderr.write("matched by finder "+F.name+"\n"); sys.stderr.flush() self.make_possible_bytes(F.possible_bytes) self._deconvolve() F.find(self.g.bytes) packet = F.fixup() return packet # if the packet did not match any of the finders then it isn't # a packet 0 (or 30). if the packet still claims to be a packet 0 it # will mess up the page splitter. so redo the deconvolution but with # packet 0 (and 30) header removed from possible bytes. # note: this doesn't work. i am not sure why. a packet in 63322 # does not match the finders but still passes through this next check # with r=0. which should be impossible. ((m,r),e) = mrag(self.g.bytes[:2]) if r == 0: sys.stderr.write("packet falsely claimed to be packet %d\n" % r); sys.stderr.flush() if not self.allow_unmatched: self._nzdeconvolve() packet = "".join([chr(x) for x in self.g.bytes]) # if it's a link packet, it is completely hammed elif r == 27: self.make_possible_bytes([hammbytes]*42) self._deconvolve() packet = "".join([chr(x) for x in self.g.bytes]) return packet
def make_graph(sentences): graph = [] for i, current_sentence in enumerate(sentences): out_row = [] current_value_list = map(lambda x:x[1], current_sentence) current_useful_term_limit = get_kth_min(current_value_list, p=0.2) current_sentence_length = len(current_sentence) current_sentence = [term for term in current_sentence if term[1] >= current_useful_term_limit] for j, sentence in enumerate(sentences): if i == j: out_row.append(0) else: same_term_count = 0 for current_term in current_sentence: for term in sentence: if current_term[0] == term[0]: same_term_count += 1 break out_row.append( float(same_term_count) / ( current_sentence_length + len(sentence) ) ) out_row = normalise(out_row, method='normal') graph.append(out_row) graph = np.array(graph) count = 0 while count < len(graph): if sum(graph[count]) > 1 - 0.01: count += 1 else: """ choose_list = [True] * len(graph) choose_list[count] = False choose_list = np.array(choose_list) graph = graph[choose_list] graph = graph[:, choose_list] sentences = sentences[:count] + sentences[count+1:] """ graph[count] = 1.0/len(graph) return sentences, graph.T
def random_pts(im, edges=True): ''' Returns an array of random points coordinates withing the image's bounds. Arguments: im: the image from which bound the points should conform. edges: when False, edges are not added to the points set. ''' # get the shape of the image height, width = im.shape[:2] # if the width is larger use it as the upper bound, else use the min # coordinates are represented as (x,y); thus 2. points = np.random.rand(max(width, height), 2) # normalise before adding the borders as the borders use a larger range # than np.random.rand which foes from [0..1) points = normalise(points, im) if edges: # now add the edges to the random points list points = add_edges(points, width, height) return points
def bits(self): """Chops and averages the raw samples to produce an array where one byte = one bit of the original signal.""" self.bits_array = normalise(numpy.add.reduceat(self.line, Line.config.bits, dtype=numpy.float32)[:-1]/Line.config.bit_lengths)
fs = 44100.0 N = 1024 f0 = 440.0 H = 10 harmonics = np.arange(1, H+1) phi = np.random.random(H) * np.pi # Test signal x = np.zeros(N) for n in range(N): for h in harmonics: x[n] += np.cos(2.0 * np.pi * n / fs * h * f0) / h x = util.normalise(x) # Compute magnitude spectrum X = np.fft.rfft(x) M = abs(X) # Fourier transform of log(magnitude spectrum) C = np.fft.rfft(np.log2(M)) Mc = abs(C) Mc = util.normalise(Mc) # Find highest bin (ignore first 10 bins) DC = 10 maxbin = np.argmax(Mc[DC:]) + DC f0_estimate = (fs / 2.0) / maxbin
def YIN(x): N = x.size y = np.zeros(N) for n in range(1, N): diff = 0.0 for m in range(N - n): diff += pow(x[m] - x[m + n], 2) y[n] = diff return y if __name__ == "__main__": fn = "/Users/olafwisselink/Code/sms-tools/sounds/flute-A4.wav" fs, x = util.readwav(fn) N = x.size # Magnitude spectrum of x X = np.fft.rfft(x, norm="ortho") M = abs(X) M = util.normalise(M) # Matplotlib plt.loglog(M, basex=10, basey=10) plt.xlim(0, X.size) plt.ylim(0, 1) plt.show()
def read_zmatrix(self,fname): f = open(fname,'r') self.file = f; name = f.readline().strip() self.mol = Molecule() self.mol.add_atom(Atom(name.lower(),1,0,0,0,connect=0)); str = f.readline(); sp = str.split(); if (sp == []): return self.mol sp[0] = sp[0].lower() if (sp==[]): return self.mol try: bl = float(sp[2]) except: bl = self.lookup_var(sp[2]) self.mol.add_atom(Atom(sp[0],2,0,0,bl,int(sp[1]))) self.mol.add_bond(2,int(sp[1])) str = f.readline(); sp = str.split(); if (sp == []): return self.mol sp[0] = sp[0].lower() try: bl = float(sp[2]); except: bl = self.lookup_var(sp[2]) try: ba = float(sp[4]); except: ba = self.lookup_var(sp[4]) a = Atom(sp[0],3,math.sin(ba*math.pi/180.0)*bl,0,self.mol.atoms[int(sp[1]) -1].z - math.cos(ba*math.pi/180.0)*bl,int(sp[1]))# BIG CHANGE self.mol.add_atom(a) self.mol.add_bond(3,int(sp[1])) atom_number = 4; while (True): #print atom_number str = f.readline(); sp = str.split(); if (sp==[]): print 'end of matrix' break sp[0] = sp[0].lower() try: bl = float(sp[2]); except: bl = self.lookup_var(sp[2]) try: ba = float(sp[4]); except: ba = self.lookup_var(sp[4]) try: dh = float(sp[6]); except: dh = self.lookup_var(sp[6]) connect = int(sp[1]) angle_connect = int(sp[3]) dihed_connect = int(sp[5]) atoms = self.mol.atoms; #print 'connect: %i angle_connect: %i' %(connect,angle_connect) vector1 = vec_minus(atoms[connect-1].xyz,atoms[angle_connect-1].xyz ); vector2 = vec_minus(atoms[connect-1].xyz,atoms[dihed_connect-1].xyz ); norm1 = vec_cross(vector1,vector2) norm2 = vec_cross(vector1,norm1) norm1 = normalise(norm1) norm2 = normalise(norm2) norm1 =vec_times(norm1,-1*math.sin(dh*math.pi/180)) norm2 = vec_times(norm2,math.cos(dh*math.pi/180)) vector3 =vec_add(norm1,norm2) vector3 =normalise(vector3) vector3 = vec_times(vector3,bl*math.sin(ba*math.pi/180.0)) vector1 = normalise(vector1) vector1 = vec_times(vector1,bl*math.cos(ba*math.pi/180.0)) vector2 = vec_add(atoms[connect - 1].xyz,vector3) vector2 = vec_minus(vector2,vector1) a = Atom(sp[0],atom_number,vector2[0],vector2[1],vector2[2],int(sp[1])) self.mol.add_atom(a) self.mol.add_bond(atom_number,int(sp[1])) atom_number+=1; return self.mol
def get_bias(self, indexes): return util.normalise(self._data.loc[indexes].sum(0))
def write_url(self): #urn = self.urn.strip().lower() urn = normalise(self.urn.strip()) if urn == None: print('Invalid URN: %s' % self.urn) return #self.cursor.execute("SELECT (urn, url, source_id, type) FROM urn2url WHERE urn = '%s'" % urn) self.cursor.execute("SELECT * FROM urn2url WHERE urn = '%s'" % urn) existing_rows = [t for t in self.cursor.fetchall()] existing_urns = [row[0] for row in existing_rows] existing_source_ids = [row[2] for row in existing_rows] if urn in existing_urns: if self.source_id in existing_source_ids: for row in existing_rows: if row[2] == self.source_id: if row[1] != self.url: # The URL for this URN in this source has changed! print('< urn change') self.cursor.execute( "UPDATE urn2url SET url='%s' WHERE urn='%s' AND source_id = '%s'" % (self.url, urn, self.source_id)) self._update_history(urn=urn, r_component=None, url_old=row[1], url_new=self.url, url_type_old=None, url_type_new=self.urn_type) print('update') else: print('< (another) urn from new source') # We have already harvested URN from some other source. self.cursor.execute( "INSERT INTO urn2url (urn, url, source_id, url_type) VALUES ('%s', '%s', '%s', '%s')" % (urn, self.url, self.source_id, self.urn_type)) self._update_history(urn=urn, r_component=None, url_old=None, url_new=self.url, url_type_old=None, url_type_new=self.urn_type) print('insert another') else: # New URN! print('< new urn') self.cursor.execute( "INSERT INTO urn2url (urn, url, source_id, url_type) VALUES ('%s', '%s', '%s', '%s')" % (urn, self.url, self.source_id, self.urn_type)) print('---') print('urn: %s' % urn) #print('r_component: %s' % r_component) print('url: %s' % self.url) print('urn_type: %s' % self.urn_type) print('<<<') self._update_history(urn=urn, r_component=None, url_old=None, url_new=self.url, url_type_old=None, url_type_new=self.urn_type) print('>>>') print(self.cursor, self.source_id, self.title, self.urn, self.url)