def update_boids(boids, time): for index in range(len(boids)): b = boids[index] b[2] += cohesion(index, boids)[0]*10 b[3] += cohesion(index, boids)[1]*10 b[2] += separate(index, boids)[0]*5 b[3] += separate(index, boids)[1]*5 b[2] += align(index, boids)[0] b[3] += align(index, boids)[1] # Limit velocity to 500 pixels per second horizontally and vertically b[2] = speed_limit(b[2], 500) b[3] = speed_limit(b[3], 500) # Update the boid's position based on its velocity and the # time that has passed since the last update. b[0] += float(b[2])/1000 * time b[1] += float(b[3])/1000 * time # Make the boid bounce off the walls. if b[0] < 0: b[0] = 0 b[2] = -b[2] elif b[0] > WIDTH: b[0] = WIDTH b[2] = -b[2] if b[1] < 0: b[1] = 0 b[3] = -b[3] elif b[1] > HEIGHT: b[1] = HEIGHT b[3] = -b[3]
def update_boids(boids, time, SEPARATION_MULTIPLIER, COHESION_MULTIPLIER, align_on, sl): for index in range(len(boids)): b = boids[index] b[2] += cohesion(index, boids)[0] * COHESION_MULTIPLIER b[3] += cohesion(index, boids)[1] * COHESION_MULTIPLIER b[2] += separate(index, boids, random_color, b[4])[0] * SEPARATION_MULTIPLIER b[3] += separate(index, boids, random_color, b[4])[1] * SEPARATION_MULTIPLIER if align_on == "y" or align_on == "Y": b[2] += align(index, boids)[0] b[3] += align(index, boids)[1] b[4] = separate(index, boids, random_color, b[4])[2] # Limit velocity to 500 pixels per second horizontally and vertically b[2] = speed_limit(b[2], sl) b[3] = speed_limit(b[3], sl) # Update the boid's position based on its velocity and the # time that has passed since the last update. b[0] += float(b[2]) / 1000 * time b[1] += float(b[3]) / 1000 * time # Make the boid bounce off the walls. if b[0] < 0: b[0] = 0 b[2] = -b[2] elif b[0] > WIDTH: b[0] = WIDTH b[2] = -b[2] if b[1] < 0: b[1] = 0 b[3] = -b[3] elif b[1] > HEIGHT: b[1] = HEIGHT b[3] = -b[3]
def main(): # parse command command_log = 'CIRCexplorer parameters: ' + ' '.join(sys.argv) if len(sys.argv) == 1: sys.exit(help_doc) elif sys.argv[1] == '--version' or sys.argv[1] == '-v': sys.exit(__version__) elif sys.argv[1] == 'align': import align align.align(docopt(align.__doc__, version=__version__), command=command_log, name='align') elif sys.argv[1] == 'parse': import parse parse.parse(docopt(parse.__doc__, version=__version__), command=command_log, name='parse') elif sys.argv[1] == 'annotate': import annotate annotate.annotate(docopt(annotate.__doc__, version=__version__), command=command_log, name='annotate') elif sys.argv[1] == 'assemble': import assemble assemble.assemble(docopt(assemble.__doc__, version=__version__), command=command_log, name='assemble') elif sys.argv[1] == 'denovo': import denovo denovo.denovo(docopt(denovo.__doc__, version=__version__), command=command_log, name='denovo') else: sys.exit(help_doc)
def align_command(options, command_log): from align import align options['--bw'] = True options['--scale'] = True options['--skip-tophat'] = False options['--skip-tophat-fusion'] = False align(options, command=command_log, name='align')
def zstruct2(mol1, mol2, r1, r2, b1, doOne=True, doTwo=False): obconversion = ob.OBConversion() obconversion.SetOutFormat("xyz") i = 0 # generate all one-permutations of r1 and r2 if (doOne == True): z = [] for atom in r2: for atom2 in r1: z.append([(atom, atom2)]) #print(z) for isomer in z: output_path = os.getcwd() + "/initial%04d.xyz" % i omol = align.align(mol1, mol2, 1, list(isomer[0])) fname = str("ISOMERS%04d" % i) s1 = " ".join([str(j) for j in isomer[0]]) i += 1 with open(fname, 'w') as f: f.write("ADD " + s1 + "\n") with open(output_path, 'w') as f: f.write(obconversion.WriteString(omol)) # ====== generate all one-permutations of add and break ===== # if not (b1 is None): zb = [[x[0], b1] for x in z] #print(zb) for isomer in zb: output_path = os.getcwd() + "/initial%04d.xyz" % i fname = str("ISOMERS%04d" % i) s1 = " ".join([str(j) for j in isomer[0]]) s2 = " ".join([str(j) for j in isomer[1]]) i += 1 omol = align.align(mol1, mol2, 1, list(isomer[0])) with open(fname, 'w') as f: f.write("ADD " + s1 + "\n") f.write("BREAK " + s2 + "\n") with open(output_path, 'w') as f: f.write(obconversion.WriteString(omol)) # ========generate all two-permutations of r1 and r2 ====== # if (doTwo == True): z2 = [zip(x, r1) for x in itertools.permutations(r2, len(r1))] for isomer in z2: output_path = os.getcwd() + "/initial%04d.xyz" % i fname = str("ISOMERS%04d" % i) s1 = " ".join([str(j) for j in isomer[0]]) s2 = " ".join([str(j) for j in isomer[1]]) isom_list = list(isomer[0]) + list(isomer[1]) i += 1 omol = align.align(mol1, mol2, 2, isom_list) with open(fname, 'w') as f: f.write("ADD " + s1 + "\n") f.write("ADD " + s2 + "\n") with open(output_path, 'w') as f: f.write(obconversion.WriteString(omol))
def smith_waterman(seq0, seq1): """ gap_score: # of gaps / length of aligned sequence (lower number => better alignment) """ score, gap_score, a0, a1 = align.align(seq0, seq1, local=False) return gap_score
def realign_filter(rec, inslib): S = -np.ones((256, 256)) + 2 * np.identity(256) S = S.astype(np.int16) seqn = rec['Superfamily'] + ':' + rec['Subfamily'] if seqn not in inslib: return False seq_headers = ['Genomic_Consensus_5p', 'Genomic_Consensus_3p', 'Insert_Consensus_5p', 'Insert_Consensus_3p'] for seqtype in seq_headers: s1 = align.string_to_alignment(rec[seqtype]) s2 = align.string_to_alignment(inslib[seqn]) (s, a1, a2) = align.align(s1, s2, -2, -2, S, local=True) a1 = align.alignment_to_string(a1) a2 = ''.join([b for b in list(align.alignment_to_string(a2)) if b != '-']) score = 0.0 if len(a1) > 0: score = float(len(a1) - (len(a1)-s)) / float(len(a1)) #print seqtype, score, len(a1) if score > 0.9 and len(a1) > 25: return False return True
def compare_fingerprint_to_database(filename): file1 = fingerprint.location_fingerprint(filename) fingerprints = session.query(model.Fingerprint) database_iteration = [] max_offset = 0 for row in fingerprints: file2 = pickle.loads(row.fingerprint) ranked_matches = align.align(file1, file2) current_song = {} song_match = {} #assorted song information current_song["title"] = row.title current_song["artist"] = row.artist current_song["album"] = row.album current_song["offset"] = ranked_matches[0][1] database_iteration.append(current_song) for current_song in database_iteration: if current_song["offset"] > max_offset: max_offset = current_song["offset"] #information for most likely match for current_song in database_iteration: if current_song["offset"] == max_offset: song_match["title"] = current_song["title"] song_match["artist"] = current_song["artist"] song_match["album"] = current_song["album"] return song_match
def realign_filter(rec, inslib): S = -np.ones((256, 256)) + 2 * np.identity(256) S = S.astype(np.int16) seqn = rec['Superfamily'] + ':' + rec['Subfamily'] if seqn not in inslib: return False seq_headers = [ 'Genomic_Consensus_5p', 'Genomic_Consensus_3p', 'Insert_Consensus_5p', 'Insert_Consensus_3p' ] for seqtype in seq_headers: s1 = align.string_to_alignment(rec[seqtype]) s2 = align.string_to_alignment(inslib[seqn]) (s, a1, a2) = align.align(s1, s2, -2, -2, S, local=True) a1 = align.alignment_to_string(a1) a2 = ''.join( [b for b in list(align.alignment_to_string(a2)) if b != '-']) score = 0.0 if len(a1) > 0: score = float(len(a1) - (len(a1) - s)) / float(len(a1)) #print seqtype, score, len(a1) if score > 0.9 and len(a1) > 25: return False return True
def test_align1(): print 'peak: ' + str(peak1) print 'mean: ' + str(mean1) (i, score) = align.align(peak1, meanWords1, -1) print 'index is ' + str(i) + ', should be 0?' print 'score is ' + str(score) return
def test_padded_align_odd() -> None: """ Test the padded cross-correlation method with two gaussian pulses of odd length. """ # make some time arrays and define a time delta t = np.linspace(-2 * np.pi, 2 * np.pi, 100) dt = 3.675 * (t[1] - t[0]) t = np.linspace(-1, 1, 199, endpoint=False) # construct two identical gaussian pulses x = np.real(signal.gausspulse(t, fc=5)) y = np.real(signal.gausspulse(t + dt, fc=5)) # compute the delay without any upsampling d = align.xcorr_delay(x, y, 30) # and apply the delay y = align.apply_delay(y, d) # and test the top-level call ynew = align.align(x, y, method="xcorr", factor=30) # and check that the alignment is within a certain known accuracy assert np.std(np.abs(x - y)) < 0.005 assert np.std(np.abs(y - ynew)) < 5e-5
def align_test(s, m): S = np.array([[1 if i == j else -1 for i in range(256)] for j in range(256)], dtype=np.short) score, p, _ = align.align(list(align.string_to_alignment(s)), list(align.string_to_alignment(m)), -1, -1, S, True, True) p = align.alignment_to_string(p).replace('-', '') return score, s.find(p)
def testUnalignedText(self): left_text = \ """シャーロックホームズにとって、彼女はいつも「あの女」である。ホームズが彼女を他の名前で呼ぶのはほとんど聞いたことがない。彼の目には、 彼女がそびえ立って女という性全体を覆い隠している。しかし、彼はアイリーン・アドラーに愛のような激情は一切感じていなかった。すべての激情は、そして特に愛というものは、 相容れなかった、彼の冷静で厳格だが見事に調整された心とは。 """ right_text = \ """TO SHERLOCK HOLMES she is always the woman. I have seldom heard him mention her under any other name. In his eyes she eclipses and predominates the whole of her sex. It was not that he felt any emotion akin to love for Irene Adler. All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind. """ split_text = align(left_text, right_text) split_text = list(split_text) self.assertEqual(split_text, [ ('シャーロックホームズにとって、彼女はいつも「あの女」である。', 'TO SHERLOCK HOLMES she is always the woman.'), ('ホームズが彼女を他の名前で呼ぶのはほとんど聞いたことがない。', 'I have seldom heard him mention her under any other name.'), ('彼の目には、 彼女がそびえ立って女という性全体を覆い隠している。', 'In his eyes she eclipses and predominates the whole of her sex.' ), ('しかし、彼はアイリーン・アドラーに愛のような激情は一切感じていなかった。', 'It was not that he felt any emotion akin to love for Irene Adler.' ), ('すべての激情は、そして特に愛というものは、 相容れなかった、彼の冷静で厳格だが見事に調整された心とは。', 'All emotions, and that one particularly, were abhorrent to his cold, ' 'precise but admirably balanced mind.'), ('', '') ])
def test_gauss_align() -> None: """ Test Gaussian interpolation of unpadded cross correlation. """ # make some time arrays and define a time delta t = np.linspace(-2 * np.pi, 2 * np.pi, 100) dt = 3.675 * (t[1] - t[0]) t = np.linspace(-1, 1, 200, endpoint=False) # construct two identical gaussian pulses x = np.real(signal.gausspulse(t, fc=5)) y = np.real(signal.gausspulse(t + dt, fc=5)) # compute the delay with upsampling d_padded = align.xcorr_delay(x, y, 30) # compute the delay without any upsampling d = align.xcorr_delay(x, y, fit="gauss") # compute the new signal y = align.apply_delay(y, d) # and test the top-level call ynew = align.align(x, y, method="xcorr", factor=30) # and check that the alignment is within a certain known accuracy assert np.abs(d - d_padded) < 0.05 assert np.mean(np.abs(x - y)) < 7e-4 assert np.mean(np.abs(x - ynew)) < 7e-4 assert np.mean(np.abs(y - ynew)) < 5e-5
def pnp_3d3d_with_num(model_points, image_points, image_pts_3d, K, n, dists=np.zeros((4, 1))): _, rotation_vector, translation_vector, mask = cv2.solvePnPRansac( model_points, image_points, K, dists) pt3d = model_points[mask.ravel()] pt2d = image_points[mask.ravel()] pt3d_img = image_pts_3d[mask.ravel()] if pt3d.shape[0] < n: print('3d-2d few inliers number {} < N = {}'.format(pt3d.shape[0], n)) return None, None s = np.int32(pt3d.shape[0] / n) pt3d = pt3d[::s] pt2d = pt2d[::s] pt3d_img = pt3d_img[::s] R, t, _ = al.align(np.asmatrix(pt3d.transpose()), np.asmatrix(pt3d_img.transpose())) return np.array(R), np.array(t)
def test_fft_align() -> None: """ Test FFT phase shift alignment """ for N in [178, 179, 200, 201]: # make some time arrays and define a time delta t = np.linspace(-2 * np.pi, 2 * np.pi, N // 2) dt = 3.675 * (t[1] - t[0]) t = np.linspace(-1, 1, N, endpoint=False) true = dt / (t[1] - t[0]) # construct two identical gaussian pulses x = np.real(signal.gausspulse(t, fc=5)) y = np.real(signal.gausspulse(t + dt, fc=5)) # compute the delay with upsampling delay = align.fft_delay(x, y) # and apply the delay y = align.apply_delay(y, delay) # use the top-level call ynew = align.align(x, y, method="fft") # and check that the alignment is within a certain known accuracy assert np.abs(true - delay) < 1e-3 assert np.mean(np.abs(x - y)) < 1e-3 assert np.mean(np.abs(x - ynew)) < 2e-4 assert np.mean(np.abs(y - ynew)) < 3e-5
def download(person, url, bb): imgName = os.path.basename(url) rawPersonPath = os.path.join(args.raw, person) rawImgPath = os.path.join(rawPersonPath, imgName) alignedPersonPath = os.path.join(args.aligned, person) alignedImgPath = os.path.join(alignedPersonPath, hashlib.md5(imgName).hexdigest() + ".png") mkdirP(rawPersonPath) mkdirP(alignedPersonPath) if not os.path.isfile(rawImgPath): print url urlF = urllib2.urlopen(url, timeout=5) with open(rawImgPath, 'wb') as f: f.write(urlF.read()) if not os.path.isfile(alignedImgPath): bgr = cv2.imread(rawImgPath) if bgr is None: return rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) dlibBB = dlib.rectangle(*bb) outRgb = align.align(64, rgb, bb=dlibBB, landmarkIndices=landmarkIndices) if outRgb is not None: outBgr = cv2.cvtColor(outRgb, cv2.COLOR_RGB2BGR) cv2.imwrite(alignedImgPath, outBgr)
def scsp(s, t): # extend # # Add missing characters to start # # Parameters: extend_me List from alignment # original Original before alignmnet # pad_me Companion - will be padded with leading spaces # # Returns: two strings, the first extended, the second padded def extend(extend_me, original, pad_me): extension = [] i = 0 while extend_me[0] != original[i] and extend_me[0] != '-': w.append(original[i]) i += 1 return extension + extend_me, i * ['-'] + pad_me _, b, c = align(s, t, replace_score=createSimpleDNASubst(subst=len(s) + len(t)), indel_cost=0) b1, c1 = extend(b, s, c) c2, b2 = extend(c1, t, b1) super_sequence = [aa if bb == '-' else bb for aa, bb in zip(b2, c2)] return ''.join(super_sequence)
def compareDomStrs(cluster1,cluster2,match,mismatch,gap,scale): #first index each cluster clus1Strs = dict(enumerate(cluster1)) clus2Strs = dict(enumerate(cluster2)) clus1Size = len(cluster1) clus2Size = len(cluster2) scoreMatrix,word2num,num2word = aligntools.buildScoringDictScaled(chain(*(cluster1+cluster2)),match,mismatch,scale) alignScores = np.ndarray((clus1Size,clus2Size)) # score each pairwise alignment of domain strings to populate a alignment scores matrix for i,domStr1 in enumerate(cluster1): for j,domStr2 in enumerate(cluster2): num1 = [word2num[x] for x in domStr1] num2 = [word2num[x] for x in domStr2] alignScore,a1,a2 = align.align(num1,num2,gap,gap,scoreMatrix,local=True) alignScores[i,j] = alignScore #prepare scoring matrix for hungarian algorithm: negate scores and pad matrix if clus1Size < clus2Size: costMatrix = -np.vstack((copy(alignScores),np.zeros((clus2Size-clus1Size,clus2Size)))) elif clus2Size < clus1Size: costMatrix = -np.hstack((copy(alignScores),np.zeros((clus1Size,clus1Size-clus2Size)))) else: costMatrix = -copy(alignScores) # apply hungarian algorithm for matching pairings = [(x,y) for x,y in zip(*linear_sum_assignment(costMatrix)) if (x<clus1Size) and (y<clus2Size)] clusterScore = sum(alignScores[pairing] for pairing in pairings) pairStrings = [(alignScores[(x,y)],clus1Strs[x],clus2Strs[y]) for x,y in pairings] pairStrings.sort(reverse=True) return clusterScore,pairStrings
def consensus(self, minscore = 0.9): ''' build consensus from sorted aligned reads iteratively ''' S = -np.ones((256, 256)) + 2 * np.identity(256) S = S.astype(np.int16) minqual = self.reads[0].minqual sortable_reads = [SortableRead(sr.read) for sr in self.reads] seqs = [qualtrim(sorted_read.read, minqual=minqual) for sorted_read in sorted(sortable_reads)] seqs = [s for s in seqs if len(s) > 20] if len(seqs) == 0: return '', 0.0 if len(seqs) == 1: # no consensus necessary return seqs[0], 1.0 uniq_seqs = [seqs[0]] for i, seq in enumerate(seqs[1:], start=1): if seq != seqs[i-1]: uniq_seqs.append(seq) if len(uniq_seqs) == 1: # all seqs were the same! return uniq_seqs[0], 1.0 cons = uniq_seqs[0] scores = [] if len(uniq_seqs) > 1000: uniq_seqs = [uniq_seqs[u] for u in sorted(np.random.choice(range(len(uniq_seqs)), size=1000))] for seq in uniq_seqs[1:]: s1 = align.string_to_alignment(cons) s2 = align.string_to_alignment(seq) (s, a1, a2) = align.align(s1, s2, -2, -2, S, local=True) a1 = align.alignment_to_string(a1) a2 = ''.join([b for b in list(align.alignment_to_string(a2)) if b != '-']) score = 0.0 if len(a1) > 0: score = float(len(a1) - (len(a1)-s)) / float(len(a1)) if re.search(a1, cons): cons_start, cons_end = locate_subseq(cons, a1) if score >= minscore and cons_end > len(cons)-5: scores.append(score) align_end = locate_subseq(seq, a2)[1] cons += seq[align_end:] #print self.start, self.end, cons if scores: return cons, np.mean(scores) else: return cons, 0.0
def main(): #Upload images uploaded_file = st.file_uploader("Choose a picture", type=['jpg', 'png']) if uploaded_file is not None: st.image(uploaded_file, width=200) second_uploaded_file = st.file_uploader("Choose another picture", type=['jpg', 'png']) if second_uploaded_file is not None: st.image(second_uploaded_file, width=200) img1 = np.array(uploaded_file) img2 = np.array(second_uploaded_file) image1 = PIL.Image.open(uploaded_file) image2 = PIL.Image.open(second_uploaded_file) images = [image1, image2] if st.button('Align images'): align(images)
def test_basic(self): """Verify that the aligner is putting out the correct score. Example on smith-waterman wikipedia page. """ s0 = 'ACACACTA' s1 = 'AGCACACA' score, normalized_score, a0, a1 = align.align(s0, s1, local=True) self.assertEqual(score, 12)
def test_long_align(self): d = _fasta_dict('example.fa') r0 = 'm150213_074729_42177R_c100777662550000001823160908051505_s1_p0/70715/9957_22166' r1 = 'm150126_093705_42156_c100779662550000001823165208251525_s1_p0/144605/28461_40297' s0 = d[r0] s1 = d[r1] score, normalized_score, a0, a1 = align.align(s0, s1, local=False) print(score) print(normalized_score)
def align_shortlist(seq, shortlist): result = [] nseq = list(align.string_to_alignment(seq)) for sh in shortlist: score, nr, shr = align.align(nseq, list(align.string_to_alignment(sh)), -1, -1, S, True, True) result.append((align.alignment_to_string(nr), align.alignment_to_string(shr))) return result
def _build_aligned_data(data, limit): limited_data = [d[:limit] for d in data] aligned_data = [] longest = limited_data[np.argmax(map(len, limited_data))][::-1] for d in limited_data: (_, _, a) = align.align(longest, d[::-1], 0, -2, scoring.S, mutual=False) aligned_data.append(a[::-1]) return aligned_data
def intake(geometry): file = open(geometry, "r") text = file.read() file.close() words = text.split() coords = [] for i in range(6): coords.append([]) for j in range(3): coords[i].append(float(words[i*4+j+1])) return align.align(coords)
def test_fix1(): data = align.load_from_file(open('fixtures/1.points')) expected = fixture_from_file(open("fixtures/1.solution")) res = align.align(data) if not np.array_equal(res, expected): print(res) print("---") print(expected) assert False
def step2(dist_mapping, profile): f_main_log = profile["main_log_fd"] f_main_log.write("\n[*] Futher Investigate on the following TID:\n") for k2, v2 in dist_mapping.iteritems(): if v2["match"] == 0: f_main_log.write("[*] Compromised TID (%s)\n\t%s\n" % (k2, profile["groups"][1][k2])) f_main_log.write(" with TID=%s and Dist=%s\n" % (v2["all"][0][0], v2["all"][0][1])) candidates = [[k2, v2["all"][0]] for k2, v2 in dist_mapping.iteritems() if v2["match"] == 0] sort_by_min_dist = sorted(candidates, key=lambda (x): x[1][1], reverse=True) f_main_log.write("\n\n") f_main_log.write("[*] TOP 3 TID Candidates are:\n\t") f_main_log.write("\n\t".join([ "Mal_TID(%s) Clostest TID(%s) w/ Dist=%s" % (p[0], p[1][0], p[1][1]) for p in sort_by_min_dist[:3] ])) f_main_log.write("\n\n") return None for k2, v2 in dist_mapping.iteritems(): if len(v2["exact"]) == 0: sort_dist = sorted(v2["all"].iteritems(), key=lambda (k, v): v) rtn = " ".join(["%s:%s" % (kk, vv) for kk, vv in sort_dist]) f_main_log.write("[*] Compromised TID (%s)\n\tDistance [%s]\n" % (k2, rtn)) if sort_dist[0][1] > 10000: continue try: f_main_log.write("(%s - %s)\n" % (k2, sort_dist[0][0])) f_main_log.write(" ".join(profile["groups"][1][k2]) + "\n") f_main_log.write( " ".join(profile["groups"][0][sort_dist[0][0]]) + "\n") enc1, enc2, v = align.align( profile["groups"][1][k2], profile["groups"][0][sort_dist[0][0]]) score, encodeds = align.score(enc1, enc2, v) for encoded in encodeds[0:1]: alignment = v.decodeSequenceAlignment(encoded) print "(%s - %s)\n" % (k2, sort_dist[0][0]) print alignment f_main_log.write( "\tSimilarity:%s (%s - %s)\n" % (alignment.percentIdentity(), k2, sort_dist[0][0])) f_main_log.write(str(alignment)) f_main_log.write("\n\n") except: break
def run_single_test(data_dir, output_dir): from align import align from skimage.io import imread, imsave parts = open(join(data_dir, 'g_coord.csv')).read().rstrip('\n').split(',') g_coord = (int(parts[0]), int(parts[1])) img = imread(join(data_dir, 'img.png'), plugin='matplotlib') aligned_img, (b_row, b_col), (r_row, r_col) = align(img, g_coord) with open(join(output_dir, 'output.csv'), 'w') as fhandle: print('%d,%d,%d,%d' % (b_row, b_col, r_row, r_col), file=fhandle) imsave(join(output_dir, 'aligned_img.png'), aligned_img)
def local_check(*zipped): for x, y in list(*zipped): nx = list(align.string_to_alignment(x.replace("-", ""))) ny = list(align.string_to_alignment(y)) score, xr, yr = align.align(nx, ny, -1, -1, S, True, True) xr = align.alignment_to_string(xr) yr = align.alignment_to_string(yr) # diff = abs(len(y.replace("-", "")) - len(xr.replace("-", ""))) # if diff > 1: # return False if hamming_dist(y, xr) > 1: # print(y, xr, yr, hamming_dist(y, xr)) return False return True
def main(): parser = argparse.ArgumentParser(description='The seqAnn application') parser.add_argument('-i', action="store", dest="i", help="The input file is fasta with a single sequence") parser.add_argument('-g', action="store", dest="g", help="The gene type to process. It includes most of HLA and KIR types.For example: HLA-A, KIR") options = parser.parse_args() config.geneType = GeneType(options.g) print("Annotating\n") blast.blast(options.i, config.get_blast_file()) print("The input sequence best matched with "+blast.get_match_gene()+"\n") alignHelp = align(options.i) alignHelp.searchGene(blast.get_match_gene()) alignHelp.writeInputFile() alignHelp.runAlign() splitGeneHelp = splitGene(alignHelp.gene) splitGeneHelp.runSplit()
def align(self, other, _pad=lambda d: di.Dimension._NullDimension(d[0]), _key=lambda t: t[0]): di00 = other.labels di10 = self.labels di01, di11 = al.align(di00, di10, key=_key, pad=_pad) def maybe_update(brick, olddims, newdims): return brick if olddims == newdims \ else HyperBrick(brick._data, newdims) return tuple(maybe_update(b, d0, d1) for b, d0, d1 in zip((self, other), (di10, di00), (di11, di01)))
def align(self, seq1, seq2, local = False): s1 = align.string_to_alignment(seq1) s2 = align.string_to_alignment(seq2) score, a1, a2 = align.align(s1, s2, self.gap_open, self.gap_extend, self.subs, local) res1, res2 = align.alignment_to_string(a1), align.alignment_to_string(a2) if local: strip1, strip2 = res1.replace("-", ""), res2.replace("-", "") start1, start2 = seq1.index(strip1), seq2.index(strip2) start_flank = max(start1, start2) end_flank = max(len(seq1) - len(strip1) - start1, len(seq2) - len(strip2) - start2) res1 = "-" * start_flank + res1 + "-" * end_flank res2 = "-" * start_flank + res2 + "-" * end_flank return res1, res2, score
def align(self, seq1, seq2, local=False): s1 = align.string_to_alignment(seq1) s2 = align.string_to_alignment(seq2) score, a1, a2 = align.align(s1, s2, self.gap_open, self.gap_extend, self.subs, local) res1, res2 = align.alignment_to_string(a1), align.alignment_to_string( a2) if local: strip1, strip2 = res1.replace("-", ""), res2.replace("-", "") start1, start2 = seq1.index(strip1), seq2.index(strip2) start_flank = max(start1, start2) end_flank = max(len(seq1) - len(strip1) - start1, len(seq2) - len(strip2) - start2) res1 = "-" * start_flank + res1 + "-" * end_flank res2 = "-" * start_flank + res2 + "-" * end_flank return res1, res2, score
def main(left, right): score, traceback = align(left, right) print 'Score: {0}'.format(score) sequence_l, sequence_r = construct_alignment(left, right, traceback) sequence_l = ''.join(sequence_l) sequence_r = ''.join(sequence_r) print 'Alignment:' while len(sequence_l) > 0: head_l, head_r = sequence_l[:60], sequence_r[:60] sequence_l, sequence_r = sequence_l[60:], sequence_r[60:] print head_l print head_r print
def item_iter(self): for i, index in enumerate(range(len(self.st_sents))): st_sent = self.st_sents[index] vecs = self.vec_manager.get_vector(st_sent['sentid']) old_toks = [wd['word'] for wd in st_sent['words']] new_toks = vecs['tokens'] alignment = align(old_toks, new_toks) if alignment is not None: for i, word in enumerate(st_sent['words']): if 'sense' in word: sense_inst = SenseInstance(old_toks, i, word['sense'], word['tag']) (projection_start, projection_stop) = alignment[i] embeddings = [] for j in range(projection_start, projection_stop): embeddings.append(tensor(vecs['vecs'][j])) embedding = torch.stack(embeddings).sum(dim=0).tolist() sense_inst.add_embedding('embed', embedding) yield sense_inst
def consensus(seqs, minscore=0.95): ''' build consensus from sorted aligned reads iteratively, expects seqs to be sorted in ref genome order ''' S = -np.ones((256, 256)) + 2 * np.identity(256) S = S.astype(np.int16) if len(seqs) == 1: # no consensus necessary return seqs[0], 1.0 uniq_seqs = [seqs[0]] for i, seq in enumerate(seqs[1:], start=1): if seq != seqs[i-1]: uniq_seqs.append(seq) if len(uniq_seqs) == 1: # all seqs were the same! return uniq_seqs[0], 1.0 cons = uniq_seqs[0] scores = [] if len(uniq_seqs) > 1000: uniq_seqs = np.random.choice(uniq_seqs, size=1000) for seq in uniq_seqs[1:]: s1 = align.string_to_alignment(cons) s2 = align.string_to_alignment(seq) (s, a1, a2) = align.align(s1, s2, -2, -2, S, local=True) a1 = align.alignment_to_string(a1) a2 = ''.join([b for b in list(align.alignment_to_string(a2)) if b != '-']) score = float(len(a1) - (len(a1)-s)) / float(len(a1)) scores.append(score) if re.search(a1, cons): cons_start, cons_end = locate_subseq(cons, a1) if score >= minscore and cons_end > len(cons)-5: align_end = locate_subseq(seq, a2)[1] cons += seq[align_end:] return cons, np.mean(scores)
def run_single_test(data_dir, output_dir): from align import align from numpy import ndarray from skimage.io import imread, imsave parts = open(join(data_dir, 'g_coord.csv')).read().rstrip('\n').split(',') g_coord = (int(parts[0]), int(parts[1])) img = imread(join(data_dir, 'img.png'), plugin='matplotlib') n_rows, n_cols = img.shape[:2] min_n_rows, min_n_cols = n_rows / 4.5, n_cols / 1.5 aligned_img, (b_row, b_col), (r_row, r_col) = align(img, g_coord) assert type(aligned_img) is ndarray, 'aligned image is not ndarray' n_rows, n_cols = aligned_img.shape[:2] assert n_rows > min_n_rows and n_cols > min_n_cols, 'aligned image is too small' with open(join(output_dir, 'output.csv'), 'w') as fhandle: print('%d,%d,%d,%d' % (b_row, b_col, r_row, r_col), file=fhandle) imsave(join(output_dir, 'aligned_img.png'), aligned_img)
def upload_file(): if request.method == 'POST': #check if the post request has the file part # if ('filewav' or 'filemid') not in request.files: # print('No file part') # return redirect(request.url) filewav = request.files['filewav'] filexml = request.files['filexml'] # if user does not select file, browser also # submit an empty part without filename # if filewav.filename == '': # print('No selected file') # return redirect(request.url) wavname = secure_filename(filewav.filename) filewav.save(os.path.join(app.config['UPLOAD_FOLDER'], wavname)) xmlname = secure_filename(filexml.filename) filexml.save(os.path.join(app.config['UPLOAD_FOLDER'], xmlname)) (f, wavext) = os.path.splitext(wavname) (xname, xmlext) = os.path.splitext(xmlname) midi_file = xname+'.mid' if (wavext == '.wav') and (xmlext == '.xml'): os.chdir(app.config['UPLOAD_FOLDER']) res1 = subprocess.call(['C:/Program Files/MuseScore 3/bin/MuseScore3.exe','-o'+midi_file , xmlname]) res2 = subprocess.call(['C:/Program Files/MuseScore 3/bin/MuseScore3.exe','-o'+xname+'.mpos', xmlname]) print(res1,res2) Y_pred = transcript(wavname) print(Y_pred.shape) ld = listdir(app.config['UPLOAD_FOLDER']) print(ld) p , q = align(midi_file , Y_pred.T) p = list(p) q = list(q) return render_template('audio_TFG.html' , filename = wavname , p = p , q = q )
def register(face_detector, model, input_shape, frame, detections): print("Registering") l = len(detections) if l > 1: print("You should only register one user") return None elif l < 1: print("No face is found") return None cv2.imwrite(f"{img_dir}{os.path.sep}registered.jpg", frame) print(detections[0]) img_aln = align.align(detections[0], frame) img_rep = represent.represent(img_aln, input_shape) retrieved_img = img_rep[0][:, :, ::-1] plt.imshow(retrieved_img) plt.show() return "Registered"
def gen_vector(landmark_predictor, origin_img, vector_predictor): h, w, _ = origin_img.shape img = cv2.resize(origin_img, (RECOG_SIZE, RECOG_SIZE)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img - 127.5 img = img * 0.0078125 landmarks = landmark_predictor.predict(np.expand_dims(img, axis=0))[0] landmarks = np.asarray([(int(landmark[0] * w), int(landmark[1] * h)) for landmark in landmarks]) align_face_img = align(origin_img, landmarks, LANDMARK_SIZE) # cv2.imwrite('output.jpg', align_face_img) # exit(0) align_face_img = cv2.cvtColor(align_face_img, cv2.COLOR_BGR2RGB) align_face_img = align_face_img - 127.5 img = align_face_img * 0.0078125 img = img.transpose([2, 0, 1]) img = np.expand_dims(img, axis=0) img = torch.from_numpy(img).float() img = torch.autograd.Variable(img, requires_grad=False).to('cpu') with torch.no_grad(): vector = vector_predictor.forward(img) return np.asarray(vector)
def register(face_detector, model, input_shape, frame, detections): print("Registering") l = len(detections) if l > 1: print("You should only register one user") return None elif l < 1: print("No face is found") return None cv2.imwrite( "C:\\development\\surveilance\\code\\deepface\\img\\image_registered.jpg", frame) print(detections[0]) img_aln = align.align(detections[0], frame) img_rep = represent.represent(img_aln, input_shape) retrieved_img = img_rep[0][:, :, ::-1] plt.imshow(retrieved_img) plt.show() return "Registered"
## python script that aligns to the reference outputting a sam file import sys import os import logging logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) sys.path.insert(0, os.path.abspath('../src')) from optparse import OptionParser import align parser = OptionParser() parser.add_option("-r", "--ref", dest="refFilename",help="fasta input ref file", default="../data/ref.fa") parser.add_option("-i","--id",dest="simID",help="simulation identifier", default=os.path.abspath(os.path.join(os.path.curdir,"../data/ref.fa"))) (opt, args) = parser.parse_args() ## Make run ID mandatory if not opt.simID: logging.error("Please specify a run ID with -i '''id''' ") raise ValueError("-i option is mandatory") ## Index to the reference opt.readFilename = opt.refFilename[:-3] +'.subsampled.'+ opt.simID+'.fq' logging.info("Indexing reference") # align.refIndex(file=opt.refFilename) # ## Align reads to the reference logging.info("Aligning reads to reference") samfileName = opt.refFilename[:-3] +'.subsampled.'+ opt.simID + '.sam' aligned = align.align(reference=opt.refFilename, read_file=opt.readFilename,stdout=samfileName,algorithm='stampy')
def nwalign_wrapper(seq1, seq2, matrix=NUCMATRIX): s1 = align.string_to_alignment(seq1) s2 = align.string_to_alignment(seq2) (score, a1, a2) = align.align(s1, s2, -1, -1, matrix) return float(score) / len(a1)
def test_align(self): for case in self.cases: self.assertEqual(align.align(case[0][0], case[0][1]), case[1])
def test_align_with_japanese(self): for case in self.jcases: self.assertEqual(align.align(case[0][0], case[0][1]), case[1])
#!/usr/bin/python from sys import argv, stdout, exit from os.path import basename from glob import iglob from align import align from skimage.io import imread, imsave if len(argv) != 3: stdout.write('Usage: %s input_dir_path output_dir_path\n' % argv[0]) exit(1) input_dir_path = argv[1] output_dir_path = argv[2] for filename in iglob(input_dir_path + '/*.png'): img = imread(filename) img = align(img) img = img.astype('float64') / 255 imsave(output_dir_path + '/' + basename(filename), img)
def testAlign1(self): self.assertEqual(hw.align("GCGGAA","GCAA",-3,dnamat), [14, 'GCggAA', 'GC--AA'])
def consensus(seqs, minscore=0.95): ''' build consensus from sorted aligned reads iteratively, expects seqs to be sorted in ref genome order ''' S = -np.ones((256, 256)) + 2 * np.identity(256) S = S.astype(np.int16) if len(seqs) == 0: return '', 0.0 if len(seqs) == 1: # no consensus necessary return seqs[0], 1.0 uniq_seqs = [seqs[0]] for i, seq in enumerate(seqs[1:], start=1): if seq != seqs[i-1]: uniq_seqs.append(seq) if len(uniq_seqs) == 1: # all seqs were the same! return uniq_seqs[0], 1.0 start_index = 0 cons = uniq_seqs[start_index] scores = [] align_init = False for i, seq in enumerate(uniq_seqs[1:]): #print 'oldcons:', cons #print 'seq :', seq s1 = align.string_to_alignment(cons) s2 = align.string_to_alignment(seq) (s, a1, a2) = align.align(s1, s2, -2, -2, S, local=True) a1 = align.alignment_to_string(a1) a2 = ''.join([b for b in list(align.alignment_to_string(a2)) if b != '-']) score = 0.0 if len(a1) > 0: score = float(len(a1) - (len(a1)-s)) / float(len(a1)) #print 'score :', score scores.append(score) if re.search(a1, cons): cons_start, cons_end = locate_subseq(cons, a1) if score >= minscore and cons_end > len(cons)-5: align_end = locate_subseq(seq, a2)[1] cons += seq[align_end:] align_init = True #print 'newcons:', cons elif not align_init: # haven't found a scaffold yet start_index += 1 cons = uniq_seqs[start_index] #print '****' return cons, np.mean(scores)
pars['flavor'] = 'passthru' # symbolic link, no fastqc pp.preprocess(pars) # *** 03 combine *** # no need to run if we don't have fastq files to combine pars['flavor'] = 'combine' # pars['flavor'] = 'skip' # c.combine(pars) # *** 04 align *** # star - generate bams pars['alignerIndexDir'] = alignerIndexDir1 pars['flavor'] = 'star' # pars['flavor'] = 'skip' a.align(pars) # pars['alignerIndexDir'] = alignerIndexDir2 # pars['flavor'] = 'salmon' # a.align(pars) # *** 05 quantify *** # cufflinks - generate quantifications from bams # pars['flavor'] = 'cufflinks' pars['flavor'] = 'stringtie' q.quantify(pars) # *** count *** # featureCounts pars['flavor'] = 'featureCounts' co.count(pars)
def testAlign2(self): self.assertEqual(hw.align("GATC","AT",-9,dnamat), [-8, 'gATc', '-AT-'])
def testAlign3(self): self.assertEqual(hw.align("CCAA","AT",-5,dnamat), [-9, 'ccAa', '--At'])
def testAlign4(self): self.assertEqual(hw.align("ATAACAGA","GAACGAA",-9,dnamat), [8, 'atAACagA', 'g-AACgaA'])
from parse import parsingFunc, toFasta from align import align ##### Config variables ##### filePath = 'rand.500.4.fq' wordLength = 30 # See readme if running on 32-bit machine writePath = 'output.fasta' ############################ reads = parsingFunc(filePath) result = align(reads, wl=wordLength) contigs = result[0] N50 = result[1] toFasta(contigs, writePath) print("Complete. n50: " + str(N50)) ''' rand.500.1.fq: wordLength = 7 n_contigs = 2 n50 = 1 time ~= 540 wordLength = 15 n_contigs = 2 n50 = 1 time ~= 345 wordLength = 30 n_contigs = 2 n50 = 1 time ~= 325 rand.500.2.fq:
def testAlign5(self): self.assertEqual(hw.align("CAGA","CGAA",-4,dnamat), [8, 'CagA', 'CgaA'])