def comparewebes(sntc1,sntc2): sntc1=sntc1.replace('+',' ') sntc2=sntc2.replace('+',' ') res=align(sntc1,sntc2,lang='spanish') prop_1=prop_al(res) res=align(sntc2,sntc1) prop_2=prop_al(res) sim=2*prop_1*prop_1/(prop_1+prop_2) return str(sim)
def verify(): ipreg.grid_forget() global count,callCounter count=False callCounter=0 opreg.grid(row=2,column=0,pady=(5,0)) for i in range(500): _, frame = cap.read() gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) faces=face_detector.detectMultiScale(gray,1.1,5) eyes=[] if(np.any(faces)==False): print('No faces. Fail - ',i) continue for (x,y,w,h) in faces: roi=gray[y:y+h,x:x+w] eyes=eye_detector.detectMultiScale(roi,1.3,7) try: _=eyes[1] except (IndexError): print('No eyes. Fail - ',i) continue break name=recognizer(database,align(frame)) opreg['text']='Identification - '+name
def processAll(audio_text_folder, audio_file, text_file): ######################## IO Arguments Here ########################### syncmap_file = text_file main_title = text_file.replace('.txt', '') ###################################################################### audio_file_path = os.path.join(audio_text_folder, audio_file) text_file_path = audio_text_folder oritext_file_path = os.path.join(text_file_path, text_file) splittext_file_path = os.path.join(text_file_path, text_file.replace('.txt', '_split.txt')) syncmap_file_path = os.path.join(audio_text_folder, 'syncmap_file') if not os.path.exists(syncmap_file_path): os.makedirs(syncmap_file_path) syncmap_file_path = os.path.join(syncmap_file_path, syncmap_file) out_dir = r'output directory here' out_dir = os.path.join(out_dir, main_title) if os.path.exists(out_dir): return os.mkdir(out_dir) out_dir = os.path.join(out_dir, main_title) print("Text processed: ", splittext(oritext_file_path)) print("Aligned: ", align(audio_file_path, splittext_file_path, syncmap_file_path)) print("Split: ", split(audio_file_path, syncmap_file_path, out_dir, main_title)) print("Done.")
def retText(): global name,frame,count,callCounter,database name=ipregentry.get() print(name) modify_database(align(frame),name) database = np.load('./vitals/database.npy').item() ipregentry.delete(0,len(name)) ipreg.grid_forget() opreg['text']='Identity Validated' opreg.grid(row=1,column=0,pady=(5,0)) count=False callCounter=0
def align(self, lang1, lang2, no_hand=False): seq1 = self.get_sentences(lang1) seq2 = self.get_sentences(lang2) if no_hand: a = aligner.align(seq1, seq2) else: hand_alignment = self.get_alignment([lang1, lang2]).as_ladder(with_costs=False) print >> log, "%d hand-aligned pairs found." % len(hand_alignment) a = aligner.make_composed_alignment(seq1, seq2, hand_alignment) output_filename = self._p('%s-%s.my' % (lang1, lang2)) Alignment(a).dump(output_filename) print >> log, "Wrote %s." % output_filename return a
def get_alignment_complexity_scores(s0, s1): """ Run Sultan's aligner on two sentences and return the list that for each word in the first sentence specifies whether it was changed/simplified (1), kept unchanged (2) or cannot be linked to any other word in the sentence (0). :param s0: the first sentence as a list of tokens :param s1: the second sentence as a string :return: see above """ s0 = [x.lower() for x in s0] s1 = s1.lower() # check if the alignment has been performed before dict_key = " ".join(s0) + SEPARATOR + s1 if dict_key in ALIGN_DICT: return ALIGN_DICT[dict_key] result = np.full(len(s0), UNK) ALIGNMENT_STATS["total"] += 1 try: # tokenize and lemmatize the sentences s0_tok = tokenize(" ".join(s0)) s1_tok = tokenize(s1) s0_lem = lemmatize(s0_tok) s1_lem = lemmatize(s1_tok) pairs = align(s0_tok, s1_tok) # pairs of sentences aligned by Sultan's word aligner except: ALIGN_DICT[dict_key] = result ALIGNMENT_STATS["unsuccessful"] += 1 return result # iterate over aligned pairs and feel the result array for i in range(len(pairs[0])): w0, w1 = pairs[1][i][0].lower(), pairs[1][i][1].lower() if w0 in STOPWORDS or w1 in STOPWORDS: # such an alignment doesn't matter continue if w0 == w1 or s0_lem.get(w0, 'w0') == s1_lem.get(w1, 'w1'): # the alignment is valid but it only indicates that the word was kept as it is id = get_index(s0, w0, i, pairs) if id == -1: continue result[id] = SIMPLE else: id = get_index(s0, w0, i, pairs) if id == -1: continue result[id] = COMPLEX ALIGN_DICT[dict_key] = result return result
def pairFeatures(self, sentenceA, sentenceB): features = [] sentA = re.findall(r"[\w]+", sentenceA) sentB = re.findall(r"[\w]+", sentenceB) numerator = 0 denominator = len(sentA) + len(sentB) alignedWords = aligner.align(sentA, sentB) for sentenceId in range(0, len(alignedWords)): numerator += len(alignedWords[sentenceId]) features.append(float(numerator) / float(denominator)) features.append(self.bow.sentence_similarity(sentenceA, sentenceB)) return features
def learn_weights(training_set, learning_epochs, burn_in_epochs, learning_rate, learning_rate_multiplier): weights = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ] weights_history = [] for i in range(learning_epochs): print '*** Starting epoch %s of %s ***' % (i, learning_epochs) learning_rate *= learning_rate_multiplier #logging.warning('Starting epoch %s with learning rate %s' % #(i, learning_rate)) shuffle(training_set) for index, problem in enumerate(training_set): print '* Starting problem %s of %s in epoch %s*' % \ (index, len(training_set), i) print problem.p_str_tokens print problem.h_str_tokens gold_features = gold_featurizer.featurize(problem) #logging.warning('\nStarting weights:\n%s' % weights) predicted_alignment, predicted_features = aligner.align( problem.p_str_tokens, problem.h_str_tokens, weights) print predicted_features weights = weights + (learning_rate * (gold_features - predicted_features)) #logging.warning('Summed rated weights:\n%s' % weights) weights = weights / sqrt(sum([i ** 2 for i in weights])) #logging.warning('L2 normalization:\n%s' % weights) weights_history.append(weights) #logging.warning('\n\nWeights history:\n%s' % weights_history) weights_averaged = 1 / (learning_epochs - burn_in_epochs) * sum(weights_history[burn_in_epochs:]) return weights_averaged
def linear_align(s1, s2): tokens1 = s1.split(' ') tokens2 = s2.split(' ') alignments = sorted([(a1-1, a2-1) for a1, a2 in align(s1, s2)[0]]) equals = set([(a1, a2) for a1, a2 in alignments if(a1<len(tokens1) and a2<len(tokens2) and tokens1[a1] == tokens2[a2])]) new_alignments = [] current = [] for a1, a2 in alignments: if ((a1, a2) in equals) and (len(current) == 0 or current[-1] == (a1-1, a2-1)): current.append((a1, a2)) else: if len(current) > 0: seq1, seq2 = zip(*current) new_alignments.append((tuple(map(str, seq1)), tuple(map(str, seq2)))) new_alignments.append(((str(a1),), (str(a2),))) current = [] if len(current) > 0: seq1, seq2 = zip(*current) new_alignments.append((tuple(map(str, seq1)), tuple(map(str, seq2)))) return new_alignments
def read_audio(audio_file, transcript): args = ["pocketsphinx_continuous", "-time", "yes", "-infile", audio_file] out = subprocess.check_output(args, stderr=DEVNULL) is_text = True reconized_text = "" words = [] # Parse ugly output for line in out.split("\n"): if "!!!" in line: continue if "<s>" in line: is_text = False if is_text: reconized_text += " " + line if "</s>" in line: is_text = True if not is_text: data = line.split(" ") word = data[0].strip("(0123456789)") start = float(data[1]) end = float(data[2]) if word.isalpha(): words.append((word, start, end)) # Remove unreconized word known_words = align(reconized_text.upper(), transcript) i = 0 res_words = [] for word in known_words: if i == len(words): break while i < len(words) and word != words[i][0].upper(): i += 1 if i < len(words): res_words.append(words[i]) return res_words
def read_audio(audio_file, transcript): args = ['pocketsphinx_continuous', '-time', 'yes', '-infile', audio_file] out = subprocess.check_output(args, stderr=DEVNULL) is_text = True reconized_text = "" words = [] # Parse ugly output for line in out.split('\n'): if '!!!' in line: continue if '<s>' in line: is_text = False if is_text: reconized_text += " " + line if '</s>' in line: is_text = True if not is_text: data = line.split(' ') word = data[0].strip('(0123456789)') start = float(data[1]) end = float(data[2]) if word.isalpha(): words.append((word, start, end)) # Remove unreconized word known_words = align(reconized_text.upper(), transcript) i = 0 res_words = [] for word in known_words: if i == len(words): break while i < len(words) and word != words[i][0].upper(): i += 1 if i < len(words): res_words.append(words[i]) return res_words
#normalisation and data format changed to channels_first img = np.around(np.transpose(img, (2, 0, 1)) / 255.0, decimals=15) #preprocess data format x_train = np.array([img]) #feed to neural net input embedding = model.predict_on_batch(x_train) return embedding #driver code if __name__ == "__main__": print('Initialised Model') print('Reading reference image') #replace with full reference image path. example - 'D:/img/image_1.jpg' image = cv2.imread('Reference Image Path', 1) image = align(image) cv2.imshow('Labelled Image', image) print('Generating embedding') database = modify_database(image, 'Prabodh') print('Embedding generated') print('Reading test image') #replace with full test image path. example - 'D:/img/image_2.jpg' image = cv2.imread('Test Image Path', 1) image = align(image) cv2.imshow('Test Image', image) print('Recognition started...') recognizer(database, image)
def alignweb(sntc1,sntc2): res={'result':align(sntc1,sntc2)} return jsonify(res)
# coding=utf-8 # run.py import aligner as fa import formants as fm fa.dict() fa.align() fm.extract()
sent1_parse_lst = read_json_file(args.sent1parsepath) sent2_parse_lst = read_json_file(args.sent2parsepath) if args.sentalignspath is None: # assume 1-to-1 alignment sent_aligns = [ '{}\t{}'.format(i, i) for i in range(0, len(sent1_parse_lst)) ] else: sent_aligns = read_text_file(args.sentalignspath) sents_info = group_sentence_alignments(sent1_parse_lst, sent2_parse_lst, sent_aligns) word_aligns = [] for sent1_parse_json, sent2_parse_json in sents_info: sent1_parse_result = coreNlpUtil.format_json_parser_results( sent1_parse_json) sent2_parse_result = coreNlpUtil.format_json_parser_results( sent2_parse_json) # get the alignments (only indices) aligns, _ = aligner.align(sent1_parse_result, sent2_parse_result) # convert to pharaoh format: [[1, 1], [2, 2]] -> ['1-1', '2-2'] aligns_pharaoh = ['-'.join([str(p[0]), str(p[1])]) for p in aligns] # create a single line to write: ['1-1', '2-2'] -> '1-1 2-2' aligns_line = ' '.join(aligns_pharaoh) word_aligns.append(aligns_line) aligns_file_path = os.path.join(args.outputfolder, args.outputfilename) with open(aligns_file_path, 'w') as aligns_file_path: aligns_file_path.write('\n'.join(word_aligns))
from aligner import align from model.utils import get_dataset, get_tokenized_lemmas def _get_unaligned_tokens(tokens, alignment): aligned = [a - 1 for (a, _) in alignment] unaligned = [i for i in range(len(tokens)) if i not in aligned] return [tokens[i] for i in unaligned] if __name__ == "__main__": df = get_dataset() data = {} for id, row in df.iterrows(): article_hl_tok = get_tokenized_lemmas(row.articleHeadline) claim_hl_tok = get_tokenized_lemmas(row.claimHeadline) try: alignment = align(claim_hl_tok, article_hl_tok) data[(row.claimId, row.articleId)] = [(s - 1, t - 1) for (s, t) in alignment[0]] except: print 'Unable to align', article_hl_tok, 'and', claim_hl_tok print row.articleId, row.claimId with open(os.path.join('..', 'data', 'pickled', 'aligned-data.pickle'), 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
import pickle from aligner import align from model.utils import get_dataset, get_tokenized_lemmas def _get_unaligned_tokens(tokens, alignment): aligned = [a-1 for (a, _) in alignment] unaligned = [i for i in range(len(tokens)) if i not in aligned] return [tokens[i] for i in unaligned] if __name__ == "__main__": df = get_dataset() data = {} for id, row in df.iterrows(): article_hl_tok = get_tokenized_lemmas(row.articleHeadline) claim_hl_tok = get_tokenized_lemmas(row.claimHeadline) try: alignment = align(claim_hl_tok, article_hl_tok) data[(row.claimId, row.articleId)] = [(s-1, t-1) for (s, t) in alignment[0]] except: print 'Unable to align', article_hl_tok, 'and', claim_hl_tok print row.articleId, row.claimId with open(os.path.join('..', 'data', 'pickled', 'aligned-data.pickle'), 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)