def alignment_v1(w2v, threshold, training_positive_annots, training_negative_annots, test_positive_annots, test_negative_annots): train_positive_words = [ entry["words"] for entry in training_positive_annots ] train_negative_words = [ entry["words"] for entry in training_negative_annots ] test_positive_words = [entry["words"] for entry in test_positive_annots] test_negative_words = [entry["words"] for entry in test_negative_annots] train_positive_masks = [ entry["mask"] for entry in training_positive_annots ] train_negative_masks = [ entry["mask"] for entry in training_negative_annots ] test_positive_masks = [entry["mask"] for entry in test_positive_annots] test_negative_masks = [entry["mask"] for entry in test_negative_annots] alignment_method = alignment.Alignment(w2v) alignment_method.train_v1(train_positive_words, train_positive_masks, train_negative_words, train_negative_masks) train_positive_labels = np.ones(len(train_positive_words), ).tolist() train_negative_labels = np.zeros(len(train_negative_words), ).tolist() train_labels = train_positive_labels + train_negative_labels test_positive_labels = np.ones(len(test_positive_words), ).tolist() test_negative_labels = np.zeros(len(test_negative_words), ).tolist() test_labels = test_positive_labels + test_negative_labels train_data = train_positive_words + train_negative_words training_pred_scores = [] training_pred_masks = [] for words in train_data: score, mask = alignment_method.predict_v1(words) training_pred_scores.append(score) training_pred_masks.append(mask) training_pred_labels = [ float(score >= threshold) for score in training_pred_scores ] test_data = test_positive_words + test_negative_words testing_pred_scores = [] testing_pred_masks = [] for words in test_data: score, mask = alignment_method.predict_v1(words) testing_pred_scores.append(score) testing_pred_masks.append(mask) testing_pred_labels = [ float(score >= threshold) for score in testing_pred_scores ] return np.array(training_pred_labels), np.array( testing_pred_labels), np.array(train_labels), np.array( test_labels), np.array(training_pred_scores), np.array( training_pred_masks), np.array(testing_pred_scores), np.array( testing_pred_masks)
def alignment_v2(w2v,training_positive_annots,training_negative_annots,test_positive_annots,test_negative_annots): train_positive_words = [entry["words"] for entry in training_positive_annots] train_negative_words = [entry["words"] for entry in training_negative_annots] test_positive_words = [entry["words"] for entry in test_positive_annots] test_negative_words = [entry["words"] for entry in test_negative_annots] train_positive_masks = [entry["mask"] for entry in training_positive_annots] train_negative_masks = [entry["mask"] for entry in training_negative_annots] test_positive_masks = [entry["mask"] for entry in test_positive_annots] test_negative_masks = [entry["mask"] for entry in test_negative_annots] alignment_method = alignment.Alignment(w2v) alignment_method.train_v2(train_positive_words,train_positive_masks,train_negative_words,train_negative_masks) train_positive_labels = np.ones(len(train_positive_words),).tolist() train_negative_labels = np.zeros(len(train_negative_words),).tolist() train_labels = train_positive_labels + train_negative_labels test_positive_labels = np.ones(len(test_positive_words),).tolist() test_negative_labels = np.zeros(len(test_negative_words),).tolist() test_labels = test_positive_labels + test_negative_labels train_data = train_positive_words + train_negative_words training_pred_scores = [] training_pred_masks = [] training_pred_labels = [] for words in train_data: pos_score,pos_mask,neg_score,neg_mask = alignment_method.predict_v2(words) if pos_score > neg_score: score = pos_score mask = pos_mask label = 1.0 else: score = 1 - neg_score mask = neg_mask label = 0.0 training_pred_scores.append(score) training_pred_masks.append(mask) training_pred_labels.append(label) test_data = test_positive_words + test_negative_words testing_pred_scores = [] testing_pred_masks = [] testing_pred_labels = [] for words in test_data: pos_score,pos_mask,neg_score,neg_mask = alignment_method.predict_v2(words) if pos_score > neg_score: score = pos_score mask = pos_mask label = 1.0 else: score = 1 - neg_score mask = neg_mask label = 0.0 testing_pred_scores.append(score) testing_pred_masks.append(mask) testing_pred_labels.append(label) return np.array(training_pred_labels), np.array(testing_pred_labels), np.array(train_labels), np.array(test_labels), np.array(training_pred_scores), np.array(training_pred_masks), np.array(testing_pred_scores), np.array(testing_pred_masks)
def parseGumby(gumbyFile, exonFile, baseSeq): # parses gumbyFile, removes things that overlap exons and gumbies that consist only of gaps on baseSeq # returns a list of gumbyBlocks infile = open(gumbyFile, "r") exons = [] if exonFile!=None: fh = open(exonFile, "r") for l in fh: fs = l.split() if fs[0].lower()!=baseSeq: continue exons.append([ int(fs[3]), int(fs[4]) ] ) # print exons re1 = compile("[a-z]+[ ]+[0-9]+[ ]+[0-9]+") seqs = {} pos = {} i = -1 resultLst = alignment.Alignment() for l in infile: l = l.strip() l = l.replace("*","-") l = l.replace("<", "-") l = l.replace(">", "-") if l.startswith("start"): if i!=-1: resultLst.extend(procData(baseSeq, exons, i, seqs, pos, pval, length, score)) f = l.split() pval = float(f[-1]) length = int(f[6].strip(",")) score = int(f[8].strip(",")) i+=1 seqs={} if re1.match(l): f = l.split() name = f[0] start = int(f[1])-1 end = int(f[2])-1 seq = f[3] if name not in seqs: faseq = Fasta.FastaSeq(name, seq) faseq.chrom = name faseq.start = start faseq.end = end seqs[name] = faseq else: faseq = seqs[f[0]] faseq.nucl += f[3] pos[name] = (name, start,end) resultLst.extend(procData(baseSeq, exons, i, seqs, pos, pval, length, score)) return resultLst
def get_best_alignment(self): state_path = seq.Sequence("State path", "") current_cell = self.get_end_cell() score = current_cell.value if score > -INFINITY: current_cell = current_cell.parent while not current_cell.state.is_begin(): state_path.append(current_cell.state.short_name) current_cell = current_cell.parent state_path.reverse() else: state_path = None return alignment.Alignment(self.sequence, state_path, score)
def sample(hmm, observations): """ Samples a finite number of times (observations) the given HMM. returns two sequences: State path and Emission sequence. """ random.seed() # force reseeding state_path = seq.Sequence("State path", "") emission_sequence = seq.Sequence("Sequence", "") current_state = hmm.begin_state() for i in range(observations): current_state = current_state.sample_transition() if current_state.is_end(): break state_path.append(current_state.short_name) emission_sequence.append(current_state.sample_emission()) return alignment.Alignment(emission_sequence, state_path)
def using_constraint_solver(original_csv, output_csv, tempo=120, threshold=7000, offset=0.00, leniency=0.3, num_agents=2): original_freq_list = mt.create_note_list(original_csv, tempo, threshold, offset=-0.08) # print(original_freq_list) original_freq_list = mt.number_converter(original_freq_list) original_solutions = cs.get_solutions(original_freq_list, num_agents) print("Original :", original_solutions[0]) # May need to change threshold for output freq list output_freq_list = mt.create_note_list(output_csv, tempo, threshold=500, offset=-0.08) output_freq_list = mt.number_converter(output_freq_list) output_solutions = cs.get_solutions(output_freq_list, num_agents) for agent, output_solution in enumerate(output_solutions): index = 0 while index < len(output_solution) and output_solution[index] == -1: index += 1 output_solutions[agent] = output_solution[index:] # print("Output: ", output_solutions[0][20:25]) # for output_solution in output_solutions: # while output_solution[0] == -1: # output_solution = output_solution[1:] print("Output:", output_solutions[0]) print("Test: ") tester = al.Alignment() a, b = al.Hirschberg.align(tester, seq_a=list(original_solutions[0]), seq_b=list(output_solutions[0])) print(a) print(b) num_errors = 0.0 for i in range(len(original_solutions)): for j in range(len(original_solutions[0])): if original_solutions[i][j] != output_solutions[i][j]: num_errors += 1 if original_solutions and original_solutions[0]: return num_errors / float(len(original_solutions)*len(original_solutions[0])) else: return -1
def __init__(self, file_path): file_path = file_loader.format_file_path_for_speech_object(file_path) alignment_root_dir = "/data/corpora/cspan/alignments/" transcript_root_dir = "/data/corpora/cspan/transcripts_clean/" audio_root_dir = "/data/corpora/cspan/audio/" applause_times_root_dir = "/data/corpora/cspan/applause_times/" self.file_path = file_path self.alignment_file = alignment_root_dir + file_path + "/" + file_path.split( '/')[1] + "_single_speaker.json" self.transcript_file = transcript_root_dir + file_path + ".txt" self.audio_file = audio_root_dir + file_path + ".mp3" self.applause_times_file = applause_times_root_dir + file_path + ".txt" self.load_stored_applause_predictions(file_path) self.load_stored_crowd_rmse() self.alignment = alignment.Alignment(self.alignment_file) self.alignment.speech = self self.applause_preds_by_second = self.get_preds_by_second() self.applause_list = applause_list.ApplauseList( self.applause_times_file) self.phrase_audio_features = self.get_phrase_audio_features() self.frame_rate = 43.06640625 self.hop_size = 512 self.sr = 22050
studentTitle = 'weiguojia_section_amateur' teacherMonoNoteOutFilename = segmentFileFolder + teacherTitle + '_monoNoteOut_midi.txt' studentMonoNoteOutFilename = segmentFileFolder + studentTitle + '_monoNoteOut_midi.txt' teacherRepresentationFilename = segmentFileFolder + teacherTitle + '_representation.json' studentRepresentationFilename = segmentFileFolder + studentTitle + '_representation.json' teacherNoteAlignedFilename = outputFileFolder + teacherTitle + '_noteAligned.csv' studentNoteAlignedFilename = outputFileFolder + studentTitle + '_noteAligned.csv' teacherSegAlignedFilename = outputFileFolder + teacherTitle + '_segAligned.csv' studentSegAlignedFilename = outputFileFolder + studentTitle + '_segAligned.csv' cs1 = cs.ConcatenateSegment() align1 = align.Alignment() #################################################### note alignment #################################################### # read note file noteStartingTime_t, noteDurTime_t, midiNote_t = cs1.readPyinMonoNoteOutMidi( teacherMonoNoteOutFilename) noteStartingTime_s, noteDurTime_s, midiNote_s = cs1.readPyinMonoNoteOutMidi( studentMonoNoteOutFilename) noteStartingFrame_t, noteEndingFrame_t = cs1.getNoteFrameBoundary( noteStartingTime_t, noteDurTime_t) noteStartingFrame_s, noteEndingFrame_s = cs1.getNoteFrameBoundary( noteStartingTime_s, noteDurTime_s) # get concatenated pitch track notePts_t, noteStartingFrameConcatenate_t, noteEndingFrameConcatenate_t = \
from xml_handling import UniprotXMLHandler from ete3 import NCBITaxa import db_handling import alignment import operator import numpy as np import os import sys #Accession.download_uniprot_xml() # uniprotHandler = UniprotXMLHandler(iterate=True) ncbi = NCBITaxa() align = alignment.Alignment() protDB = db_handling.ProteinDatabase() filtering_states = protDB.get_filtering_states() for state in filtering_states: current_state = protDB.get_current_state() rank = state[0] if rank == current_state[0]: if current_state[2]: if not current_state[4]: align.iterate_ellection_taxon_rank_representatives( taxon_rank=rank, current_state=current_state) protDB.set_next_filtering_state()
parser.add_argument('--num', type=int, default=1000, help='num of images') parser.add_argument('--dim', type=int, default=96, help='alignment dimension') args = parser.parse_args() print(args) videoCapture = cv2.VideoCapture(os.path.join(args.dir, '001.mp4')) if not videoCapture.isOpened(): sys.exit('video not opened') template = np.load(os.path.join(fileDir, 'template.npy')) delaunay = scipy.spatial.Delaunay(template) facePredictor = os.path.join(fileDir, 'shape_predictor_68_face_landmarks.dat') alignDlib = openface.AlignDlib(facePredictor) alignment = alignment.Alignment(args.dim, template, delaunay.simplices) print('processing images...') for index in range(args.num): ret, rawImage = videoCapture.read() if not ret: break boundingBox = alignDlib.getLargestFaceBoundingBox(rawImage) landmarks = alignDlib.findLandmarks(rawImage, boundingBox) alignedImage = alignment.align(rawImage, landmarks) convertedImage = cv2.cvtColor(alignedImage, cv2.COLOR_RGB2GRAY)
def print_alignment(hyp, ref): a = alignment.Alignment() a.align(hyp, ref) #input is string!!! print "Formatted hypothesis: ", a.hstring print "Formatted reference: ", a.rstring print "Substitution: ", a.numSub, "Insertion: ", a.numIns, "Deletion: ", a.numDel
LOGFILE.write("%s%s\n" % ("Line width is ", width)) MSSAFILE = open(mssaFile, "w") # Set up data structure for holding alignments pairwise = { # dict holding next pairwise alignment "matchName": "", # name of sequence aligned to reference "referenceLine": "", # reference sequence (complete) with possible gaps as '-' "correspondenceLine": "", # string of characters: blank, '.', or ':' "matchLine": "", # match sequence (maybe incomplete) with possible gaps as '-' } failure = 0 # will be test result from method call # Create an Alignment object myAlignment = alignment.Alignment(format) if CHATTY: print "Acquiring R-R correspondences from input file..." # Switches that control handling of input file data lines FASTA = False ALIGN = False fLines = INFILE.read().splitlines() lineCount = len(fLines) for i in xrange(0, lineCount): nextLine = fLines[i] # Get next data line if (nextLine == '' or nextLine == '\^#'): # comment line starts with '#' continue # skip blank and comment lines
if etree is None: log.write("bad tree, skipped line\n") continue #espans = label_spans(etree) eleaves = list(etree.frontier()) ewords = [leaf.label for leaf in eleaves] etags = [leaf.parent.label for leaf in eleaves] ecb = crossing_brackets(etree) except: log.write("bad etree, skipped: %s\n" % estr) continue else: ewords = estr.split() a = alignment.Alignment(fwords, ewords) good = True if astr.strip() == "": log.write("skipping sentence with empty alignment\n") continue for ij in astr.split(): i,j = (int(x) for x in ij.split('-',1)) try: a.align(i,j) except IndexError: log.write("alignment point %s-%s out of bounds\n" % (i,j)) good = False break if not good: log.write("french: %s\n" % " ".join(fwords)) log.write("english: %s\n" % " ".join(ewords))