def alignment_score(ref, hyp): if len(hyp) == 0: return 0 n, m = float(len(ref)), float(len(hyp)) alignment = align.alignment(ref, hyp) n_matches = len(alignment) if n_matches == 0: return 0 return sum(abs(i/n-j/m) for i, j, _, _, _ in alignment)/float(n_matches)
def blaster(input, pident, qcovs): ''' Iterando sobre un archivo fasta, realiza un blastp de cada query contra un archivo multifasta que actúa como subject. Además, realiza un árbol filogenético (formatos .nw y .png) y busca dominios proteicos en cada una de las secuencias resultantes del blast (tanto query como subject) ''' dir_results = fl.main_folder() for record in SeqIO.parse(input, "fasta"): dir_query = fl.query_folder(dir_results, record.id) #Blast y filtrado bls.blast(record, qcovs, pident, dir_query, "multifasta.txt") blast_aligned = record.id + "aligned.fasta" blast_fasta = record.id + "blast_fasta.fa" #Alineamiento y creación de árboles, si estos contienen más de una secuencia try: al.alignment(blast_fasta, blast_aligned) nw_tree = dir_query + record.id + ".nw" al.tree(blast_aligned, nw_tree) tree_img = dir_query + record.id + ".png" if save_pngtree.get() == 1: al.tree_drawer(nw_tree, tree_img) except: print("Número de secuencias insuficiente para crear un árbol") #Búsqueda de dominios proteicos si hay una db válida try: domain_file = dir_query + record.id + "_domains.txt" pst.domain_scanner(blast_fasta, domain_file, prosite_db) print("Base de datos de PROSITE encontrada") except: print( 'No se ha introducido ninguna base de datos Prosite, no se buscarán dominios.\n' ) #Eliminar archivos temporales os.remove(blast_aligned) os.remove(blast_fasta) return
def features_student(wav_t, wav_s, midi_t, annotation_txt_s, plot_align=False): # load wav y_t, sr_t = librosa.load(wav_t, sr=None) y_s, sr_s = librosa.load(wav_s, sr=None) # parse score of sonic visualizer score_t = mid_note_parser(midi_t) score_s = sv_score_parser(annotation_txt_s) # align teacher and student scores, output format: # [ [[teacher_onset_0, teacher_pitch_0, teacher_duration_0, teacher_number_0], [student_onset_0, student_pitch_0, student_duration_0, student_number_0]], # [[teacher_onset_1, teacher_pitch_1, teacher_duration_1, teacher_number_1], [student_onset_1, student_pitch_1, student_duration_1, student_number_1]], ... ] list_score_aligned = alignment(y_t, y_s, sr_t, sr_s, score_t, score_s, plot=plot_align) # segment the student score according the rules score_s_segmented = notes_segmenation(score_s, list_score_aligned) # find the indices of list_score_aligned correspond to student score segments segment_start_end = indices_segment_start_end(list_score_aligned, score_s_segmented) # strech the student score notes in each segment so that the tempo equals to the teacher's. # segment the aligned score into sub lists (segments) list_score_aligned_seg, _, list_tempo_s = streching_student_notes( list_score_aligned, segment_start_end) # calculate features for each segment list_features = [] for ii in range(len(list_score_aligned_seg)): list_features_seg = all_features(list_score_aligned_seg[ii], list_tempo_s[ii]) list_features += list_features_seg # list_features_students ignores the missing notes list_features_student = [] for ii in range(len(list_score_aligned)): if list_score_aligned[ii][1]: list_features_student.append(list_features[ii]) return list_features_student
else: list_tempo_t.append(None) list_tempo_s.append(None) return list_score_aligned_seg, list_tempo_t, list_tempo_s if __name__ == "__main__": y_t, sr_t = librosa.load("./test/seconds(t).wav", sr=None) y_s, sr_s = librosa.load("./test/seconds1(s).wav", sr=None) # load score of sonic visualizer score_t = mid_note_parser("./test/seconds(t).mid") score_s = sv_score_parser("./test/seconds1(s).txt") list_score_aligned = alignment(y_t, y_s, sr_t, sr_s, score_t, score_s) score_s_segmented = notes_segmenation(score_s, list_score_aligned) segment_start_end = indices_segment_start_end(list_score_aligned, score_s_segmented) list_score_aligned_seg, list_tempo_t, list_tempo_s = streching_student_notes( list_score_aligned, segment_start_end) for seg in list_score_aligned_seg: print(seg) print(list_tempo_t) print(list_tempo_s)
# 3. render midi to wav save_midi_2_audio(filename_xml_midi, filename_xml_wav) # video processing in pipeline.sh # 8. alignment score_t = sv_score_parser(filename_xml_txt) score_s = mid_note_parser(filename_video_midi) y_t, sr_t = librosa.load(filename_xml_wav, sr=None) y_s, sr_s = librosa.load(filename_video_wav, sr=None) list_score_aligned = alignment(y_t, y_s, sr_t, sr_s, score_t, score_s, plot=False) with open(filename_alignment, "w") as f: for note_t_s in list_score_aligned: if not note_t_s[1]: note_t_s[1] = [None, None, None, None] if not note_t_s[0]: note_t_s[0] = [None, None, None, None] f.write( str(note_t_s[0][0]) + '\t' + str(note_t_s[0][1]) + '\t' + str(note_t_s[0][2]) + '\t' + str(note_t_s[0][3]) + '\t' + str(note_t_s[1][0]) + '\t' + str(note_t_s[1][1]) + '\t' + str(note_t_s[1][2]) + '\t' + str(note_t_s[1][3]) + '\n')
def word_similarity(ref, hyp): alignment = align.alignment(ref, hyp) n_matches = len(alignment) if n_matches == 0: return 0 return sum(score for _, _, _, _, score in alignment)/float(n_matches)
def max_chunk(ref, hyp): n_matches = len(align.alignment(ref, hyp)) if n_matches == 0: return 0 return max(len(chunk) for chunk in align.chunks(ref, hyp))/float(n_matches)
def fragmentation(ref, hyp): n_matches = len(align.alignment(ref, hyp)) if n_matches == 0: return 0 n_chunks = sum(1 for c in align.chunks(ref, hyp)) return n_chunks/float(n_matches)
def n_matches(ref, hyp): return len(align.alignment(ref, hyp))
def align_recall(ref, hyp): return len(align.alignment(ref, hyp))/float(len(ref))
def align_precision(ref, hyp): if len(hyp) == 0: return 0 return len(align.alignment(ref, hyp))/float(len(hyp))