def correct_bxb(): ANNOTS_DIR = ('/home/remoto/tomas.teijeiro/Escritorio/anots_dani/') RECORDS = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234 ] for rec in RECORDS: IN_FILE = ANNOTS_DIR + str(rec) + '.bxb' OUT_FILE = ANNOTS_DIR + str(rec) + '.bxd' anots = MIT.read_annotations(IN_FILE) out = [] #Corrections according to the -o flag of the bxb utility. for ann in anots: if MIT.is_qrs_annotation(ann): out.append(ann) #Missed beats elif ann.code == CODES.NOTE and ann.aux[0] not in ('O', 'X'): new = MIT.MITAnnotation() new.code = CODES.CHARMAP[ann.aux[0]] new.time = ann.time out.append(new) MIT.save_annotations(out, OUT_FILE) print('Record {0} processed'.format(rec)) print('The full database was successfully processed')
def _merge_annots(annlst, interp, reftime): """ Merges an annotations list and an interpretation by selecting on the overlap interval the sequence with highest coverage. """ beg = next((ob.earlystart + reftime for ob in interp.get_observations(o.Cardiac_Rhythm)), np.inf) - ms2sp(150) #Ventricular flutter episodes change the reference point. vflut = next((a for a in reversed(annlst) if a.code is ECGCodes.VFOFF and a.time >= beg), None) if vflut is not None: beg = vflut.time + 1 bidx = next((i for i in xrange(len(annlst)) if annlst[i].time >= beg), len(annlst)) end = next((a.time for a in reversed(annlst) if a.code is ECGCodes.RHYTHM and a.aux == ')'), annlst[-1].time) #First we calculate the possible 'join points' of the two sequences. jpts = ( set(a.time for a in annlst[bidx:] if a.time <= end and a.code is ECGCodes.RHYTHM) & set(reftime + r.earlystart for r in interp.get_observations( o.Cardiac_Rhythm, filt=lambda rh: beg - reftime <= rh.earlystart <= end - reftime))) #If there are no join points, we give priority to the interpretation. if not jpts: jpt = beg else: #We select the join point with highest coverage. score = {} for jpt in jpts: score[jpt] = (len([ a for a in annlst[bidx:] if a.time <= jpt and (a.code in (ECGCodes.TWAVE, ECGCodes.PWAVE) or MITAnnotation.is_qrs_annotation(a)) ]) + len( list( interp.get_observations((o.QRS, o.PWave, o.TWave), jpt - reftime, end - reftime)))) jpt = max(jpts, key=lambda pt: score[pt]) #We remove the discarded annotations (those after the selected join point), #ensuring the WFON/WFOFF pairs are consistent. offsets = 0 while annlst and annlst[-1].time >= jpt: if annlst[-1].code is ECGCodes.WFOFF: offsets += 1 elif annlst[-1].code is ECGCodes.WFON: offsets -= 1 annlst.pop() while offsets > 0: ann = annlst.pop() if ann.code is ECGCodes.WFON: offsets -= 1 return jpt - reftime
def correct_bxc_bxd(): UNITS.set_sampling_freq(360.0) ANNOTS_DIR = ('/home/remoto/tomas.teijeiro/Escritorio/anots_dani/') RECORDS = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234 ] for rec in RECORDS: REF = ANNOTS_DIR + str(rec) + '.atr' TEST = ANNOTS_DIR + str(rec) + '.bxd' OUT = ANNOTS_DIR + str(rec) + '.bxD' ref = SortedList(MIT.read_annotations(REF)) test = MIT.read_annotations(TEST) for tann in test: dummy = MIT.MITAnnotation() dummy.time = int(tann.time - UNITS.msec2samples(150)) idx = ref.bisect_left(dummy) try: rann = next( a for a in ref[idx:] if MIT.is_qrs_annotation(a) and abs(a.time - tann.time) <= UNITS.msec2samples(150)) tann.code = rann.code except StopIteration: pass MIT.save_annotations(test, OUT) print('Record {0} processed'.format(rec)) print('The full database was successfully processed')
def _clean_artifacts_redundancy(annots): """ Removes those artifact annotations that are close to a QRS annotation, as well as redundant rhythm annotations. """ DISTANCE = ms2sp(150) banns = [ a for a in annots if MITAnnotation.is_qrs_annotation(a) or a.code == ECGCodes.ARFCT ] i = 0 while i < len(banns): if (banns[i].code == ECGCodes.ARFCT and ((i > 0 and banns[i].time - banns[i - 1].time < DISTANCE) or (i < len(banns) - 1 and banns[i + 1].time - banns[i].time < DISTANCE))): #We cannot use 'remove' due to a bug in SortedList. j = annots.bisect_left(banns[i]) while annots[j] is not banns[i]: j += 1 annots.pop(j) banns.pop(i) else: i += 1 #Redundant rhythms i = 1 while i < len(annots): if annots[i].code is ECGCodes.RHYTHM: prev = next( (a for a in reversed(annots[:i]) if a.code is ECGCodes.RHYTHM), None) if prev is not None and prev.aux == annots[i].aux: annots.pop(i) else: i += 1 else: i += 1 return annots
RECORDS = [l.strip() for l in open(DATABASE_DIR + 'RECORDS')] #Records to be interpreted can be selected from command line SLC_STR = '0:{0}'.format(len(RECORDS)) if len(sys.argv) < 2 else sys.argv[1] #We get a slice from the input string SLC = slice(*[{True: lambda n: None, False: int}[x == ''](x) for x in (SLC_STR.split(':') + ['', '', ''])[:3]]) for rec in RECORDS[SLC]: fname = OUTPUT_DIR + str(rec) + '.i' + ANNOTATOR if os.path.isfile(fname): print('Output file "{0}" already exists. Skipping record {1}'.format( fname, rec)) continue #Time check T0 = time.time() TFACTOR = 5.0 FR_OVERLAP = int(ms2sp(3000)) MIN_DELAY = 1750 MAX_DELAY = 20.0 print('Processing record {0} at 250.0 Hz '.format(rec)) ANNOTS = process_record(DATABASE_DIR + str(rec), ANNOTATOR, TFACTOR, FR_LEN, FR_OVERLAP, MIN_DELAY, MAX_DELAY, KFACTOR) MITAnnotation.save_annotations(ANNOTS, fname) print('Record '+ str(rec) +' processed in '+ str(time.time() - T0) +'s') print('The full database was sucessfully processed. Total branches: {0}'.format( Interpretation.counter)) print('Reasoning statistics:') pprint.pprint(reasoning.STATS.most_common())
#Now we write the .hea and the .dat files. heafmt = ('{0} 1 {1} {2} {3}\n' '{0}.dat 16 {4} 16 0 0 0 0 MLII\n') with open(PATH + NAME + '.hea', 'w') as hea: hea.write(heafmt.format(NAME, FREQ, len(sig), tp.strftime('%H:%M:%S %d/%m/%Y'), GAIN)) with open(PATH + NAME + '.dat', 'w') as dat: fmt = '<'+'h'*len(sig) dat.write(struct.pack(fmt, *sig)) #And we create the (AFIB annotations according to the loaded episodes. etp = tp + dt.timedelta(milliseconds=len(sig)*4) devid = next((d for d in AF_EPISODES if NAME.startswith(d)), None) annots = [] if devid is not None: afibs = [ep for ep in AF_EPISODES[devid] if tp <= ep.start <= etp] for af in afibs: #Two annotations for each episode bann = MITAnnotation.MITAnnotation() bann.code = ECGCodes.RHYTHM bann.time = int((af.start-tp).total_seconds()*FREQ) bann.aux = b'(AFIB' eann = MITAnnotation.MITAnnotation() eann.code = ECGCodes.RHYTHM eann.time = int((min(etp, af.end)-tp).total_seconds()*FREQ) #The end of AF is encoded as 'back to normality' eann.aux = b'(N' annots.append(bann) annots.append(eann) #Annotations are stored in a file with the '.mbg' extension. MITAnnotation.save_annotations(annots, PATH+NAME+'.mbg')
def interp2ann(interp, btime=0, offset=0, include_format=True): """ Generates a list of annotations representing the observations from an interpretation. The *btime* optional parameter allows to include only the observations after a specific time point, and *offset* allows to define a constant time to be added to the time point of each annotation. An optional format annotation of type NOTE can be included at the beginning. NOTE: A first annotation is included at the beginning of the list, with time=*offset*, to indicate that the annotations are created with the specific format for Construe interpretations. This format includes the following features (for version 17.01): - Beat annotations include the specific delineation information for each lead in a dictionary in JSON format. The keys in this dictionary are the lead names, and the values are a sequence of integer numbers. Each triple in this sequence determines a wave within the QRS complex. - WFON and WFOFF annotations include the type of wave they delimit in the *subtyp* field. QRS complexes are described by the SYSTOLE code, while P and T waves limits have the PWAVE or TWAVE code, respectively. - PWAVE and TWAVE annotations include the amplitude of each lead, in a dictionary in JSON format in the AUX field. """ annots = sortedcontainers.SortedList() if include_format: fmtcode = MITAnnotation.MITAnnotation() fmtcode.code = C.NOTE fmtcode.time = int(offset) fmtcode.aux = FMT_STRING annots.add(fmtcode) beats = list(interp.get_observations(o.QRS, filt=lambda q: q.time.start >= btime)) #We get the beat observations in the best explanation branch. for beat in beats: #We tag all beats as normal, and we include the delineation. The #delineation on each lead is included as a json string in the peak #annotation. beg = MITAnnotation.MITAnnotation() beg.code = C.WFON beg.subtype = C.SYSTOLE beg.time = int(offset + beat.earlystart) peak = MITAnnotation.MITAnnotation() peak.code = beat.tag peak.time = int(offset + beat.time.start) delin = {} for lead in beat.shape: shape = beat.shape[lead] displ = beg.time-peak.time shape.move(displ) waveseq = sum((w.pts for w in shape.waves), tuple()) delin[lead] = tuple(int(w) for w in waveseq) shape.move(-displ) peak.aux = json.dumps(delin) end = MITAnnotation.MITAnnotation() end.code = C.WFOFF end.subtype = C.SYSTOLE end.time = int(offset + beat.lateend) annots.add(beg) annots.add(peak) annots.add(end) #P and T wave annotations pstart = beats[0].earlystart - ms2sp(400) if beats else 0 tend = beats[-1].lateend + ms2sp(400) if beats else 0 for wtype in (o.PWave, o.TWave): for wave in interp.get_observations(wtype, pstart, tend): if wave.earlystart >= btime: code = C.PWAVE if wtype is o.PWave else C.TWAVE beg = MITAnnotation.MITAnnotation() beg.code = C.WFON beg.subtype = code beg.time = int(offset + wave.earlystart) end = MITAnnotation.MITAnnotation() end.code = C.WFOFF end.subtype = code end.time = int(offset + wave.lateend) peak = MITAnnotation.MITAnnotation() peak.code = code peak.time = int((end.time+beg.time)/2.) peak.aux = json.dumps(wave.amplitude) annots.add(beg) annots.add(peak) annots.add(end) #Flutter annotations for flut in interp.get_observations(o.Ventricular_Flutter, btime): vfon = MITAnnotation.MITAnnotation() vfon.code = C.VFON vfon.time = int(offset + flut.earlystart) annots.add(vfon) for vfw in interp.get_observations(o.Deflection, flut.earlystart, flut.lateend): wav = MITAnnotation.MITAnnotation() wav.code = C.FLWAV wav.time = int(offset + vfw.time.start) annots.add(wav) vfoff = MITAnnotation.MITAnnotation() vfoff.code = C.VFOFF vfoff.time = int(offset + flut.lateend) annots.add(vfoff) #All rhythm annotations for rhythm in interp.get_observations(o.Cardiac_Rhythm, btime): if not isinstance(rhythm, o.RhythmStart): rhyon = MITAnnotation.MITAnnotation() rhyon.code = C.RHYTHM rhyon.aux = C.RHYTHM_AUX[type(rhythm)] rhyon.time = int(offset + rhythm.earlystart) annots.add(rhyon) #The end of the last rhythm is also added as an annotation try: rhyoff = MITAnnotation.MITAnnotation() rhyoff.code = C.RHYTHM rhyoff.aux = ')' rhyoff.time = int(offset + rhythm.earlyend) annots.add(rhyoff) except NameError: #If there are no rhythms ('rhythm' variable is undefined), we go on pass #Unintelligible R-Deflections for rdef in interp.get_observations(o.RDeflection, btime, filt=lambda a: a in interp.unintelligible or a in interp.focus): unint = MITAnnotation.MITAnnotation() #We store unintelligible annotations as artifacts unint.code = C.ARFCT unint.time = int(offset + rdef.earlystart) annots.add(unint) return annots
def _standardize_rhythm_annots(annots): """ Standardizes a set of annotations obtained from the interpretation procedure to make them compatible with the criteria applied in the MIT-BIH Arrhythmia database in the labeling of rhythms. """ dest = sortedcontainers.SortedList() for ann in annots: code = ann.code if code in (ECGCodes.RHYTHM, ECGCodes.VFON): #TODO remove this if not necessary if code is ECGCodes.VFON: newann = MITAnnotation.MITAnnotation() newann.code = ECGCodes.RHYTHM newann.aux = b'(VFL' newann.time = ann.time dest.add(newann) ############################################################ #For convention with original annotations, we only admit # #bigeminies with more than two pairs, and trigeminies with # #more than two triplets, # ############################################################ if ann.aux == b'(B': end = next((a for a in annots if a.time > ann.time and a.code in (ECGCodes.RHYTHM, ECGCodes.VFON)), annots[-1]) nbeats = searching.ilen(a for a in annots if a.time >= ann.time and a.time <= end.time and MITAnnotation.is_qrs_annotation(a)) if nbeats < 7: continue if ann.aux == '(T': end = next((a for a in annots if a.time > ann.time and a.code in (ECGCodes.RHYTHM, ECGCodes.VFON)), annots[-1]) nbeats = searching.ilen(a for a in annots if a.time >= ann.time and a.time <= end.time and MITAnnotation.is_qrs_annotation(a)) if nbeats < 7: continue ############################################################# # Pauses and missed beats are replaced by bradycardias (for # # consistency with the reference annotations). # ############################################################# if ann.aux in (b'(BK', b'P'): ann.aux = b'(SBR' if ann.aux not in (b'(EXT', b'(CPT'): prev = next((a for a in reversed(dest) if a.code is ECGCodes.RHYTHM), None) if prev is None or prev.aux != ann.aux: dest.add(ann) else: dest.add(ann) ################################# #Atrial fibrillation correction # ################################# iterator = iter(dest) afibtime = 0 while True: try: start = next(a.time for a in iterator if a.code == ECGCodes.RHYTHM and a.aux == b'(AFIB') end = next((a.time for a in iterator if a.code == ECGCodes.RHYTHM), dest[-1].time) afibtime += end-start except StopIteration: break #If more than 1/20 of the time of atrial fibrillation... if annots and afibtime > (annots[-1].time-annots[0].time)/20.0: iterator = iter(dest) rhythms = ('(N', '(SVTA') start = next((a for a in iterator if a.code == ECGCodes.RHYTHM and a.aux in rhythms), None) while start is not None: end = next((a for a in iterator if a.code == ECGCodes.RHYTHM), dest[-1]) #All normal rhythms that satisfy the Lian method to identify #afib by rhythm are now considered afib. We also check the #method considering alternate RRs to avoid false positives with #bigeminies. fragment = dest[dest.bisect_left(start):dest.bisect_right(end)] rrs = np.diff([a.time for a in fragment if MITAnnotation.is_qrs_annotation(a)]) if (is_afib_rhythm_lian(rrs) and is_afib_rhythm_lian(rrs[0::2]) and is_afib_rhythm_lian(rrs[1::2])): start.aux = b'(AFIB' #Next rhythm start = (end if end.aux in rhythms else next((a for a in iterator if a.code == ECGCodes.RHYTHM and a.aux in rhythms), None)) ############################## #Paced rhythm identification # ############################## #To consider the presence of paced rhythms in a record, we require at #least a mean of one paced beat each 10 seconds. pacedrec = sum(1 for a in dest if a.code == ECGCodes.PACE) > 180 if pacedrec: iterator = iter(dest) rhythms = (b'(AFIB', b'(N', b'(SBR', b'(SVTA') start = next((a for a in iterator if a.code == ECGCodes.RHYTHM and a.aux in rhythms), None) while start is not None: end = next((a for a in iterator if a.code == ECGCodes.RHYTHM), dest[-1]) #If there are paced beats in a rhythm fragment, the full #rhythm is identified as paced. if any([start.time < a.time < end.time and a.code == ECGCodes.PACE for a in dest[dest.index(start):dest.index(end)]]): start.aux = b'(P' #Next rhythm start = (end if end.aux in rhythms else next((a for a in iterator if a.code == ECGCodes.RHYTHM and a.aux in rhythms), None)) ######################################### # Redundant rhythm description removing # ######################################### i = 1 while i < len(dest): if dest[i].code is ECGCodes.RHYTHM: prev = next((a for a in reversed(dest[:i]) if a.code is ECGCodes.RHYTHM), None) if prev is not None and prev.aux == dest[i].aux: dest.pop(i) else: i += 1 else: i += 1 return dest
RECORDS = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234 ] set_sampling_freq(360.0) #Dictionary to save the discrepancy at record-level dist = {} miss = 0 for rec in RECORDS: dist[rec] = [] REF_FILE = ANNOTS_DIR + str(rec) + '.atr' TEST_FILE = ANNOTS_DIR + str(rec) + '.wbr' reference = np.array([ anot.time for anot in MIT.read_annotations(REF_FILE) if MIT.is_qrs_annotation(anot) ]) test = np.array([ anot.time for anot in MIT.read_annotations(TEST_FILE) if MIT.is_qrs_annotation(anot) ]) #Missing beat search for b in reference: err = np.Inf for t in test: bdist = t - b if abs(bdist) > abs(err): break else: err = bdist