def process_files(files, opts): """ Process each file @param files the file(s) to process """ annotationdata.aio = sppas_tools.getAnnotationdataAio() # import annotationdata.aio or annotationdata.io from annotationdata import Transcription, Tier #, TimePoint, TimeInterval, Label, Annotation for f in files: print("[%s] Loading annotation file..." % f) # Read an annotated file trs = annotationdata.aio.read(f) print("[%s] Number of tiers:%d" % (f, trs.GetSize())) # Prepare the output Transcription destTrs = trs destAppendProcessed = False # default/'all' => work directly on trs if (opts.keep_tiers.startswith('process')): destTrs = Transcription(trs.GetName(), trs.GetMinTime(), trs.GetMaxTime()) # empty copy of trs destAppendProcessed = True # append processed tiers elif ((opts.keep_tiers == 'any') or (opts.keep_tiers == 'bilou')): destTrs = Transcription(trs.GetName(), trs.GetMinTime(), trs.GetMaxTime()) # empty copy of trs # Look for the tier to process for tier_name in opts.tiers_names: tier = sppas_tools.tierFind(trs, tier_name) if tier is None: print("[%s] Any tier with name similar to '%s' ;-(" % (f, opts.tier_name)) print("[%s] Tiers are : %s" % (f, ''.join([ "{}[{}] '{}'".format("\n " if (i % 4) == 0 else ", ", i, t.GetName()) for i, t in enumerate(trs) ]))) break print("[%s] Searched tier '%s' has %d annotations" % (f, tier.GetName(), tier.GetSize())) if (destAppendProcessed): destTrs.Append(tier) # Create the BILOU tier bilouName = opts.out_tier_format.format(tier_name, opts.base_time) bilouTier = Tier(bilouName) splitIntervals(bilouTier, opts.base_time, trs.GetMaxTime(), trs.GetMinTime(), opts.radius) bilouTags(tier, bilouTier, opts.labels, opts.bilu_format, opts.o_label) print("[%s] BILOU tier '%s' has %d annotations" % (f, bilouTier.GetName(), bilouTier.GetSize())) destTrs.Append(bilouTier) # Saving file (root, ext) = os.path.splitext(f) of = opts.out_file_format.format(root, "+".join( opts.tiers_names)) + ext print("[%s] Saving annotations into %s" % (f, of)) annotationdata.aio.write(of, destTrs)
def getTier(trs, tierName=None, errorMsg=None): """ Get the corresponding tier @param trs: an annotation file or a tier @param tierName: the tier name @param errorMsg: (optional) message (format) for the ValueError raised if no tier is found if falsy, any ValueError is raised (=> return is None) @return: the tier found or None """ from annotationdata import Tier if isinstance(trs, Tier): #TODO? check tierName return trs tier = sppas_tools.tierFind(trs, tierName) if tier is None and errorMsg: raise ValueError(errorMsg.format(**locals())) return tier
def process_files(files, opts): """ Process each file @param files the file(s) to process """ annotationdata.aio = sppas_tools.getAnnotationdataAio(); # import annotationdata.aio or annotationdata.io from annotationdata import Transcription for f in files: print("[%s] Loading annotation file..." % f) # Read an annotated file, put content in a Transcription object. trs = annotationdata.aio.read(f) print("[%s] Number of tiers:%d" % (f, trs.GetSize())) tier = sppas_tools.tierFind(trs, opts.tier_name) if tier is None: print("[%s] Any tier with name similar to '%s' ;-(" % (f, opts.tier_name)) print("[%s] Tiers are : %s" % (f, ''.join([ "{}[{}] '{}'".format("\n " if (i % 4)==0 else ", ", i, t.GetName()) for i, t in enumerate(trs)]) )) break; print("[%s] Searched tier '%s' has %d annotations" % (f, tier.GetName(), tier.GetSize())) csv = Transcription() csv.Append(tier) of = re.sub(r"\.\w+$", "-"+opts.tier_name+".csv", f) print("[%s] Saving tier into %s" % (f, of)) annotationdata.aio.write(of, csv)
def process_feedback_per_phases(trs, fb_tierName='P-Feedback', phases_tierName='Script', most_common=False): """ Process relation between feedbacks and 'phases' (Script, eye's directions, ...) """ from annotationdata import Filter, SingleFilter, Sel, RelationFilter, Rel from annotationdata.filter.delay_relations import IntervalsDelay, OrPredicates #TODO? parameters fb_phases_min_overlap = 1. # minimum overlap time for overlaps/overlappedby relations res = namedtuple( 'res', "phases_tier, phases, phases_counter, phases_radius, perphases" + ", fb_tier, fb_durations, fb_duration_stats, fb_radius" ) # list of fields # looking for phases labels not_found = 0 res.phases_tier = sppas_tools.tierFind(trs, phases_tierName) # (a) Phases if res.phases_tier is None: print("\t[{phases_tierName}] No phases tier found ;-(".format( **locals())) not_found += 1 # (b) 'P-Feedback' res.fb_tier = sppas_tools.tierFind(trs, fb_tierName) if res.fb_tier is None: print("[{fb_tierName}] No feedbacks tier found ;-(".format(**locals())) not_found += 1 if not_found: print("[%s] %d unfound tier(s) => skip this file") return # Look for the various phases res.phases = [] res.perphases = dict() res.phases_counter = Counter() for ph_ann in res.phases_tier: phase = ph_ann.GetLabel().GetValue() res.phases_counter[phase] += 1 if res.phases_counter[phase] == 1: #init phases res.phases.append(phase) res.perphases[phase] = namedtuple( 'perph', "phase, count, tier, durations, sumduration") res.perphases[phase].phase = phase # sum of annotations/durations of phases_tier ptSize = len(res.phases_tier) ptSumDurations = sum(durations(res.phases_tier)) print( "\t[{fb_tierName}/{phases_tierName}] {nbphases} phases (#annots:{ptSize}, sum_durations={ptSumDurations:.3f}), {fbsize} feedbacks" .format(nbphases=len(res.phases), fbsize=len(res.fb_tier), **locals())) if not len(res.phases): return res # any phases # sort phases by occurences if most_common: res.phases = [ phase for (phase, count) in res.phases_counter.most_common() ] # phases_tier filter ptFilter = Filter(res.phases_tier) # split feedback tier by phases fbFilter = Filter(res.fb_tier) phRel = OrPredicates( Rel('during') # fb during the phase , Rel('starts') # fb starts with the phase (and is shorter) , Rel('finishes') # fb ends with the phase (and is shorter) , Rel( overlappedby=fb_phases_min_overlap ) # fb overlaped by the phase (i.e. start during the phase but end after) #?, Rel('startedby') #? fb starts with the phase and is longer ) #for phase, perph in res.perphases.items(): for phase in res.phases: perph = res.perphases[phase] perph.count = res.phases_counter[phase] phaseFilter = SingleFilter(Sel(exact=phase), ptFilter) perph.tier = phaseFilter.Filter() perph.durations = durations(perph.tier) perph.sum_durations = sum(perph.durations) print( "\t Phase:'{phase}' => #annot={perph.count} ({pannot:.0%}), sum_durations={perph.sum_durations} ({psdur:.0%}) (mean={s.Mean:.3f}, min={s.Min:.3f}, max={s.Max:.3f})" .format(s=stats(perph.durations), pannot=float(perph.count) / ptSize, psdur=perph.sum_durations / ptSumDurations, **locals())) rf = RelationFilter(phRel, fbFilter, phaseFilter) perph.fb_tier = rf.Filter() perph.fb_count = len(perph.fb_tier) perph.fb_durations = durations(perph.fb_tier) perph.fb_sum_durations = sum(perph.fb_durations) perph.fb_per_sec = perph.fb_count / perph.sum_durations if perph.fb_count else 0 perph.sec_per_fb = perph.sum_durations / perph.fb_count if perph.fb_count else 0 print( "\t #feedback={perph.fb_count}, freq={perph.fb_per_sec:.3f}/s (every {perph.sec_per_fb:.3f}s), sum_durations={perph.fb_sum_durations:.3f} ({sdurpercent:.0%})" .format(sdurpercent=perph.fb_sum_durations / perph.sum_durations, **locals())) if perph.fb_count: print( "\t durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]" .format(s=stats(perph.fb_durations), **locals())) statsPerLabel(perph.fb_tier, "\t\t", normLabelWithSep #TODO(pb repeated phases)# , intraDelays ) return res
def process_feedback_eyes(trs, fb_tierName='Feedback', eyes_tierName='Regard'): """ Process relation between feedbacks and eyes direction """ from annotationdata import Filter, RelationFilter, Rel from annotationdata.filter.delay_relations import IntervalsDelay, OrPredicates #TODO? parameters fb_eyes_min_overlap = 1. # minimum overlap time for overlaps/overlappedby relations res = namedtuple('res', "eyes_tier, eyes_radius" + ", fb_tier, fb_durations, fb_duration_stats, fb_radius" ) # list of fields # Search Vocabulaire and P-Feedback tiers not_found = 0 # (a) 'Vocabulaire' res.eyes_tier = sppas_tools.tierFind(trs, eyes_tierName) if res.eyes_tier is None: print("\t[{eyes_tierName}] No eyes direction tier found ;-(".format( **locals())) not_found += 1 # (b) 'P-Feedback' res.fb_tier = sppas_tools.tierFind(trs, fb_tierName) if res.fb_tier is None: print("[{fb_tierName}] No feedbacks tier found ;-(".format(**locals())) not_found += 1 if not_found: print("[%s] %d unfound tier(s) => skip this file") return # Combine the 2 tiers # - create the predicates # [1] 'convergent' a combination of Allen relations for 2 intervals that (partly) overlaps pStable = IntervalsDelay.create( 'start_start_after_min', 0, 'end_end_min', 0, name='stable' ) # X during Y <=> X starts after Y and X ends before Y <=> (Xs>Ys) and (Xe<Ye) pBefore = IntervalsDelay.create( 'start_start_after_min', 0, 'start_end_min', 0, 'end_end_after_min', 0, name='before' ) # Y ends during X (Y before X) <=> (Xs>Ys) and (Xs<Ye) and (Xe>Ye) pInside = IntervalsDelay.create( 'start_start_min', 0, 'end_end_after_min', 0, name='inside' ) # Y inside X <=> X starts before Y and X ends after Y <=> (Xs<Ys) and (Xe>Ye) pAfter = IntervalsDelay.create( 'start_start_min', 0, 'end_start_after_min', 0, 'end_end_min', 0, name='after' ) # Y starts during X (Y after X) <=> (Xs<Ys) and (Xe>Ys) and (Xe<Ye) #print("pStable={pStable:r}, str()={pStable:s}; pBefore={pBefore:r}, str()={pBefore:s}; pAfter={pAfter:r}, str()={pAfter:s}; pInside={pInside:r}, str()={pInside:s};".format(**locals())) #-- pConv = Rel('convergent') #-- pConv = OrPredicates( Rel('during') # X during Y => no eyes direction change during the feedback # # Y starts before Y # , Rel(overlappedby=fb_eyes_min_overlap) # X overlappedby Y (Y starts before X) => eyes direction changes during the feedback # , Rel('finishes') # X finishes Y (Y starts before X) => eyes direction changes just after the feedback # , IntervalsDelay.create(start_end=(-fb_eyes_min_overlap, 0)) # add -max < start_end < 0 # # Y inside X # , Rel('startedby') # X startedby Y (same start, X longer) => eyes direction changes when the feedback start, and change anoter time ~ contains # , Rel('contains') # X contains Y => at least 2 changes during the feedback # , Rel('finishedby') # X finishedby Y (same end, X longer) => eyes direction changes during AND just after the feedback # # Y ends after X # , Rel('starts') # X starts Y (same start, Y longer) => eyes direction changes when the feedback start (an is maintain) # , Rel(overlaps=fb_eyes_min_overlap) # X overlaps Y (Y start during X) => eyes direction changes during the feedback # , IntervalsDelay.create(end_start=(0, fb_eyes_min_overlap)) # add 0 < end_start < max #-- ) # nota: OrPredicates orders # if X=Y, all predicates are true # if X finishes Y, pBefore and pStable are true # if X started by Y, pAfter and pStable are true pConv = OrPredicates(pInside, pBefore, pAfter, pStable) def sbia(rel): """ Classify relation in 'stable', 'before'/'inside'/'after' """ if str(rel) in ['stable', 'before', 'inside', 'after']: return str(rel) elif str(rel) == 'during': return 'stable' # X during Y => stable Y before/inside/after elif str(rel) in ['overlappedby', 'finishes']: return 'before' # X rel Y => Y starts before X elif str(rel) in ['startedby', 'contains', 'finishedby']: return 'inside' # X rel Y => Y inside X elif str(rel) in ['starts', 'overlaps']: return 'after' # X rel Y => Y ends after X elif str(rel).startswith('start_end'): return 'before' # -max < X start_end Y < 0 => Y ends just before X elif str(rel).startswith('end_start'): return 'after' # 0 < X end_start Y < max => Y starts just after X return #ERROR fEyes = Filter(res.eyes_tier) fFb = Filter(res.fb_tier) rf = RelationFilter(pConv, fFb, fEyes) rConv = [(x, rel, y) for (x, rel, y) in rf] print( "\t[{fb_tierName}|{eyes_tierName}] {tier_len} feedbacks-eyes links (for {fb_len} feedbacks and {eyes_len} eyes)" .format(tier_len=len(rConv), fb_len=len(res.fb_tier), eyes_len=len(res.eyes_tier), **locals())) if len(rConv) == 0: return # any feedback linked to eyes direction # group relations by name if False: rels = {} for (x, rel, y) in rf: srel = str(rel) if srel not in rels: rels[srel] = [] # init rels[srel].append((x, rel, y)) for srel, lst in rels.items(): print("\t '{srel}' {lst_len} feedbacks-eyes links".format( lst_len=len(lst), **locals())) # group relation by 1st interval xrels = {} for (x, rel, y) in rf: if x not in xrels: xrels[x] = [] # init xrels[x].append((x, rel, y)) # organize the relations associated to x xgroups = {} groups = {} # groups xtransitions = {} # eye's direction transitions for (x, lst) in xrels.items(): # sort lst lst.sort(cmp=lambda a, b: cmp(a[2], b[2]) ) # sort based on y order (a/b = (x, rel, y)) xtransitions[x] = [] xgroups[x] = {} # 3 cases: # (a) any eyes direction change <=> X during Y ('stable' group) # (b) only one change 'during' X => one Y 'before' (overlappedby; finishes; ~start_end<0) and one Y 'after' (overlaps; ~end_start>0; starts) # (b) only various changes 'during' X => one Y 'before' (overlappedby; ~start_end<0), one Y 'after' (overlaps; ~end_start>0), and others Ys 'inside' (contains, startedby, finishedby) lastY = None for (xi, rel, y) in lst: # - transition if lastY is not None: xtransitions[x].append(' -> '.join([ str(lastY.GetLabel().GetValue()), str(y.GetLabel().GetValue()) ])) lastY = y # - group ygr = sbia(rel) if ygr not in xgroups[x]: xgroups[x][ygr] = [] xgroups[x][ygr].append((x, rel, y)) # In which case we are ? if 'stable' in xgroups[x]: # case (a) xgr = 'any' elif 'inside' in xgroups[x]: # case (c) xgr = 'various' else: # case (b) xgr = 'one' if xgr not in groups: groups[xgr] = [] groups[xgr].append(x) allTransitionsCnt = Counter() allGrRels = {'stable': [], 'before': [], 'inside': [], 'after': []} for y1 in ['before', 'inside', 'after']: allGrRels["(%s + stable)" % y1] = [] for nb in ['any', 'one', 'various']: if nb not in groups: continue xAnnots = groups[nb] lst_len = len(xAnnots) percent = float(lst_len) / len(res.fb_tier) # groups ygroups = [] if nb == 'any': # 0 changement, only 1 interval <=> X during Y print( "\t No change : {lst_len} feedbacks-eyes links ({percent:.1%})" .format(**locals())) ygroups = ['stable'] elif nb == 'one': # 1 changement, 2 intervals => before/after (i.e. X overlapped by Y[0] and X overlaps Y[1]) print( "\t One change : {lst_len} feedbacks-eyes links ({percent:.1%})" .format(**locals())) ygroups = ['before', 'after'] else: # nb changements, nb+1 intervals => before/contains+/after (i.e. X overlapped by Y[0] and X contains Y[1:-1] and X overlaps Y[-1]) print( "\t Various changes : {lst_len} feedbacks-eyes links ({percent:.1%})" .format(**locals())) ygroups = ['before', 'inside', 'after'] grRels = {} yLabels = {} yLabelsCnt = {} for ygr in ygroups: grRels[ygr] = [] for x in xAnnots: if ygr in xgroups[x]: grRels[ygr] += xgroups[x][ygr] #TODO else: print warning allGrRels[ygr] += grRels[ygr] if ygr == 'stable': for y1 in ['before', 'inside', 'after']: allGrRels["(%s + stable)" % y1] += grRels[ygr] else: allGrRels["(%s + stable)" % ygr] += grRels[ygr] yLabels[ygr] = [ str(y.GetLabel().GetValue()) for (x, rel, y) in grRels[ygr] ] yLabelsCnt[ygr] = Counter(yLabels[ygr]) print("\t {ygr} eyes-direction: {cnt}".format( cnt=counterWithPercent(yLabelsCnt[ygr]), **locals())) # transitions if nb != 'any': transitionsCnt = Counter() for x in xAnnots: if x in xtransitions: transitionsCnt.update(xtransitions[x]) #for t in xtransitions[x]: transitionsCnt[t] += 1 print("\t transitions: {cnt}".format(cnt=counterWithPercent( transitionsCnt, sep="\n\t "), **locals())) allTransitionsCnt.update(transitionsCnt) # ALL print("\t All : {x_len} feedbacks-eyes".format(x_len=len(xrels.keys()), **locals())) for ygr in [ 'stable', 'before', '(before + stable)', 'inside', '(inside + stable)', 'after', '(after + stable)' ]: yLabels[ygr] = [ str(y.GetLabel().GetValue()) for (x, rel, y) in allGrRels[ygr] ] yLabelsCnt[ygr] = Counter(yLabels[ygr]) print("\t {ygr} eyes-direction: {cnt}".format(cnt=counterWithPercent( yLabelsCnt[ygr]), **locals())) print("\t transitions: {cnt}".format(cnt=counterWithPercent( transitionsCnt, sep="\n\t "), **locals()))
def process_feedback_after(trs, pFb_tierName='P-Feedback', mVoc_tierName='Vocabulaire', after_Max=1., perLabel=False, after_tierAppend=False, after_tierName=None): """ Process relation between (patient) feedbacks during/after another tier (b.e. Vocabulaire, ...) """ from annotationdata import Filter, RelationFilter #, Rel from annotationdata.filter.delay_relations import IntervalsDelay, AndPredicates #TODO? parameters if isinstance(after_tierName, basestring): after_tierName = after_tierName.format(**locals()) else: after_tierName = "{pFb_tierName} after {mVoc_tierName}".format( **locals()) # default #after_Max = 1. # max time between feedback and previous vocabulary res = namedtuple( 'res', "mVoc_tier, mVoc_durations, mVoc_duration_stats, mVoc_radius" + ", pFb_tier, pFb_durations, pFb_duration_stats, pFb_radius" ) # list of fields # Search Vocabulaire and P-Feedback tiers not_found = 0 # (a) 'Vocabulaire' res.mVoc_tier = sppas_tools.tierFind(trs, mVoc_tierName) if res.mVoc_tier is None: print("\t[{mVoc_tierName}] No tier found ;-(".format(**locals())) not_found += 1 # (b) 'P-Feedback' res.pFb_tier = sppas_tools.tierFind(trs, pFb_tierName) if res.pFb_tier is None: print("[{pFb_tierName}] No tier found ;-(".format(**locals())) not_found += 1 if not_found: print("[%s] %d unfound tier(s) => skip this file") return # Combine the 2 tiers # - create the predicates # [1] pDuringAfter <=> P-feedback(X) start during or a few time(1s) after Vocabulaire (Y) #pDuringAfter = IntervalsDelay.create('start_start', (None, 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0 # , 'start_end', (-after_Max, None) # AND X=Pfb starts at least 1s after Y=MVoc ends <=> -inf < Xs-Ye <= 1s <=> -inf > Ye-Xs => 1s # ) # (a) pStartStart : X (P-feedback) starts after Y (Vocabulaire) starts <=> delay(Xstart - Ystart) >= 0 pStartStart = IntervalsDelay.create( 'after', 'start_start_min', 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0 # (b) pStartEnd : X (P-feedback) starts at the latest 1s after Y (Vocabulaire) ends <=> -infinity < delay(Xstart - Yends) <= 1s # nota: -infinity as X can start during Y, the pStartStart allow to eliminate the case of X start before Y pStartEnd = IntervalsDelay.create('after', 'start_end', (None, after_Max)) # => pDuringAfter = AndPredicates(pStartStart, pStartEnd) fMVoc = Filter(res.mVoc_tier) fPFb = Filter(res.pFb_tier) rf = RelationFilter(pDuringAfter, fPFb, fMVoc) newtier = rf.Filter(annotformat="{x} [after({y})]") res.pFb_mVoc_tier = newtier if after_tierAppend: newtier.SetName(after_tierName) trs.Append(newtier) # ? print( "\t[{after_tierName}] {tier_len} (of {pFb_len}) {pFb_tierName} during/after({after_Max}s) a {mVoc_tierName}" .format(tier_len=len(res.pFb_mVoc_tier), pFb_len=len(res.pFb_tier), **locals())) #-- # (1) Annotation, duration #-- res.pFb_mVoc_durations = durations(res.pFb_mVoc_tier) #-- res.pFb_mVoc_duration_stats = stats(res.pFb_mVoc_durations) #-- print("\t durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]".format(s=res.pFb_mVoc_duration_stats, **locals())) # Analyse rf results if True: groups = {'after': [], 'during': [], 'all': []} # group result between after/during for x, rel, y in rf: groups['all'].append((x, rel, y)) if rel[1].delay > 0: # rel[1] correspond to pStartEnd => give use the Xstart-Yend delay groups['after'].append((x, rel, y)) # feedback start strictly after the vocabulaire else: groups['during'].append((x, rel, y)) # feedback start during the vocabulaire # 'all' annotations for gkey in ['all', 'during', 'after']: group = groups[gkey] if not len(group): continue ssDelays = [rel[0].delay for (x, rel, y) in group] # rel[0] is pStartStart ssStats = stats(ssDelays) seDelays = [rel[1].delay for (x, rel, y) in group] # rel[1] is pStartEnd seStats = stats(seDelays) xDurStats = stats(durations([x for (x, rel, y) in group ])) # p-Feedback durations yDurStats = stats(durations([y for (x, rel, y) in group ])) # Vocabulary durations linked_to = "'linked' to" if gkey == 'all' else gkey print( "\t {gkey}: {gsize} {pFb_tierName} {linked_to} a {mVoc_tierName}" .format(gsize=len(group), **locals())) print( "\t Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=ssStats)) if gkey != 'during': print( "\t End-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=seStats)) print( "\t {mVoc_tierName} durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]" .format(s=yDurStats, **locals())) print( "\t {pFb_tierName} durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]" .format(s=xDurStats, **locals())) if perLabel: statsPerLabel([x for (x, rel, y) in group], "\t\t", normLabelWithSep)
def process_pFb_mVoc(trs, pFb_tierName='P-Feedback', mVoc_tierName='Vocabulaire', perLabel=False): """ Process relation between patient feedbacks and medical vocabulary """ from annotationdata import Filter, RelationFilter #, Rel from annotationdata.filter.delay_relations import IntervalsDelay, AndPredicates #TODO? parameters pFb_mVoc_tierName = 'P-fb-after-M-Voc' pFbStart_mVocEnd_Max = 1. # max time between feedback and previous vocabulary res = namedtuple( 'res', "mVoc_tier, mVoc_durations, mVoc_duration_stats, mVoc_radius" + ", pFb_tier, pFb_durations, pFb_duration_stats, pFb_radius" ) # list of fields # Search Vocabulaire and P-Feedback tiers not_found = 0 # (a) 'Vocabulaire' res.mVoc_tier = sppas_tools.tierFind(trs, mVoc_tierName) if res.mVoc_tier is None: print( "\t[{mVoc_tierName}] No medecin's medical vocabulary tier found ;-(" .format(**locals())) not_found += 1 # (b) 'P-Feedback' res.pFb_tier = sppas_tools.tierFind(trs, pFb_tierName) if res.pFb_tier is None: print("[{pFb_tierName}] No patient's feedbacks tier found ;-(".format( **locals())) not_found += 1 if not_found: print("[%s] %d unfound tier(s) => skip this file") return # Combine the 2 tiers # - create the predicates # [1] pDuringAfter <=> P-feedback(X) start during or a few time(1s) after Vocabulaire (Y) #pDuringAfter = IntervalsDelay.create('start_start', (None, 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0 # , 'start_end', (-pFbStart_mVocEnd_Max, None) # AND X=Pfb starts at least 1s after Y=MVoc ends <=> -inf < Xs-Ye <= 1s <=> -inf > Ye-Xs => 1s # ) # (a) pStartStart : X (P-feedback) starts after Y (Vocabulaire) starts <=> delay(Xstart - Ystart) >= 0 pStartStart = IntervalsDelay.create( 'after', 'start_start_min', 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0 # (b) pStartEnd : X (P-feedback) starts at the latest 1s after Y (Vocabulaire) ends <=> -infinity < delay(Xstart - Yends) <= 1s # nota: -infinity as X can start during Y, the pStartStart allow to eliminate the case of X start before Y pStartEnd = IntervalsDelay.create('after', 'start_end', (None, pFbStart_mVocEnd_Max)) # => pDuringAfter = AndPredicates(pStartStart, pStartEnd) fMVoc = Filter(res.mVoc_tier) fPFb = Filter(res.pFb_tier) rf = RelationFilter(pDuringAfter, fPFb, fMVoc) newtier = rf.Filter(annotformat="{x} [after({y})]") newtier.SetName(pFb_mVoc_tierName) res.pFb_mVoc_tier = newtier print( "\t[{pFb_mVoc_tierName}] {tier_len} (of {pFb_len}) patient feedbacks during/after a medic vocabulary" .format(tier_len=len(res.pFb_mVoc_tier), pFb_len=len(res.pFb_tier), **locals())) trs.Append(newtier) # ? #-- # (1) Annotation, duration #-- res.pFb_mVoc_durations = durations(res.pFb_mVoc_tier) #-- res.pFb_mVoc_duration_stats = stats(res.pFb_mVoc_durations) #-- print("\t durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]".format(s=res.pFb_mVoc_duration_stats, **locals())) # Analyse rf results if True: groups = {'after': [], 'during': [], 'all': []} # group result between after/during for x, rel, y in rf: groups['all'].append((x, rel, y)) if rel[1].delay > 0: # rel[1] correspond to pStartEnd => give use the Xstart-Yend delay groups['after'].append((x, rel, y)) # feedback start strictly after the vocabulaire else: groups['during'].append((x, rel, y)) # feedback start during the vocabulaire # 'all' annotations if groups['all']: group = groups['all'] ssDelays = [rel[0].delay for (x, rel, y) in group] # rel[0] is pStartStart ssStats = stats(ssDelays) xDurStats = stats(durations([x for (x, rel, y) in group ])) # p-Feedback durations yDurStats = stats(durations([y for (x, rel, y) in group ])) # Vocabulary durations print( "\t all: {} feedbacks 'linked' to vocabulaire".format( len(group)) + "\n\t Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=ssStats) + "\n\t Vocabulary durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]" .format(s=yDurStats) + "\n\t Feedback durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]" .format(s=xDurStats)) if perLabel: statsPerLabel([x for (x, rel, y) in group], "\t\t", normLabelWithSep) if groups['during']: group = groups['during'] ssDelays = [rel[0].delay for (x, rel, y) in group] # rel[0] is pStartStart ssStats = stats(ssDelays) ssPercents = [ float((rel[0].delay) / y.GetLocation().GetDuration()) for (x, rel, y) in group ] # at each percent of the Vocabulaire starts the Feedback ssPStats = stats(ssPercents) print( "\t during: {} feedbacks starts during the vocabulaire". format(len(group)) + "\n\t Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=ssStats) + "\n\t Percent of Vocabulary when P-Feedback starts: mean={s.Mean:.0%}, std.dev.={s.StdDev:.3f} [{s.Min:.0%},{s.Max:.0%}]" .format(s=ssPStats)) if perLabel: statsPerLabel([x for (x, rel, y) in group], "\t\t", normLabelWithSep) if groups['after']: group = groups['after'] ssDelays = [rel[0].delay for (x, rel, y) in group] # rel[0] is pStartStart ssStats = stats(ssDelays) seDelays = [rel[1].delay for (x, rel, y) in group] # rel[1] is pStartEnd seStats = stats(seDelays) print( "\t after: {} feedbacks starts (at most {:.3f}s) after the vocabulaire" .format(len(group), pFbStart_mVocEnd_Max) + "\n\t Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=ssStats) + "\n\t Vocabulaire end - P-Feedback start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]" .format(s=seStats)) if perLabel: statsPerLabel([x for (x, rel, y) in group], "\t\t", normLabelWithSep)
def process_files(files, opts): """ Process each file @param files the file(s) to process """ annotationdata.aio = sppas_tools.getAnnotationdataAio() # import annotationdata.aio or annotationdata.io from annotationdata import Transcription, Tier #, TimePoint, TimeInterval, Label, Annotation for f in files: print("[%s] Loading annotation file..." % f) # Read an annotated file trs = annotationdata.aio.read(f) print("[%s] Number of tiers:%d" % (f, trs.GetSize())) # Prepare the output Transcription destTrs = trs destAppendProcessed = False # default/'all' => work directly on trs if (opts.keep_tiers.startswith('process')): destTrs = Transcription(trs.GetName(), trs.GetMinTime(), trs.GetMaxTime()) # empty copy of trs destAppendProcessed = True # append processed tiers elif ((opts.keep_tiers == 'any') or opts.keep_tiers.startwith('bound')): destTrs = Transcription(trs.GetName(), trs.GetMinTime(), trs.GetMaxTime()) # empty copy of trs # Look for the tier to process equalsRefBoundTier = None for tier_name in opts.tiers_names: tier = sppas_tools.tierFind(trs, tier_name) if tier is None: print("[%s] Any tier with name similar to '%s' ;-(" % (f, opts.tier_name)) print("[%s] Tiers are : %s" % (f, ''.join([ "{}[{}] '{}'".format("\n " if (i % 4) == 0 else ", ", i, t.GetName()) for i, t in enumerate(trs) ]))) break print("[%s] Searched tier '%s' has %d annotations" % (f, tier.GetName(), tier.GetSize())) if (destAppendProcessed): destTrs.Append(tier) # Create the Boundaries tier boundName = opts.out_tier_format.format(tier_name) boundTier = Tier(boundName) bounds = boundaries(tier, opts.radius, opts.bound_type, trs.GetMinTime(), trs.GetMaxTime(), opts.begin_format, opts.end_format) for bound in bounds: boundTier.Append(bound) print("[%s] Boundaries tier '%s' has %d annotations" % (f, boundTier.GetName(), boundTier.GetSize())) destTrs.Append(boundTier) # Create the 'equals' tier if (opts.equals_tier): if (tier_name == opts.tiers_names[0]): # first => reference equalsRefBoundTier = boundTier else: equalsTier = filterEquals(boundTier, equalsRefBoundTier, opts.bound_type, opts.equals_label_format) equalsName = opts.equals_tier_format.format( tier_name, opts.tiers_names[0]) equalsTier.SetName(equalsName) print("[%s] Equals tier '%s' has %d annotations" % (f, equalsTier.GetName(), equalsTier.GetSize())) destTrs.Append(equalsTier) # Saving file (root, ext) = os.path.splitext(f) of = opts.out_file_format.format(root, "+".join( opts.tiers_names)) + ext print("[%s] Saving annotations into %s" % (f, of)) annotationdata.aio.write(of, destTrs)