interjection_skip = False cnt_brackets_opening = 0 cnt_brackets_closing = 0 continue else: line = line.replace('<interjection_begin>', '').replace('<interjection_end>', '') if line: interjection_text.append(line) interjection_length += 1 continue if current_speaker is not None and not endend_with_interjection: if interjection_complete: interjection_complete = None text = [] line = helper.cleans_line(line) text.append(line) continue else: current_role = current_role.strip() line = helper.cleans_line(line) text.append(line) continue if s is not None: if not endend_with_interjection: if ":* " in line: line = line.split(':* ', 1)[-1] elif ":" in line: line = line.split(':', 1)[-1]
cnt_brackets_opening = 0 cnt_brackets_closing = 0 else: line = line.replace('<interjection_begin>', '').replace('<interjection_end>', '') if line and not line.isspace(): interjection_text.append(line) interjection_length += 1 continue if current_speaker is not None and not endend_with_interjection: if interjection_complete: interjection_complete = None text = [] if line and not line.isspace() and not INTERJECTION_END.search( line): line = helper.cleans_line(line) text.append(line) continue else: current_role = current_role.strip() line = helper.cleans_line(line) if line and not line.isspace(): text.append(line) continue if s is not None: if ":* " in line: line = line.split(':* ', 1)[-1] elif ":" in line: line = line.split(':', 1)[-1]
} table.insert(speech_dict) #import pdb; pdb.set_trace() sub += 1 interjection = True interjection_text = [] if interjection: cnt_brackets_opening += line.count('(') cnt_brackets_closing += line.count(')') #import pdb; pdb.set_trace() if INTERJECTION_END.search( line ) and cnt_brackets_opening <= cnt_brackets_closing or CHAIR_MARK.match( line): if current_speaker is not None: interjection_text.append(helper.cleans_line(line)) interjection_text = [ i + ' ' if not i.endswith('-') else i for i in interjection_text ] interjection_text = ''.join(interjection_text) # removes whitespace duplicates interjection_text = re.sub(' +', ' ', interjection_text) # removes whitespaces at the beginning and end interjection_text = interjection_text.strip() interjection_text = re.sub('-(?=[a-z])', '', interjection_text) if debug: speech = pd.DataFrame({ 'speaker': [current_speaker],