Ejemplo n.º 1
0
                interjection_skip = False
                cnt_brackets_opening = 0
                cnt_brackets_closing = 0
                continue
            else:
                line = line.replace('<interjection_begin>',
                                    '').replace('<interjection_end>', '')
                if line:
                    interjection_text.append(line)
                    interjection_length += 1
                continue
        if current_speaker is not None and not endend_with_interjection:
            if interjection_complete:
                interjection_complete = None
                text = []
                line = helper.cleans_line(line)
                text.append(line)
                continue
            else:
                current_role = current_role.strip()

                line = helper.cleans_line(line)
                text.append(line)
                continue

        if s is not None:
            if not endend_with_interjection:
                if ":* " in line:
                    line = line.split(':* ', 1)[-1]
                elif ":" in line:
                    line = line.split(':', 1)[-1]
                cnt_brackets_opening = 0
                cnt_brackets_closing = 0
            else:
                line = line.replace('<interjection_begin>',
                                    '').replace('<interjection_end>', '')
                if line and not line.isspace():
                    interjection_text.append(line)
                    interjection_length += 1
                continue
        if current_speaker is not None and not endend_with_interjection:
            if interjection_complete:
                interjection_complete = None
                text = []
                if line and not line.isspace() and not INTERJECTION_END.search(
                        line):
                    line = helper.cleans_line(line)
                    text.append(line)
                continue
            else:
                current_role = current_role.strip()

                line = helper.cleans_line(line)
                if line and not line.isspace():
                    text.append(line)
                continue

        if s is not None:
            if ":* " in line:
                line = line.split(':* ', 1)[-1]
            elif ":" in line:
                line = line.split(':', 1)[-1]
                            }
                            table.insert(speech_dict)
                #import pdb; pdb.set_trace()
                sub += 1
                interjection = True
                interjection_text = []
        if interjection:
            cnt_brackets_opening += line.count('(')
            cnt_brackets_closing += line.count(')')
            #import pdb; pdb.set_trace()
            if INTERJECTION_END.search(
                    line
            ) and cnt_brackets_opening <= cnt_brackets_closing or CHAIR_MARK.match(
                    line):
                if current_speaker is not None:
                    interjection_text.append(helper.cleans_line(line))
                    interjection_text = [
                        i + ' ' if not i.endswith('-') else i
                        for i in interjection_text
                    ]
                    interjection_text = ''.join(interjection_text)
                    # removes whitespace duplicates
                    interjection_text = re.sub(' +', ' ', interjection_text)
                    # removes whitespaces at the beginning and end
                    interjection_text = interjection_text.strip()
                    interjection_text = re.sub('-(?=[a-z])', '',
                                               interjection_text)
                    if debug:

                        speech = pd.DataFrame({
                            'speaker': [current_speaker],