Example #1
0
def prepareA0(segments, questions):
    out = dict()
    for seg in segments:
        confirm = prefixMap(seg, 'CONFIRM')
        if len(confirm) > 0:
            confirm = confirm[-1]
        else:
            continue
        out.setdefault(confirm['sid'], []).append(
            (str(seg['usid']), confirm['text2']))
        firstViable = firstViableTrg(seg)
        if firstViable:
            out.setdefault(confirm['sid'], []).append(
                (f'v{seg["usid"]}', firstViable['text2']))

    markdown = ''
    csv = 'USID, Score\n'
    for sid, segments in out.items():
        question = questions[sid].replace('*', '__')
        markdown += f'\n\n\n## {sid}\n'
        helpText = ''
        if sid.startswith('t'):
            helpText += 'Popište daný problém technické podpoře.'
        else:
            helpText += 'Položte dotaz, na který odpovídá vyznačená část v textu.'
        markdown += f'_{helpText}_\n\n'
        markdown += f'{question}\n\n'
        for segment in segments:
            markdown += f'- `{segment[0].rjust(7)}` {segment[1]}\n'
            csv += f'"{segment[0].rjust(7)}",0\n'
    markdown = markdown.replace('<br>', ' ')
    markdown = markdown.replace('</br>', ' ')
    return markdown, csv
Example #2
0
def domainKeyMap(logs, key, func=lambda x: x):
    out = []
    for seg in logs:
        firstNext = prefixMap(seg, 'NEXT', lambda x: x['sid'])[0]
        if re.match(key, firstNext):
            out.append(seg)
    return out
Example #3
0
def createBlog(segments):
    newSegments = []
    for seg in segments:
        newSeg = dict()
        newSeg['usid'] = seg[0]['usid']
        newSeg['items'] = []
        for line in seg:
            newLine = dict(line)
            del newLine['usid']
            newSeg['items'].append(newLine)
        newSeg['sid'] = getSID(newSeg)
        newSeg['domain'] = newSeg['sid'][0]
        newSeg['rating'] = dict()

        # Add first viable object by source
        firstViableObj = firstViableSrc(newSeg)
        newSeg['first_viable_src'] = firstViableObj

        # Add first viable object by target
        firstViableObj = firstViableTrg(newSeg)
        newSeg['first_viable_trg'] = firstViableObj

        # Add final object
        lastConfirm = prefixMap(newSeg, 'CONFIRM')
        if len(lastConfirm) == 0:
            newSeg['final'] = None
        else:
            newSeg['final'] = lastConfirm[-1]

        newSeg['backtracking'] = not isWithoutBacktracking(newSeg)
        newSegments.append(newSeg)
    return newSegments
Example #4
0
def withoutBacktracking(segments):
    out = []
    for segment in segments:
        if isWithoutBacktracking(segment):
            translates = prefixMap(segment, 'TRANSLATE1', lambda x: x['text1'])
            out.append([translates[-1], len(segment)])
    print('Number of all segments:', len(segments))
    print('Number of segments without backtracking:', len(out))
    return out
Example #5
0
def firstViableTrgEditsDistribution(segment):
    viable = firstViableTrg(segment)
    if viable is None:
        return None
    else:
        viable = viable['text2']
    lastConfirmTrg = prefixMap(segment, 'CONFIRM', lambda x: x['text2'])[-1]
    sm = SequenceMatcher(None, tokenize(viable), tokenize(lastConfirmTrg))
    opcodes = sm.get_opcodes()
    opcodes_equals = list(filter(lambda x: x[0] == 'equal', opcodes))
    opcodes_replace = list(filter(lambda x: x[0] == 'replace', opcodes))
    opcodes_insert = list(filter(lambda x: x[0] == 'insert', opcodes))
    opcodes_delete = list(filter(lambda x: x[0] == 'delete', opcodes))
    sum_equals = sum(map(lambda x: x[2] - x[1], opcodes_equals))
    sum_replace = sum(map(lambda x: x[2] - x[1], opcodes_replace))
    sum_insert = sum(map(lambda x: x[2] - x[1], opcodes_insert))
    sum_delete = sum(map(lambda x: x[2] - x[1], opcodes_delete))
    sum_all = float(sum_equals + sum_replace + sum_insert + sum_delete)
    return (sm.ratio(), sum_equals/sum_all, sum_replace/sum_all, sum_insert/sum_all, sum_delete/sum_all)
Example #6
0
    print(f'- - - insert: {avgDistribution[3]/len(sEdits)*100:.2f}%')
    print(f'- - - delete: {avgDistribution[4]/len(sEdits)*100:.2f}%')

# Only for technical issues check how much it overlaps  
if args.questions_flat != None:
    print('\nSpecific for tech issues:')
    with open(args.questions_flat, 'r') as f:
        questions = json.loads(f.read())
    questions = {k:v.replace('*', '') for k,v in questions.items()}
    techSegments = domainKeyMap(segments, 't[0-9]{2}')
    lineRatios = []
    confirmRatios = []
    for seg in techSegments:
        question = questions[seg['sid']]
        if not isSkipped(seg):
            translates = prefixMap(seg, 'TRANSLATE1', lambda x: x['text1'])
            # these two are the same on the collected data
            if True:
                sm = SequenceMatcher(None, tokenize(question), tokenize(translates[0]))
                maxR = sm.ratio()
            else: 
                maxR = -1
                for line in translates:
                    sm = SequenceMatcher(None, tokenize(question), tokenize(line))
                    maxR = max(sm.ratio(), maxR)
                maxR = max(sm.ratio(), maxR)
            lineRatios.append(maxR)

            lastConfirmSrc = prefixMap(seg, 'CONFIRM', lambda x: x['text1'])[-1]
            sm = SequenceMatcher(None, tokenize(question), tokenize(lastConfirmSrc))
            confirmRatios.append(sm.ratio())
Example #7
0
    return sm.ratio() > 0.8


out = []
for segment in segments:
    if segment['final']:
        text1 = segment['final']['text1']
        # out.append([text1])
    if segment['first_viable_src']:
        text1 = segment['first_viable_src']['text1']
        if text1[-1] in ['.', '?']:
            out.append([text1])

    if False and ('rating' in segment) and ('final' in segment['rating']):
        text1 = segment['final']['text1']
        text2 = segment['final']['text2']
        bts = prefixMap(segment, 'TRANSLATE2')
        for backObj in bts:
            if backObj['text2'] == text2:
                text3 = backObj['text3']
                if isSimilar(text1, text3):
                    out.append(
                        (segment['rating']['final'], text1, text2, text3))
                    break

# sort by score
out = sorted(out, key=lambda x: x[0], reverse=True)

with open(args.csvout, 'w') as f:
    for outObj in out:
        f.write('\t'.join([str(x) for x in outObj]) + '\n')
Example #8
0
def getSID(segment):
    firstNext = prefixMap(segment, 'NEXT', lambda x: x['sid'])
    if len(firstNext) == 0:
        raise Exception('Domain could not be found')
    else:
        return firstNext[0]