예제 #1
0
파일: common.py 프로젝트: proycon/colibrita
def makesentencepair(id, sourcepattern, targetpattern, sourceoffset, targetoffset, sourcesentence, targetsentence):
    targetsentence = tuple(targetsentence)
    targetpattern_n = targetpattern.count(" ") + 1
    sourcepattern_n = sourcepattern.count(" ") + 1

    newtargetsentence = tuple(targetsentence[:targetoffset]) + (Fragment(tuple(targetpattern.split())),) + tuple(targetsentence[targetoffset+targetpattern_n:])
    input = tuple(targetsentence[:targetoffset]) + (Fragment(tuple(sourcepattern.split())),) + tuple(targetsentence[targetoffset+targetpattern_n:])

    if tuple(SentencePair._str(newtargetsentence)) != targetsentence:
        print("Target sentence mismatch:\n", tuple(SentencePair._str(newtargetsentence)), "\n****VS****\n", targetsentence, file=sys.stderr)
        print("Sentence: ", id,file=sys.stderr)
        print("Source pattern: " , sourcepattern,file=sys.stderr)
        print("Target pattern: ", targetpattern,file=sys.stderr)
        print("Target offset: ", targetoffset,file=sys.stderr)
        print("Source offset: ", sourceoffset,file=sys.stderr)
        print("Target n: ", targetpattern_n,file=sys.stderr)
        print("Input: ", input,file=sys.stderr)
        return False, None

    return True, SentencePair(id, input, None, newtargetsentence)
예제 #2
0
def newsentencepair(sentencepairs):
    global sources, categories
    cursor = len(sentencepairs)
    print("------------------ #" + str(cursor+1) + ": New sentence pair ----------------")
    print("Enter untokenised text (L2), mark fragment in *asterisks*")
    ref = input("Reference sentence: ")
    ref = makesentence(ref.strip())
    if not ref:
        print("No sentence provided",file=sys.stderr)
        return False
    fragment = input("L1 fragment: ")
    fragment = Fragment(tuple(fragment.strip().split(" ")))
    f = None
    for x in ref:
        if isinstance(x,Fragment):
            f = x
    assert f
    inputsentence = SentencePair.replacefragment(f, fragment, ref)
    choices = listsources()
    src = input("Source: ")
    if src.isdigit():
        if int(src) in choices:
            src = choices[int(src)]
            print("Selected " + src,file=sys.stderr)
        else:
            print("Invalid source, leaving empty",file=sys.stderr)
            src = None
    if src:
        sources[src] += 1
    choices = listcats()
    cat = input("Category: ")
    if cat.isdigit():
        if int(cat) in choices:
            cat = choices[int(cat)]
            print("Selected " + cat,file=sys.stderr)
        else:
            print("Invalid category, leaving empty",file=sys.stderr)
            cat = None
    sentencepairs.append( SentencePair(cursor, inputsentence,None,ref, src, cat) )
    return cursor