def makesentencepair(id, sourcepattern, targetpattern, sourceoffset, targetoffset, sourcesentence, targetsentence): targetsentence = tuple(targetsentence) targetpattern_n = targetpattern.count(" ") + 1 sourcepattern_n = sourcepattern.count(" ") + 1 newtargetsentence = tuple(targetsentence[:targetoffset]) + (Fragment(tuple(targetpattern.split())),) + tuple(targetsentence[targetoffset+targetpattern_n:]) input = tuple(targetsentence[:targetoffset]) + (Fragment(tuple(sourcepattern.split())),) + tuple(targetsentence[targetoffset+targetpattern_n:]) if tuple(SentencePair._str(newtargetsentence)) != targetsentence: print("Target sentence mismatch:\n", tuple(SentencePair._str(newtargetsentence)), "\n****VS****\n", targetsentence, file=sys.stderr) print("Sentence: ", id,file=sys.stderr) print("Source pattern: " , sourcepattern,file=sys.stderr) print("Target pattern: ", targetpattern,file=sys.stderr) print("Target offset: ", targetoffset,file=sys.stderr) print("Source offset: ", sourceoffset,file=sys.stderr) print("Target n: ", targetpattern_n,file=sys.stderr) print("Input: ", input,file=sys.stderr) return False, None return True, SentencePair(id, input, None, newtargetsentence)
def newsentencepair(sentencepairs): global sources, categories cursor = len(sentencepairs) print("------------------ #" + str(cursor+1) + ": New sentence pair ----------------") print("Enter untokenised text (L2), mark fragment in *asterisks*") ref = input("Reference sentence: ") ref = makesentence(ref.strip()) if not ref: print("No sentence provided",file=sys.stderr) return False fragment = input("L1 fragment: ") fragment = Fragment(tuple(fragment.strip().split(" "))) f = None for x in ref: if isinstance(x,Fragment): f = x assert f inputsentence = SentencePair.replacefragment(f, fragment, ref) choices = listsources() src = input("Source: ") if src.isdigit(): if int(src) in choices: src = choices[int(src)] print("Selected " + src,file=sys.stderr) else: print("Invalid source, leaving empty",file=sys.stderr) src = None if src: sources[src] += 1 choices = listcats() cat = input("Category: ") if cat.isdigit(): if int(cat) in choices: cat = choices[int(cat)] print("Selected " + cat,file=sys.stderr) else: print("Invalid category, leaving empty",file=sys.stderr) cat = None sentencepairs.append( SentencePair(cursor, inputsentence,None,ref, src, cat) ) return cursor