def adapt_translation(source, matches = []): """ Translate new messages by adapting an old translation """ # Start with a null result result = None, None # Start by considering the operation not safe # True: translated # False: fuzzy # None: untranslated safe = None # Iterate over the matches for match_source, match_translation in matches: # Consider the operation is safe for the moment safe = True # The accepted operations operations = {} # Split the texts by using the Splitter source_list = patterns.split(source) # print 'Source list', source_list # Split the matching source text by using the Splitter match_source_list = patterns.split(match_source) # print # print 'Match source list', match_source_list # The sequence matcher seq = SequenceMatcher(None, match_source_list, source_list) # Build the result for tag, i1, i2, j1, j2 in seq.get_opcodes(): from_seq = match_source_list[i1:i2] to_seq = source_list[j1:j2] # print 'OPCODE:', tag, i1, i2, j1, j2, from_seq, to_seq if tag == 'replace': # Accept replacement of several genera if (check_sequence(from_seq, genera = ['placeholder', 'branding', 'syntax']) and \ check_sequence(to_seq, genera = ['placeholder', 'branding', 'syntax'])) or \ (check_sequence(from_seq, genera = ['number']) and \ check_sequence(to_seq, genera = ['number'])) or \ (check_sequence(from_seq, genera = ['fixed']) and \ check_sequence(to_seq, genera = ['fixed'])) or \ (check_sequence(from_seq, genera = ['punctuation']) and \ check_sequence(to_seq, genera = ['punctuation'])): # Check if the operation is safe if not check_sequence_operations(operations, from_seq, to_seq): safe = None break else: safe = None break elif tag == 'delete': # Accept deletion of several genera if check_sequence(from_seq, genera = ['placeholder', 'branding', 'syntax', 'number', 'punctuation', 'control']): # Check if the operation is safe if not check_sequence_operations(operations, from_seq, []): safe = None break else: safe = None break elif tag == 'equal': # Check the letter case of text/words in equal sequences for from_word, to_word in zip (from_seq, to_seq): if from_word.genus == 'text': if from_word.key(ignorecase = False) != to_word.key(ignorecase = False): safe = False break elif tag == 'insert': # Accept insertion of several genera only if check_sequence(to_seq, genera = ['placeholder', 'branding', 'syntax', 'number', 'punctuation', 'control']): if i1 == 0: # Insert at begining from_item = patterns.SplitNode(text = '', genus = '~', species = '^') elif i1 == len(match_source_list): # Insert at end from_item = patterns.SplitNode(text = '', genus = '~', species = '$') else: # No other insertion allowed safe = None break # Check if the operation is safe if not check_sequence_operations(operations, [from_item,], to_seq): safe = None break else: # Unaccepted genera safe = None break else: # Unkonwn operation safe = None break # print 'SAFE:', safe # Check if we are safe if safe is None: # We are not, try another match continue # We can now operate on translation # print 'OPERATIONS:', operations # Split the matching translation text by using the Splitter match_translation_list = patterns.split(match_translation) # Start with a result translation list based on the matching list translation_list = match_translation_list[:] # Get the operations list ops = operations.items() # Start with an empty list of failed operations failed = [] # Do not extrapolate from the start extrapolate = False # Continue to process each operation as long as there are any # operations left while ops: # print 'OPS', ops # Iterate over operations for op, count in ops: # print 'OP:', op, count # Get the from and to from_item, to_item = op # Convert back the tuples into lists from_seq = list(from_item) to_seq = list(to_item) # Check if 'from' is emty, which means insertion and this is # allowd only at the end (append) if len(from_seq) == 1 and from_seq[0].genus == '~': if from_seq[0].species == '$': translation_list.extend(to_seq) elif from_seq[0].species == '^': to_seq.extend(translation_list) translation_list = to_seq[:] else: # Get the number of the occurences of this operation in # translation we are building from_item_count = sublist_count(from_seq, translation_list, ignore = ['space']) # Check if the occurrences remain the same if from_item_count != count: # Check if we should push the operation a little, if the # count means 'all' anyway if extrapolate and sublist_count(from_seq, match_source_list, ignore = ['space']) == count: # We can operate on all of them, but the result will be # fuzzy at best safe = False else: # The number of occurrences is not the same, we can not # extrapolate or we can extrapolate but the count does # not mean 'all', so fail this operation failed.append((op, count)) continue # Adapt the translation if safe if safe is not None: # Get the sub-list positions positions = sublist_positions(from_seq, translation_list, ignore = ['space']) # Replace original sequences with constructed # sequences, in reversed order for start, end in reversed(positions): translation_list[start:end] = to_seq # Check if there are any failed operations if failed: # print 'FAILED', failed # There are failed operations, check if all of them have failed if ops == failed: # Check if we have tried extrapolating too if not extrapolate: # Let's try to extrapolate extrapolate = True else: # All the operations have failed, even in # extrapolation, we are not safe at all ops = [] # If safe was True (first iteration), switch to None, # if safe was False (later iteration) or None, keep it if safe: safe = None else: # Only some operations have failed, try them again ops = failed[:] failed = [] safe = False else: # There are no failed operations, we can go on with current # safety level ops = [] # Check the result of the previous match if (result[0] is None and safe is not None) or (not result[0] and safe): # If there is no previous match or we are safe now and the previous # match was not, create a new result result = (safe, patterns.join(translation_list)) if safe: # We can break if we are safe, no more matches are needed break # Return the result return result