Example #1
0
def generate_translation_fsts(self, translation_fst_base=None, draw=False):
    """
	Generates translation FSTs from input and phrase table FSTs. 
	The translation is then simply the composition of the two.
	"""
    if translation_fst_base == None:
        translation_fst_base = self.translation_fst_base

    for i in range(self.num_sentences):

        # The phrase table
        phrase_table_fst = FST("%s-%s" % (self.phrase_table_fst_base, i))

        # Updating osymbols and recompiling is pretty essential: all isymbols
        # of the phrase-table FST should be osymbols of the input FST. This is
        # easily fixed by  updating the input_fst.osymbols accordingly
        # More info in the Notes section of the README
        input_fst = FST("%s-%s" % (self.input_fst_base, i))
        input_fst.osymbols_fn = phrase_table_fst.isymbols_fn
        input_fst.compile()

        # Generate translation SFT and copy in- and out-symbol files
        phrase_table_fst.sort(how="ilabel").decompile()
        translation = input_fst.compose(phrase_table_fst,
                                        "%s-%s" % (translation_fst_base, i))
        translation.copy_symbols()

        if draw: translation.draw()
Example #2
0
def generate_best_derivations_fsts(self, n=100, draw=False):
    """
	Get the best n derivations from a given FST
	"""
    for i in range(self.num_sentences):
        best_derivations_fn = self.best_derivation_fst_base + ("-%s.100best" %
                                                               i)
        fst = FST("%s-%s" % (self.translation_fst_base, i))
        best_derivations_fst = fst.find_n_best(n, best_derivations_fn)
        best_derivations_fst.decompile()
        best_derivations_fst.copy_symbols()
        if draw: best_derivations_fst.draw()

        with open(self.raw_sentences_fn, 'r') as file:
            orig_sentence = file.read().split("\n")[i].split(" ")

        # Save to file
        out_fn = "%s.100best.%s" % (self.best_derivations_base, i)
        full_out_fn = "%s.100best.%s.full" % (self.best_derivations_base, i)
        translations = get_path_translations(best_derivations_fst.txtfst_fn,
                                             orig_sentence)
        with open(out_fn, "w") as out_f:
            with open(full_out_fn, "w") as full_out_f:
                for i, (trans, deriv, w) in enumerate(translations):
                    out_f.write(deriv + "\n")
                    full_out_f.write("%s ||| %s ||| %s ||| %s\n" %
                                     (i, trans, deriv, w))
Example #3
0
def generate_self_map(component, options):
    if options.target == 'single-state':
        return FST.EmptySingleStateTranslator()
    elif options.target == 'symbol-only-reconfiguration':
        lookup = {}
        for edge in component.algebra.all_edges():
            start, end = edge
            symbols = component.automata.symbol_lookup[edge]
            lookup[end] = symbols
        return FST.SymbolReconfiguration(lookup, Modifications([], []))
    elif options.target == 'perfect-unification':
        result = FST.AllPowerfulUnifier(Modifications([], []))
Example #4
0
def generate_translation_fsts(self, translation_fst_base=None, draw=False):
	"""
	Generates translation FSTs from input and phrase table FSTs. 
	The translation is then simply the composition of the two.
	"""
	if translation_fst_base == None: translation_fst_base = self.translation_fst_base

	for i in range(self.num_sentences):

		# The phrase table
		phrase_table_fst = FST("%s-%s" % (self.phrase_table_fst_base, i))

		# Updating osymbols and recompiling is pretty essential: all isymbols
		# of the phrase-table FST should be osymbols of the input FST. This is 
		# easily fixed by  updating the input_fst.osymbols accordingly
		# More info in the Notes section of the README
		input_fst = FST("%s-%s" % (self.input_fst_base, i))
		input_fst.osymbols_fn = phrase_table_fst.isymbols_fn
		input_fst.compile()

		# Generate translation SFT and copy in- and out-symbol files
		phrase_table_fst.sort(how="ilabel").decompile()
		translation = input_fst.compose(phrase_table_fst, "%s-%s" % (translation_fst_base, i))
		translation.copy_symbols()

		if draw: translation.draw()
Example #5
0
def generate(unification, to_atma, from_atma, options):
    assert unification is not None
    # Get the lookup tables.

    to_edge_lookup = to_atma.symbol_lookup
    from_edge_lookup = from_atma.symbol_lookup

    best_result = None
    best_structural_modification_count = 1000000
    best_overapproximation_factor = 1000000
    unifiers_attempted = 0

    unification = sorted(unification,
                         key=lambda u: u.structural_modification_count())

    for unifier in unification:
        if not unifier:
            continue
        else:
            unifiers_attempted += 1

        if options.target == 'single-state':
            result = unifier.unify_single_state(from_edge_lookup,
                                                to_edge_lookup, options)
        elif options.target == 'symbol-only-reconfiguration':
            result = unifier.unify_symbol_only_reconfigutaion(
                to_edge_lookup, from_edge_lookup, options)
        elif options.target == 'perfect-unification':
            result = FST.AllPowerfulUnifier(
                Modifications(unifier.additions_between_nodes,
                              unifier.additions_from_node))
        else:
            print "Unkown target " + options.target
            sys.exit(1)

        # We want to return results with 0 structural modification
        # where possible.  So, if we find one with structural
        # modification, then keep going.
        structural_modification_count = unifier.structural_modification_count()
        # We also keep track of the overapproximation factor, i.e.
        # how many edges are overapproximated.
        if result:
            overapproximation_factor = result.overapproximation_factor()

        if result and structural_modification_count == 0 and overapproximation_factor <= 0.000005:
            # Auto-return if we get an answer with 0 modification and
            # no overapproximation.
            return result, None
        elif result and (overapproximation_factor < best_overapproximation_factor or \
                (structural_modification_count < best_structural_modification_count and overapproximation_factor <= best_overapproximation_factor + 0.0005)):
            best_result = result
            best_structural_modification_count = structural_modification_count
            best_overapproximation_factor = overapproximation_factor

    if best_result is not None:
        return best_result, None
    elif unifiers_attempted > 0:
        return None, GenerationFailureReason("Unification Failure")
    elif unifiers_attempted == 0:
        return None, GenerationFailureReason("Structural Failure")
Example #6
0
def generate_best_derivations_fsts(self, n=100, draw=False):
	"""
	Get the best n derivations from a given FST
	"""
	for i in range(self.num_sentences):
		best_derivations_fn = self.best_derivation_fst_base + ("-%s.100best" % i)
		fst = FST("%s-%s" % (self.translation_fst_base, i))
		best_derivations_fst = fst.find_n_best(n, best_derivations_fn)
		best_derivations_fst.decompile()
		best_derivations_fst.copy_symbols()
		if draw: best_derivations_fst.draw()

		with open(self.raw_sentences_fn, 'r') as file:
			orig_sentence = file.read().split("\n")[i].split(" ")

		# Save to file
		out_fn 		 = "%s.100best.%s" % (self.best_derivations_base, i)
		full_out_fn  = "%s.100best.%s.full" % (self.best_derivations_base, i)
		translations = get_path_translations(best_derivations_fst.txtfst_fn, orig_sentence)
		with open(out_fn, "w") as out_f:
			with open(full_out_fn, "w") as full_out_f:
				for i, (trans, deriv, w) in enumerate(translations):
					out_f.write(deriv +"\n")
					full_out_f.write("%s ||| %s ||| %s ||| %s\n" % (i, trans, deriv, w))
Example #7
0
def generate_phrase_table_fsts(self,
                               sentence_ids=None,
                               grammar_base_fn=None,
                               out_base=None,
                               draw=False):
    """
    Generates all phrase table FSTS 

    Args:
        sentence_ids: a list of ids (numbers) of the sentences (to find the right grammars)
        grammar_base_fn: the base of the grammr files; defaults to Helper value.
        out_base: base of the resulting fsts; defaults to Helper value for phrase tables.
        draw: draw the fsts?
    """
    if out_base == None: out_base = self.phrase_table_fst_base
    if sentence_ids == None: sentence_ids = range(self.num_sentences)

    for line_num in sentence_ids:
        fst = FST(out_base + "-" + str(line_num))
        grammar = self.get_grammar(line_num, grammar_base_fn=grammar_base_fn)

        node, fst_txt, isymbols, osymbols = 0, "", [], []
        for rule in grammar:

            parts = rule.split(" ||| ")
            english = parts[1].split(" ")
            japanese = parts[2].split(" ")
            isymbols += english
            osymbols += japanese

            # Calculate additional features
            OOV_count = english.count(self.OOV)
            glue = 1
            word_penalty = float(len(english)) * (-1.0 / math.log(10))

            # Determine the weight (this is the log-linear model)
            weight = 0
            feature_weights = self.get_feature_weights()
            for feature_value in parts[3].split(" "):
                feature, value = feature_value.split("=")
                if feature in feature_weights.keys():
                    weight += feature_weights[feature] * float(value)
            weight += feature_weights['Glue'] * glue
            weight += feature_weights['WordPenalty'] * word_penalty
            weight += feature_weights['PassThrough'] * OOV_count

            # Build the FST
            if len(english) == 1 and len(japanese) == 1:

                # Arc 0 --> 0 labeled english[i]:japanese[0] with weight of the rule
                fst_txt += "0 0 %s %s %s\n" % (english[0], japanese[0], weight)

            else:
                # Arc 0 --> [new node] labeled english[0]:<eps> with weight 1
                node += 1
                fst_txt += "0 %s %s <eps> 0\n" % (node, english[0])

                for en in english[1:]:
                    # Arc [node] --> [new node] labeled english[i]:<eps> with weight 1
                    node += 1
                    fst_txt += "%s %s %s <eps> 0\n" % (node - 1, node, en)

                for ja in japanese[:-1]:
                    node += 1
                    # Arc [node] --> [new node] labeled <eps>:japanese[i] with weight 1
                    fst_txt += "%s %s <eps> %s 0\n" % (node - 1, node, ja)

                # Arc [node] --> 0 labeled <eps>:japanese[-1] with the weight of the rule
                fst_txt += "%s 0 <eps> %s %s\n" % (node, japanese[-1], weight)

        # Add final node
        fst_txt += "0"

        # The in and out symbol dictionaries
        isymbols_txt = "<eps> 0\n"
        for i, en in enumerate(set(isymbols)):
            isymbols_txt += "%s %s\n" % (en, i + 1)

        osymbols_txt = "<eps> 0\n"
        for i, ja in enumerate(set(osymbols)):
            osymbols_txt += "%s %s\n" % (ja, i + 1)

        # Update & compile the FST
        fst.update_fst(fst_txt)
        fst.update_isymbols(isymbols_txt)
        fst.update_osymbols(osymbols_txt)
        fst.compile()

        # Drawing large FST's can take a very long time!
        if draw: fst.draw()
Example #8
0
def generate_input_fsts(self, sentences=None, out_base=None, draw=False):
    """
    Turns a list of sentences into intput transducers. These are
    all stored as .txtfst, .osyms, .isyms, .fst files.
    """
    if sentences == None: sentences = self.get_sentences()
    if out_base == None: out_base = self.input_fst_base

    for line_num, sentence in enumerate(sentences):

        # FST object
        fst = FST("%s-%s" % (out_base, line_num))

        # Create the FST
        words = sentence.split(" ")
        voc = set()
        fst_txt = ""
        isymbols_txt = "<eps> 0\n"
        for i, word in enumerate(words):
            i += 1
            voc.add(word)
            fst_txt += "%s %s %s %s 0\n" % (i, i+1, i, word)
            isymbols_txt += "%s %s\n" % (i, i)
        fst_txt += str(i+1)

        # Create the out-symbols
        osymbols_txt = "<eps> 0\n"
        for i, word in enumerate(voc):
            osymbols_txt += "%s %s\n" % (word, i+1)
        
        # Update fst and compile
        fst.update_fst(fst_txt)
        fst.update_osymbols(osymbols_txt)
        fst.update_isymbols(isymbols_txt)
        fst.compile()

        if draw: fst.draw()
Example #9
0
def generate_input_fsts(self, sentences=None, out_base=None, draw=False):
    """
    Turns a list of sentences into intput transducers. These are
    all stored as .txtfst, .osyms, .isyms, .fst files.
    """
    if sentences == None: sentences = self.get_sentences()
    if out_base == None: out_base = self.input_fst_base

    for line_num, sentence in enumerate(sentences):

        # FST object
        fst = FST("%s-%s" % (out_base, line_num))

        # Create the FST
        words = sentence.split(" ")
        voc = set()
        fst_txt = ""
        isymbols_txt = "<eps> 0\n"
        for i, word in enumerate(words):
            i += 1
            voc.add(word)
            fst_txt += "%s %s %s %s 0\n" % (i, i + 1, i, word)
            isymbols_txt += "%s %s\n" % (i, i)
        fst_txt += str(i + 1)

        # Create the out-symbols
        osymbols_txt = "<eps> 0\n"
        for i, word in enumerate(voc):
            osymbols_txt += "%s %s\n" % (word, i + 1)

        # Update fst and compile
        fst.update_fst(fst_txt)
        fst.update_osymbols(osymbols_txt)
        fst.update_isymbols(isymbols_txt)
        fst.compile()

        if draw: fst.draw()
Example #10
0
    if BACKWARD:
        reverseText = generate_text(langModel, char2idx,
                                    ['<endCouplet>', '<endLine>'])
        reverseText.reverse()
        print(''.join(reverseText))
    else:
        print(''.join(generate_text(langModel, char2idx, ['<beginCouplet>'])))

    if DATA == "real":
        outp = "./data/OTAP clean data/wordList.txt"
    else:
        outp = "./data/tempWordList.txt"
    #FST.makeWordList("./data/OTAP clean data/total", outp)
    #vezn = FST.mefailunmefailun
    vezn = FST.mefailunmefailun
    fst = FST.FST(vezn, outp)
    constraint1 = ""
    constraint2 = ""
    #fst.constrain(0,constraint1)
    #fst.constrain(1,constraint2)
    if BACKWARD:
        fst.reverse()
    beyts = generateCouplet(fst,
                            langModel,
                            char2idx,
                            BEAMSIZE=BEAMSIZE,
                            BACKWARD=BACKWARD)
    end = time.time()
    f = open(resultPath, "w")
    f.write("Beam size: " + str(BEAMSIZE) + "\n")
    f.write("Constraints:\n")
Example #11
0
def generate_input_lattices(self, fst_base=None, draw=False, num_sentences=None):
	""" Generate input fst (input as it's used as input for task 6 later on)
	"""
	if num_sentences == None: num_sentences = self.num_sentences;
	if fst_base == None: fst_base = self.input_fst_base

	# Get permutations
	perm_dict = self.parse_permutation_file();

	lattice_cost = self.get_feature_weights()['LatticeCost']
	for sentence, perm_vals in perm_dict.iteritems():
		if int(sentence) > num_sentences: continue;

		# build fst per permuted sentence
		fst = FST("%s-%s" % (fst_base, sentence))

		fst_txt = ""
		isyms, osyms = set(), set()
		isymbols_txt = "<eps> 0\n"
		osymbols_txt = "<eps> 0\n"
		state = 0
		# loop over all permutations per sentence
		for prob, perm_positions, perm_words in perm_vals: 

			prob =  -math.log(prob) * lattice_cost

			for i, (pos, word) in enumerate(zip(perm_positions, perm_words)):
				if i == 0:
					fst_txt += "%s %s %s %s\n" % (0, state+1, pos, word)
				elif i == len(perm_positions) - 1: # add the weights only to the last arc
					fst_txt += "%s %s %s %s %s \n" % (state, state+1, pos, word, prob)
				else:
					fst_txt += "%s %s %s %s\n" % (state, state+1, pos, word)

				isyms.add(pos)
				osyms.add(word)
				state += 1

			fst_txt += "%s \n" % (state)

		for i, word in enumerate(osyms):
			osymbols_txt += "%s %s\n" % (word, i+1)

		for i, word in enumerate(isyms):
			isymbols_txt += "%s %s\n" % (word, i+1)

		# Update FST
		fst.update_fst(fst_txt)
		fst.update_osymbols(osymbols_txt)
		fst.update_isymbols(isymbols_txt)

		# GO, GO, GO!
		fst.compile().determinize().push().minimize().decompile()

		if draw: fst.draw()
Example #12
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    bigList = [['a', 'e', 'i', 'o', 'u', 'w', 'h', 'y'], ['b', 'f', 'p', 'v'], ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'],\
               ['d', 't'], ['l'], ['m', 'n'], ['r']]
    
    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next_0')
    f1.add_state('next_1')
    f1.add_state('next_2')
    f1.add_state('next_3')
    f1.add_state('next_4')
    f1.add_state('next_5')
    f1.add_state('next_6')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next_0')
    f1.set_final('next_1')
    f1.set_final('next_2')
    f1.set_final('next_3')
    f1.set_final('next_4')
    f1.set_final('next_5')
    f1.set_final('next_6')

    # Add the rest of the arcs
 
    for l in string.ascii_lowercase:
        if l in bigList[0]:
            f1.add_arc('start', 'next_0', (l), (l))
        elif l in bigList[1]:
            f1.add_arc('start', 'next_1', (l), (l))
        elif l in bigList[2]:
            f1.add_arc('start', 'next_2', (l), (l))
        elif l in bigList[3]:
            f1.add_arc('start', 'next_3', (l), (l))
        elif l in bigList[4]:
            f1.add_arc('start', 'next_4', (l), (l))
        elif l in bigList[5]:
            f1.add_arc('start', 'next_5', (l), (l))
        else:
            f1.add_arc('start', 'next_6', (l), (l))
            
        if l in bigList[1]:
            f1.add_arc('next_0', 'next_1', (l), ('1'))         
        elif l in bigList[2]:
            f1.add_arc('next_0', 'next_2', (l), ('2'))
        elif l in bigList[3]:
            f1.add_arc('next_0', 'next_3', (l), ('3'))
        elif l in bigList[4]:
            f1.add_arc('next_0', 'next_4', (l), ('4'))
        elif l in bigList[5]:
            f1.add_arc('next_0', 'next_5', (l), ('5'))
        elif l in bigList[6]:
            f1.add_arc('next_0', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_0', 'next_0', (l), ()) 
   
        
        if l in bigList[0]:
            f1.add_arc('next_1', 'next_0', (l), ())
        elif l in bigList[2]:
            f1.add_arc('next_1', 'next_2', (l), ('2'))
        elif l in bigList[3]:
            f1.add_arc('next_1', 'next_3', (l), ('3'))
        elif l in bigList[4]:
            f1.add_arc('next_1', 'next_4', (l), ('4'))
        elif l in bigList[5]:
            f1.add_arc('next_1', 'next_5', (l), ('5'))
        elif l in bigList[6]:
            f1.add_arc('next_1', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_1', 'next_1', (l), ()) 
            
        if l in bigList[0]:
            f1.add_arc('next_2', 'next_0', (l), ())
        elif l in bigList[1]:
            f1.add_arc('next_2', 'next_1', (l), ('1'))
        elif l in bigList[3]:
            f1.add_arc('next_2', 'next_3', (l), ('3'))
        elif l in bigList[4]:
            f1.add_arc('next_2', 'next_4', (l), ('4'))
        elif l in bigList[5]:
            f1.add_arc('next_2', 'next_5', (l), ('5'))
        elif l in bigList[6]:
            f1.add_arc('next_2', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_2', 'next_2', (l), ()) 
            
        if l in bigList[0]:
            f1.add_arc('next_3', 'next_0', (l), ())
        elif l in bigList[1]:
            f1.add_arc('next_3', 'next_1', (l), ('1'))
        elif l in bigList[2]:
            f1.add_arc('next_3', 'next_2', (l), ('2'))
        elif l in bigList[4]:
            f1.add_arc('next_3', 'next_4', (l), ('4'))
        elif l in bigList[5]:
            f1.add_arc('next_3', 'next_5', (l), ('5'))
        elif l in bigList[6]:
            f1.add_arc('next_3', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_3', 'next_3', (l), ())

        if l in bigList[0]:
            f1.add_arc('next_4', 'next_0', (l), ())
        elif l in bigList[1]:
            f1.add_arc('next_4', 'next_1', (l), ('1'))
        elif l in bigList[2]:
            f1.add_arc('next_4', 'next_2', (l), ('2'))
        elif l in bigList[3]:
            f1.add_arc('next_4', 'next_3', (l), ('3'))
        elif l in bigList[5]:
            f1.add_arc('next_4', 'next_5', (l), ('5'))
        elif l in bigList[6]:
            f1.add_arc('next_4', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_4', 'next_4', (l), ())
            
        if l in bigList[0]:
            f1.add_arc('next_5', 'next_0', (l), ())
        elif l in bigList[1]:
            f1.add_arc('next_5', 'next_1', (l), ('1'))
        elif l in bigList[2]:
            f1.add_arc('next_5', 'next_2', (l), ('2'))
        elif l in bigList[3]:
            f1.add_arc('next_5', 'next_3', (l), ('3'))
        elif l in bigList[4]:
            f1.add_arc('next_5', 'next_4', (l), ('4'))
        elif l in bigList[6]:
            f1.add_arc('next_5', 'next_6', (l), ('6'))
        else:
            f1.add_arc('next_5', 'next_5', (l), ())  

        if l in bigList[0]:
            f1.add_arc('next_6', 'next_0', (l), ())
        elif l in bigList[1]:
            f1.add_arc('next_6', 'next_1', (l), ('1'))
        elif l in bigList[2]:
            f1.add_arc('next_6', 'next_2', (l), ('2'))
        elif l in bigList[3]:
            f1.add_arc('next_6', 'next_3', (l), ('3'))
        elif l in bigList[4]:
            f1.add_arc('next_6', 'next_4', (l), ('4'))
        elif l in bigList[5]:
            f1.add_arc('next_6', 'next_5', (l), ('5'))
        else:
            f1.add_arc('next_6', 'next_6', (l), ())
            

    return f1
Example #13
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')
    
    f3.initial_state = '1'
    f3.set_final('2')

    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))
    
    for number in xrange(10):
        f3.add_arc('1', '1a', (str(number)), (str(number)))
        f3.add_arc('1a', '1b', (str(number)), (str(number)))
        f3.add_arc('1b', '2', (str(number)), (str(number)))
   
        
#    if count == 2:
    f3.add_arc('1b', '2', (), ('0'))
#    if count == 1:
    f3.add_arc('1a', '2', (), ('00'))
#    if count == 1:
    f3.add_arc('1', '2', (), ('000'))
    
    
    return f3
Example #14
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('1b')
    f2.add_state('1c')
    f2.add_state('1d')
    f2.add_state('1e')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')

    f2.initial_state = '1'
    f2.set_final('2')
    f2.set_final('3')
    f2.set_final('4')

    f2.set_final('1b')
    f2.set_final('1c')
    f2.set_final('1d')
    f2.set_final('1e')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1b', (letter), (letter))
     
    for n in range(10):
        f2.add_arc('1b', '1c', (str(n)), (str(n)))
        f2.add_arc('1c', '1d', (str(n)), (str(n)))
        f2.add_arc('1d', '1e', (str(n)), (str(n)))
        f2.add_arc('1e', '1e', (str(n)), ())
        f2.add_arc('1', '2', (str(n)), (str(n)))
        f2.add_arc('2', '3', (str(n)), (str(n)))
        f2.add_arc('3', '4', (str(n)), (str(n)))
        f2.add_arc('4', '4', (str(n)), ())
        

    return f2
Example #15
0
    def unify_single_state(self, symbol_lookup_1, symbol_lookup_2, options):
        if self.unifier_failed:
            compilation_statistics.ssu_pre_fail += 1
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failed due to early unification failure"
            return None
        # This unifies into a single state.
        state_lookup = {}
        matching_symbol = {}

        if options.use_unification_heuristics and mapping_heuristic_fail(
                self.from_edges, self.to_edges, symbol_lookup_1,
                symbol_lookup_2, options):
            compilation_statistics.ssu_heuristic_fail += 1
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failed matching heursitic..."
            return None

        if DEBUG_UNIFICATION:
            print "Starting new unification between "
            if self.algebra_from and self.algebra_to:
                print self.algebra_from.str_with_lookup(symbol_lookup_2)
                print self.algebra_to.str_with_lookup(symbol_lookup_1)

        if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
            if self.algebra_from and self.algebra_from.equals(
                    self.algebra_to, symbol_lookup_1, symbol_lookup_2):
                print "Algebras are actually exactly the same..."
                for i in range(len(self.from_edges)):
                    if symbol_lookup_1[self.from_edges[i]] != symbol_lookup_2[
                            self.to_edges[i]]:
                        print "But edges are not the same..."

            compilation_statistics.exact_same_compilations += 1

        # Generate a mapping that is complete, but not correct. (i.e. does not miss anything)
        state_lookup, matching_symbol = generate_complete_mapping(
            self.from_edges, self.to_edges, symbol_lookup_1, symbol_lookup_2,
            options)
        if state_lookup is None:
            compilation_statistics.ssu_complete_mapping_failed += 1
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failing due to completeness fail"
            return None

        # Make that mapping correct. (i.e. not an overapproximation)
        # Even if we aren't required to do this, it is good
        # to reduce the overapproximation error rate.
        state_lookup, overapproximated_edge_count = generate_correct_mapping(
            state_lookup, self.from_edges, self.to_edges, symbol_lookup_1,
            symbol_lookup_2, options)
        if state_lookup is None:
            compilation_statistics.ssu_correct_mapping_failed += 1
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failing due to generate correct fail"
            return None

        # Check that we would be able to unify with the structural
        # modifications:  this just has to be an approximation, because
        # we don't really use this to unify, just to guide decisions
        # for a later reconstruction pass.
        all_to_edges = symbol_lookup_2.keys()
        state_lookup, matching_symbol = generate_additions_mapping(
            state_lookup, matching_symbol, self.from_edges, self.to_edges,
            all_to_edges, symbol_lookup_1, symbol_lookup_2,
            self.additions_between_nodes, self.additions_from_node, options)

        if state_lookup is None:
            compilation_statistics.ssu_additions_failed += 1
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failing due to generate additions failed"
            return None

        # This is correctness, not completeness related:
        non_matching = compute_non_matching_symbol(matching_symbol)
        if options.correct_mapping:
            # Get the non-matching symbols:

            # We can't have a symbol activating an edge that it is not
            # supposed to activate.
            # Go through and get all the valid activating symbols
            # together
            state_lookup = disable_edges(state_lookup, non_matching,
                                         self.get_disabled_edges(),
                                         symbol_lookup_2, options)

            if state_lookup is None:
                compilation_statistics.ssu_disable_edges_failed += 1
                if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                    print "Failing due to disable edge fail"
                return None

        # Collapse any symbol sets that are still more than one element,
        # and also complete the conversion table to include
        # all characters.
        state_lookup = collapse_and_complete_state_lookup(
            state_lookup, non_matching, options)

        if state_lookup is None:
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failing due to disabled symbols fail"
            compilation_statistics.ssu_disable_symbols_failed += 1
            return None

        if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
            print "Returning a real result"
        compilation_statistics.ssu_success += 1

        # Assign each modification appropriately translated symbols.
        # There is no return value --- the results are set in each addition.
        modification_state_assigment(state_lookup, symbol_lookup_1,
                                     symbol_lookup_2, self.additions_from_node,
                                     self.additions_between_nodes, options)

        # Compute the overapproximation factor as a number between 0 and 1, we can think about this as the fraction of edges
        # that are spuriosly activated.
        overapproximation_factor = float(overapproximated_edge_count) / float(
            256 * len(self.from_edges))

        modifications = Modifications(self.additions_from_node,
                                      self.additions_between_nodes)
        return FST.SingleStateTranslator(
            state_lookup,
            modifications,
            unifier=self,
            overapproximation_factor=overapproximation_factor)
Example #16
0
    def unify_symbol_only_reconfigutaion(self, symbol_lookup_1,
                                         symbol_lookup_2, options):
        if self.unifier_failed:
            if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
                print "Failed due to early-unification failure"
            return None
        # In this unification method, we can unify each state individually ---
        # giving us much better compression.
        state_lookup = {}
        if DEBUG_UNIFICATION or PRINT_UNIFICATION_FAILURE_REASONS:
            print "Starting new unification between (symbol-only-reconfiguration mode)"
            print self.algebra_from.str_with_lookup(symbol_lookup_1)
            print self.algebra_to.str_with_lookup(symbol_lookup_2)

        # This generates lookups for symbol-only-reconfiguration ---
        # to support something like symbol reconfigurability
        # in the AP it can be made a bit more general IIUC.
        # In that case it would also need different outputs
        # anyway, so the unification phase would need to  be
        # rejigged.

        for i in range(len(self.from_edges)):
            # Try and unify the individual edges  --- This should almost always
            # work.
            from_edge = self.from_edges[i]
            to_edge = self.to_edges[i]

            # since each state is homogeneous, the question is "does this
            # state get enabled on this particular input character?"
            # and we aim to change the answer from the one in 'from_edge'
            # to the one in to_edge.
            _, dest_state = from_edge
            if dest_state in state_lookup:
                lookup = state_lookup[dest_state]
                # We want to enable every  character coming
                # into this edge.  We can't double map things
                # though. --- In this part, we need to check that
                # we are not double mapping.
                # Check that nothing that needs to be mapped
                # is not already.
                for character in symbol_lookup_2[from_edge]:
                    # If the table is already set, then
                    # we need to make sure we are not changing
                    # the table:
                    if character not in lookup:
                        if PRINT_UNIFICATION_FAILURE_REASONS or DEBUG_UNIFICATION:
                            print "Unification failed due to double-mapped state"
                        return None
                # Also check that none of the already-mapped
                # symbols should not be.
                compilation_character_set = FastSet(symbol_lookup_2[from_edge])
                for character in lookup:
                    if character not in compilation_character_set:
                        if PRINT_UNIFICATION_FAILURE_REASONS or DEBUG_UNIFICATION:
                            print "Unification failed due to double-mapped state"
                        return None

            else:
                lookup = {}
                for character in symbol_lookup_2[from_edge]:
                    lookup[character] = True

                state_lookup[dest_state] = lookup

        modifications = Modifications(self.additions_from_node,
                                      self.additions_between_nodes)

        # Set the initial symbols for the additions.
        symbol_only_lookup_modifications_setup_lookup(modifications,
                                                      symbol_lookup_1)
        # We also need to set the symbol reconfigurations
        # for the added edge.  These will be added
        # to the SymbolReconfiguration when the accelerator
        # is updated.
        symbol_only_reconfig_setup_modification_configurations(
            modifications, symbol_lookup_2)

        # Sanity check the added modifications to make sure they're
        # valid.
        for mod in modifications.all_modifications():
            symbol_only_reconfgiruation_modification_sanity_check(mod)

        return FST.SymbolReconfiguration(state_lookup, modifications)