예제 #1
0
def _compute_fst_results_for_comparison(comparison_key, cohort_a, cohort_b,
                                        otu_defs, geno_samples, data_dir,
                                        file_owner):

    noprompt = True
    fst_input_etl.dump_comparison_classification(comparison_key,
                                                 cohort_a,
                                                 cohort_b,
                                                 geno_samples,
                                                 otu_defs,
                                                 data_dir,
                                                 file_owner=file_owner)
    fst_input_etl.dump_feature_metadata(otu_defs,
                                        data_dir,
                                        file_owner=file_owner)

    if DEBUGGING_CONFIG:
        fst_config = fst_input_etl._make_debugging_config(
            comparison_key, data_dir, otu_defs)
    else:
        fst_config = fst_input_etl._make_fst_config(comparison_key, data_dir,
                                                    otu_defs)

    scratch_dirname = dirname_of_fst_scratch(data_dir)
    file_utils.ensure_directory(scratch_dirname, file_owner=file_owner)
    os.environ['JOBLIB_TEMP_FOLDER'] = scratch_dirname

    fst_runner = FST(fst_config, noprompt)
    print('begin FST.refuse(): ' + comparison_key)
    fst_runner.refuse()
    print('end FST.refuse(): ' + comparison_key)
    return
예제 #2
0
    def fstDistance(self, data):
        fst = FST()
        sampleCount = data.shape[1]
        dm = np.zeros((sampleCount, sampleCount))

        for i in range(0, sampleCount):
            for j in range(0, sampleCount):
                #extract the whole copy number profile, so the whole column
                sample1Profile = data[:, i]
                sample2Profile = data[:, j]

                dm[i, j] = fst.computeDistance(sample1Profile, sample2Profile)
        return dm
예제 #3
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'

    for ii in xrange(10):
        f.add_arc('start', 'start', [str(ii)], [kFRENCH_TRANS[ii]])

    f.set_final('start')

    return f
def computeCTreeError(cMatrix, realTree):
    sampleNum = cMatrix.shape[1]

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)

    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):

            #The distance can be computed for the entire column at once using the FST
            dist = FST().computeDistance(cMatrix[:, sample1], cMatrix[:,
                                                                      sample2])

            distanceMatrix[sample1, sample2] = dist

    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    inferredTree = computeMST(fullGraph, realTree.vertices)

    [
        ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred,
        noOfSamplePairs
    ] = computeAncestrySwapError(realTree, inferredTree)

    summedError = (ancestrySwapErrorAbsentInInferred +
                   ancestrySwapErrorPresentInInferred)
    averagedAncestrySwapError = summedError / float(noOfSamplePairs)

    #simulationErrorHandler = SimulationErrorHandler()
    #treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return averagedAncestrySwapError
예제 #5
0
def run_experiment():
    score = open("score.csv", "wt")
    for k in range(1, 21):
        k *= 10
        if k == 0:
            alphabet, states, init_state, accept_states, transitions = \
                SIMPLIFIED_JSON_ALPHABET, SIMPLIFIED_JSON_STATES, SIMPLIFIED_JSON_INIT_STATE, \
                SIMPLIFIED_JSON_ACCEPT_STATES, SIMPLIFIED_JSON_TRANSITIONS
        else:
            alphabet, states, init_state, accept_states, transitions = load_fst_by_nodes_to_add(k)
            init_state = init_state[0]

        fst = FST(alphabet, states, init_state, accept_states, transitions)
        fst_dataset = FstDataset(BinaryFSTParams(), fst=fst)

        activator_params = BinaryActivatorParams()
        activator_params.EPOCHS = 100

        activator = binaryActivator(BinaryModule(BinaryModuleParams(alphabet_size=len(fst_dataset.chr_embed))),
                                    activator_params, fst_dataset, split_fst_dataset)
        activator.train(validate_rate=10)

        score.write(str(k) + "train_loss," + ",".join([str(v) for v in activator.loss_train_vec]) + "\n")
        score.write(str(k) + "train_acc," + ",".join([str(v) for v in activator.accuracy_train_vec]) + "\n")
        score.write(str(k) + "train_auc," + ",".join([str(v) for v in activator.auc_train_vec]) + "\n")
        score.write(str(k) + "dev_loss," + ",".join([str(v) for v in activator.loss_dev_vec]) + "\n")
        score.write(str(k) + "dev_acc," + ",".join([str(v) for v in activator.accuracy_dev_vec]) + "\n")
        score.write(str(k) + "dev_auc," + ",".join([str(v) for v in activator.auc_dev_vec]) + "\n")
예제 #6
0
def fst_test2(test_string):
    # print "\nThis FST removes all b's from string\n"
    fst_states = ["q0"]
    fst_in_alph = ["a", "b"]
    fst_out_alph = ["a", "b"]
    fst_start = "q0"
    fst_final = ["q0"]
    fst_trans = {
        "q0": {
            "a": ["q0", "a"],
            "b": ["q0", ""]
        },
    }

    test_fst = FST(fst_states, fst_in_alph, fst_out_alph, fst_start, fst_final,
                   fst_trans)

    print test_string + " : " + test_fst.transduce_string(test_string)
예제 #7
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'

    for ii in xrange(10):
        f.add_arc('start', 'start', str(ii), [kFRENCH_TRANS[ii]])

    f.set_final('start')

    return f
예제 #8
0
def french_count():
    f = FST("french")

    f.add_state("start")
    f.initial_state = "start"

    for ii in xrange(10):
        f.add_arc("start", "start", str(ii), [kFRENCH_TRANS[ii]])

    f.set_final("start")

    return f
예제 #9
0
    def rand_fst(self, size_states, size_alphabet, num_accept_states):
        alphabet, states, start_state, accept_states, transitions = [], [], "q0", [], []
        for i in range(size_alphabet):
            alphabet.append("sym" + str(i))
        for i in range(size_states):
            states.append("q" + str(i))
        accept_states = [(q, 1) for q in sample(states, num_accept_states)]

        for q in states:
            for symbol in alphabet:
                transitions.append((q, symbol, choice(states)))
        return FST(alphabet, set(states), start_state, accept_states, transitions)
예제 #10
0
    def fstAlleleDistance(
            self, data, samples
    ):  #this could have been more efficient when we pass objects
        fst = FST()
        sampleCount = data.shape[1]

        dm = np.zeros((sampleCount, sampleCount))
        messages = dict()
        for i in range(0, sampleCount):
            for j in range(0, sampleCount):
                #extract the whole copy number profile, so the whole column
                sample1Profile = data[:, i]
                sample2Profile = data[:, j]
                returnValue = fst.computeAlleleDistance(
                    sample1Profile, sample2Profile, samples[i], samples[j])
                messages[(i, j)] = returnValue[0]

                dm[i, j] = returnValue[1]
        print "distances: "
        print dm
        return [messages, dm]
예제 #11
0
def letters_to_numbers():
    """
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    return f1
예제 #12
0
def fst_test(test_string):
    # print "\nThis FST replaces the first 'a' in a string with an 'ba'"
    fst_states = ["q0", "q1"]
    fst_in_alph = ["a", "b"]
    fst_out_alph = ["a", "b"]
    fst_start = "q0"
    fst_final = ["q0", "q1"]
    fst_trans = {
        "q0": {
            "a": ["q1", "ba"],
            "b": ["q0", "b"]
        },
        "q1": {
            "a": ["q1", "a"],
            "b": ["q1", "b"]
        },
    }

    test_fst = FST(fst_states, fst_in_alph, fst_out_alph, fst_start, fst_final,
                   fst_trans)

    print test_string + " : " + test_fst.transduce_string(test_string)
예제 #13
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('2')

    f3.initial_state = '1'
    f3.set_final('2')

    return f3
예제 #14
0
파일: soundex.py 프로젝트: Pinafore/cl1-hw
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for n in range(10):
        f2.add_arc('1', '1', str(n), str(n))

    return f2
예제 #15
0
파일: build_fst.py 프로젝트: jtwaugh/fsot
def fst_from_prohibited_string(input_alphabet, output_alphabet, banned_string,
                               violation_name):
    length = len(banned_string)
    fst = FST(set([""]), "", set(), set(), "")
    # Add arcs
    if length > 1:
        for i in range(1, length):
            fst.states.add(banned_string[0:i])
            add_alternation_arcs(fst, banned_string[0:i - 1],
                                 banned_string[0:i], '_', banned_string[i - 1])
    # Send a penalty arc to the longest valid suffix
    fst.arcs.add(
        Arc(banned_string[0:-1], longest_suffix(banned_string, fst.states),
            Label('_', banned_string[-1], Counter({violation_name: 1}))))
    # Add loopback arcs and return
    for state in fst.states:
        for char in input_alphabet:
            add_elision_arc(fst, state, char)
        for char in fst.chars_not_leaving(state, output_alphabet):
            add_alternation_arcs(fst, state,
                                 longest_suffix(state + char, fst.states), '_',
                                 char)
    return fst
예제 #16
0
    def generate(self, analysis):
        """Generate the morphologically correct word 

        e.g.
        p = Parser()
        analysis = ['p','a','n','i','c','+past form']
        p.generate(analysis) 
        ---> 'panicked'
        """

        # Let's define our first FST
        f1 = FST('morphology-generate')

        output = ['p', 'a', 'n', 'i', 'c', 'k', 'e', 'd']
        return ''.join(output)
예제 #17
0
    def parse(self, word):
        """Parse a word morphologically 

        e.g.
        p = Parser()
        word = ['p','a','n','i','c','k','i','n','g']
        p.parse(word)
        ---> 'panic+present participle form'
        """

        # Ok so now let's do the second FST
        f2 = FST('morphology-parse')

        output = ['p', 'a', 'n', 'i', 'c', '+present participle form']
        return ''.join(output)
def computeATreeError(aMatrix, lafMatrix, afMatrix, realTree):
    sampleNum = aMatrix.shape[1]

    aObjMatrix = np.empty(aMatrix.shape, dtype=object)
    #Convert the a matrix to an actual allele matrix
    for row in range(0, aMatrix.shape[0]):
        for col in range(0, aMatrix.shape[1]):
            allele = aMatrix[row][col]
            AOccurrences = [m.start() for m in re.finditer('A', allele)]
            ACount = len(AOccurrences)
            BOccurrences = [m.start() for m in re.finditer('B', allele)]
            BCount = len(BOccurrences)

            alleleObj = Alleles(ACount, BCount)
            aObjMatrix[row][col] = alleleObj

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)
    [chromosomes, positions, segmentation,
     chromosomeArms] = parseReferenceFile()
    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):
            #make a dummy sample object for the FST function
            sample1Obj = Sample(None, None)
            sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes,
                                          positions, positions)
            sample1Obj.measurements.segmentation = segmentation
            sample1Obj.afMeasurements = afMatrix[:, sample1]
            sample2Obj = Sample(None, None)
            sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes,
                                          positions, positions)
            sample2Obj.measurements.segmentation = segmentation
            sample2Obj.afMeasurements = afMatrix[:, sample2]

            #The distance can be computed for the entire column at once using the FST
            [messages,
             dist] = FST().computeAlleleDistance(aObjMatrix[:, sample1],
                                                 aObjMatrix[:, sample2],
                                                 sample1Obj, sample2Obj)
            distanceMatrix[sample1, sample2] = dist
    #print distanceMatrix
    #exit()
    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    mst = computeMST(fullGraph, realTree.vertices)
    simulationErrorHandler = SimulationErrorHandler()
    treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return treeScore
예제 #19
0
def letters_to_numbers():
	"""
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""

	# Let's define our first FST
	f1 = FST('soundex-generate')

	# Indicate that '1' is the initial state
	f1.add_state('start')
	f1.add_state('next')
	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('next')

	return f1
예제 #20
0
def add_zero_padding():
	# Now, the third fst - the zero-padding fst
	f3 = FST('soundex-padzero')

	f3.add_state('1')
	f3.add_state('2')
	
	f3.initial_state = '1'
	f3.set_final('2')

	return f3
예제 #21
0
def truncate_to_three_digits():
    """
	Create an FST that will truncate a soundex string to three digits
	"""

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    return f2
예제 #22
0
파일: soundex.py 프로젝트: dvilinsky/CS-114
def add_zero_padding():
    # Now, the third fst - the zero-padding fst

    #Variable aliases
    start_state = 'start'
    just_numbers = 'just_numbers'
    letter_first = 'letter_first'
    epsilons = ['e0', 'e1', 'e2', 'e3', 'e4', 'e5']

    #Initialization
    f3 = FST('soundex-padzero')
    f3.add_state(start_state)
    f3.add_state(just_numbers)
    f3.add_state(letter_first)
    f3.initial_state = start_state
    add_numbers(f3, start_state, just_numbers)
    for letter in string.ascii_letters:
        f3.add_arc(start_state, letter_first, letter, letter)

    build_letter_first(f3, epsilons, letter_first)
    build_number_first(f3, epsilons, just_numbers)

    return f3
예제 #23
0
파일: build_fst.py 프로젝트: jtwaugh/fsot
def fst_intersect(m, n):
    arcs = set()
    state_lookup = dict(
        (product_state(p, q), set()) for p, q in states_set_product(m, n))
    start = product_state(m.start, n.start)
    # Compute arcs for each state pair
    for ((x, y), (z, w)) in states_mega_product(m, n):
        labels_lists = similar_labels_between(m, x, y, n, z, w)
        elision_arcs = set()
        for labels_list in labels_lists:
            arcs_by_input = set()
            for (k, l) in labels_list:
                add_arc = False
                seg = ''
                if k.output == '':
                    # Faithfulness constraint; cares about input
                    if k.input == l.input:
                        if k.input not in elision_arcs:
                            add_arc = True
                            seg = k.input
                            elision_arcs.add(seg)
                elif ((k.input == '_') or
                      (k.input == l.input)) and (l.input not in arcs_by_input):
                    # Markedness constraint
                    add_arc = True
                    seg = l.input
                    arcs_by_input.add(seg)
                elif (l.input == '_') and (k.input not in arcs_by_input):
                    # Markedness constraint
                    add_arc = True
                    seg = k.input
                    arcs_by_input.add(seg)
                if add_arc:
                    intersection_arc = Arc(
                        product_state(x, z), product_state(y, w),
                        Label(seg, k.output, otimes(k.violation, l.violation)))
                    arcs.add(intersection_arc)
                    state_lookup[intersection_arc.start].add(intersection_arc)
    # Figure out the states reachable from the start
    fst_states = traverse_states(state_lookup, start)
    fst = FST(fst_states, start, fst_states,
              filter((lambda arc: arc.start in fst_states), arcs), 1)
    return fst
예제 #24
0
def truncate_to_three_digits():
	"""
	Create an FST that will truncate a soundex string to three digits
	"""

	# Ok so now let's do the second FST, the one that will truncate
	# the number of digits to 3
	f2 = FST('soundex-truncate')

	# Indicate initial and final states
	f2.add_state('1')
	f2.initial_state = '1'
	f2.set_final('1')

	return f2
def computeCTreeError(cMatrix, realTree):
    sampleNum = cMatrix.shape[1]

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)

    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):

            #The distance can be computed for the entire column at once using the FST
            dist = FST().computeDistance(cMatrix[:, sample1], cMatrix[:,
                                                                      sample2])

            distanceMatrix[sample1, sample2] = dist

    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    mst = computeMST(fullGraph, realTree.vertices)
    simulationErrorHandler = SimulationErrorHandler()
    treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return treeScore
예제 #26
0
    def generate(self, analysis):
        """Generate the morphologically correct word

        e.g.
        p = Parser()
        analysis = ['p','a','n','i','c','+past form']
        p.generate(analysis)
        ---> 'panicked'
        """
        start_state = 'start'

        f = FST('generator')
        f.add_state(start_state)
        f.initial_state = start_state
        self._build_generator_fst(f, analysis, start_state)

        return ''.join(f.transduce(analysis)[0])
예제 #27
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    states = ['1', '2', '3', '4']

    for state in states:
        f3.add_state(state)

    f3.initial_state = '1'
    f3.set_final('4')

    for letter in string.letters:
        f3.add_arc('1', '1', letter, letter)

    for number in range(1, 10):
        f3.add_arc('1', '2', str(number), str(number))
        f3.add_arc('2', '3', str(number), str(number))
        f3.add_arc('3', '4', str(number), str(number))

    f3.add_arc('2', '4', (), '00')
    f3.add_arc('3', '4', (), '0')
    return f3
예제 #28
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.add_state('z')
    for i in range(30):
        f.add_state(str(i))

    f.initial_state = ('start')

    for i in range(20, 30):
        f.set_final(str(i))
    f.set_final('z')

    f.add_arc('start', 'z', ['z'], [kFRENCH_TRANS[0]])

    for i in range(10):
        f.add_arc('start', str(i), [str(i)], [])
        for j in range(10, 20):
            if i is 0:
                f.add_arc(str(i), str(j), [str(j - 10)], [])
            elif i is 1:
                f.add_arc(str(i), str(j), [str(j - 10)], [kFRENCH_TRANS[100]])
            elif i in range(2, 10):
                f.add_arc(str(i), str(j), [str(j - 10)],
                          [kFRENCH_TRANS[i], kFRENCH_TRANS[100]])

    for i in range(10, 20):
        for j in range(20, 30):
            if i is 10:
                if j != 20:
                    f.add_arc(str(i), str(j), [str(j - 20)],
                              [kFRENCH_TRANS[j - 20]])
                else:
                    f.add_arc(str(i), str(j), [str(j - 20)], [])
            elif i is 11 and j in range(20, 27):
                f.add_arc(str(i), str(j), [str(j - 20)],
                          [kFRENCH_TRANS[j - 10]])
            elif i is 11 and j in range(27, 30):
                f.add_arc(str(i), str(j), [str(j - 20)],
                          [kFRENCH_TRANS[10], kFRENCH_TRANS[j - 20]])
            elif i in range(12, 17):
                if j is 20:
                    f.add_arc(str(i), str(j), [str(j - 20)],
                              [kFRENCH_TRANS[int(i % 10) * 10]])
                elif j is 21:
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[int(i % 10) * 10], kFRENCH_AND,
                        kFRENCH_TRANS[1]
                    ])
                else:
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[int(i % 10) * 10], kFRENCH_TRANS[j - 20]
                    ])
            elif i is 17:
                if j is 20:
                    f.add_arc(str(i), str(j), [str(j - 20)],
                              [kFRENCH_TRANS[60], kFRENCH_TRANS[10]])
                elif j is 21:
                    f.add_arc(
                        str(i), str(j), [str(j - 20)],
                        [kFRENCH_TRANS[60], kFRENCH_AND, kFRENCH_TRANS[11]])
                elif j in range(22, 27):
                    f.add_arc(str(i), str(j), [str(j - 20)],
                              [kFRENCH_TRANS[60], kFRENCH_TRANS[j - 10]])
                elif j in range(27, 30):
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[60], kFRENCH_TRANS[10],
                        kFRENCH_TRANS[j - 20]
                    ])
            elif i is 18:
                if j is 20:
                    f.add_arc(str(i), str(j), [str(j - 20)],
                              [kFRENCH_TRANS[4], kFRENCH_TRANS[20]])
                elif j in range(21, 30):
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[4], kFRENCH_TRANS[20],
                        kFRENCH_TRANS[j - 20]
                    ])
            elif i is 19:
                if j in range(20, 27):
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[4], kFRENCH_TRANS[20],
                        kFRENCH_TRANS[j - 10]
                    ])
                elif j in range(27, 30):
                    f.add_arc(str(i), str(j), [str(j - 20)], [
                        kFRENCH_TRANS[4], kFRENCH_TRANS[20], kFRENCH_TRANS[10],
                        kFRENCH_TRANS[j - 20]
                    ])

    return f
예제 #29
0
 def union_fst(self, list_fst):
     new_alphabet, new_states, new_start_state, new_accept_states, new_transitions =\
         self._merge_fst(list_fst, op=UNION)
     return FST(new_alphabet, new_states, new_start_state, new_accept_states, new_transitions)
예제 #30
0
    _states_L1 = {"s0", "s1", "s2", "s3", "s4"}
    _init_state_L1 = "s0"
    _accept_states_L1 = [("s2", 1), ("s4", 1)]
    _transitions_L1 = [
        ("s0", "a", "s1"),
        ("s0", "b", "s3"),
        ("s1", "a", "s1"),
        ("s1", "b", "s2"),
        ("s2", "a", "s1"),
        ("s2", "b", "s2"),
        ("s3", "a", "s4"),
        ("s3", "b", "s3"),
        ("s4", "a", "s4"),
        ("s4", "b", "s3")
    ]
    _fst_L1 = FST(_alphabet_L1, _states_L1, _init_state_L1, _accept_states_L1, _transitions_L1)
    print("check FST - L1")
    print(_fst_L1)
    assert _fst_L1.go("aaabbababaabbab")[1]
    assert not _fst_L1.go("aaabbbbba")[1]
    rand = "".join(_fst_L1.go())
    print("sample:" + rand)
    assert _fst_L1.go(rand)

    #  L2
    _alphabet_L2 = ["a", "b"]
    _states_L2 = {"q0", "q1", "q2", "q3"}
    _init_state_L2 = "q0"
    _accept_states_L2 = [("q2", 3)]
    _transitions_L2 = [
        ("q0", "a", "q1"),
예제 #31
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('ste')
    f2.add_state('L1')
    f2.add_state('N1')
    f2.add_state('N2')
    f2.add_state('N3')
    f2.add_state('next1')
    f2.initial_state = 'ste'

    f2.set_final('next1')
    for letter in string.letters:
        f2.add_arc('ste', 'L1', (letter), (letter))
    for n in range(10):
        f2.add_arc('ste', 'N1', (str(n)), (str(n)))
        f2.add_arc('L1', 'N1', (str(n)), (str(n)))
        f2.add_arc('N1', 'N2', (str(n)), (str(n)))
        f2.add_arc('N2', 'N3', (str(n)), (str(n)))
        f2.add_arc('N3', 'N3', (str(n)), ())
    # Add the arcs
    """for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for n in range(10):
        f2.add_arc('1', '1', (str(n)), (str(n)))"""
    f2.add_arc('L1', 'next1', (), ())
    f2.add_arc('N1', 'next1', (), ())
    f2.add_arc('N2', 'next1', (), ())
    f2.add_arc('N3', 'next1', (), ())

    return f2
예제 #32
0
파일: main.py 프로젝트: superhg2012/cs208
        fst_trans[char_list[i][0]] = {}
        fst_trans["start"][char_list[i][0]] = [
            char_list[i][:1], arp_list[i][0] + " "
        ]

for i in range(0, len(char_list)):
    for j in range(1, len(char_list[i])):
        # adding new states to fst_states
        fst_states.append(char_list[i][:j + 1])
        # adding state transitions, where state names are string of chars in the color
        # so far so the 3rd state on the way to blue is named 'blu'
        # example in dictionary: 'ru': {'b': ['rub', 'B ']}
        fst_trans[char_list[i][:j + 1]] = {}
        fst_trans[char_list[i][:j]][char_list[i][j]] = [char_list[i][:j + 1]]
        # if/else accounting for silent letters at end of word
        if j < len(arp_list[i]):
            fst_trans[char_list[i][:j]][char_list[i][j]].append(
                arp_list[i][j] + " ")
        else:
            fst_trans[char_list[i][:j]][char_list[i][j]].append("")

# Our final states are all of our input words
fst_final = char_list
# Declaring our fst
test_fst = FST(fst_states, fst_in_alph, fst_out_alph, fst_start, fst_final,
               fst_trans)

# Calling transduce on each color in our input
for word in char_list:
    print test_fst.transduce_string(word)
예제 #33
0
def french_count():
    f = FST('french')

    f.add_state('start')
    # one number and two trailing unknowns
    f.add_state('n**')
    # exception from state n**
    f.add_state('n**+')
    # two numbers and one trailing unknown
    f.add_state('nn*')
    # zero and two uknown digits trailing and so on
    f.add_state('0**')
    f.add_state('00*')
    f.add_state('00n')
    f.add_state('0n*')
    f.add_state('0n*+')
    f.add_state('0nn')
    f.add_state('n00')
    f.add_state('nnn')
    f.add_state('nnn*')
    f.add_state('*et*')
    # vegasimal counting for 7 in ((0/n)n*)
    f.add_state('0n*Vega7+')
    f.add_state('0n*Vega7')
    f.add_state('0nnVega7')
    # vegasimal counting for 8 in ((0/n)n*)
    f.add_state('0n*Vega8')
    f.add_state('0n*Vega8+')
    f.add_state('0nnVega8')
    # vegasimal counting for 9 in ((0/n)n*)
    f.add_state('0n*Vega9')
    f.add_state('0n*Vega9+')
    f.add_state('0n*Vega9++')
    f.add_state('0nnVega9')

    # set final states
    f.set_final('00n')
    f.set_final('0nn')
    f.set_final('nnn')
    f.set_final('n00')
    f.set_final('0nnVega7')
    f.set_final('0nnVega8')
    f.set_final('0nnVega9')

    # initial state
    f.initial_state = 'start'
    # remove initial zeroes
    f.add_arc('start', '0**', '0', ())
    f.add_arc('0**', '00*', '0', ())
    
    for ii in xrange(10):
        #from '0n*Vega8' to '0nnVega8
        if ii != 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), [kFRENCH_TRANS[ii]])
        elif ii == 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), ())
        #from '0n*Vega7' to '0nnVega7' 7-9
        if ii == 0 or ii == 7 or ii ==8 or ii == 9:
            f.add_arc('0n*Vega7', '0n*Vega7+', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7+', '0n*Vega7+', str(ii), [kFRENCH_TRANS[ii]])
            #
            f.add_arc('0n*Vega9+', '0n*Vega9++', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega9++', '0nnVega9', str(ii), [kFRENCH_TRANS[ii]])
    
            if ii == 0:
                f.add_arc('0n*Vega7+', '0nnVega7', '0', ())
                f.add_arc('0n*Vega9++', '0nnVega9', '0', ())
                
            elif ii == 7 or ii == 8 or ii == 9:
                f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii]])
        #from '0n*Vega' to '0nnVega' 2-6
        if ii == 2 or ii == 3 or ii ==4 or ii == 5 or ii == 6:
            f.add_arc('0n*Vega7', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
        if ii == 1:
            f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('n**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7', '0n*Vega7+', str(ii), [kFRENCH_AND])
            f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
            
        #from '00*' to '00n'
        f.add_arc('00*', '00n', str(ii), [kFRENCH_TRANS[ii]])
        #from '*n*' to '*nn' 2-9
        if ii != 0 and ii !=9:
            f.add_arc('0n*','0nn', str(ii+1), [kFRENCH_TRANS[ii+1]])
            f.add_arc('0n*+','0nn', str(ii), [kFRENCH_TRANS[ii]])
        #from 'start' to 'nnn' 200,300,...,900
        if ii != 0 and ii !=1:
            f.add_arc('start','n**+', str(ii), [kFRENCH_TRANS[ii]])
            f.add_arc('n**+', 'n**', (), [kFRENCH_TRANS[100]])
        #from 'n**' to 'n0*' 0
        if ii == 0:
            f.add_arc('n**', 'n00', '00', ())
        if ii == 1:
            f.add_arc('start', 'n**', '1', [kFRENCH_TRANS[100]])

        
    #from '*n*' to '*et*' 1
    f.add_arc('0n*','*et*', '1', [kFRENCH_AND])
    #from '*et*' to '*nn' 1
    f.add_arc('*et*','0nn', (), [kFRENCH_TRANS[1]])
    #from '0**' to '*nn' 10-16 
    for ii in xrange(10,17):
        f.add_arc('0**','0nn', str(ii), [kFRENCH_TRANS[ii]])
        f.add_arc('n**','0nn', str(ii), [kFRENCH_TRANS[ii]])
    #from '0**' to '*nn' 20-60
    for ii in xrange(2,7):
        f.add_arc('0**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        f.add_arc('n**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        
        #from '0**', to *n*
        f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[ii*10]])
        #from 'n**' to '0n*'
        f.add_arc('n**', '0n*+', str(ii), [kFRENCH_TRANS[ii*10]])
    for ii in xrange(7,10):
        if ii == 7:
            f.add_arc('0**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
            f.add_arc('n**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
        elif ii == 8:
            f.add_arc('0**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega8', '0n*Vega8+', (), [kFRENCH_TRANS[20]])
        elif ii == 9:
            f.add_arc('0**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega9', '0n*Vega9+', (), [kFRENCH_TRANS[20]])
            
    f.add_arc('n**', '0n*+', '0', ())

    return f
예제 #34
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')
    f2.initial_state = '1'
    f2.set_final('1')
    f2.set_final('2')
    f2.set_final('3')
    f2.set_final('4')

    # Adds letters from input string of 'A###0000'
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    # Adds numbers from first FST of range 0-9
    for n in range(10):
        f2.add_arc('1', '2', str(n), (str(n)))
        f2.add_arc('2', '3', str(n), (str(n)))
        f2.add_arc('3', '4', str(n), (str(n)))
        f2.add_arc('4', '4', str(n), ())


    return f2
예제 #35
0
    def generate_control(self):
        arguments = self.matchers.keys()

        # this will be a hypercube
        control = FST()

        # zero state is for verb
        control.add_state("0", is_init=True, is_final=False)

        # inside states for the cube, except the last, accepting state
        for i in xrange(1, pow(2, len(arguments))):
            control.add_state(str(i), is_init=False, is_final=False)

        # last node of the hypercube
        control.add_state(
            str(int(pow(2, len(arguments)))),
            is_init=False, is_final=True)

        # first transition
        control.add_transition(KRPosMatcher("VERB"), [ExpandOperator(
            self.lexicon, self.working_area)], "0", "1")

        # count every transition as an increase in number of state
        for path in permutations(arguments):
            actual_state = 1
            for arg in path:
                increase = pow(2, arguments.index(arg))
                new_state = actual_state + increase
                control.add_transition(
                    self.matchers[arg],
                    [FillArgumentOperator(arg, self.working_area)],
                    str(actual_state), str(new_state))

                actual_state = new_state
        return control
예제 #36
0
from fst import FST
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--fst', action='store_true')
parser.add_argument('--isyms', action='store_true')
parser.add_argument('--osyms', action='store_true')
parser.add_argument('--name', type=str, required=True)
parser.add_argument('--file', type=str, required=True)

if __name__ == '__main__':
    args = parser.parse_args()
    fst = FST(args.name)
    fst.initial_state = fst.new_state()
    fst.final_states.append(fst.new_state())
    with open(args.file, 'r') as f:
        for line in f:
            if fst.name == 'L':
                word, phones = line.strip().split('\t')
                tokens = phones.split()
            elif fst.name == 'S':
                word = line.strip().split('\t')[0]
                tokens = list(word)

            if len(tokens) == 1:
                fst.add_arc(fst.initial_state, fst.final_states[0], word,
                            tokens[0])
            else:
                state = fst.new_state()
                fst.add_arc(fst.initial_state, state, word, tokens[0])
                for phone in tokens[1:-1]:
예제 #37
0
from fst import FST

# This function returns fn o ... o f3 o f2 o f1 (input)
# where ALL transducers use characters as input symbols
def compose(input, *fsts):
	output_list = [input]
	for fst in fsts:
		next_output_list = []
		for o in output_list:
			new_output = ''.join(o)
			next_output_list.extend(fst.transduce(new_output))
		output_list = next_output_list
	return output_list

if __name__ == '__main__':
	f1 = FST('test-generate')

	# Indicate that '1' is the initial state
	f1.add_state('start')
	f1.add_state('next')
	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('next')

	# Add the rest of the arcs
	for letter in ['A','B','C','D']:
		f1.add_arc('start', 'next', letter, '1')
		f1.add_arc('next', 'next', letter, '0')

	f2 = FST('test-generate')
예제 #38
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    vowels = [
        'a', 'A', 'e', 'E', 'h', 'H', 'i', 'I', 'o', 'O', 'u', 'U', 'w', 'W',
        'y', 'Y'
    ]
    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('s11')
    f1.add_state('s22')
    f1.add_state('s33')
    f1.add_state('s44')
    f1.add_state('s55')
    f1.add_state('s66')
    f1.add_state('s1')
    f1.add_state('s2')
    f1.add_state('s3')
    f1.add_state('sv')
    f1.add_state('s4')
    f1.add_state('s5')
    f1.add_state('s6')

    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        #f1.add_arc('start', 'next', (letter), (letter))
        #f1.add_arc('next', 'next', (letter), ('0'))

        if letter in vowels:
            f1.add_arc('start', 'sv', (letter), (letter))
            f1.add_arc('s11', 'sv', (letter), ())
            f1.add_arc('s33', 'sv', (letter), ())
            f1.add_arc('s22', 'sv', (letter), ())
            f1.add_arc('s44', 'sv', (letter), ())
            f1.add_arc('s55', 'sv', (letter), ())
            f1.add_arc('s66', 'sv', (letter), ())
            f1.add_arc('sv', 'sv', (letter), ())
            f1.add_arc('s1', 'sv', (letter), ())
            f1.add_arc('s2', 'sv', (letter), ())
            f1.add_arc('s3', 'sv', (letter), ())
            f1.add_arc('s4', 'sv', (letter), ())
            f1.add_arc('s6', 'sv', (letter), ())
            f1.add_arc('s5', 'sv', (letter), ())
            #f1.add_arc('s3','s4',(letter),(letter))
        elif letter in "Ll":
            f1.add_arc('start', 's44', (letter), (letter))
            f1.add_arc('s44', 's4', (letter), ())
            f1.add_arc('s11', 's4', (letter), ('4'))
            f1.add_arc('s22', 's4', (letter), ('4'))
            f1.add_arc('s33', 's4', (letter), ('4'))
            f1.add_arc('s55', 's4', (letter), ('4'))
            f1.add_arc('s66', 's4', (letter), ('4'))
            f1.add_arc('s4', 's4', (letter), ())
            f1.add_arc('s1', 's4', (letter), ('4'))
            f1.add_arc('s2', 's4', (letter), ('4'))
            f1.add_arc('s3', 's4', (letter), ('4'))
            f1.add_arc('s5', 's4', (letter), ('4'))
            f1.add_arc('s6', 's4', (letter), ('4'))
            f1.add_arc('sv', 's4', (letter), ('4'))
        elif letter in 'Rr':
            f1.add_arc('start', 's66', (letter), (letter))
            f1.add_arc('s66', 's6', (letter), ())
            f1.add_arc('s22', 's6', (letter), ('6'))
            f1.add_arc('s33', 's6', (letter), ('6'))
            f1.add_arc('s44', 's6', (letter), ('6'))
            f1.add_arc('s55', 's6', (letter), ('6'))
            f1.add_arc('s11', 's6', (letter), ('6'))
            f1.add_arc('s6', 's6', (letter), ())
            f1.add_arc('s1', 's6', (letter), ('6'))
            f1.add_arc('s2', 's6', (letter), ('6'))
            f1.add_arc('s3', 's6', (letter), ('6'))
            f1.add_arc('s5', 's6', (letter), ('6'))
            f1.add_arc('s4', 's6', (letter), ('6'))
            f1.add_arc('sv', 's6', (letter), ('6'))
        elif letter in "bfpvBFPV":
            f1.add_arc('start', 's11', (letter), (letter))
            f1.add_arc('s11', 's1', (letter), ())
            f1.add_arc('s22', 's1', (letter), ('1'))
            f1.add_arc('s33', 's1', (letter), ('1'))
            f1.add_arc('s44', 's1', (letter), ('1'))
            f1.add_arc('s55', 's1', (letter), ('1'))
            f1.add_arc('s66', 's1', (letter), ('1'))
            f1.add_arc('s1', 's1', (letter), ())
            f1.add_arc('s5', 's1', (letter), ('1'))
            f1.add_arc('s2', 's1', (letter), ('1'))
            f1.add_arc('s3', 's1', (letter), ('1'))
            f1.add_arc('s4', 's1', (letter), ('1'))
            f1.add_arc('sv', 's1', (letter), ('1'))
            f1.add_arc('s6', 's1', (letter), ('1'))
        elif letter in "cgjkqsxzCGJKQSXZ":
            f1.add_arc('start', 's22', (letter), (letter))
            f1.add_arc('s22', 's2', (letter), ())
            f1.add_arc('s11', 's2', (letter), ('2'))
            f1.add_arc('s33', 's2', (letter), ('2'))
            f1.add_arc('s44', 's2', (letter), ('2'))
            f1.add_arc('s55', 's2', (letter), ('2'))
            f1.add_arc('s66', 's2', (letter), ('2'))
            f1.add_arc('s2', 's2', (letter), ())
            f1.add_arc('s5', 's2', (letter), ('2'))
            f1.add_arc('s1', 's2', (letter), ('2'))
            f1.add_arc('s3', 's2', (letter), ('2'))
            f1.add_arc('s4', 's2', (letter), ('2'))
            f1.add_arc('sv', 's2', (letter), ('2'))
            f1.add_arc('s6', 's2', (letter), ('2'))
        elif letter in "mnMN":
            f1.add_arc('start', 's55', (letter), (letter))
            f1.add_arc('s55', 's5', (letter), ())
            f1.add_arc('s11', 's5', (letter), ('5'))
            f1.add_arc('s44', 's5', (letter), ('5'))
            f1.add_arc('s33', 's5', (letter), ('5'))
            f1.add_arc('s22', 's5', (letter), ('5'))
            f1.add_arc('s66', 's5', (letter), ('5'))
            f1.add_arc('s5', 's5', (letter), ())
            f1.add_arc('s2', 's5', (letter), ('5'))
            f1.add_arc('s1', 's5', (letter), ('5'))
            f1.add_arc('s3', 's5', (letter), ('5'))
            f1.add_arc('s4', 's5', (letter), ('5'))
            f1.add_arc('sv', 's5', (letter), ('5'))
            f1.add_arc('s6', 's5', (letter), ('5'))
        elif letter in "dtDT":
            f1.add_arc('start', 's33', (letter), (letter))
            f1.add_arc('s33', 's3', (letter), ())
            f1.add_arc('s11', 's3', (letter), ('3'))
            f1.add_arc('s44', 's3', (letter), ('3'))
            f1.add_arc('s55', 's3', (letter), ('3'))
            f1.add_arc('s22', 's3', (letter), ('3'))
            f1.add_arc('s66', 's3', (letter), ('3'))
            f1.add_arc('s3', 's3', (letter), ())
            f1.add_arc('s2', 's3', (letter), ('3'))
            f1.add_arc('s1', 's3', (letter), ('3'))
            f1.add_arc('s5', 's3', (letter), ('3'))
            f1.add_arc('s4', 's3', (letter), ('3'))
            f1.add_arc('sv', 's3', (letter), ('3'))
            f1.add_arc('s6', 's3', (letter), ('3'))
    """    else:
            f1.add_arc('s1','s5',(letter),('1'))
            f1.add_arc('s4','s5',(letter),('1'))
            f1.add_arc('s6','s5',(letter),('1'))
            f1.add_arc('s44','s5',(letter),('1'))
            f1.add_arc('s66','s5',(letter),('1'))
            f1.add_arc('s3','s5',(letter),('1'))

            #f1.add_arc('s5','s5',(letter),())   """
    f1.add_arc('s11', 'next', (), ())
    f1.add_arc('s22', 'next', (), ())
    f1.add_arc('s33', 'next', (), ())
    f1.add_arc('s44', 'next', (), ())
    f1.add_arc('s55', 'next', (), ())
    f1.add_arc('s66', 'next', (), ())
    f1.add_arc('s1', 'next', (), ())
    f1.add_arc('s2', 'next', (), ())
    f1.add_arc('s3', 'next', (), ())
    f1.add_arc('sv', 'next', (), ())
    f1.add_arc('s4', 'next', (), ())
    f1.add_arc('s5', 'next', (), ())
    f1.add_arc('s6', 'next', (), ())
    return f1
예제 #39
0
파일: soundex.py 프로젝트: mazdeh/NLP
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.add_state('one')
    f1.add_state('two')
    f1.add_state('three')
    f1.add_state('four')
    f1.add_state('five')
    f1.add_state('six')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')
    f1.set_final('one')
    f1.set_final('two')
    f1.set_final('three')
    f1.set_final('four')
    f1.set_final('five')
    f1.set_final('six')

    list_one = ['b', 'f', 'p', 'v']
    list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
    list_three = ['d', 't']
    list_four = ['l']
    list_five = ['m', 'n']
    list_six = ['r']
    vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']

    # Add the rest of the arcs
     # changed string.ascii_lowercase to string.letters
    
    for letter in string.letters:
        f1.add_arc('start', 'next', (letter), (letter))

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('next', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('next', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('next', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('next', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('next', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('next', 'six', (letter), '6')
        else:
            f1.add_arc('next', 'next', (letter), ())

    for letter in string.letters:
        if letter in list_two:
            f1.add_arc('one', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('one', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('one', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('one', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('one', 'six', (letter), '6')
        else:
            f1.add_arc('one', 'one', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('two', 'one', (letter), '1')
        elif letter in list_three:
            f1.add_arc('two', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('two', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('two', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('two', 'six', (letter), '6')
        else:
            f1.add_arc('two', 'two', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('three', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('three', 'two', (letter), '2')
        elif letter in list_four:
            f1.add_arc('three', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('three', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('three', 'six', (letter), '6')
        else:
            f1.add_arc('three', 'three', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('four', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('four', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('four', 'three', (letter), '3')
        elif letter in list_five:
            f1.add_arc('four', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('four', 'six', (letter), '6')
        else:
            f1.add_arc('four', 'four', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('five', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('five', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('five', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('five', 'four', (letter), '4')
        elif letter in list_six:
            f1.add_arc('five', 'six', (letter), '6')
        else:
            f1.add_arc('five', 'five', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('six', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('six', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('six', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('six', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('six', 'five', (letter), '5')
        else:
            f1.add_arc('six', 'six', (letter), ())
    return f1
예제 #40
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('st')
    f3.add_state('L1')
    f3.add_state('N1')
    f3.add_state('N2')
    f3.add_state('N3')
    f3.add_state('P1')
    f3.add_state('P2')
    f3.add_state('P3')

    f3.initial_state = 'st'
    f3.set_final('N3')
    f3.set_final('P3')

    for letter in string.letters:
        f3.add_arc('st', 'L1', (letter), (letter))
    for number in xrange(10):
        f3.add_arc('st', 'N1', (str(number)), (str(number)))
        f3.add_arc('L1', 'N1', (str(number)), (str(number)))
        f3.add_arc('N1', 'N2', (str(number)), (str(number)))
        f3.add_arc('N2', 'N3', (str(number)), (str(number)))

    f3.add_arc('L1', 'P1', (), ('0'))
    f3.add_arc('N1', 'P2', (), ('0'))
    f3.add_arc('N2', 'P3', (), ('0'))
    f3.add_arc('P1', 'P2', (), ('0'))
    f3.add_arc('P2', 'P3', (), ('0'))
    return f3
예제 #41
0
파일: french_count.py 프로젝트: mazdeh/NLP
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'
    f.add_state('1stzero')
    f.add_state('tens')
    f.add_state('seventeen')
    f.add_state('final_seventeen')
    f.add_state('eighteen')
    f.add_state('final_eighteen')
    f.add_state('nineteen')
    f.add_state('final_nineteen')
    f.add_state('zero')
    f.add_state('ones')
    f.add_state('20-69')
    f.add_state('70-ten')
    f.add_state('80s')
    f.add_state('90s')
    f.add_state('100s')
    f.add_state('et')
    f.add_state('10-et')
    f.add_state('et-un')
    f.add_state('et-onze')

    f.set_final('zero')
    f.set_final('ones')
    f.set_final('tens')
    f.set_final('final_seventeen')
    f.set_final('final_eighteen')
    f.set_final('final_nineteen')
    f.set_final('20-69')
    f.set_final('70-ten')
    f.set_final('80s')
    f.set_final('90s')
    f.set_final('et-un')
    f.set_final('et-onze')


# 100 - 999
    f.add_arc('start', '1stzero', '1', [kFRENCH_TRANS[100]])
    for i in range(2, 10):
        f.add_arc('start', '100s', str(i), [kFRENCH_TRANS[i]])

    f.add_arc('100s', '1stzero', (), [kFRENCH_TRANS[100]])


# 0 - 9
    f.add_arc('start', '1stzero', '0', [])
    f.add_arc('1stzero', 'ones', '0', [])
    for ii in range(1, 10):
        f.add_arc('ones', 'ones', str(ii), [kFRENCH_TRANS[ii]])

    f.add_arc('ones', 'ones', '0', [])
    
    # for i in range(10):
    #     f.add_arc('ten-6', 'ten-6', str(i), kFRENCH_TRANS[(i+10])
# 10 - 16
    f.add_arc('1stzero', 'tens', '1', [])
    f.add_arc('tens', 'tens', '0', [kFRENCH_TRANS[10]])
    f.add_arc('tens', 'tens', '1', [kFRENCH_TRANS[11]])
    f.add_arc('tens', 'tens', '2', [kFRENCH_TRANS[12]])
    f.add_arc('tens', 'tens', '3', [kFRENCH_TRANS[13]])
    f.add_arc('tens', 'tens', '4', [kFRENCH_TRANS[14]])
    f.add_arc('tens', 'tens', '5', [kFRENCH_TRANS[15]])
    f.add_arc('tens', 'tens', '6', [kFRENCH_TRANS[16]])

    f.add_arc('tens', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('tens', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('tens', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 20 - 69
    f.add_arc('1stzero', '20-69', '2', [kFRENCH_TRANS[20]])
    f.add_arc('1stzero', '20-69', '3', [kFRENCH_TRANS[30]])
    f.add_arc('1stzero', '20-69', '4', [kFRENCH_TRANS[40]])
    f.add_arc('1stzero', '20-69', '5', [kFRENCH_TRANS[50]])
    f.add_arc('1stzero', '20-69', '6', [kFRENCH_TRANS[60]])

    # special cases:
    for i in range(2, 10):
        f.add_arc('20-69', '20-69', str(i), [kFRENCH_TRANS[i]])

        # handles 20, 30 ... 60
    for i in range(20, 60, 10):
        f.add_arc('20-69', '20-69', '0', [])

        # handles 21, 31, ... 61
    f.add_arc('20-69', 'et', '1', [kFRENCH_AND])
    f.add_arc('et', 'et-un', (),[kFRENCH_TRANS[1]])

# 70 - 79
    f.add_arc('1stzero', '70-ten', '7', [kFRENCH_TRANS[60]])
    f.add_arc('70-ten', '70-ten', '0', [kFRENCH_TRANS[10]])
    # handle 71 here
    f.add_arc('70-ten', '10-et', '1', [kFRENCH_AND])
    f.add_arc('10-et', 'et-onze', (),[kFRENCH_TRANS[11]])
    f.add_arc('70-ten', '70-ten', '2', [kFRENCH_TRANS[12]])
    f.add_arc('70-ten', '70-ten', '3', [kFRENCH_TRANS[13]])
    f.add_arc('70-ten', '70-ten', '4', [kFRENCH_TRANS[14]])
    f.add_arc('70-ten', '70-ten', '5', [kFRENCH_TRANS[15]])
    f.add_arc('70-ten', '70-ten', '6', [kFRENCH_TRANS[16]])
    
    f.add_arc('70-ten', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('70-ten', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('70-ten', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 80 - 89
    f.add_arc('1stzero', '80s', '8', [kFRENCH_TRANS[4]])
    f.add_arc('80s', 'ones', (), [kFRENCH_TRANS[20]])
    f.add_arc('80s', '80s', '0', [kFRENCH_TRANS[20]])

# 90 - 99
    f.add_arc('1stzero', '90s', '9', [kFRENCH_TRANS[4]])
    f.add_arc('90s', 'tens', (), [kFRENCH_TRANS[20]])

    return f
예제 #42
0
 def intersection_fst(self, list_fst):
     new_alphabet, new_states, new_start_state, new_accept_states, new_transitions =\
         self._merge_fst(list_fst, op=INTERSECT)
     return FST(new_alphabet, new_states, new_start_state, new_accept_states, new_transitions)
예제 #43
0
파일: soundex.py 프로젝트: Pinafore/cl1-hw
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), '0')
    return f1
예제 #44
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    aeoy = ['a','e','h','i','o','u','w','y']
    one = ['b','f','p','v']
    two = ['c','g','j','k','q','s','x','z']
    three = ['d','t']
    four = ['l'] 
    five = ['m','n']
    six = ['r']

    # Indicate that '1' is the initial state
    f1.add_state('initial')
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.initial_state = 'initial'

    # Set all the final states
    f1.set_final('0')
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        f1.add_arc('initial','0',(letter),(letter))
        if letter in aeoy:
            f1.add_arc('0','0', (letter), ())
            f1.add_arc('1','0', (letter), ())
            f1.add_arc('2','0', (letter), ())
            f1.add_arc('3','0', (letter), ())
            f1.add_arc('4','0', (letter), ())
            f1.add_arc('5','0', (letter), ())
            f1.add_arc('6','0', (letter), ())
        else:
            if letter in one:
                f1.add_arc('0','1', (letter), '1')
                f1.add_arc('2','1', (letter), '1')
                f1.add_arc('3','1', (letter), '1')
                f1.add_arc('4','1', (letter), '1')
                f1.add_arc('5','1', (letter), '1')
                f1.add_arc('6','1', (letter), '1')
                f1.add_arc('1','0', (letter), ())
            if letter in two:
                f1.add_arc('0','2', (letter), '2')
                f1.add_arc('1','2', (letter), '2')
                f1.add_arc('3','2', (letter), '2')
                f1.add_arc('4','2', (letter), '2')
                f1.add_arc('5','2', (letter), '2')
                f1.add_arc('6','2', (letter), '2')
                f1.add_arc('2','0', (letter), ())
            if letter in three:
                f1.add_arc('0','3', (letter), '3')
                f1.add_arc('1','3', (letter), '3')
                f1.add_arc('2','3', (letter), '3')
                f1.add_arc('4','3', (letter), '3')
                f1.add_arc('5','3', (letter), '3')
                f1.add_arc('6','3', (letter), '3')
                f1.add_arc('3','0', (letter), ())
            if letter in four:
                f1.add_arc('0','4', (letter), '4')
                f1.add_arc('1','4', (letter), '4')
                f1.add_arc('2','4', (letter), '4')
                f1.add_arc('3','4', (letter), '4')
                f1.add_arc('5','4', (letter), '4')
                f1.add_arc('6','4', (letter), '4')
                f1.add_arc('4','0', (letter), ())
            if letter in five:
                f1.add_arc('0','5', (letter), '5')
                f1.add_arc('1','5', (letter), '5')
                f1.add_arc('2','5', (letter), '5')
                f1.add_arc('3','5', (letter), '5')
                f1.add_arc('4','5', (letter), '5')
                f1.add_arc('6','5', (letter), '5')
                f1.add_arc('5','0', (letter), ())
            if letter in six:
                f1.add_arc('0','6', (letter), '6')
                f1.add_arc('1','6', (letter), '6')
                f1.add_arc('2','6', (letter), '6')
                f1.add_arc('3','6', (letter), '6')
                f1.add_arc('4','6', (letter), '6')
                f1.add_arc('5','6', (letter), '6')
                f1.add_arc('6','0', (letter), ())

    return f1
예제 #45
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    states = ['1', 'd1', 'd2', 'd3']

    for state in states:
        f2.add_state(state)

    f2.initial_state = '1'

    for state in ['d1', 'd2', 'd3']:
        f2.set_final(state)

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for index, state in enumerate(states):
        if index > 0:
            for n in range(10):
                f2.add_arc(states[index-1], states[index], str(n), str(n))

    for n in range(10):
        f2.add_arc('d3', 'd3', str(n), ())

    return f2
예제 #46
0
def french_count():
    f = FST('french')

    f.add_state('0')
    f.add_state('1')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.add_state('11')
    f.add_state('12')
    f.add_state('13')
    f.add_state('14')
    f.add_state('15')
    f.add_state('16')
    f.add_state('17')
    f.add_state('18')
    f.add_state('19')
    f.add_state('20')
    f.add_state('21')
    f.add_state('22')
    f.add_state('23')
    f.add_state('24')
    f.add_state('25')

    f.initial_state = '0'

    f.set_final('1')
    f.set_final('3')
    f.set_final('6')
    f.set_final('7')
    f.set_final('8')
    f.set_final('9')
    f.set_final('11')
    f.set_final('13')
    f.set_final('14')
    f.set_final('18')
    f.set_final('20')

    zero = [0]
    one = [1]
    two_to_six = [2,3,4,5,6]
    one_to_six = [1,2,3,4,5,6]
    seven = [7]
    seven_eight_nine = [7,8,9]
    eight = [8]
    nine = [9]
    singles_all = [1,2,3,4,5,6,7,8,9]
    singles = [2,3,4,5,6,7,8,9]
    tens = [20,30,40,50]

    # Edge from initial to final, if preceding zero in input
    for i in zero:
        # f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','0', str(i), ())
        f.add_arc('4','6', str(i), ())
        f.add_arc('5','8', str(i), ())
        f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('12','13', str(i), [kFRENCH_TRANS[20]])
        f.add_arc('16','18', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[10]])
        f.add_arc('17','19', str(i), ())
        f.add_arc('19','9', str(i), ())

    for i in one:
        f.add_arc('0','2', str(i), ())
        f.add_arc('17','2', str(i), ())
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[100]])
        f.add_arc('0','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('4','7', str(i), [kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_AND, kFRENCH_TRANS[i+10]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i+10]])

    for i in one_to_six:
        f.add_arc('2','3', str(i), [kFRENCH_TRANS[i+10]])

    for i in two_to_six:
        f.add_arc('0','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[i+10]])

    for i in singles:
        f.add_arc('4','7', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[i],kFRENCH_TRANS[100]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[i]])

    for i in singles_all:
        f.add_arc('0','1', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('19','1', str(i), [kFRENCH_TRANS[i]])

    for i in seven_eight_nine:
        f.add_arc('5','8', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[10], kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[i]])

    for i in seven:
        f.add_arc('0','10',str(i), [kFRENCH_TRANS[60]])
        f.add_arc('17','10',str(i), [kFRENCH_TRANS[60]])

    for i in eight:
        f.add_arc('0','12',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','12',str(i), [kFRENCH_TRANS[4]])

    for i in nine:
        f.add_arc('0','16',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','16',str(i), [kFRENCH_TRANS[4]])

    return f
예제 #47
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']
    for state in states:
        f1.add_state(state)

    f1.initial_state = 'q1'

    # Set all the final states
    for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
        f1.set_final(state)

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('q1', 'q2', (letter), (letter))
        if letter in set('aehiouwy'):
            for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
                f1.add_arc(state, state, (letter), ())
        else:
            if letter in set('bfpv'):
                for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n1', (letter), ('1'))
                f1.add_arc('n1', 'n1', (letter), ())
            elif letter in set('cgjkqsxz'):
                for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n2', (letter), ('2'))
                f1.add_arc('n2', 'n2', (letter), ())
            elif letter in set('dt'):
                for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n3', (letter), ('3'))
                f1.add_arc('n3', 'n3', (letter), ())
            elif letter in set('l'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']:
                    f1.add_arc(state, 'n4', (letter), ('4'))
                f1.add_arc('n4', 'n4', (letter), ())
            elif letter in set('mn'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']:
                    f1.add_arc(state, 'n5', (letter), ('5'))
                f1.add_arc('n5', 'n5', (letter), ())
            elif letter in set('r'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']:
                    f1.add_arc(state, 'n6', (letter), ('6'))
                f1.add_arc('n6', 'n6', (letter), ())
    return f1
def french_count():
    f = FST('french')

    f.add_state('start')

    f.add_state('0xx')
    f.add_state('nxx')

    f.add_state('00x')

    f.add_state('n0x')
    f.add_state('n1x')
    f.add_state('nnx')
    f.add_state('n7x')
    f.add_state('n8x')
    f.add_state('n9x')
    f.add_state('last')

    f.initial_state = 'start'
    f.set_final('last')

    for ii in xrange(10):
        f.add_arc('00x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
        if ii == 0:
             f.add_arc('start', '0xx', [str(ii)], ())
             f.add_arc('0xx', '00x', [str(ii)], ())
             f.add_arc('nxx', 'n0x', [str(ii)], ())
             f.add_arc('n0x', 'last', [str(ii)], ())
             f.add_arc('n1x', 'last', [str(ii)], [kFRENCH_TRANS[ii + 10]])
             f.add_arc('nnx', 'last', [str(ii)], ())
             f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_TRANS[ii+10]])
             f.add_arc('n8x', 'last', [str(ii)], ())
             f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[ii+10]])

        if ii == 1:
            f.add_arc('start', 'nxx', [str(ii)], [kFRENCH_TRANS[100]])
            f.add_arc('0xx', 'n1x', [str(ii)], ())
            f.add_arc('nxx', 'n1x', [str(ii)], ())
            f.add_arc('n0x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n1x', 'last', [str(ii)], [kFRENCH_TRANS[ii + 10]])
            f.add_arc('nnx', 'last', [str(ii)], [kFRENCH_AND,kFRENCH_TRANS[ii]])
            f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_AND,kFRENCH_TRANS[ii+10]])
            f.add_arc('n8x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[ii+10]])

        if ii in range(2,7):
            f.add_arc('start', 'nxx', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]])
            f.add_arc('0xx', 'nnx', [str(ii)], [kFRENCH_TRANS[ii * 10]])
            f.add_arc('nxx', 'nnx', [str(ii)], [kFRENCH_TRANS[ii * 10]])
            f.add_arc('n0x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n1x', 'last', [str(ii)], [kFRENCH_TRANS[ii + 10]])
            f.add_arc('nnx', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_TRANS[ii+10]])
            f.add_arc('n8x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[ii + 10]])

        if ii == 7:
            f.add_arc('start', 'nxx', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]])
            f.add_arc('0xx', 'n7x', [str(ii)], [kFRENCH_TRANS[6 * 10]])
            f.add_arc('nxx', 'n7x', [str(ii)], [kFRENCH_TRANS[6*10]])
            f.add_arc('n0x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n1x', 'last', [str(ii)],  [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('nnx', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('n8x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])

        if ii == 8:
            f.add_arc('start', 'nxx', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]])
            f.add_arc('0xx', 'n8x', [str(ii)], [kFRENCH_TRANS[4], kFRENCH_TRANS[20]])
            f.add_arc('nxx', 'n8x', [str(ii)], [kFRENCH_TRANS[4], kFRENCH_TRANS[20]])
            f.add_arc('n0x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n1x', 'last', [str(ii)],  [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('nnx', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('n8x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])

        if ii == 9:
            f.add_arc('start', 'nxx', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]])
            f.add_arc('0xx', 'n9x', [str(ii)], [kFRENCH_TRANS[4], kFRENCH_TRANS[20]])
            f.add_arc('nxx', 'n9x', [str(ii)], [kFRENCH_TRANS[4], kFRENCH_TRANS[20]])
            f.add_arc('n0x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n1x', 'last', [str(ii)],  [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('nnx', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n7x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
            f.add_arc('n8x', 'last', [str(ii)], [kFRENCH_TRANS[ii]])
            f.add_arc('n9x', 'last', [str(ii)], [kFRENCH_TRANS[10], kFRENCH_TRANS[ii]])
    return f
예제 #49
0
파일: f1.py 프로젝트: mazdeh/NLP
from fst import FST
import string, sys
from fsmutils import composechars, trace

f1 = FST('soundex-generate')

f1.add_state('start')
f1.add_state('next')
f1.initial_state = 'start'
f1.set_final('next')

list_one = ['b', 'f', 'p', 'v']
list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
list_three = ['d', 't']
list_four = ['l']
list_five = ['m', 'n']
list_six = ['r']
vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']