Esempio n. 1
0
class Round():
    def __init__(self):
        self.categories = Categories()
        self.categories.load()
        self.alphabet = Alphabet()
        self.alphabet.load()
        self.responses = []
        self.nextRound()


    def allResponses(self):
        return [d['response'] for d in self.responses]

    def getResponse(self, ptn):
        log( 'getResponse for ' + ptn )
        try:
            pr = [d for d in self.responses if d['tn'] == ptn]
            return pr[0]
        except Exception as e:
            return { 'tn': ptn, 'valid': False, 'response': 'UNK' }

    def nextRound(self):
        self.cat_index = randint( 0, len(self.categories.data)-1)
        log( self.cat_index)
        self.alpha_index = randint( 0, len(self.alphabet.data)-1)
        log( self.alpha_index )
        self.responses = []

    def describe(self):
        alpha = self.alphabet.data[self.alpha_index]
        return  self.categories.data[self.cat_index]['category'] + " that " + alpha['position'].lower() + " " + alpha['letter']
Esempio n. 2
0
def load_config_pos(config_path, char_embedd_dim):
    max_sent_length, max_char_length, num_labels, embedd_dim_concat = load_config(config_path)
    alphabet_char = Alphabet('char', keep_growing=False)
    alphabet_char.load(config_path, 'alphabet_char')
    alphabet_label = Alphabet('label', keep_growing=False)
    alphabet_label.load(config_path, 'alphabet_label')
    scale = np.sqrt(3.0 / char_embedd_dim)
    char_embedd_table = np.random.uniform(-scale, scale, [alphabet_char.size(), char_embedd_dim]).\
        astype(theano.config.floatX)
    return max_sent_length, max_char_length, num_labels, embedd_dim_concat, alphabet_char, alphabet_label, \
           char_embedd_table
Esempio n. 3
0
def create_alphabets(alphabet_directory,
                     data_paths,
                     max_vocabulary_size,
                     normalize_digits=True):
    logger = utils.get_logger("Create Alphabets")
    word_alphabet = Alphabet('word')
    pos_alphabet = Alphabet('pos')
    type_alphabet = Alphabet('type')
    if not gfile.Exists(alphabet_directory):
        logger.info("Creating Alphabets: %s" % alphabet_directory)

        pos_alphabet.add(ROOT_POS)
        type_alphabet.add(ROOT_TYPE)

        pos_alphabet.add(PAD_POS)
        type_alphabet.add(PAD_TYPE)

        vocab = dict()
        for data_path in data_paths:
            logger.info("Processing data: %s" % data_path)
            with gfile.GFile(data_path, mode="r") as file:
                for line in file:
                    line = line.decode('utf-8')
                    line = line.strip()
                    if len(line) == 0:
                        continue

                    tokens = line.split()
                    word = DIGIT_RE.sub(
                        b"0", tokens[1]) if normalize_digits else tokens[1]
                    pos = tokens[4]
                    type = tokens[7]

                    pos_alphabet.add(pos)
                    type_alphabet.add(type)

                    if word in vocab:
                        vocab[word] += 1
                    else:
                        vocab[word] = 1

        vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True)
        logger.info("Total Vocabulary Size: %d" % len(vocab_list))
        logger.info("POS Alphabet Size: %d" % pos_alphabet.size())
        logger.info("Type Alphabet Size: %d" % type_alphabet.size())

        if len(vocab_list) > max_vocabulary_size:
            vocab_list = vocab_list[:max_vocabulary_size]
        for word in vocab_list:
            word_alphabet.add(word)

        word_alphabet.save(alphabet_directory)
        pos_alphabet.save(alphabet_directory)
        type_alphabet.save(alphabet_directory)

    else:
        word_alphabet.load(alphabet_directory)
        pos_alphabet.load(alphabet_directory)
        type_alphabet.load(alphabet_directory)

    word_alphabet.close()
    pos_alphabet.close()
    type_alphabet.close()
    return word_alphabet, pos_alphabet, type_alphabet
Esempio n. 4
0
import sys

from alphabet import Alphabet
from regex import Regex

if __name__ == '__main__':
    args = sys.argv
    if len(args) < 3:
        print('Usage: python app-regex-simple.py <alphabet> <regex>')
        sys.exit(1)

    alpha = Alphabet.load(args[1])    
    rx = Regex(args[2], alpha)
    rx.build()

    for l in sys.stdin:
        l = l.strip()
        if rx.match(l):
            print(l)