def __addSymbol(self, symbol: Symbol):
     #check if this position is already occupied
     head = symbol.getHead()
     tail = symbol.getTail()
     if self.__canOccupy(head) == False or self.__canOccupy(tail) == False:
         print("Not possible to add symbol to the board")
         return False
     else:
         self.board[headx][heady] = symbol
         self.board[tailx][taily] = symbol
         return True
예제 #2
0
 def init_symbols(symbols):
     return [Symbol.convert(s) for s in symbols]
예제 #3
0
 def __init__(self):
     Symbol.__init__(self)
    # Dictionary of words and it's token for later use
    tokenized = dict()
    notfound = set()

    tokenizer = SpanishTokenizer()
    found = 0.0
    error = 0
    voc = model.vocabulary()
    voc_size = model.V()

    for word in voc:
        token = tokenizer.tokenize(word)
        if token != word:
            print(str((float(found + error) / voc_size) * 100) + "%")
            tokenized[word] = Symbol(token, True)
            found += 1
        else:
            notfound.add(word)
            tokenized[word] = Symbol(word, True)
            print(word)
            error += 1

    print("found: " + str(found))
    print("error: " + str(error))
    # I'm getting a 0.78 rate
    print("success rate = " + str(found/voc_size))

    # save the dictionary
    filename = location + "tokenized"
    f = open(filename, 'wb')
예제 #5
0
def add_symbol(title, body, img, subcategory_id):
    img_path = storage.upload_image_file(img, "symbol")

    symbol = Symbol(title, body, img_path, subcategory_id)
    insert(symbol)
예제 #6
0
    opts = docopt(__doc__)
    print("TRAIN GRAMMAR: ")
    # Get the corpus
    corpus = opts['-c']
    location = opts['-d']

    print("getting corpus from: " + corpus)
    model = PlaintextCorpusReader(
        corpus,
        '.*\.txt',
        sent_tokenizer=LazyLoader('tokenizers/punkt/spanish.pickle'),
        encoding="utf8")

    # Create grammar
    terminals = set()
    epsilon = Symbol("ε", True)
    terminals.add(epsilon)  ## epsilon terminal
    non_terminals = set()
    s = Symbol("S", False)  # Starting non terminal
    non_terminals.add(s)
    grammar = Grammar(non_terminals, terminals, s)
    # This is only to tell me how advanced the process is
    count = 0.0
    len_fileids = len(model.fileids())

    # Get the tokenized corpus
    tokens_location = location + "tokenized"
    print("getting tokens from: " + tokens_location)
    f = open(tokens_location, 'rb')
    tokens = pickle.load(f)
    f.close()