def parse(cls, line, file_name, line_num):
     """
     parse word(EoJeol) with single line
     :param  line:  line
     :param  file_name:  file name
     :param  line_num:  line number
     """
     cols = line.split('\t')
     if len(cols) != 3:
         if Sentence.is_tag_in_sent(line):
             return None
         raise ParseError('%s(%d) Invalid line: %s' %
                          (file_name, line_num, line))
     word = Word()
     word.wid, word.raw = cols[0], cols[1]
     if ' ' in word.raw:
         raise ParseError('%s(%d) space in raw word: %s' %
                          (file_name, line_num, line))
     try:
         word.morphs = [
             Morph.parse(word_tag, file_name, line_num)
             for word_tag in cols[2].split(' + ')
         ]
     except ValueError as val_err:
         raise ParseError('%s(%d) %s: %s' %
                          (file_name, line_num, val_err, line))
     morphs_raw = ''.join([m.lex for m in word.morphs])
     if (len(word.raw) == len(morphs_raw) and word.raw != morphs_raw
             and norm('NFKD', word.raw) == norm('NFKD', morphs_raw)):
         raise ParseError('%s(%d) raw-morph mismatch: %s' %
                          (file_name, line_num, line))
     return word
Ejemplo n.º 2
0
    def pre_process(self, text):

        text = text.lower()

        text = re.sub('(\\n)+', ' ', text)

        text = ' '.join([
            word for word in text.split()
            if word not in stopwords.words('portuguese')
            and word not in string.punctuation
        ])

        text = norm('NFKD', text).encode('ascii', 'ignore').decode()

        text = re.sub('\@\S*', '', text)

        #     text = ''.join([char for char in text if char not in string.punctuation])

        return text
Ejemplo n.º 3
0
def normalize(s):
    """Normalize utf8 characters to their ascii equivalents"""
    return norm('NFD', s.decode('utf8')).encode('ascii', 'ignore')
Ejemplo n.º 4
0
 def lineToarray(self, line):
     new = norm('NFKD', line.lower()).encode('ASCII',
                                             'ignore').decode('ASCII')
     new = re.sub(r'[^a-z0-9 ]|\s\s+', ' ', new)
     return new.split(' ')
Ejemplo n.º 5
0
def normalize(s):
    """Normalize utf8 characters to their ascii equivalents"""
    return norm('NFD', s.decode('utf8')).encode('ascii', 'ignore')
Ejemplo n.º 6
0
 def _normalize(self, name):
     lower_name = name.lower()
     norm_name = norm('NFKD', lower_name).encode('ASCII',
                                                 'ignore').decode('ASCII')
     return norm_name
Ejemplo n.º 7
0
import logging
from unicodedata import normalize as norm
from phue import Bridge
from subprocess import run, PIPE
logging.basicConfig()
b = Bridge('192.168.1.15')

# If the app is not registered and the button is not pressed, press the button and call connect() (this only needs to be run a single time)
b.connect()
lights = b.get_light_objects('name')
logging.info("Detected {} lights:".format(len(lights)))
for l in lights: logging.info("\t{}".format(l))

room = run("whereami predict", shell=True, stdout=PIPE)
room = room.stdout.decode('UTF-8').strip().split('\n')[-1]
room_lights = [l for l in lights if norm("NFKD",room.casefold()) in norm("NFKD",l.casefold())]
print("Detected room {}.\nTurn on {} ? (Y/n)".format(room, room_lights))
if not input().lower() == 'n':
    print("Turning lights on")
    for l in room_lights:
        lights[l].on = True
print("Turn off all other lights ? (Y/n)")
if not input().lower() == 'n':
    print("Turning lights off")
    for n, l in lights.items():
        if not n in room_lights:
            l.on = False