Python unichr 예제들, idna.unichr Python 예제들

예제 #1

0

파일 보기

파일: HowardChromium.py 프로젝트: xuyuanfang/PyQt5-HowardChrome

    def loadFnBtn(self):
        # 利用Webdings字体来显示图标
        font = self.font() or QFont()
        font.setFamily('Webdings')

        # 添加新的标签页
        self.buttonAddPage = QPushButton(unichr(0xf067), self, clicked=self.addPaged.emit, font=fontawesome("far"),
                                         objectName='buttonAddPage')
        # self.buttonAddPage.setIconSize(QSize(16,16))
        # self.buttonAddPage.setIcon(QIcon(':/icons/plus.png'))

        self.layout.addWidget(self.buttonAddPage)
        # 最小化按钮
        self.buttonMinimum = QPushButton(
            '0', self, clicked=self.windowMinimumed.emit, font=font, objectName='buttonMinimum')
        self.layout.addWidget(self.buttonMinimum)
        # 最大化/还原按钮
        self.buttonMaximum = QPushButton(
            '1', self, clicked=self.showMaximized, font=font, objectName='buttonMaximum')
        self.layout.addWidget(self.buttonMaximum)
        # 关闭按钮
        self.buttonClose = QPushButton(
            'r', self, clicked=self.windowClosed.emit, font=font, objectName='buttonClose')
        self.layout.addWidget(self.buttonClose)
        # 初始高度
        self.setHeight()

예제 #2

0

파일 보기

def fullToHalfText(ustring):
    rstring = ""
    for uchar in ustring:
        inside_code=ord(uchar)
        if inside_code == 12288:
            inside_code = 32
        elif (inside_code >= 65281 and inside_code <= 65374):
            inside_code -= 65248
        rstring += unichr(inside_code)
    return rstring

예제 #3

0

파일 보기

def quanToBan(quan_String):
    # 全角转半角
    res_String = ""
    for uchar in quan_String:
        inside_code=ord(uchar)
        if inside_code == 12288:                              #全角空格直接转换
            inside_code = 32
        elif (inside_code >= 65281 and inside_code <= 65374): #全角字符（除空格）根据关系转化
            inside_code -= 65248
        res_String += unichr(inside_code)
    return res_String

예제 #4

0

파일 보기

def strQ2B(ustring):
    """全角转半角"""
    rstring = ""
    for uchar in ustring:
        inside_code = ord(uchar)
        if inside_code == 12288:                              #全角空格直接转换
            inside_code = 32
        elif (inside_code >= 65281 and inside_code <= 65374): #全角字符（除空格）根据关系转化
            inside_code -= 65248
        rstring += unichr(inside_code)
    return rstring

예제 #5

0

파일 보기

파일: commons.py 프로젝트: knight6236/kyaru-bot

def strB2Q(ustring):
    """半角转全角"""
    rstrings = ""
    for uchar in ustring:
        inside_code = ord(uchar)
        if inside_code == 32:  # 半角空格直接转化
            inside_code = 12288
        elif 32 <= inside_code <= 126:  # 半角字符（除空格）根据关系转化
            inside_code += 65248

        rstrings += unichr(inside_code)
    return rstrings

예제 #6

0

파일 보기

파일: html2text.py 프로젝트: SomaiaAmin/mbox-to-csv

 def entityref(self, c):
     if not self.unicode_snob and c in unifiable.keys():
         return unifiable[c]
     else:
         try:
             name2cp(c)
         except KeyError:
             return "&" + c + ';'
         else:
             try:
                 return unichr(name2cp(c))
             except NameError:  #Python3
                 return chr(name2cp(c))

예제 #7

0

파일 보기

파일: html2text.py 프로젝트: SomaiaAmin/mbox-to-csv

    def charref(self, name):
        if name[0] in ['x', 'X']:
            c = int(name[1:], 16)
        else:
            c = int(name)

        if not self.unicode_snob and c in unifiable_n.keys():
            return unifiable_n[c]
        else:
            try:
                return unichr(c)
            except NameError:  #Python3
                return chr(c)

예제 #8

0

파일 보기

파일: html2text.py 프로젝트: SomaiaAmin/mbox-to-csv

    def close(self):
        HTMLParser.HTMLParser.close(self)

        self.pbr()
        self.o('', 0, 'end')

        self.outtext = self.outtext.join(self.outtextlist)
        if self.unicode_snob:
            nbsp = unichr(name2cp('nbsp'))
        else:
            nbsp = u' '
        self.outtext = self.outtext.replace(u'&nbsp_place_holder;', nbsp)

        return self.outtext

예제 #9

0

파일 보기

파일: HowardChromium.py 프로젝트: xuyuanfang/PyQt5-HowardChrome

    def rightMenuShow(self):
        try:
            self.contextMenu = QMenu()
            self.contextMenu.setFont(fontawesome("far"))
            index_action = QWidgetAction()
            index_button = QPushButton(unichr(0xf015),
                                       # clicked=self.zoom_out_func,
                                       font=fontawesome("far"), )
            index_button.setToolTip("主页")
            index_button.setCursor(Qt.ArrowCursor)
            index_action.setDefaultWidget(index_button)

            self.actionA = self.contextMenu.addAction(index_action)
            self.contextMenu.popup(QCursor.pos())  # 2菜单显示的位置
            # self.actionA.triggered.connect(self.actionHandler)
            self.contextMenu.show()
        except Exception as e:
            print(e)

예제 #10

0

파일 보기

    "b": 1,
    "c": 2,
    "d": 3,
    "e": 4,
    "f": 5,
    "g": 6,
    "h": 7
}
template = \
    """| {} """
for i in range(8):
    for j in range(8):
        board[i].append(template.format("  "))

whiteFigures = {
    "WQ": u''.join(unichr(9813)),
    "WB": u''.join(unichr(9815)),
    "WKn": u''.join(unichr(9816)),
    "WR": u''.join(unichr(9814)),
    "WK": u''.join(unichr(9812)),
    "WP": u''.join(unichr(9817)),
}
blackFigures = {
    "BK": u''.join(unichr(9818)),
    "BQ": u''.join(unichr(9819)),
    "BB": u''.join(unichr(9821)),
    "BKn": u''.join(unichr(9822)),
    "BR": u''.join(unichr(9820)),
    "BP": u''.join(unichr(9823))
}

예제 #11

0

파일 보기

파일: Mirror.py 프로젝트: LPRacsi/Mirror

    def __init__(self):
        super(Window, self).__init__()
        self.setWindowTitle("Smart mirror")
        self.setAutoFillBackground(True)
        p = self.palette()
        p.setColor(self.backgroundRole(), Qt.black)
        self.degree = unichr(176)
        self.redrawCount = 0
        self.calendarRefreshCount = 0
        self.setPalette(p)

        self.prevTrainLabel = QLabel(self)
        self.prevTrainLabel1 = QLabel(self)
        self.prevTrainLabel2 = QLabel(self)
        self.prevTrainLabel3 = QLabel(self)

        self.nextTrainLabel = QLabel(self)
        self.nextTrainLabel1 = QLabel(self)
        self.nextTrainLabel2 = QLabel(self)
        self.nextTrainLabel3 = QLabel(self)

        self.nextTrain1Label = QLabel(self)
        self.nextTrain1Label1 = QLabel(self)
        self.nextTrain1Label2 = QLabel(self)
        self.nextTrain1Label3 = QLabel(self)

        self.nextTrain2Label = QLabel(self)
        self.nextTrain2Label1 = QLabel(self)
        self.nextTrain2Label2 = QLabel(self)
        self.nextTrain2Label3 = QLabel(self)

        self.startTime = QLabel(self)
        self.lateTime = QLabel(self)
        self.arrivalTime = QLabel(self)
        self.arrivalStation = QLabel(self)
        self.line = QLabel(self)
        self.time = QLabel(self)
        self.date = QLabel(self)

        self.temperature = QLabel(self)
        self.weather = [QLabel(self), QLabel(self), QLabel(self)]
        self.clouds = QLabel(self)
        self.wind = QLabel(self)

        self.weatherForecastWeek = []
        self.forecastSeparatorLine = []
        self.forecastSeparatorPartOfTheDay = []
        self.forecastDayIdentify = []
        for i in range(4):
            self.forecastSeparatorLine.append(QLabel(self))
            self.forecastSeparatorPartOfTheDay.append(QLabel(self))
        for i in range(5):
            self.forecastDayIdentify.append(QLabel(self))
        for i in range(5 * forecastRows):
            self.weatherForecastWeek.append(QLabel(self))

        self.calendarEntries = []

        for i in range(15):
            self.calendarEntries.append(QLabel(self))

        self.trainLabelFont = QtGui.QFont("Times", 35, QtGui.QFont.Normal)
        self.dayLabelFont = QtGui.QFont("Times", 30, QtGui.QFont.Normal)
        self.timeLabelFont = QtGui.QFont("Times", 150, QtGui.QFont.Normal)
        self.tempLabelFont = QtGui.QFont("Times", 100, QtGui.QFont.Normal)
        self.dateLabelFont = QtGui.QFont("Times", 45, QtGui.QFont.Normal)
        self.forecastLabelFont = QtGui.QFont("Times", 20, QtGui.QFont.Normal)
        self.forecastLabelFontBold = QtGui.QFont("Times", 20, QtGui.QFont.Bold)

        exit_action = QAction("", self)
        exit_action.setShortcut("Ctrl+Q")
        exit_action.setStatusTip('')
        exit_action.triggered.connect(self.showNormalAndExit)
        exit_full_screen = QAction("", self)
        exit_full_screen.setShortcut("Ctrl+X")
        exit_full_screen.setStatusTip('')
        exit_full_screen.triggered.connect(self.showNormal)
        return_full_screen = QAction("", self)
        return_full_screen.setShortcut("Ctrl+A")
        return_full_screen.setStatusTip('')
        return_full_screen.triggered.connect(self.showFullScreen)

        self.colorWhite = 'color: white'
        self.colorGrey = 'color: grey'
        self.colorRed = 'color: red'
        self.colorDarkGrey = 'color: #1e1e1e'

        self.statusBar()
        self.setStyleSheet("""
            QMenuBar {
                background-color: rgb(0,0,0);
                color: rgb(255,255,255);
                border: 1px solid #000;
            }

            QMenuBar::item {
                background-color: rgb(0,0,0);
                color: rgb(255,255,255);
            }

            QMenuBar::item::selected {
                background-color: rgb(0,0,0);
            }

            QMenu {
                background-color: rgb(0,0,0);
                color: rgb(255,255,255);
                border: 1px solid #000;           
            }

            QMenu::item::selected {
                background-color: rgb(0,0,0);
            }
        """)

        mainMenu = self.menuBar()
        fileMenu = mainMenu.addMenu('')
        fileMenu.addAction(exit_action)
        fileMenu.addAction(exit_full_screen)
        fileMenu.addAction(return_full_screen)
        mainMenu.resize(0, 0)

        self.showFullScreen()
        self.trainData = TrainData()
        self.weatherData = WeatherData()
        self.setTrainTexts()
        self.initTrainsLabel()
        self.initDateTimeLabels()
        self.initWeatherLabels()
        self.initForecastWeatherLabels()
        self.initCalendarLabels()
        self.setTrains()
        self.setDateTime()
        self.setWeather()
        self.setForecastWeather()
        self.setCalendar()

예제 #12

0

파일 보기

파일: HowardChromium.py 프로젝트: xuyuanfang/PyQt5-HowardChrome

    def initToolbar(self, webview):
        pass
        ###使用QToolBar创建导航栏，并使用QAction创建按钮
        # 添加导航栏
        self.navigation_bar = QToolBar('Navigation')
        # 锁定导航栏
        self.navigation_bar.setMovable(False)
        # 设定图标的大小
        self.navigation_bar.setIconSize(QSize(2, 2))
        # 添加导航栏到窗口中
        self.addToolBar(self.navigation_bar)
        # 添加其它配置
        self.navigation_bar.setObjectName("navigation_bar")
        self.navigation_bar.setCursor(Qt.ArrowCursor)
        # QAction类提供了抽象的用户界面action，这些action可以被放置在窗口部件中
        # 添加前进、后退、停止加载和刷新的按钮
        self.reload_icon = unichr(0xf2f9)
        self.stop_icon = unichr(0xf00d)

        # 后退按钮
        self.back_action = QWidgetAction(self)
        self.back_button = QPushButton(unichr(0xf060), self,
                                       clicked=webview.back,
                                       font=fontawesome("far"),
                                       objectName='back_button')
        self.back_button.setToolTip("后退")
        self.back_button.setCursor(Qt.ArrowCursor)
        self.back_action.setDefaultWidget(self.back_button)

        # 前进按钮
        self.next_action = QWidgetAction(self)
        self.next_button = QPushButton(unichr(0xf061), self,
                                       clicked=webview.forward,
                                       font=fontawesome("far"),
                                       objectName='next_button')
        self.next_button.setToolTip("前进")
        self.next_button.setCursor(Qt.ArrowCursor)
        self.next_action.setDefaultWidget(self.next_button)

        # 刷新与停止按钮
        self.reload_action = QWidgetAction(self)

        self.reload_button = QPushButton(self.reload_icon, self,
                                         clicked=webview.reload,
                                         font=fontawesome("far"),
                                         objectName='reload_button')
        self.reload_button.setToolTip("刷新")
        self.reload_button.setCursor(Qt.ArrowCursor)
        self.reload_action.setDefaultWidget(self.reload_button)

        # 放大按钮
        self.zoom_in_button = QPushButton(unichr(0xf067), self,
                                          clicked=self.zoom_in_func,
                                          font=fontawesome("far"),
                                          objectName='zoom_in_btn')
        self.zoom_in_button.setToolTip("放大")
        self.zoom_in_button.setCursor(Qt.ArrowCursor)

        # 缩小按钮
        self.zoom_out_button = QPushButton(unichr(0xf068), self,
                                           clicked=self.zoom_out_func,
                                           font=fontawesome("far"),
                                           objectName='zoom_out_btn')
        self.zoom_out_button.setToolTip("缩小")
        self.zoom_out_button.setCursor(Qt.ArrowCursor)
        self.sf_label_rate = QLabel()
        self.sf_label_rate.setObjectName("sf_label_rate")
        self.sf_label_rate.setFixedWidth(30)
        self.sf_label_rate.setAlignment(QtCore.Qt.AlignRight|QtCore.Qt.AlignVCenter)
        self.sf_label_rate.setProperty("class","qlabel")
        self.sf_label_rate.setText(str(int(self.webview.zoomFactor()*100))+"%")

        # 全屏按钮
        self.full_screen_button = QPushButton(unichr(0xe140), self,
                                           clicked=self.full_screen_func,
                                           font=fontawesome("boot"),
                                           objectName='full_screen_button')
        self.full_screen_button.setToolTip("全屏")
        self.full_screen_button.setCursor(Qt.ArrowCursor)

        # 其它按钮
        self.more_action = QWidgetAction(self)
        self.more_button = QPushButton(unichr(0xe235), self,
                                       clicked=self.moreMenuShow,
                                       font=fontawesome("boot"),
                                       objectName='more_button')
        self.more_button.setToolTip("页面控制及浏览器核心")
        self.more_button.setCursor(Qt.ArrowCursor)
        self.more_action.setDefaultWidget(self.more_button)

        # 首页按钮
        self.index_action = QWidgetAction(self)
        self.index_button = QPushButton(unichr(0xf015), self,
                                        # clicked=self.zoom_out_func,
                                        font=fontawesome("far"),
                                        objectName='index_button')
        self.index_button.setToolTip("主页")
        self.index_button.setCursor(Qt.ArrowCursor)
        self.index_action.setDefaultWidget(self.index_button)

        # self.back_button.triggered.connect(webview.back)
        # self.next_button.triggered.connect(webview.forward)
        # self.reload_button.triggered.connect(webview.reload)
        # self.zoom_in_btn.triggered.connect(self.zoom_in_func)
        # self.zoom_out_btn.triggered.connect(self.zoom_out_func)
        # 将按钮添加到导航栏上
        self.navigation_bar.addAction(self.back_action)
        self.navigation_bar.addAction(self.next_action)
        self.navigation_bar.addAction(self.reload_action)
        self.navigation_bar.addAction(self.index_action)
        # 添加URL地址栏
        self.urlbar = QLineEdit()
        # 让地址栏能响应回车按键信号
        self.urlbar.returnPressed.connect(self.navigate_to_url)
        # self.navigation_bar.addSeparator()
        self.navigation_bar.addWidget(self.urlbar)
        # self.navigation_bar.addSeparator()

        # self.navigation_bar.addAction(self.zoom_in_action)
        # self.navigation_bar.addAction(self.zoom_out_action)
        self.navigation_bar.addAction(self.more_action)
        # 让浏览器相应url地址的变化
        webview.urlChanged.connect(self.renew_urlbar)
        webview.loadProgress.connect(self.processLoad)
        webview.loadStarted.connect(self.loadPage)
        webview.loadFinished.connect(self.loadFinish)
        webview.titleChanged.connect(self.renew_title)
        webview.iconChanged.connect(self.renew_icon)
        self.webBind()
        webview.show()
        self.navigation_bar.setIconSize(QSize(20, 20))
        self.urlbar.setFont(QFont('SansSerif', 13))

예제 #13

0

파일 보기

파일: spellTrainer.py 프로젝트: CyraxSector/HelaGrammarly

class Configuration(object):

CONFIG = Configuration()
CONFIG.input_layers = 2
CONFIG.output_layers = 2
CONFIG.amount_of_dropout = 0.2
CONFIG.hidden_size = 500
CONFIG.initialization = "he_normal"
CONFIG.number_of_chars = 100
CONFIG.max_input_len = 60
CONFIG.inverted = True

# parameters for the training:
CONFIG.batch_size = 100 # As the model changes in size, play with the batch size to best fit the process in memory
CONFIG.epochs = 500 # due to mini-epochs.
CONFIG.steps_per_epoch = 1000 # This is a mini-epoch. Using News 2013 an epoch would need to be ~60K.
CONFIG.validation_steps = 10
CONFIG.number_of_iterations = 10
#pylint:enable=attribute-defined-outside-init

DIGEST = sha256(json.dumps(CONFIG.__dict__, sort_keys=True)).hexdigest()

# Parameters for the dataset
MIN_INPUT_LEN = 5
AMOUNT_OF_NOISE = 0.2 / CONFIG.max_input_len
CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .")
PADDING = "☕"

DATA_FILES_PATH = "~/Downloads/data"
DATA_FILES_FULL_PATH = os.path.expanduser(DATA_FILES_PATH)
DATA_FILES_URL = "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2013.en.shuffled.gz"
NEWS_FILE_NAME_COMPRESSED = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.shuffled.gz") # 1.1 GB
NEWS_FILE_NAME_ENGLISH = "news.2013.en.shuffled"
NEWS_FILE_NAME = os.path.join(DATA_FILES_FULL_PATH, NEWS_FILE_NAME_ENGLISH)
NEWS_FILE_NAME_CLEAN = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.clean")
NEWS_FILE_NAME_FILTERED = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.filtered")
NEWS_FILE_NAME_SPLIT = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.split")
NEWS_FILE_NAME_TRAIN = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.train")
NEWS_FILE_NAME_VALIDATE = os.path.join(DATA_FILES_FULL_PATH, "news.2013.en.validate")
CHAR_FREQUENCY_FILE_NAME = os.path.join(DATA_FILES_FULL_PATH, "char_frequency.json")
SAVED_MODEL_FILE_NAME = os.path.join(DATA_FILES_FULL_PATH, "keras_spell_e{}.h5") # an HDF5 file

# Some cleanup:
NORMALIZE_WHITESPACE_REGEX = re.compile(r'[^\S\n]+', re.UNICODE) # match all whitespace except newlines
RE_DASH_FILTER = re.compile(r'[\-\˗\֊\‐\‑\‒\–\—\⁻\₋\−\﹣\－]', re.UNICODE)
RE_APOSTROPHE_FILTER = re.compile(r'&#39;|[ʼ՚＇‘’‛❛❜ߴߵ`‵´ˊˋ{}{}{}{}{}{}{}{}{}]'.format(unichr(768), unichr(769), unichr(832),
                                                                                      unichr(833), unichr(2387), unichr(5151),
                                                                                      unichr(5152), unichr(65344), unichr(8242)),
                                  re.UNICODE)
RE_LEFT_PARENTH_FILTER = re.compile(r'[\(\[\{\⁽\₍\❨\❪\﹙\（]', re.UNICODE)
RE_RIGHT_PARENTH_FILTER = re.compile(r'[\)\]\}\⁾\₎\❩\❫\﹚\）]', re.UNICODE)
ALLOWED_CURRENCIES = """¥£₪$€฿₨"""
ALLOWED_PUNCTUATION = """-!?/;"'%&<>.()[]{}@#:,|=*"""
RE_BASIC_CLEANER = re.compile(r'[^\w\s{}{}]'.format(re.escape(ALLOWED_CURRENCIES), re.escape(ALLOWED_PUNCTUATION)), re.UNICODE)

# pylint:disable=invalid-name

def download_the_news_data():
    """Download the news data"""
    LOGGER.info("Downloading")
    try:
        os.makedirs(os.path.dirname(NEWS_FILE_NAME_COMPRESSED))
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
    with open(NEWS_FILE_NAME_COMPRESSED, "wb") as output_file:
        response = requests.get(DATA_FILES_URL, stream=True)
        total_length = response.headers.get('content-length')
        downloaded = percentage = 0
        print("»"*100)
        total_length = int(total_length)
        for data in response.iter_content(chunk_size=4096):
            downloaded += len(data)
            output_file.write(data)
            new_percentage = 100 * downloaded // total_length
            if new_percentage > percentage:
                print("☑", end="")
                percentage = new_percentage
    print()

def uncompress_data():
    """Uncompress the data files"""
    import gzip
    with gzip.open(NEWS_FILE_NAME_COMPRESSED, 'rb') as compressed_file:
        with open(NEWS_FILE_NAME_COMPRESSED[:-3], 'wb') as outfile:
            outfile.write(compressed_file.read())

def add_noise_to_string(a_string, amount_of_noise):
    """Add some artificial spelling mistakes to the string"""
    if rand() < amount_of_noise * len(a_string):
        # Replace a character with a random character
        random_char_position = random_randint(len(a_string))
        a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position + 1:]
    if rand() < amount_of_noise * len(a_string):
        # Delete a character
        random_char_position = random_randint(len(a_string))
        a_string = a_string[:random_char_position] + a_string[random_char_position + 1:]
    if len(a_string) < CONFIG.max_input_len and rand() < amount_of_noise * len(a_string):
        # Add a random character
        random_char_position = random_randint(len(a_string))
        a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position:]
    if rand() < amount_of_noise * len(a_string):
        # Transpose 2 characters
        random_char_position = random_randint(len(a_string) - 1)
        a_string = (a_string[:random_char_position] + a_string[random_char_position + 1] + a_string[random_char_position] +
                    a_string[random_char_position + 2:])
    return a_string

def _vectorize(questions, answers, ctable):
    len_of_questions = len(questions)
    X = np_zeros((len_of_questions, CONFIG.max_input_len, ctable.size), dtype=np.bool)
    for i in xrange(len(questions)):
        sentence = questions.pop()
        for j, c in enumerate(sentence):
            try:
                X[i, j, ctable.char_indices[c]] = 1
            except KeyError:
                pass # Padding
    y = np_zeros((len_of_questions, CONFIG.max_input_len, ctable.size), dtype=np.bool)
    for i in xrange(len(answers)):
        sentence = answers.pop()
        for j, c in enumerate(sentence):
            try:
                y[i, j, ctable.char_indices[c]] = 1
            except KeyError:
                pass # Padding
    return X, y

def slice_X(X, start=None, stop=None):
    """This takes an array-like, or a list of
    array-likes, and outputs:
        - X[start:stop] if X is an array-like
        - [x[start:stop] for x in X] if X in a list
    Can also work on list/array of indices: `slice_X(x, indices)`
    # Arguments
        start: can be an integer index (start index)
            or a list/array of indices
        stop: integer (stop index); should be None if
            `start` was a list.
    """
    if isinstance(X, list):
        if hasattr(start, '__len__'):
            # hdf5 datasets only support list objects as indices
            if hasattr(start, 'shape'):
                start = start.tolist()
            return [x[start] for x in X]
        else:
            return [x[start:stop] for x in X]
    else:
        if hasattr(start, '__len__'):
            if hasattr(start, 'shape'):
                start = start.tolist()
            return X[start]
        else:
            return X[start:stop]

def vectorize(questions, answers, chars=None):
    """Vectorize the questions and expected answers"""
    print('Vectorization...')
    chars = chars or CHARS
    ctable = CharacterTable(chars)
    X, y = _vectorize(questions, answers, ctable)
    # Explicitly set apart 10% for validation data that we never train over
    split_at = int(len(X) - len(X) / 10)
    (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
    (y_train, y_val) = (y[:split_at], y[split_at:])

    print(X_train.shape)
    print(y_train.shape)

    return X_train, X_val, y_train, y_val, CONFIG.max_input_len, ctable


def generate_model(output_len, chars=None):
    """Generate the model"""
    print('Build model...')
    chars = chars or CHARS
    model = Sequential()
    # "Encode" the input sequence using an RNN, producing an output of hidden_size
    # note: in a situation where your input sequences have a variable length,
    # use input_shape=(None, nb_feature).
    for layer_number in range(CONFIG.input_layers):
        model.add(recurrent.LSTM(CONFIG.hidden_size, input_shape=(None, len(chars)), kernel_initializer=CONFIG.initialization,
                                 return_sequences=layer_number + 1 < CONFIG.input_layers))
        model.add(Dropout(CONFIG.amount_of_dropout))
    # For the decoder's input, we repeat the encoded input for each time step
    model.add(RepeatVector(output_len))
    # The decoder RNN could be multiple layers stacked or a single layer
    for _ in range(CONFIG.output_layers):
        model.add(recurrent.LSTM(CONFIG.hidden_size, return_sequences=True, kernel_initializer=CONFIG.initialization))
        model.add(Dropout(CONFIG.amount_of_dropout))

    # For each of step of the output sequence, decide which character should be chosen
    model.add(TimeDistributed(Dense(len(chars), kernel_initializer=CONFIG.initialization)))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


class Colors(object):
    """For nicer printouts"""
    green = '\033[92m'
    red = '\033[91m'
    close = '\033[0m'


class CharacterTable(object):
    """
    Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    @property
    def size(self):
        """The number of chars"""
        return len(self.chars)

    def encode(self, C, maxlen):
        """Encode as one-hot"""
        X = np_zeros((maxlen, len(self.chars)), dtype=np.bool) # pylint:disable=no-member
        for i, c in enumerate(C):
            X[i, self.char_indices[c]] = 1
        return X

    def decode(self, X, calc_argmax=True):
        """Decode from one-hot"""
        if calc_argmax:
            X = X.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in X if x)

def generator(file_name):
    """Returns a tuple (inputs, targets)
    All arrays should contain the same number of samples.
    The generator is expected to loop over its data indefinitely.
    An epoch finishes when  samples_per_epoch samples have been seen by the model.
    """
    ctable = CharacterTable(read_top_chars())
    batch_of_answers = []
    while True:
        with open(file_name) as answers:
            for answer in answers:
                batch_of_answers.append(answer.strip().decode('utf-8'))
                if len(batch_of_answers) == CONFIG.batch_size:
                    random_shuffle(batch_of_answers)
                    batch_of_questions = []
                    for answer_index, answer in enumerate(batch_of_answers):
                        question, answer = generate_question(answer)
                        batch_of_answers[answer_index] = answer
                        assert len(answer) == CONFIG.max_input_len
                        question = question[::-1] if CONFIG.inverted else question
                        batch_of_questions.append(question)
                    X, y = _vectorize(batch_of_questions, batch_of_answers, ctable)
                    yield X, y
                    batch_of_answers = []

def print_random_predictions(model, ctable, X_val, y_val):
    """Select 10 samples from the validation set at random so we can visualize errors"""
    print()
    for _ in range(10):
        ind = random_randint(0, len(X_val))
        rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])] # pylint:disable=no-member
        preds = model.predict_classes(rowX, verbose=0)
        q = ctable.decode(rowX[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        if CONFIG.inverted:
            print('Q', q[::-1]) # inverted back!
        else:
            print('Q', q)
        print('A', correct)
        print(Colors.green + '☑' + Colors.close if correct == guess else Colors.red + '☒' + Colors.close, guess)
        print('---')
    print()


class OnEpochEndCallback(Callback):
    """Execute this every end of epoch"""

    def on_epoch_end(self, epoch, logs=None):
        """On Epoch end - do some stats"""
        ctable = CharacterTable(read_top_chars())
        X_val, y_val = next(generator(NEWS_FILE_NAME_VALIDATE))
        print_random_predictions(self.model, ctable, X_val, y_val)
        self.model.save(SAVED_MODEL_FILE_NAME.format(epoch))

ON_EPOCH_END_CALLBACK = OnEpochEndCallback()

def itarative_train(model):
    """
    Iterative training of the model
     - To allow for finite RAM...
     - To allow infinite training data as the training noise is injected in runtime
    """
    model.fit_generator(generator(NEWS_FILE_NAME_TRAIN), steps_per_epoch=CONFIG.steps_per_epoch,
                        epochs=CONFIG.epochs,
                        verbose=1, callbacks=[ON_EPOCH_END_CALLBACK, ], validation_data=generator(NEWS_FILE_NAME_VALIDATE),
                        validation_steps=CONFIG.validation_steps,
                        class_weight=None, max_q_size=10, workers=1,
                        pickle_safe=False, initial_epoch=0)


def iterate_training(model, X_train, y_train, X_val, y_val, ctable):
    """Iterative Training"""
    # Train the model each generation and show predictions against the validation dataset
    for iteration in range(1, CONFIG.number_of_iterations):
        print()
        print('-' * 50)
        print('Iteration', iteration)
        model.fit(X_train, y_train, batch_size=CONFIG.batch_size, epochs=CONFIG.epochs,
                  validation_data=(X_val, y_val))
        print_random_predictions(model, ctable, X_val, y_val)

def clean_text(text):
    """Clean the text - remove unwanted chars, fold punctuation etc."""
    result = NORMALIZE_WHITESPACE_REGEX.sub(' ', text.strip())
    result = RE_DASH_FILTER.sub('-', result)
    result = RE_APOSTROPHE_FILTER.sub("'", result)
    result = RE_LEFT_PARENTH_FILTER.sub("(", result)
    result = RE_RIGHT_PARENTH_FILTER.sub(")", result)
    result = RE_BASIC_CLEANER.sub('', result)
    return result

def preprocesses_data_clean():
    """Pre-process the data - step 1 - cleanup"""
    with open(NEWS_FILE_NAME_CLEAN, "wb") as clean_data:
        for line in open(NEWS_FILE_NAME):
            decoded_line = line.decode('utf-8')
            cleaned_line = clean_text(decoded_line)
            encoded_line = cleaned_line.encode("utf-8")
            clean_data.write(encoded_line + b"\n")

def preprocesses_data_analyze_chars():
    """Pre-process the data - step 2 - analyze the characters"""
    counter = Counter()
    LOGGER.info("Reading data:")
    for line in open(NEWS_FILE_NAME_CLEAN):
        decoded_line = line.decode('utf-8')
        counter.update(decoded_line)
#     data = open(NEWS_FILE_NAME_CLEAN).read().decode('utf-8')
#     LOGGER.info("Read.\nCounting characters:")
#     counter = Counter(data.replace("\n", ""))
    LOGGER.info("Done.\nWriting to file:")
    with open(CHAR_FREQUENCY_FILE_NAME, 'wb') as output_file:
        output_file.write(json.dumps(counter))
    most_popular_chars = {key for key, _value in counter.most_common(CONFIG.number_of_chars)}
    LOGGER.info("The top %s chars are:", CONFIG.number_of_chars)
    LOGGER.info("".join(sorted(most_popular_chars)))

def read_top_chars():
    """Read the top chars we saved to file"""
    chars = json.loads(open(CHAR_FREQUENCY_FILE_NAME).read())
    counter = Counter(chars)
    most_popular_chars = {key for key, _value in counter.most_common(CONFIG.number_of_chars)}
    return most_popular_chars

def preprocesses_data_filter():
    """Pre-process the data - step 3 - filter only sentences with the right chars"""
    most_popular_chars = read_top_chars()
    LOGGER.info("Reading and filtering data:")
    with open(NEWS_FILE_NAME_FILTERED, "wb") as output_file:
        for line in open(NEWS_FILE_NAME_CLEAN):
            decoded_line = line.decode('utf-8')
            if decoded_line and not bool(set(decoded_line) - most_popular_chars):
                output_file.write(line)
    LOGGER.info("Done.")

def read_filtered_data():
    """Read the filtered data corpus"""
    LOGGER.info("Reading filtered data:")
    lines = open(NEWS_FILE_NAME_FILTERED).read().decode('utf-8').split("\n")
    LOGGER.info("Read filtered data - %s lines", len(lines))
    return lines

def preprocesses_split_lines():
    """Preprocess the text by splitting the lines between min-length and max_length
    I don't like this step:
      I think the start-of-sentence is important.
      I think the end-of-sentence is important.
      Sometimes the stripped down sub-sentence is missing crucial context.
      Important NGRAMs are cut (though given enough data, that might be moot).
    I do this to enable batch-learning by padding to a fixed length.
    """
    LOGGER.info("Reading filtered data:")
    answers = set()
    with open(NEWS_FILE_NAME_SPLIT, "wb") as output_file:
        for _line in open(NEWS_FILE_NAME_FILTERED):
            line = _line.decode('utf-8')
            while len(line) > MIN_INPUT_LEN:
                if len(line) <= CONFIG.max_input_len:
                    answer = line
                    line = ""
                else:
                    space_location = line.rfind(" ", MIN_INPUT_LEN, CONFIG.max_input_len - 1)
                    if space_location > -1:
                        answer = line[:space_location]
                        line = line[len(answer) + 1:]
                    else:
                        space_location = line.rfind(" ") # no limits this time
                        if space_location == -1:
                            break # we are done with this line
                        else:
                            line = line[space_location + 1:]
                            continue
                answers.add(answer)
                output_file.write(answer.encode('utf-8') + b"\n")

def preprocesses_split_lines2():
    """Preprocess the text by splitting the lines between min-length and max_length
    Alternative split.
    """
    LOGGER.info("Reading filtered data:")
    answers = set()
    for encoded_line in open(NEWS_FILE_NAME_FILTERED):
        line = encoded_line.decode('utf-8')
        if CONFIG.max_input_len >= len(line) > MIN_INPUT_LEN:
            answers.add(line)
    LOGGER.info("There are %s 'answers' (sub-sentences)", len(answers))
    LOGGER.info("Here are some examples:")
    for answer in itertools.islice(answers, 10):
        LOGGER.info(answer)
    with open(NEWS_FILE_NAME_SPLIT, "wb") as output_file:
        output_file.write("".join(answers).encode('utf-8'))

def preprocesses_split_lines3():
    """Preprocess the text by selecting only max n-grams
    Alternative split.
    """
    LOGGER.info("Reading filtered data:")
    answers = set()
    for encoded_line in open(NEWS_FILE_NAME_FILTERED):
        line = encoded_line.decode('utf-8')
        if line.count(" ") < 5:
            answers.add(line)
    LOGGER.info("There are %s 'answers' (sub-sentences)", len(answers))
    LOGGER.info("Here are some examples:")
    for answer in itertools.islice(answers, 10):
        LOGGER.info(answer)
    with open(NEWS_FILE_NAME_SPLIT, "wb") as output_file:
        output_file.write("".join(answers).encode('utf-8'))


def preprocess_partition_data():
    """Set asside data for validation"""
    answers = open(NEWS_FILE_NAME_SPLIT).read().decode('utf-8').split("\n")
    print('shuffle', end=" ")
    random_shuffle(answers)
    print("Done")
    # Explicitly set apart 10% for validation data that we never train over
    split_at = len(answers) - len(answers) // 10
    with open(NEWS_FILE_NAME_TRAIN, "wb") as output_file:
        output_file.write("\n".join(answers[:split_at]).encode('utf-8'))
    with open(NEWS_FILE_NAME_VALIDATE, "wb") as output_file:
        output_file.write("\n".join(answers[split_at:]).encode('utf-8'))


def generate_question(answer):
    """Generate a question by adding noise"""
    question = add_noise_to_string(answer, AMOUNT_OF_NOISE)
    # Add padding:
    question += PADDING * (CONFIG.max_input_len - len(question))
    answer += PADDING * (CONFIG.max_input_len - len(answer))
    return question, answer

def generate_news_data():
    """Generate some news data"""
    print ("Generating Data")
    answers = open(NEWS_FILE_NAME_SPLIT).read().decode('utf-8').split("\n")
    questions = []
    print('shuffle', end=" ")
    random_shuffle(answers)
    print("Done")
    for answer_index, answer in enumerate(answers):
        question, answer = generate_question(answer)
        answers[answer_index] = answer
        assert len(answer) == CONFIG.max_input_len
        if random_randint(100000) == 8: # Show some progress
            print (len(answers))
            print ("answer:   '{}'".format(answer))
            print ("question: '{}'".format(question))
            print ()
        question = question[::-1] if CONFIG.inverted else question
        questions.append(question)

    return questions, answers

def train_speller_w_all_data():
    """Train the speller if all data fits into RAM"""
    questions, answers = generate_news_data()
    chars_answer = set.union(*(set(answer) for answer in answers))
    chars_question = set.union(*(set(question) for question in questions))
    chars = list(set.union(chars_answer, chars_question))
    X_train, X_val, y_train, y_val, y_maxlen, ctable = vectorize(questions, answers, chars)
    print ("y_maxlen, chars", y_maxlen, "".join(chars))
    model = generate_model(y_maxlen, chars)
    iterate_training(model, X_train, y_train, X_val, y_val, ctable)

def train_speller(from_file=None):
    """Train the speller"""
    if from_file:
        model = load_model(from_file)
    else:
        model = generate_model(CONFIG.max_input_len, chars=read_top_chars())
    itarative_train(model)

if __name__ == '__main__':
#     download_the_news_data()
#     uncompress_data()
#     preprocesses_data_clean()
#     preprocesses_data_analyze_chars()
#     preprocesses_data_filter()
#     preprocesses_split_lines() --- Choose this step or:
#     preprocesses_split_lines2()
#     preprocesses_split_lines4()
#     preprocess_partition_data()
#     train_speller(os.path.join(DATA_FILES_FULL_PATH, "keras_spell_e15.h5"))
    train_speller()