Example #1
0
def _load_stations():
    filepath = os.path.join(os.path.dirname(__file__), 'stations.dat')
    stations = {}
    with open(filepath, 'rb') as f:
        for line in f.readlines():
            name, telecode = line.split()
            stations[to_unicode(name)] = to_unicode(telecode)
    return stations
Example #2
0
def _load_stations():
    filepath = os.path.join(os.path.dirname(__file__), 'stations.dat')
    stations = {}
    with open(filepath, 'rb') as f:
        for line in f.readlines():
            name, telecode = line.split()
            stations[to_unicode(name)] = to_unicode(telecode)
    return stations
Example #3
0
    def test_should_create_sphinx_header(self):
        """ Should create Sphinx header. """
        os.chdir(self.var_path)
        with HookStdOut():
            self.app.create_project('testproject')

        view_path = join(self.var_path, 'testproject', 'views', 'frontend',
                         'index.py')

        with open(view_path, 'rb') as f:
            lines = f.readlines()
            self.assertEqual(to_unicode(lines[2].lstrip().rstrip()),
                             'testproject.views.frontend.index')
            self.assertEqual(to_unicode(lines[3].lstrip().rstrip()),
                             '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
Example #4
0
    def test_should_create_sphinx_header(self):
        """ Should create Sphinx header. """
        os.chdir(self.var_path)
        with HookStdOut():
            self.app.create_project('testproject')

        view_path = join(self.var_path, 'testproject',
                         'views', 'frontend', 'index.py')

        with open(view_path, 'rb') as f:
            lines = f.readlines()
            self.assertEqual(to_unicode(lines[2].lstrip().rstrip()),
                             'testproject.views.frontend.index')
            self.assertEqual(to_unicode(lines[3].lstrip().rstrip()),
                             '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
Example #5
0
 def to_sentences(self, paragraph):
     if hasattr(self._sentence_tokenizer, '_params'):
         extra_abbreviations = self.LANGUAGE_EXTRA_ABREVS.get(
             self._language, [])
         self._sentence_tokenizer._params.abbrev_types.update(
             extra_abbreviations)
     sentences = self._sentence_tokenizer.tokenize(to_unicode(paragraph))
     return tuple(map(unicode.strip, sentences))
Example #6
0
 def __init__(self, words, tokenizer=None):
     if isinstance(words, string_types) and tokenizer is None:
         raise ValueError(
             "the tokenizer must be called if 'words' is not a sequence.")
     elif isinstance(words, string_types):
         words = tokenizer.to_words(to_unicode(words))
     elif not isinstance(words, Sequence):
         #error handling
         raise ValueError(
             "Parameter 'words' has to be sequence or string with tokenizer given."
         )
     self._terms = Counter(map(unicode.lower, words))
     self._max_frequency = max(self._terms.values()) if self._terms else 1
Example #7
0
def cli():
    # Parse the command-line arguments.
    arguments = docopt(__doc__)

    from_station_code = stations.get(to_unicode(arguments['<from>']))
    if not from_station_code:
        print('Seems that no this station where you from.')
        exit()

    to_station_code = stations.get(to_unicode(arguments['<to>']))
    if not to_station_code:
        print('Seems that no this station where you going to.')
        exit()

    valid_date = get_valid_date(to_unicode(arguments['<date>']))
    if not valid_date:
        print('Not a valid date.')
        exit()

    # Transform valid options to a string.
    opts = ''.join(o[1] for o in arguments
                   if o in '-d-g-k-t-z' and arguments[o])

    params = build_params(from_station_code, to_station_code, valid_date)
    try:
        resp = requests.get(QUERY_URL, params=params, verify=False)
    except ConnectionError:
        print(colorit('red', 'Network connection fail.'))
        exit()

    try:
        rows = resp.json()['data']['datas']
    except KeyError:
        print(colorit('green', 'No train available.'))

    trains = TrainsCollection(rows, opts)

    trains.export()
Example #8
0
def cli():
    # Parse the command-line arguments.
    arguments = docopt(__doc__)

    from_station_code = stations.get(to_unicode(arguments['<from>']))
    if not from_station_code:
        print('Seems that no this station where you from.')
        exit()

    to_station_code = stations.get(to_unicode(arguments['<to>']))
    if not to_station_code:
        print('Seems that no this station where you going to.')
        exit()

    valid_date = get_valid_date(to_unicode(arguments['<date>']))
    if not valid_date:
        print('Not a valid date.')
        exit()

    # Transform valid options to a string.
    opts = ''.join(o[1] for o in arguments if o in '-d-g-k-t-z' and arguments[o])

    params = build_params(from_station_code, to_station_code, valid_date)
    try:
        resp = requests.get(QUERY_URL, params=params, verify=False)
    except ConnectionError:
        print(colorit('red', 'Network connection fail.'))
        exit()

    try:
        rows = resp.json()['data']['datas']
    except KeyError:
        print(colorit('green', 'No train available.'))

    trains = TrainsCollection(rows, opts)

    trains.export()
Example #9
0
def null_stemmer(object):
    return to_unicode(object).lower()
Example #10
0
 def __init__(self, text, tokenizer, is_heading=False):
     self._text = to_unicode(text).strip()
     self._tokenizer = tokenizer
     self._is_heading = bool(is_heading)
Example #11
0
 def normalize_word(self, word):
     return to_unicode(word).lower()
Example #12
0
def parse_stop_words(data):
    return frozenset(w.rstrip() for w in to_unicode(data).splitlines() if w)
Example #13
0
 def to_words(self, sentence):
     words = self._word_tokenizer.tokenize(to_unicode(sentence))
     return tuple(filter(self._is_word, words))