def __init__(self,locale="EN_US",comparison_level=0,case_sensitive=False):
        comparison_level = max(0,min(3,comparison_level))

        self.__locale = Locale(locale)
        self.__collator = Collator.createInstance(self.__locale)
        self.__collator.setStrength(comparison_level)
        self.__collator.setAttribute(UCollAttribute.CASE_LEVEL,
            UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF)
        if comparison_level == 0 and case_sensitive == False:
            self.__base_coll = self.__collator
        else:
            self.__base_coll = Collator.createInstance(self.__locale)
            self.__base_coll.setStrength(0)
            self.__base_coll.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.OFF)
Ejemplo n.º 2
0
    def widget(cls, field, value, collation=None, **attributes):
        """
        Generates a SELECT tag, including OPTIONs (only 1 option allowed)

        see also: `FormWidget.widget`
        """
        default = dict(value=value)
        attr = cls._attributes(field, default, **attributes)
        requires = field.requires
        if not isinstance(requires, (list, tuple)):
            requires = [requires]
        if requires:
            if hasattr(requires[0], 'options'):
                options = requires[0].options()
            else:
                raise SyntaxError('widget cannot determine options of %s' %
                                  field)

        if collation:
            myloc = Locale(collation)
            coll = Collator.createInstance(myloc)
            options = sorted(options, key=itemgetter(1), cmp=coll.compare)

        opts = [OPTION(v, _value=k) for (k, v) in options]
        return SELECT(*opts, **attr)
    def __init__(self, *args, **kwargs):
        '''Initialize a unicode dictionary.  The signature is changed because the 
        kwargs are used to set the comparison details

        '''
        if len(args) > 1:
            raise TypeError('expected at most 1 arguments, got %d' % len(args))

        if len(args) == 1 and isinstance(args[0],self.__class__):
            locale = args[0].locale if 'locale' not in kwargs else kwargs.pop('locale')
            comparison_level = args[0].comparison_level if 'comparison_level' \
                not in kwargs else kwargs.pop('comparison_level')
            case_sensitive = args[0].case_sensitive if 'case_sensitive' \
                not in kwargs else kwargs.pop('case_sensitive')
        else:
            locale = kwargs.pop('locale','en_US')
            comparison_level = max(0,min(3,kwargs.pop('comparison_level',0)))
            case_sensitive = kwargs.pop('case_sensitive', False)
        self.__locale = Locale(locale)
        self.__collator = Collator.createInstance(self.__locale)
        self.__collator.setStrength(comparison_level)
        self.__collator.setAttribute(UCollAttribute.CASE_LEVEL,
            UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF)
        if len(args) == 1:
            if isinstance(args[0],Mapping):
                vals = list(args[0].items())
            else:
                vals = args[0]
            for key,val in vals:
                self.__setitem__(key,val)
Ejemplo n.º 4
0
    def widget(cls, field, value, collation=None, **attributes):
        """
        Generates a SELECT tag, including OPTIONs (only 1 option allowed)

        see also: `FormWidget.widget`
        """
        default = dict(value=value)
        attr = cls._attributes(field, default,
                               **attributes)
        requires = field.requires
        if not isinstance(requires, (list, tuple)):
            requires = [requires]
        if requires:
            if hasattr(requires[0], 'options'):
                options = requires[0].options()
            else:
                raise SyntaxError(
                    'widget cannot determine options of %s' % field)

        if collation:
            myloc = Locale(collation)
            coll = Collator.createInstance(myloc)
            options = sorted(options, key=itemgetter(1), cmp=coll.compare)

        opts = [OPTION(v, _value=k) for (k, v) in options]
        return SELECT(*opts, **attr)
    def _get_rowlist(self, tablename, orderby, restrictor, collation):
        """
        Return a web2py rows object with the db rows for filtered list items.

        """
        db = current.db
        flash = ''
        rowlist = []
        orderby = orderby[0] if isinstance(orderby, list) else orderby

        if tablename not in db.tables():
            flash = '''Sorry, you are trying to list
                    entries from a table that does not exist in the database.'''
        else:
            tb = db[tablename]
            if restrictor:
                # print 'filtering on restrictor'
                for k, v in list(restrictor.items()):
                    filter_select = db(tb[k] == v)._select(tb.id)
                    myrows = db(tb.id.belongs(filter_select)
                                ).select(orderby=~tb[orderby])
            else:
                # print 'no restrictor'
                myrows = db().select(tb.ALL, orderby=~tb[orderby])
        rowlist = myrows.as_list()

        if collation:
            myloc = Locale(collation)
            coll = Collator.createInstance(myloc)
            rowlist = sorted(rowlist, key=itemgetter(orderby), cmp=coll.compare)

        return rowlist, flash
Ejemplo n.º 6
0
def sorted_(data, l, series=False, i=False):
    loc = Locale.forLanguageTag(l)
    collator = Collator.createInstance(loc)
    if isinstance(data, dict):
        sorted_data = sorted(data.items(),
                             key=lambda x: collator.getSortKey(x[i]))
        return dict(sorted_data)
Ejemplo n.º 7
0
def index(request):
  preferred_language = translation.get_language()
  collator = Collator.createInstance(Locale(preferred_language))
  if request.GET.get('aspect'):
    aspect = request.GET['aspect']

  else:
    aspect = 'Collection'

  try:
    aspect_uri = ROUTABLES[aspect]
  except KeyError:
    aspect_uri = ROUTABLES['Collection']

  this_results = []
  for res in graph.subjects(RDF.type, aspect_uri):
    r = Resource(graph,res)
    if Resource(graph,UNBIST.PlaceName) in list(r[RDF.type]):
      continue
    res_label = get_preferred_label(res,preferred_language)
    this_results.append({'uri': res, 'pref_label':res_label})
  #sorted_results =  sorted(this_results, key=lambda tup: tup['pref_label'], cmp=collator.compare)
  sorted_results =  sorted(this_results, key=lambda tup: tup['pref_label'])

  try:
    page = request.GET.get('page',1)
  except PageNotAnInteger:
    page = 1

  p = Paginator(sorted_results, 20, request=request)
  paginated_results = p.page(page)

  return render(request, 'thesaurus/index.html', {'results': paginated_results, 'target': 'instances', 'aspect':aspect })
Ejemplo n.º 8
0
def _localize_timezones(locale: babel.Locale) -> LocalizedTimezone:
    zones_and_aliases = _read_timezone_ids_and_aliases()
    # locale.language: 'en' or 'en_US'
    collator = Collator.createInstance(Locale.createFromName(locale.language))
    return [
        _localize_timezone(zone, aliases, locale, collator)
        for zone, aliases in zones_and_aliases.items()
    ]
Ejemplo n.º 9
0
def sortkey_length(strength, word):
    c = Collator.createInstance(Locale(''))
    c.setStrength(strength)
    c.setAttribute(
        UCollAttribute.ALTERNATE_HANDLING,
        UCollAttributeValue.SHIFTED,
    )
    coll_key = c.getSortKey(word)
    return len(coll_key) - 1  # subtract 1 for ending \x00 byte
Ejemplo n.º 10
0
 def primary_difference(prev_key, new_key, rlocale=glocale):
     """
     Try to use the PyICU collation.
     If we generate a report for another language, make sure we use the good
     collation sequence
     """
     collate_lang = Locale(rlocale.collation)
     collation = Collator.createInstance(collate_lang)
     collation.setStrength(Collator.PRIMARY)
     return collation.compare(prev_key, new_key) != 0
Ejemplo n.º 11
0
def sortkey(strength, maxlength=None):
    c = Collator.createInstance(Locale(''))
    c.setStrength(strength)
    c.setAttribute(
        UCollAttribute.ALTERNATE_HANDLING,
        UCollAttributeValue.SHIFTED,
    )
    if maxlength is None:
        return c.getSortKey
    else:
        return lambda x: c.getSortKey(x)[:maxlength]
Ejemplo n.º 12
0
def sort(iterable, loc, key=None, reverse=False):
    """
    Creates new sorted list from passed list (or any iterable data) according to the passed locale.

    arguments:
    iterable -- iterable object (typically a list or a tuple)
    loc -- locale identifier (e.g. cs_CZ.UTF-8, en_US,...)
    key -- access to sorted value
    reverse -- whether the result should be in reversed order (default is False)
    """
    collator = Collator.createInstance(Locale(loc))
    return sorted(iterable, cmp=collator.compare, key=key, reverse=reverse)
Ejemplo n.º 13
0
def sort(iterable, loc, key=None, reverse=False):
    """
    Creates new sorted list from passed list (or any iterable data) according to the passed locale.

    arguments:
    iterable -- iterable object (typically a list or a tuple)
    loc -- locale identifier (e.g. cs_CZ.UTF-8, en_US,...)
    key -- access to sorted value
    reverse -- whether the result should be in reversed order (default is False)
    """
    collator = Collator.createInstance(Locale(loc))
    return sorted(iterable, cmp=collator.compare, key=key, reverse=reverse)
def sort_for_script(cp_list, script):
    lang = lang_for_script(script)
    if not lang:
        print 'cannot sort for script, no lang for %s' % script
        return cp_list
    if _HAVE_ICU:
        from icu import Locale, Collator
        loc = Locale(lang + '_' + script)
        col = Collator.createInstance(loc)
        return sorted(cp_list, cmp=col.compare)
    else:
        import locale
        return sorted(cp_list, cmp=locale.strcoll)
Ejemplo n.º 15
0
def _localize_timezone(zone: str, aliases: List[str], locale: babel.Locale,
                       collator: Collator) -> LocalizedTimezone:
    tzinfo = pytz.timezone(zone)
    name = get_timezone_location(tzinfo, locale=locale)
    name_sort_key = collator.getSortKey(name)
    return LocalizedTimezone(
        id=zone,
        name=name,
        aliases=aliases,
        name_sort_key=name_sort_key,
        tzinfo=tzinfo,
        locale=locale,
    )
def sort_for_script(cp_list, script):
  lang = lang_for_script(script)
  if not lang:
    print 'cannot sort for script, no lang for %s' % script
    return cp_list
  if _HAVE_ICU:
    from icu import Locale, Collator
    loc = Locale(lang + '_' + script)
    col = Collator.createInstance(loc)
    return sorted(cp_list, cmp=col.compare)
  else:
    import locale
    return sorted(cp_list, cmp=locale.strcoll)
Ejemplo n.º 17
0
def init():
    # try:
    #     os.remove(app.config['SQLALCHEMY_DATABASE_URI'][10:])
    # except:  # pragma: no cover
    #     pass
    # db.create_all()

    db.reflect()
    db.drop_all()
    db.create_all()

    root_dir = app.config['KARAOKE_DIR']

    collator = Collator.createInstance(Locale('en_US'))

    # artist_dirs = sorted(os.listdir(root_dir), key=collator.getSortKey)
    artist_dirs = os.listdir(root_dir)

    for artist in artist_dirs:
        if artist == '.DS_Store' or os.path.islink('%s/%s' % (root_dir, artist)):
            continue


        name = re.sub(r' : ', ' / ', artist)

        # song_dirs = sorted(os.listdir('%s/%s' % (root_dir, dirname)), key=collator.getSortKey)
        song_dirs = os.listdir('%s/%s' % (root_dir, artist))

        for title in song_dirs:
            title_dir = '%s/%s/%s' % (root_dir, artist, title)

            if title == '.DS_Store' or os.path.islink(title_dir):
                continue

            if is_alias(title_dir):
                print("a", end="", flush=True)
                song = decolonize(os.path.basename(target_of_alias(title_dir)))
                alias = SongAlias(title=title, artist=artist, song=song)
                db.session.add(alias)
                continue

            title = decolonize(title)

            song_files = [x for x in os.listdir(title_dir) if x.endswith(".mp3") or x.endswith(".mpg")]

            for file in song_files:
                print(".", end="", flush=True)
                songfile = SongFile(path=title_dir, filename=file, title=title, artist=artist)
                db.session.add(songfile)

    db.session.commit()
Ejemplo n.º 18
0
def worker(path, outdir, with_sorting=True):
    collator = Collator.createInstance(Locale("pl_PL.UTF-8"))
    separator = re.compile("[\W\d]+")
    filepath = path.replace(".yml", ".txt")
    with open(filepath) as file:
        text = file.read().lower().rstrip()
        words = set(re.split(separator, text))
    with open(path) as file:
        meta = yaml.safe_load(file)
    with open(f"{outdir}/extracted-words-for-{meta['label']}.txt",
              "w") as file:
        if with_sorting:
            words = sorted(words, key=collator.getSortKey)
        file.write("\n".join(words))
    return path
Ejemplo n.º 19
0
def sort_choices(choices):
    '''
    Sorts choices alphabetically.

    Either using cmp or ICU.
    '''
    if not HAS_ICU:
        sorter = cmp
    else:
        sorter = Collator.createInstance(Locale(get_language())).compare

    # Actually sort values
    return sorted(
        choices,
        key=lambda tup: tup[1],
        cmp=sorter
    )
Ejemplo n.º 20
0
def sort_choices(choices):
    '''
    Sorts choices alphabetically.

    Either using cmp or ICU.
    '''
    if not HAS_ICU:
        sorter = cmp
    else:
        sorter = Collator.createInstance(Locale(get_language())).compare

    # Actually sort values
    return sorted(
        choices,
        key=lambda tup: tup[1],
        cmp=sorter
    )
Ejemplo n.º 21
0
def sort(iterable, loc, key=None, reverse=False):
    """
    Creates new sorted list from passed list (or any iterable data) according to the passed locale.

    arguments:
    iterable -- iterable object (typically a list or a tuple)
    loc -- locale identifier (e.g. cs_CZ.UTF-8, en_US,...)
    key -- access to sorted value
    reverse -- whether the result should be in reversed order (default is False)
    """
    if not loc:
        raise LocalizationError(
            'cannot sort string due to missing locale information (probably a configuration issue)')
    collator = Collator.createInstance(Locale(loc))
    if key is None:
        kf = cmp_to_key(collator.compare)
    else:
        def tmp(v1, v2):
            return collator.compare(key(v1), key(v2))
        kf = cmp_to_key(tmp)
    return sorted(iterable, key=kf, reverse=reverse)
Ejemplo n.º 22
0
 def create_collator(strength):
     c = Collator.createInstance(Locale(''))
     c.setStrength(strength)
     return c
Ejemplo n.º 23
0
    def __init__(self, localedir=None, lang=None, domain=None, languages=None):
        """
        Init a GrampsLocale. Run __init_first_instance() to set up the
        environment if this is the first run. Return __first_instance
        otherwise if called without arguments.
        """
        global _hdlr
        #initialized is special, used only for the "first instance",
        #and created by __new__(). It's used to prevent re-__init__ing
        #__first_instance when __new__() returns its pointer.
        if hasattr(self, 'initialized') and self.initialized:
            return
        _first = self._GrampsLocale__first_instance
        self.localedir = None
        # Everything breaks without localedir, so get that set up
        # first.  Warnings are logged in _init_first_instance or
        # _init_secondary_locale if this comes up empty.
        if localedir and os.path.exists(os.path.abspath(localedir)):
            self.localedir = localedir
        elif (_first and hasattr(_first, 'localedir') and _first.localedir
              and os.path.exists(os.path.abspath(_first.localedir))):
            self.localedir = _first.localedir
        else:
            LOG.warning(
                'Missing or invalid localedir %s; no translations'
                ' will be available.', repr(localedir))
        self.lang = lang
        self.localedomain = domain or 'gramps'
        if languages:
            self.language = [
                x for x in [
                    self.check_available_translations(l)
                    for l in languages.split(":")
                ] if x
            ]
        else:
            self.language = None

        if self == _first:
            self._GrampsLocale__init_first_instance()
        else:
            self._init_secondary_locale()

        self.icu_locales = {}
        self.collator = None
        if HAVE_ICU:
            self.icu_locales["default"] = Locale.createFromName(self.lang)
            if self.collation and self.collation != self.lang:
                self.icu_locales["collation"] = Locale.createFromName(
                    self.collation)
            else:
                self.icu_locales["collation"] = self.icu_locales["default"]
            try:
                self.collator = Collator.createInstance(
                    self.icu_locales["collation"])
            except ICUError as err:
                LOG.warning("Unable to create collator: %s", str(err))
                self.collator = None

        try:
            self.translation = self._get_translation(self.localedomain,
                                                     self.localedir,
                                                     self.language)
        except ValueError:
            LOG.warning(
                "Unable to find translation for languages in %s, using US English",
                ':'.join(self.language))
            self.translation = GrampsNullTranslations()
            self.translation._language = "en"

        if _hdlr:
            LOG.removeHandler(_hdlr)
            _hdlr = None
        self._dd = self._dp = None
        #Guards against running twice on the first instance.
        self.initialized = True
 def collator(self):
     return Collator.createInstance(self.locale)
Ejemplo n.º 25
0
You will be presented with a prompt. There are three options.\n\
  1. Type the list number of a suggested Facebook friend and press Enter to create a link with that friend.\n\
  2. Type a name and press Enter to perform another search of Facebook friends.\n\
     A link will be automatically created if the search name and a Facebook friend name are a close enough match.\n\
     This is helpful if a contact's name does not closely match their Facebook name.\n\
  3. Press Enter without typing anything to ignore the contact.\n\
     Sink will ignore this contact during updates.'''

# Default arguments
PORT = 7465
SCORE_THRESHOLD = 100
MATCH_LIMIT = 5
RETRIES = 3
DELAY = 0
EXPIRY = 30
COLLATOR = Collator.createInstance(Locale('pt_BR.UTF-8'))
QTY_THREADS = multiprocessing.cpu_count() * 2 + 1

# Shelf keys
TOKEN = 'token'
USERNAME = '******'
PASSWORD = '******'
LINKS = 'links'
CHECKSUMS = 'checksums'
TIMESTAMPS = 'timestamps'


class Facebook:
    base_url = 'https://mbasic.facebook.com'
    graph_api_picture = 'https://graph.facebook.com/%s/picture?height=720&width=720&redirect=false'
    user_id_regex = r'/messages/thread/(\d+)'
Ejemplo n.º 26
0
 def createInstance(locale):
     return Collator(locale)
Ejemplo n.º 27
0
        meta = yaml.safe_load(file)
    with open(f"{outdir}/extracted-words-for-{meta['label']}.txt",
              "w") as file:
        if with_sorting:
            words = sorted(words, key=collator.getSortKey)
        file.write("\n".join(words))
    print(f"Saved: ", filepath)


if __name__ == "__main__":
    t = time.time()

    outdir = "words"
    if os.path.exists(outdir):
        shutil.rmtree(outdir)
    os.makedirs(outdir)

    collator = Collator.createInstance(Locale("pl_PL.UTF-8"))
    separator = re.compile("[\W\d]+")
    print("Processing")
    for path in glob.glob("../data/pl/**/*.yml", recursive=True):
        worker(
            path=path,
            collator=collator,
            separator=separator,
            outdir=outdir,
            with_sorting=True,
        )

    print("Total timing: ", time.time() - t)
Ejemplo n.º 28
0
    def __init__(self, localedir=None, lang=None, domain=None, languages=None):
        """
        Init a WearNowLocale. Run __init_first_instance() to set up the
        environment if this is the first run. Return __first_instance
        otherwise if called without arguments.
        """
        global _hdlr
        #initialized is special, used only for the "first instance",
        #and created by __new__(). It's used to prevent re-__init__ing
        #__first_instance when __new__() returns its pointer.
        if hasattr(self, 'initialized') and self.initialized:
            return
        _first = self._WearNowLocale__first_instance
        self.localedir = None
        # Everything breaks without localedir, so get that set up
        # first.  Warnings are logged in _init_first_instance or
        # _init_secondary_locale if this comes up empty.
        if localedir and os.path.exists(os.path.abspath(localedir)):
            self.localedir = localedir
        elif (_first and hasattr(_first, 'localedir') and _first.localedir and
              os.path.exists(os.path.abspath(_first.localedir))):
            self.localedir = _first.localedir
        else:
            LOG.warn('Missing or invalid localedir %s; no translations will be available.', repr(localedir))

        self.lang = lang
        self.localedomain = domain or 'wearnow'
        if languages:
            self.language = [x for x in [self.check_available_translations(l)
                                         for l in languages.split(":")]
                             if x]
        else:
            self.language = None

        if self == _first:
            self._WearNowLocale__init_first_instance()
        else:
            self._init_secondary_locale()

        self.icu_locales = {}
        self.collator = None
        if HAVE_ICU:
            self.icu_locales["default"] = Locale.createFromName(self.lang)
            if self.collation and self.collation != self.lang:
                self.icu_locales["collation"] = Locale.createFromName(self.collation)
            else:
                self.icu_locales["collation"] = self.icu_locales["default"]
            try:
                self.collator = Collator.createInstance(self.icu_locales["collation"])
            except ICUError as err:
                LOG.warning("Unable to create collator: %s", str(err))
                self.collator = None

        try:
            self.translation = self._get_translation(self.localedomain,
                                                     self.localedir,
                                                     self.language)
        except ValueError:
            LOG.warning("Unable to find translation for languages in %s, using US English", ':'.join(self.language))
            self.translation = WearNowNullTranslations()
            self.translation._language = "en"

        if _hdlr:
            LOG.removeHandler(_hdlr)
            _hdlr = None
        self._dd = self._dp = None
        #Guards against running twice on the first instance.
        self.initialized = True
Ejemplo n.º 29
0
def term(request):
  preferred_language = translation.get_language()
  collator = Collator.createInstance(Locale(preferred_language))
  if request.GET.get('uri'):
    uri = request.GET['uri']

    pref_label = get_preferred_label(URIRef(uri), preferred_language)
    pref_labels = graph.preferredLabel(URIRef(uri))

    breadcrumbs = []
    breadcrumbs_q = "prefix skos: <http://www.w3.org/2004/02/skos/core#> prefix unbist: <http://unontologies.s3-website-us-east-1.amazonaws.com/unbist#> prefix eu: <http://eurovoc.europa.eu/schema#> select ?domain ?microthesaurus where { { ?domain skos:member ?microthesaurus . ?microthesaurus skos:member <" + uri + "> . } union { ?domain rdf:type eu:Domain . ?domain skos:member <" + uri + "> } . }"
    for res in graph.query(breadcrumbs_q):
      bc = {}
      bc.update({'domain':{'uri':res.domain, 'pref_label': get_preferred_label(res.domain, preferred_language)}})
      microthesaurus = None
      if res.microthesaurus:
        bc.update({'microthesaurus': {'uri':res.microthesaurus, 'pref_label': get_preferred_label(res.microthesaurus, preferred_language)}})
      breadcrumbs.append(bc)

    scope_notes = []
    sns = graph.objects(subject=URIRef(uri),predicate=SKOS.scopeNote)
    for s in sns:
      if s.language == preferred_language:
        scope_notes.append(s)

    alt_labels = []
    als = graph.objects(subject=URIRef(uri),predicate=SKOS.altLabel)
    for a in als:
      if a.language == preferred_language:
        alt_labels.append(a)

    relationships = []
    for c in [SKOS.broader,SKOS.related,SKOS.narrower,SKOS.member]:
      this_results = []
      for rel in graph.objects(subject=URIRef(uri),predicate=c):
        rel_label = get_preferred_label(rel,preferred_language)
        this_results.append({'type':c.split('#')[1], 'uri': rel, 'pref_label':rel_label})
      #sorted_results = sorted(this_results, key=lambda tup: tup['pref_label'], cmp=collator.compare)
      sorted_results = sorted(this_results, key=lambda tup: tup['pref_label'])
      for sr in sorted_results:
        relationships.append(sr)

    matches = []
    for t in [SKOS.relatedMatch, SKOS.broadMatch, SKOS.closeMatch, SKOS.exactMatch, SKOS.narrowMatch]:
      matches_q = "select ?" + t.split('#')[1] + " where { <" + uri +"> owl:sameAs ?osa . ?" + t.split('#')[1] + " <" + t + "> ?osa }"
      for m in graph.query(matches_q):
        matches.append({'type':t.split('#')[1], 'uri': m})

    descriptions = []

    rdf_types = []
    for t in graph.objects(subject=URIRef(uri),predicate=RDF.type):
      rdf_types.append({'short_name':t.split('#')[1], 'uri':t})

    return render(request, 'thesaurus/term.html', {
      'rdf_types': rdf_types,
      'pref_label': pref_label, 
      'pref_labels': pref_labels,
      'alt_labels':alt_labels, 
      'breadcrumbs':breadcrumbs, 
      'scope_notes':scope_notes,
      'relationships':relationships,
      'matches':matches })
  else:
    raise Http404
Ejemplo n.º 30
0
def compress(text):
    compressed = text
    cfunc = None
    for func in (_zlib, _bz2):
        c = func(text)
        if len(c) < len(compressed):
            compressed = c
            cfunc = func
    if cfunc:
        compress_counts[cfunc.__name__] += 1
    else:
        compress_counts['none'] += 1
    return compressed


collator = Collator.createInstance(Locale(''))
collator.setStrength(Collator.QUATERNARY)
collation_key = collator.getCollationKey


def make_output_file_name(input_file, options, session_dir):
    """
    Return output file name based on input file name.

    >>> from minimock import Mock
    >>> opts = Mock('options')
    >>> opts.output_file = 'abc'
    >>> make_output_file_name('123.tar.bz2', opts, 'a')
    'abc'

    >>> opts.output_file = None
Ejemplo n.º 31
0
import pandas as pd
from icu import Collator, Locale, RuleBasedCollator

ddf = pd.read_csv("../word_frequency/unilex/din.txt", sep='\t', skiprows = range(2,5))

collator = Collator.createInstance(Locale('en_AU.UTF-8'))

# https://stackoverflow.com/questions/13838405/custom-sorting-in-pandas-dataframe/27009771#27009771
# https://gist.github.com/seanpue/e1cb846f676194ae77eb
def sort_pd(key=None,reverse=False):
    def sorter(series):
        series_list = list(series)
        return [series_list.index(i) 
           for i in sorted(series_list,key=key,reverse=reverse)]
    return sorter

sort_by_custom_dict = sort_pd(key=collator.getSortKey)


#ddf.iloc[sort_by_custom_dict(ddf.index)]
# ddf.iloc[sort_by_custom_dict(ddf['Form'])]
ddf.iloc[sort_by_custom_dict(ddf['Form'])]


#https://python3.wannaphong.com/2015/03/sort-python.html

# https://pyerror.com/detail/1316/

lexemes = ddf.Form

#lexemes2 = ddf['Form']
Ejemplo n.º 32
0
def publicschool_form(request, student_id=0, id=None):
    #get current method name
    method_name = inspect.currentframe().f_code.co_name
    if user_permissions(method_name, request.user):
        data_public_schools = list(
            PublicSchoolHistory.objects.all().values_list(
                'school_name', flat=True).distinct())
        # sort khmer
        data_public_schools = [
            x.encode('utf-8').strip() for x in data_public_schools
        ]
        locale = Locale('km_KH')
        collator = Collator.createInstance(locale)
        data_public_schools = sorted(set(data_public_schools),
                                     key=collator.getSortKey)
        try:
            survey = IntakeSurvey.objects.get(pk=student_id)

            try:
                #edit form
                instance = PublicSchoolHistory.objects.get(pk=id)
                form = StudentPublicSchoolHistoryForm(instance=instance)
                action = 'Editing '
            except ObjectDoesNotExist:
                #adding form
                form = StudentPublicSchoolHistoryForm(
                    initial={'student_id': student_id})
                action = 'Adding'
                instance = None

            if request.method == 'POST':
                # cancel button
                if "cancel" in request.POST:
                    return HttpResponseRedirect('')

                if instance != None:
                    # edit
                    form = StudentPublicSchoolHistoryForm(request.POST,
                                                          instance=instance)
                else:
                    # add
                    form = StudentPublicSchoolHistoryForm(request.POST)

                if form.is_valid():
                    #process
                    instance = form.save()

                    # update cache table
                    student = IntakeSurvey.objects.get(
                        student_id=instance.student_id.student_id)

                    update_student = CurrentStudentInfo.objects.get(
                        student_id=student.student_id)
                    update_student.in_public_school = True if student.get_pschool(
                    ).status == 'Y' else False
                    update_student.refresh = date.today().isoformat()
                    update_student.save()

                    url = '/students/' + str(student_id) + '/#enrollment'
                    return HttpResponseRedirect(url)
                else:
                    print form.errors

            context = {
                'form': form,
                'student_id': student_id,
                'action': action,
                'data_public_schools': json.dumps(data_public_schools)
            }
            return render(request, 'mande/publicschoolhistoryform.html',
                          context)
        except IntakeSurvey.DoesNotExist as e:
            context = {'error_sms': e}
            return render(request, 'mande/errors/intakesurveynotexist.html',
                          context)
    else:
        raise PermissionDenied
Ejemplo n.º 33
0
def sorted_(data, l, series=False):
    if (language_tailorings.get(l)):
        tailored_lang = language_tailorings.get(l)
        tailored_rules = collation_tailorings.get(tailored_lang)
        collator = RuleBasedCollator(tailored_rules)
        if isinstance(data, list) or isinstance(data, tuple) or isinstance(
                data, str):
            sorted_data = sorted(data, key=collator.getSortKey)
        elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
            sorted_data = df_sort(data, series, collator.getSortKey)
    elif Locale.forLanguageTag(l):
        loc = Locale.forLanguageTag(l)
        collator = Collator.createInstance(loc)
        if isinstance(data, list) or isinstance(data, tuple) or isinstance(
                data, str):
            sorted_data = sorted(data, key=collator.getSortKey)
        elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
            sorted_data = df_sort(data, series, collator.getSortKey)
    else:
        if isinstance(data, list) or isinstance(data, tuple) or isinstance(
                data, str):
            sorted_data = sorted(data)
        elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
            sorted_data = data.sort_values(by=series)
    if isinstance(data, list):
        return sorted_data
    elif isinstance(data, tuple):
        return tuple(sorted_data)
    elif isinstance(data, str):
        return ''.join(sorted_data)
    else:
        return sorted_data


# def sorted_(data,l, series=False, i=False):
#     if (language_tailorings.get(l)):
#         tailored_lang = language_tailorings.get(l)
#         tailored_rules = collation_tailorings.get(tailored_lang)
#         collator = RuleBasedCollator(tailored_rules)
#         if isinstance(data, list) or isinstance(data, tuple) or isinstance(data, str):
#             sorted_data = sorted(data, key=collator.getSortKey)
#         elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
#             sorted_data = df_sort(data, series, collator.getSortKey)
#         elif isinstance(data, dict):
#             sorted_data = sorted(data.items(), key = lambda x : collator.getSortKey(x[i]))
#     elif Locale.forLanguageTag(l):
#         loc = Locale.forLanguageTag(l)
#         collator = Collator.createInstance(loc)
#         if isinstance(data, list) or isinstance(data, tuple) or isinstance(data, str):
#             sorted_data = sorted(data, key=collator.getSortKey)
#         elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
#             sorted_data = df_sort(data, series, collator.getSortKey)
#         elif isinstance(data, dict):
#             sorted_data = sorted(data.items(), key = lambda x : collator.getSortKey(x[i]))
#     else:
#         if isinstance(data, list) or isinstance(data, tuple) or isinstance(data, str):
#             sorted_data = sorted(data)
#         elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
#             sorted_data = data.sort_values(by=series)
#         elif isinstance(data, dict):
#             sorted_data = sorted(data.items(), key = lambda x : x[i])
#     if isinstance(data, list):
#         return sorted_data
#     elif isinstance(data, tuple):
#         return tuple(sorted_data)
#     elif isinstance(data, str):
#         return ''.join(sorted_data)
#     elif isinstance(data, dict):
#         return dict(sorted_data)
#     else:
#         return sorted_data

# How to handle numpy arrays?

# Would it be better to use df.sort_values() for Pandas dataframes and series?

# https://stackoverflow.com/questions/62147537/how-do-i-do-a-natural-sort-in-pyicu
# https://gist.github.com/dpk/8325992
# https://ianwscott.blog/2015/04/30/python-programming-proper-alphabetical-sorting-for-polytonic-greek/
Ejemplo n.º 34
0
from mande.utils import getEnrolledStudents
from mande.utils import getStudentGradebyID
from mande.utils import studentAtSchoolGradeLevel
from mande.utils import studentAtAgeAppropriateGradeLevel
from mande.utils import user_permissions

import inspect

from django.contrib.auth.models import User
from mande.utils import user_permissions

from icu import Locale, Collator
from django.contrib import messages

locale = Locale('km_KH')
collator = Collator.createInstance(locale)
'''
*****************************************************************************
Intake Survey
 - process an IntakeSurveyForm and log the action
*****************************************************************************
'''


def intake_survey(request, student_id=None):
    #get current method name
    method_name = inspect.currentframe().f_code.co_name
    if user_permissions(method_name, request.user):
        next_url = request.GET.get('next')  #where we're going next
        limit = request.GET.get('limit')  #limit to a single field
        intake_surveys = IntakeSurvey.objects.all()
Ejemplo n.º 35
0
 def create_collator(strength):
     c = Collator.createInstance(Locale(''))
     c.setStrength(strength)
     return c
Ejemplo n.º 36
0
def make_collator(request):
    loc = Locale.createFromName(request.locale_name)
    return Collator.createInstance(loc)
Ejemplo n.º 37
0
def compress(text):
    compressed = text
    cfunc = None
    for func in (_zlib, _bz2):
        c = func(text)
        if len(c) < len(compressed):
            compressed = c
            cfunc = func
    if cfunc:
        compress_counts[cfunc.__name__] += 1
    else:
        compress_counts['none'] += 1
    return compressed


collator = Collator.createInstance(Locale(''))
collator.setStrength(Collator.QUATERNARY)
collation_key = collator.getCollationKey


def make_output_file_name(input_file, options, session_dir):
    """
    Return output file name based on input file name.

    >>> from minimock import Mock
    >>> opts = Mock('options')
    >>> opts.output_file = 'abc'
    >>> make_output_file_name('123.tar.bz2', opts, 'a')
    'abc'

    >>> opts.output_file = None