Python unicodeの例、builtins.unicode Pythonの例

コード例 #1

0

ファイルを表示

ファイル: mambustruct.py プロジェクト: jstitch/MambuPy

    def serializeFields(data):
        """Turns every attribute of the Mambu object in to a string representation.

        If the object is an iterable one, it goes down to each of its
        elements and turns its attributes too, recursively.

        The base case is when it's a MambuStruct class (this one) so it
        just 'serializes' the attr atribute. Refer to
        MambuStruct.serializeStruct pydoc.

        This is perhaps the worst way to do it, still looking for a better way.
        """
        if isinstance(data, MambuStruct):
            return data.serializeStruct()
        try:
            it = iter(data)
        except TypeError as terr:
            return unicode(data)
        if type(it) == type(iter([])):
            l = []
            for e in it:
                l.append(MambuStruct.serializeFields(e))
            return l
        elif type(it) == type(iter({})):
            d = {}
            for k in it:
                d[k] = MambuStruct.serializeFields(data[k])
            return d
        # elif ... tuples? sets?
        return unicode(data)

コード例 #2

0

ファイルを表示

ファイル: cytometry.py プロジェクト: superssnails/edd

 def define_viable(self, value):
     try:
         if unicode(value)[-1] == u'%':
             viable = float(unicode(value)[:-1]) / 100
         else:
             viable = float(value)
     except ValueError:
         print("Invalid viable value '%s'" % value)
     else:
         self._viable = viable

コード例 #3

0

ファイルを表示

 def __new__(cls, s, *args, **kwargs):
     if isinstance(s, _):
         s = unicode(s.untranslated)
     if translator:
         trans = translator(s, *args, **kwargs)
         obj = super(_, cls).__new__(cls, trans, *args, **kwargs)
     else:
         obj = super(_, cls).__new__(cls, s, *args, **kwargs)
     obj.untranslated = unicode(s)
     obj._additionals = []
     return obj

コード例 #4

0

ファイルを表示

ファイル: stringproperty.py プロジェクト: lobocv/eventdispatcher

 def __new__(cls, s, *args, **kwargs):
     if isinstance(s, _):
         s = unicode(s.untranslated)
     if translator:
         trans = translator(s, *args, **kwargs)
         obj = super(_, cls).__new__(cls, trans, *args, **kwargs)
     else:
         obj = super(_, cls).__new__(cls, s, *args, **kwargs)
     obj.untranslated = unicode(s)
     obj._additionals = []
     return obj

コード例 #5

0

ファイルを表示

ファイル: cytometry.py プロジェクト: superssnails/edd

 def define_variance(self, seq, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         if unicode(value)[-1] == u'%':
             cv = float(unicode(value)[:-1]) / 100
         else:
             cv = float(value)
     except ValueError:
         print("Invalid cv value '%s'" % value)
     else:
         self._measure_data[seq]['variance'] = cv

コード例 #6

0

ファイルを表示

def create_categories(connection):
    for linea in category_table_data:
        if category_table_data[linea][1] is not None:
            ac = Category(id=unicode(category_table_data[linea][0]),
                          name=linea,
                          parent=unicode(category_table_data[linea][1]),
                          balance=0)
        else:
            ac = Category(id=unicode(category_table_data[linea][0]),
                          name=linea,
                          balance=0)
        connection.add(ac)

コード例 #7

0

ファイルを表示

ファイル: codewise.py プロジェクト: SegundoBob/leo-editor

 def feed_ctags(self, tagsfile_obj):
     for l in tagsfile_obj:
         #print l
         if not isPython3:
             l = builtins.unicode(l, 'utf8', 'replace')
         if l.startswith('!'):
             continue
         fields = l.split('\t')
         m = fields[0]
         fil = fields[1]
         pat = fields[2]
         # typ = fields[3]
         klass = None
         try:
             ext = fields[4]
             if ext and ext.startswith('class:'):
                 klass = ext.split(':', 1)[1].strip()
                 idd = self.class_id(klass)
                 #print "klass",klass, idd
         except IndexError:
             ext = None
             # class id 0 = function
             idd = 0
         c = self.cursor()
         #print fields
         fid = self.file_id(fil)
         c.execute('insert into function(class, name, searchpattern, file) values (?, ?, ?, ?)',
                   [idd, m, pat, fid])
     self.dbconn.commit()

コード例 #8

0

ファイルを表示

    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        # text = ''.join(char for char in unicodedata.normalize('NFD', text)
        #                if unicodedata.category(char) != 'Mn')  # Strip accents
        # text = re.sub("[^ a-z'.,?!\-]", "", text)

        normalizer = hazm.Normalizer()
        text = normalizer.normalize(text)
        # tokenization
        words = hazm.word_tokenize(text)
        # tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word in words:
            if not any(letter in word for letter in self.graphemes):
                pron = [word]

            # elif word in self.homograph2features:  # Check homograph
            #     pron1, pron2, pos1 = self.homograph2features[word]
            #     if pos.startswith(pos1):
            #         pron = pron1
            #     else:
            #         pron = pron2
            elif word in self.tihu:  # lookup tihu dict
                pron = self.tihu[word]
            else: # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])

        return prons[:-1]

コード例 #9

0

ファイルを表示

 def __valid_ip(self, value):
     try:
         if not ipaddress.ip_address(unicode(value)).is_global:
             return None
     except:
         return None
     return value

コード例 #10

0

ファイルを表示

ファイル: codewise.py プロジェクト: tatatasw/leo-editor

 def feed_ctags(self, tagsfile_obj):
     for l in tagsfile_obj:
         #print l
         if not isPython3:
             l = builtins.unicode(l, 'utf8', 'replace')
         if l.startswith('!'):
             continue
         fields = l.split('\t')
         m = fields[0]
         fil = fields[1]
         pat = fields[2]
         # typ = fields[3]
         klass = None
         try:
             ext = fields[4]
             if ext and ext.startswith('class:'):
                 klass = ext.split(':', 1)[1].strip()
                 idd = self.class_id(klass)
                 #print "klass",klass, idd
         except IndexError:
             ext = None
             # class id 0 = function
             idd = 0
         c = self.cursor()
         #print fields
         fid = self.file_id(fil)
         c.execute('insert into function(class, name, searchpattern, file) values (?, ?, ?, ?)',
                   [idd, m, pat, fid])
     self.dbconn.commit()

コード例 #11

0

ファイルを表示

ファイル: g2p.py プロジェクト: Koomook/g2p

def predict(words, sess):
    '''
    Returns predicted pronunciation of `words` which do NOT exist in the dictionary.
    :param words: A list of words.
    :return: pron: A list of phonemes
    '''
    if len(words) > hp.batch_size:
        after = predict(words[hp.batch_size:], sess)
        words = words[:hp.batch_size]
    else:
        after = []
    x = np.zeros((len(words), hp.maxlen), np.int32)  # 0: <PAD>
    for i, w in enumerate(words):
        for j, g in enumerate((w + "E")[:hp.maxlen]):
            x[i][j] = g2idx.get(g, 2)  # 2:<UNK>

    ## Autoregressive inference
    preds = np.zeros((len(x), hp.maxlen), np.int32)
    for j in range(hp.maxlen):
        _preds = sess.run(graph.preds, {graph.x: x, graph.y: preds})
        preds[:, j] = _preds[:, j]

    # convert to string
    pron = []
    for i in range(len(preds)):
        p = [u"%s" % unicode(idx2p[idx])
             for idx in preds[i]]  # Make p into unicode.
        if "<EOS>" in p:
            eos = p.index("<EOS>")
            p = p[:eos]
        pron.append(p)

    return pron + after

コード例 #12

0

ファイルを表示

ファイル: vocabs.py プロジェクト: stjordanis/NeMo

def _text_preprocessing(text):
    text = unicode(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text)
                   if unicodedata.category(char) != 'Mn')
    text = text.lower()
    text = re.sub("[^ a-z'\".,?!()\[\]:;\-]", "", text)
    return text

コード例 #13

0

ファイルを表示

ファイル: cytometry.py プロジェクト: superssnails/edd

 def define_deviation(self, seq, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         dev = float(value)
     except ValueError:
         print("Invalid deviation value")
     else:
         self._measure_data[seq]['deviation'] = dev

コード例 #14

0

ファイルを表示

def english_text_preprocessing(text, lower=True):
    text = unicode(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text) if unicodedata.category(char) != 'Mn')
    text = ''.join(char if char not in SYNOGLYPH2ASCII else SYNOGLYPH2ASCII[char] for char in text)

    if lower:
        text = text.lower()

    return text

コード例 #15

0

ファイルを表示

ファイル: http.py プロジェクト: valluzzi/gecosistema_core

def JSONResponse(obj, start_response):
    """
    JSONResponse
    """
    if isstring(obj):
        res = obj
    elif isinstance(obj, (dict, list)):
        res = unicode(json.dumps(obj))
    else:
        res = obj
    return httpResponse(res, "200 OK", start_response)

コード例 #16

0

ファイルを表示

ファイル: cytometry.py プロジェクト: superssnails/edd

 def define_measurement(self, seq, ptype, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         avg = float(value)
     except ValueError:
         print("Invalid average value")
     else:
         self._measure_data[seq].update({
             'ptype': ptype,
             'value': avg,
         })

コード例 #17

0

ファイルを表示

def create_accounts(connection):
    for linea in acount_table_data:
        id_acc_type = unicode(Acounttype().get_one(
            acount_table_data[linea][2]).id)
        id_currency = unicode(Currency().get_one(
            acount_table_data[linea][3]).id)
        if acount_table_data[linea][1] is not None:
            ac = Account(id=unicode(acount_table_data[linea][0]),
                         name=linea,
                         parent=unicode(acount_table_data[linea][1]),
                         id_account_type=id_acc_type,
                         id_currency=id_currency,
                         balance=0)
        else:
            ac = Account(id=unicode(acount_table_data[linea][0]),
                         name=linea,
                         id_account_type=id_acc_type,
                         id_currency=id_currency,
                         balance=0)
        connection.add(ac)

コード例 #18

0

ファイルを表示

def process_django_model(app, what, name, obj, options, lines):
    # This causes import errors if left outside the function
    from django.db import models

    # Only look at objects that inherit from Django's base model class
    if inspect.isclass(obj) and issubclass(obj, models.Model):
        # Grab the field list from the meta class
        fields = obj._meta.fields

        for field in fields:
            # Decode and strip any html out of the field's help text
            help_text = strip_tags(unicode(field.help_text))

            # Decode and capitalize the verbose name, for use if there isn't
            # any help text
            verbose_name = unicode(field.verbose_name).capitalize()

            if help_text:
                # Add the model field to the end of the docstring as a param
                # using the help text as the description
                lines.append(":param {}: {}".format(field.attname, help_text))
            else:
                # Add the model field to the end of the docstring as a param
                # using the verbose name as the description
                lines.append(":param {}: {}".format(field.attname,
                                                    verbose_name))

            # Add the field's type to the docstring
            if isinstance(field, (models.ForeignKey, models.OneToOneField,
                                  models.ManyToManyField)):
                lines.append(":type %s: %s to :class:`%s.%s`" % (
                    field.attname,
                    type(field).__name__,
                    field.related_model.__module__,
                    field.related_model.__name__,
                ))
            else:
                lines.append(":type {}: {}".format(field.attname,
                                                   type(field).__name__))
    # Return the extended docstring
    return lines

コード例 #19

0

ファイルを表示

def SQL_EXEC(sql, args):
    """
    SQL_EXEC - run a query o a file.sql
    """
    try:
        env = mapify(args, sep=' ', kvsep='=', strip_char=' ', glue='"')
        res = SqliteDB.ExecuteP(sql, env, outputmode='response', verbose=False)
        return unicode(json.dumps(res))
    except Exception as ex:
        manage(ex)

    return 0

コード例 #20

0

ファイルを表示

ファイル: parse.py プロジェクト: yi-ji/anki-jp-parser

 def loadCss(self):
     #log("*** loadCss function ***")
     try:
         #log(self.cssFileInPlugin)
         f = open(self.cssFileInPlugin, 'r')
         #log(f.read())
         css = unicode(f.read())
         f.close()
         
     except Exception as e:
         log(e)
         css = u''
     return css

コード例 #21

0

ファイルを表示

def normalize(sentence):
    """ Normalize English text.
    """
    # preprocessing
    sentence = unicode(sentence)
    sentence = normalize_numbers(sentence)
    sentence = ''.join(char for char in unicodedata.normalize('NFD', sentence)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
    sentence = sentence.lower()
    sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
    sentence = sentence.replace("i.e.", "that is")
    sentence = sentence.replace("e.g.", "for example")
    return sentence

コード例 #22

0

ファイルを表示

ファイル: mambuclient.py プロジェクト: jstitch/MambuPy

    def postprocess(self):
        """Postprocessing.

        Just in case some elements on the addresses was converted to
        anything but string, it gets converted back to only string
        (unicode). Things on addresses are not useful but by what they
        say, not what they are.

.. todo:: do the same thing to the 'address' field created on
          preprocessing.
        """
        try:
            for name, item in self['addresses'][0].items():
                try:
                    if name == "indexInList": continue
                    self['addresses'][0][name] = unicode(self['addresses'][0][name])
                    self['address'][name] = unicode(self['address'][name])
                except AttributeError:
                    pass
        except (KeyError, IndexError):
            pass

        super(MambuClient,self).postprocess()

コード例 #23

0

ファイルを表示

ファイル: main.py プロジェクト: tedder/rssfilter

def stringify(blob):
  retstr = ''
  if not blob:
    return '' # we were passed nothing, so return nothing
  elif isinstance(blob, list):
    for e in blob:
      retstr += stringify(e)
  elif isinstance(blob, dict):
    for k,v in blob.items():
      retstr += stringify(unicode(k))
      #print(type(retstr), type(v), v)
      retstr += stringify(unicode(v))
  elif isinstance(blob, str):
    retstr += unicode(blob)
  elif isinstance(blob, bytes):
    retstr += unicode(blob)
  elif isinstance(blob, unicode):
    retstr += blob
  else:
    raise Exception("unknown type: %s" % str(type(blob)))

  #print(retstr)
  return retstr

コード例 #24

0

ファイルを表示

ファイル: conf.py プロジェクト: rwakulszowa/poradnia

def process_django_model(app, what, name, obj, options, lines):
    # This causes import errors if left outside the function
    from django.db import models

    # Only look at objects that inherit from Django's base model class
    if inspect.isclass(obj) and issubclass(obj, models.Model):
        # Grab the field list from the meta class
        fields = obj._meta.fields

        for field in fields:
            # Decode and strip any html out of the field's help text
            help_text = strip_tags(unicode(field.help_text))

            # Decode and capitalize the verbose name, for use if there isn't
            # any help text
            verbose_name = unicode(field.verbose_name).capitalize()

            if help_text:
                # Add the model field to the end of the docstring as a param
                # using the help text as the description
                lines.append(u':param %s: %s' % (field.attname, help_text))
            else:
                # Add the model field to the end of the docstring as a param
                # using the verbose name as the description
                lines.append(u':param %s: %s' % (field.attname, verbose_name))

            # Add the field's type to the docstring
            if isinstance(field, (models.ForeignKey, models.OneToOneField, models.ManyToManyField)):
                lines.append(u':type %s: %s to :class:`%s.%s`' % (field.attname,
                                                                  type(field).__name__,
                                                                  field.rel.to.__module__,
                                                                  field.rel.to.__name__))
            else:
                lines.append(u':type %s: %s' % (field.attname, type(field).__name__))
    # Return the extended docstring
    return lines

コード例 #25

0

ファイルを表示

    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        text = re.sub("[^ a-z'.,?!\-]", "", text)
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        words = word_tokenize(text)
        tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        #prons = []
        # YJS added
        prons2 = ""
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else:  # predict for oov
                pron = self.predict(word)

            #prons.extend(pron)
            #prons.extend([" "])

            #YJS CHANGED
            new_word = ''
            for i in np.arange(np.size(pron)):
                new_word += pron[i]

            prons2 += new_word
            prons2 += ' '


#        return prons[:-1_old_2]
        return prons2

コード例 #26

0

ファイルを表示

ファイル: g2p.py プロジェクト: PyThaiNLP/g2p-old

def g2p(text):
    '''
    Returns the pronunciation of text.
    :param text: A string. A sequence of words.
    :return: A list of phonemes.
    '''
    # normalization
    text = unicode(text)
    #text = normalize_numbers(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text)
                   if unicodedata.category(char) != 'Mn')  # Strip accents
    text = text.lower()
    #text = re.sub("[^ a-z'.,?!\-]", "", text)
    #text = text.replace("i.e.", "that is")
    #text = text.replace("e.g.", "for example")

    # tokenization
    words = tokenize(text)
    tokens = (words)
    #tokens = pos_tag(words) # tuples of (word, tag)

    # g2p
    oovs, u_loc = [], []
    ret = []
    for token in tokens:
        pron = token2pron(token)  # list of phonemes
        if pron == []:  # oov
            oovs.append(token[0])
            u_loc.append(len(ret))
        ret.extend(pron)
        ret.extend([" "])

    if len(oovs) > 0:
        global g_sess
        if g_sess is not None:  # check global session
            prons = predict(oovs, g_sess)
            for i in range(len(oovs) - 1, -1, -1):
                ret = ret[:u_loc[i]] + prons[i] + ret[u_loc[i]:]
        else:  # If global session is not defined, make new one as local.
            with tf.Session(graph=g, config=config) as sess:
                saver.restore(
                    sess,
                    tf.train.latest_checkpoint(os.path.join(
                        dirname, hp.logdir)))
                prons = predict(oovs, sess)
                for i in range(len(oovs) - 1, -1, -1):
                    ret = ret[:u_loc[i]] + prons[i] + ret[u_loc[i]:]
    return ret[:-1]

コード例 #27

0

ファイルを表示

ファイル: http.py プロジェクト: valluzzi/opensitua_core

def JSONResponse(obj, start_response):
    """
    JSONResponse
    """
    if isstring(obj):
        text = obj
    elif isinstance(obj, (dict, list)):
        text = unicode(json.dumps(obj))
    else:
        text = obj

    response_headers = [('Content-type', 'application/json'),
                        ('Content-Length', str(len(text)))]
    if start_response:
        start_response("200 OK", response_headers)
    return [text.encode('utf-8')]

コード例 #28

0

ファイルを表示

    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        #text = re.sub("[^ a-z'.,?!\-]", "", text)
        text = re.sub("[^ a-z'.,?!\-;:\"]", "", text)  # mdda
        #text = re.sub("([a-z])\-([a-z])", r"\1 - \2", text)   # mdda 'hot-shot' -> 'hot - shot'
        text = re.sub("([a-z])\-([a-z])", r"\1 \2",
                      text)  # mdda    'hot-shot' -> 'hot shot'
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        #words1 = word_tokenize(text)
        #print( words1 )
        words2 = kaldi_tokenize(text)
        #print( words2 )

        tokens = pos_tag(words2)  # tuples of (word, tag)

        # steps
        prons = []
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else:  # predict for oov
                pron = self.predict(word)

            #prons.extend(pron)  #mdda
            #prons.extend([" "]) #mdda
            prons.append((word, pron))  #mdda

        #return prons[:-1]   #mdda
        return prons  #mdda

コード例 #29

0

ファイルを表示

ファイル: settings.py プロジェクト: umeboshi2/screenly-ose

 def _get(self, config, section, field, default):
     try:
         if isinstance(default, bool):
             self[field] = config.getboolean(section, field)
         elif isinstance(default, int):
             self[field] = config.getint(section, field)
         else:
             self[field] = config.get(section, field)
             if field == 'password' and self[field] != '' and len(
                     self[field]) != 64:  # likely not a hashed password.
                 self[field] = hashlib.sha256(
                     self[field]).hexdigest()  # hash the original password.
     except ConfigParser.Error as e:
         logging.debug(
             "Could not parse setting '%s.%s': %s. Using default value: '%s'."
             % (section, field, unicode(e), default))
         self[field] = default
     if field in ['database', 'assetdir']:
         self[field] = str(path.join(self.home, self[field]))

コード例 #30

0

ファイルを表示

 def create_index_page(self):
     '''
     If there is no local html containing links to files, create one.
     '''
     if os.path.isfile(self.index_page):
         print('>>> Reading cached index page')
         index_file = open(self.index_page, 'r')
         index_contents = index_file.read()
         index_file.close()
     else:
         print('>>> Downloading index page')
         fp = urllib.urlopen(self.kgs_url)
         data = unicode(fp.read())
         fp.close()
         index_contents = data
         index_file = open(self.index_page, 'w')
         index_file.write(index_contents)
         index_file.close()
     return index_contents

コード例 #31

0

ファイルを表示

ファイル: G2P.py プロジェクト: PasaOpasen/PersianG2P

    def __call__(self, text, tidy=False, secret=False):

        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        # text = ''.join(char for char in unicodedata.normalize('NFD', text)
        #                if unicodedata.category(char) != 'Mn')  # Strip accents
        # text = re.sub("[^ a-z'.,?!\-]", "", text)

        normalizer = hazm.Normalizer()
        text = normalizer.normalize(text)
        # tokenization
        words = hazm.word_tokenize(text)
        # tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word in words:

            if not any(letter in word for letter in self.graphemes):
                pron = [word]

            # elif word in self.homograph2features:  # Check homograph
            #     pron1, pron2, pos1 = self.homograph2features[word]
            #     if pos.startswith(pos1):
            #         pron = pron1
            #     else:
            #         pron = pron2
            elif word in self.tihu:  # lookup tihu dict
                pron = [self.tihu[word].replace(' ', '')
                        ] if secret else [' ', self.tihu[word], ' ']
            else:  # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])

        result = ''.join(prons[:-1])

        if tidy:
            return Persian_g2p_converter.convert_from_native_to_good(result)

        return result

コード例 #32

0

ファイルを表示

def sanitize(text, kana=True, wildcards=False):
    if kana:
        checker = isJapanese
    else:
        checker = isKanji

    if wildcards:
        text = re.sub(u'[\*＊]', u'%', text)
        text = re.sub(u'[\?？]', u'_', text)
        overrides = [u'%', u'_']
    else:
        overrides = list()

    result = unicode()
    for c in text:
        if checker(c) or c in overrides:
            result += c

    return result

コード例 #33

0

ファイルを表示

ファイル: codewise.py プロジェクト: SegundoBob/leo-editor

    def feed_scintilla(self, apifile_obj):
        """ handle scintilla api files

        Syntax is like:

        qt.QApplication.style?4() -> QStyle
        """
        for l in apifile_obj:
            if not isPython3:
                l = builtins.unicode(l, 'utf8', 'replace')
            parts = l.split('?')
            fullsym = parts[0].rsplit('.', 1)
            klass, func = fullsym
            if len(parts) == 2:
                desc = parts[1]
            else:
                desc = ''
            # now our class is like qt.QApplication. We do the dirty trick and
            # remove all but actual class name
            shortclass = klass.rsplit('.', 1)[-1]
            #print func, klass, desc
            self.feed_function(func.strip(), shortclass.strip(), '', desc.strip())
        self.dbconn.commit()

コード例 #34

0

ファイルを表示

ファイル: codewise.py プロジェクト: tatatasw/leo-editor

    def feed_scintilla(self, apifile_obj):
        """ handle scintilla api files

        Syntax is like:

        qt.QApplication.style?4() -> QStyle
        """
        for l in apifile_obj:
            if not isPython3:
                l = builtins.unicode(l, 'utf8', 'replace')
            parts = l.split('?')
            fullsym = parts[0].rsplit('.', 1)
            klass, func = fullsym
            if len(parts) == 2:
                desc = parts[1]
            else:
                desc = ''
            # now our class is like qt.QApplication. We do the dirty trick and
            # remove all but actual class name
            shortclass = klass.rsplit('.', 1)[-1]
            #print func, klass, desc
            self.feed_function(func.strip(), shortclass.strip(), '', desc.strip())
        self.dbconn.commit()

コード例 #35

0

ファイルを表示

ファイル: g2p.py プロジェクト: sjmignot/poetry-phoneme-gru

    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        text = re.sub("[^ a-z'.,?!\-#~\r\t_\"\']", "", text)
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        words = word_tokenize(text)
        tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else: # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])
            self.word_map["".join(pron)] = word
        return prons[:-1]

コード例 #36

0

ファイルを表示

ファイル: codewise.py プロジェクト: SegundoBob/leo-editor

 def u(s):
     return builtins.unicode(s)

コード例 #37

0

ファイルを表示

 def _str(s, encoding="UTF-8"):
     return unicode(s, encoding=encoding)

コード例 #38

0

ファイルを表示

ファイル: codewise.py プロジェクト: SegundoBob/leo-editor

 def ue(s, encoding):
     return builtins.unicode(s, encoding)

コード例 #39

0

ファイルを表示

ファイル: common_imports.py プロジェクト: ActionsPerMinute/lxml

 def _str(s, encoding="UTF-8"):
     s = unicode(s, encoding=encoding)
     return unichr_escape.sub(lambda x:
                                  x.group(0).decode('unicode-escape'),
                              s)

コード例 #40

0

ファイルを表示

ファイル: common_imports.py プロジェクト: AviorAlong/haiwen-5.1.3

 def _str(s, encoding="UTF-8"):
     return unicode(s, encoding=encoding)

コード例 #41

0

ファイルを表示

 def toUnicode(self, s):
     # pylint: disable=no-member
     return builtins.unicode(s)

コード例 #42

0

ファイルを表示

ファイル: leoGui.py プロジェクト: SegundoBob/leo-editor

 def toUnicode(self, s):
     # pylint: disable=no-member
     if g.isPython3:
         return str(s)
     else:
         return builtins.unicode(s)

コード例 #43

0

ファイルを表示

ファイル: leoGui.py プロジェクト: TheKezzyBoy/leo-editor

 def toUnicode(self, s):
     if g.isPython3:
         return str(s)
     else:
         return builtins.unicode(s)