Ejemplo n.º 1
0
def single_picky(slug='test'):
    try:
        f = open(PICKY_DIR + slug + '.md')
    except IOError:
        abort(404)
    picky = f.read()
    f.close()
    meta_regex = re.compile(
            r"^\s*(?:-|=){3,}\s*\n((?:.|\n)+?)\n\s*(?:-|=){3,}\s*\n*",
            re.MULTILINE
        )
    match = re.match(meta_regex, picky)
    if not match:
        abort(404)
    metas = match.group(1)
    title = None
    date = None
    meta = metas.split("\n")
    try:
        title = meta[0].split("=>")[1]
    except IndexError:
        title = meta[0].split("=>")[0]
    try:
        date = meta[1].split("=>")[1]
    except IndexError:
        date = meta[1].split("=>")[0]
    cont = to_unicode(picky[match.end():])
    content = to_markdown(cont)
    return template('picky.html', content=content, title=to_unicode(title),
                                 date=to_unicode(date), slug=slug)
Ejemplo n.º 2
0
    def __init__(self, input, transposed=True):
        """
        Initialize the matrix reader.

        The `input` refers to a file on local filesystem, which is expected to
        be in the sparse (coordinate) Matrix Market format. Documents are assumed
        to be rows of the matrix (and document features are columns).

        `input` is either a string (file path) or a file-like object that supports
        `seek()` (e.g. gzip.GzipFile, bz2.BZ2File).
        """
        logger.info("initializing corpus reader from %s" % input)
        self.input, self.transposed = input, transposed
        with utils.file_or_filename(self.input) as lines:
            try:
                header = utils.to_unicode(next(lines)).strip()
                if not header.lower().startswith('%%matrixmarket matrix coordinate real general'):
                    raise ValueError("File %s not in Matrix Market format with coordinate real general; instead found: \n%s" %
                                    (self.input, header))
            except StopIteration:
                pass

            self.num_docs = self.num_terms = self.num_nnz = 0
            for lineno, line in enumerate(lines):
                line = utils.to_unicode(line)
                if not line.startswith('%'):
                    self.num_docs, self.num_terms, self.num_nnz = map(int, line.split())
                    if not self.transposed:
                        self.num_docs, self.num_terms = self.num_terms, self.num_docs
                    break

        logger.info("accepted corpus with %i documents, %i features, %i non-zero entries" %
                     (self.num_docs, self.num_terms, self.num_nnz))
Ejemplo n.º 3
0
 def load_cat2vec_format(cls, cat_model=None, sent_model=None, word_model=None):
     """
     Load sentence vectors
     """
     model = Category2Vec(None)
     count = 0
     if cat_model:
         logger.info("loading %s object(cat) from %s" % (cls.__name__, cat_model))
         for line in open(cat_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.cat_len = int(info[0])
                 model.layer1_size = int(info[1])
                 model.sg = int(info[2])
                 model.hs = int(info[3])
                 model.negative = int(info[4])
                 model.cbow_mean = int(info[5])
                 model.cats = empty((model.cat_len, model.layer1_size), dtype=REAL)
                 model.cat_no_hash = {}
                 model.cat_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 cat_id = utils.to_unicode(row[0])
                 model.cat_no_hash[cat_id] = idx
                 model.cat_id_list.append(cat_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.cats[idx][j] = float(vals[j])
             count += 1
     count = 0
     if sent_model:
         logger.info("loading %s object(sentence) from %s" % (cls.__name__, sent_model))
         for line in open(sent_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.sents_len = int(info[0])
                 model.sents = empty((model.sents_len, model.layer1_size), dtype=REAL)
                 model.sent_no_hash = {}
                 model.sent_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 sent_id = utils.to_unicode(row[0])
                 model.sent_no_hash[sent_id] = idx
                 model.sent_id_list.append(sent_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.sents[idx][j] = float(vals[j])
             count += 1
     if word_model:
         logger.info("loading word2vec from %s" % word_model)
         model.w2v = Word2Vec.load(word_model)
         model.vocab = model.w2v.vocab
     return model
Ejemplo n.º 4
0
    def __init__ (self, id, uri, name, type):
        if id is None:
            self.id = DBRepository.id_counter
            DBRepository.id_counter += 1
        else:
            self.id = id

        self.uri = to_unicode (uri)
        self.name = to_unicode (name)
        self.type = to_unicode (type)
Ejemplo n.º 5
0
 def __init__ (self, id, commit):
     if id is None:
         self.id = DBLog.id_counter
         DBLog.id_counter += 1
     else:
         self.id = id
         
     self.rev = to_unicode (commit.revision)
     self.committer = None
     self.author = None
     self.date = commit.date
     self.message = to_unicode (commit.message)
     self.composed_rev = commit.composed_rev
Ejemplo n.º 6
0
 def __iter__(self):
     """Iterate through the lines in the source."""
     try:
         # Assume it is a file-like object and try treating it as such
         # Things that don't have seek will trigger an exception
         self.source.seek(0)
         for line in self.source:
             yield utils.to_unicode(line).split()
     except AttributeError:
         # If it didn't work like a file, use it as a string filename
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 yield utils.to_unicode(line).split()
Ejemplo n.º 7
0
    def response(self,msg,**kwargs):
        ## msg is parsed and your handled data.Actually,it is a dict.
        ## Your could specify a type by assign.ex response(type='music').I list all legal types.
        '''
        ex: response(message,type='yourType')
        optional kwargs:
        type='legal_types',content='yourContent',handler=foo,count=1 
        ps:when type is news,the count kwarg is nessceary
        support types:
        text,image,voice,video,music,news
        '''
        msg['receiver'],msg['sender'] = msg['sender'],msg['receiver']
        legal_types = ['text','music','image','voice','video','news']

        ## get some kwargs ##
        # key word content ---- which force type to textand return a static string
        if kwargs.get('type'):
            type = kwargs.get('type')
        else:type = msg['type']
        if type == 'music':
            if not msg['hq_musurl']:
                msg['hq_musurl'] = msg['musurl']
        # charge receiver and sender
        if kwargs.get('content'):
            msg['type'] = type = 'text'
            msg['content'] = to_unicode(kwargs.get('content'))
        if not type in legal_types:
            raise Exception("Illgal type!You could only choose one type from legal_types!") 
        # key word handler ---- which is a function object,accept a dict and return a modified dict
        else:
            msg['type'] = type
        if kwargs.get('handler'):
            msg = kwargs.get('handler')(msg)
        ## more kwargs ##

        if not type == 'news':
            template = to_unicode(getattr(Template(),type))
        else:
            count = kwargs.get('count')
            if count:
                temp = Template() 
                template = to_unicode(temp.news(count))
            else:
                raise Exception('When type is set to news,the count kwarg is necessary!')

        logging.info(template.format(**msg))
        try:
            retdata = template.format(**msg)
        except:
            raise Exception("You did't pass enough args or pass wrong args,please check args which template needed.Read template.py maybe inspire your mind")
        return retdata
Ejemplo n.º 8
0
    def __init__(self, unique_name, base_filepath, parameters):
        """
        Arguments
        ---------
            keyword_name : feature unique name
            base_filepath : filepath of feature config
            parameters : lexicon parameters, presented by dictionary
        """
        self.unique_name = unique_name
        self.parameters = parameters

        filepath = os.path.join(
            base_filepath,
            parameters[BagOfClustersFeature.PARAM_CLUSTERED_WORDS_FILEPATH])

        if parameters[BagOfClustersFeature.PARAM_ENABLED] == 'false':
            return

        print "Loading file with clusters of words: {}".format(filepath)
        with io.open(filepath, 'r', encoding='utf-8') as f:
            self.clustered_words = json.load(f, encoding='utf-8')

        print "Create dictionary with all clusters, accessed by cluster_id ..."
        self.clusters = {}
        for word in self.clustered_words.iterkeys():
            cluster_id = self.clustered_words[word]
            if cluster_id not in self.clusters:
                self.clusters[cluster_id] = []
            self.clusters[cluster_id].append(utils.to_unicode(word))
Ejemplo n.º 9
0
 def add_header(self):
     if self.file is not None :
         dis = ""
         dis += "Script file    : %s\n" % sys.argv[0]
         dis += "Date           : %s\n" % time.strftime("%d/%m/%Y %H:%M:%S", self.gtime.start_date)
         dis += "\n%s\n" % self.format("Time(s)", "Scope", "Info")
         self.file.write(utils.to_unicode(dis))
Ejemplo n.º 10
0
def get_search_string(search_string=None):
    """Ask the user for a search string"""
    keyboard = xbmc.Keyboard(search_string, localize(30134))
    keyboard.doModal()
    if keyboard.isConfirmed():
        search_string = to_unicode(keyboard.getText())
    return search_string
Ejemplo n.º 11
0
def get_property(key, default=None, window_id=10000):
    """Get a Window property"""
    from xbmcgui import Window
    value = to_unicode(Window(window_id).getProperty(key))
    if value == '' and default is not None:
        return default
    return value
Ejemplo n.º 12
0
 def get_project(self):
     """ get the project """
     ret = ""
     mid = self.fields['project']
     if mid is not None:
         ret = mid['key']
     return to_unicode(ret)
Ejemplo n.º 13
0
 def get_reporter(self):
     """ Get the issue reporter """
     ret = ""
     mid = self.fields['reporter']
     if mid is not None:
         ret = mid['displayName']
     return to_unicode(ret)
Ejemplo n.º 14
0
 def get_type(self):
     """ Get the Issue type """
     ret = ""
     mid = self.fields['issuetype']
     if mid is not None:
         ret = mid['name']
     return to_unicode(ret)
Ejemplo n.º 15
0
 def get_assignee(self):
     """ Get the assignee """
     ret = ""
     mid = self.fields['assignee']
     if mid is not None:
         ret = mid['displayName']
     return to_unicode(ret)
Ejemplo n.º 16
0
 def get_cluster_id(self, word):
     """
     Returns
     -------
        Returns id of cluster, which is contain the 'word'
     """
     return self.clustered_words[utils.to_unicode(word)]
Ejemplo n.º 17
0
 def __iter__(self):
     # the entire corpus is one gigantic line -- there are no sentence marks at all
     # so just split the sequence of tokens arbitrarily: 1 sentence = 1000 tokens
     sentence, rest, max_sentence_length = [], b'', 1000
     with utils.smart_open(self.fname) as fin:
         while True:
             text = rest + fin.read(
                 8192)  # avoid loading the entire file (=1 line) into RAM
             if text == rest:  # EOF
                 sentence.extend(
                     rest.split()
                 )  # return the last chunk of words, too (may be shorter/longer)
                 if sentence:
                     yield sentence
                 break
             last_token = text.rfind(
                 b' '
             )  # the last token may have been split in two... keep it for the next iteration
             words, rest = (
                 utils.to_unicode(text[:last_token]).split(),
                 text[last_token:].strip()) if last_token >= 0 else ([],
                                                                     text)
             sentence.extend(words)
             while len(sentence) >= max_sentence_length:
                 yield sentence[:max_sentence_length]
                 sentence = sentence[max_sentence_length:]
Ejemplo n.º 18
0
def _create_des_(msg):
    if msg is None :
        return {}
    elif not(isinstance(msg, types.StringTypes)):
        raise pexception.PytestembError("Msg must be a string")
    else:
        return dict({"msg":"%s" % utils.to_unicode(msg)})
Ejemplo n.º 19
0
 def get_priority(self):
     """ Get the priority """
     ret = ""
     pri = self.fields['priority']
     if pri is not None:
         ret = pri['name']
     return to_unicode(ret)
Ejemplo n.º 20
0
def get_local_features(token, word_freq=None):

    assert len(token) >= 1

    features = []
    
    ntoken = normalize(token, lowercase=False)

    if token.isalpha():

        if 'UpperCase' in features_on:
            if first_upper_case(ntoken):
                features += ['IsUpperCase']

        if 'AllUpperCase' in features_on:
            if all_upper_case(ntoken):
                features += ['IsAllUpperCase']

        if 'AllLowerCase' in features_on:
            if all_lower_case(ntoken):
                features += ['IsAllLowerCase']

        if 'Freq' in features_on:
            features += ['Freq:%s' % str(word_freq[ntoken])]
        
        if 'Rare' in features_on:
            if word_freq[ntoken] <= rare_thr:
                features += ['IsRare']

        if 'IsWord' in features_on:
            features += ['IsWord']

    elif token.isdigit():

        if 'Number' in features_on:
            features += ['IsNumber']

    elif token.isalnum():

        if 'AlphaNum' in features_on:
            features += ['IsAlphaNum']

    elif len(to_unicode(token)) == 1:

        if is_punct(token):
            if 'Separator' in features_on:
                features += ['IsSeparator']
        else:
            if 'NonAlphanum' in features_on:
                features += ['IsNonAlphanum']
    
    if 'Word' in features_on:
        if not any(x in features for x in ['IsNumber', 'IsAlphaNum']):
            features += ['W=%s' % ntoken]

    if 'Length' in features_on:
        features += ['Length:%s' % str(len(ntoken))]

    return features
Ejemplo n.º 21
0
def unfollow(program_name, title, program_id=None):
    """The API interface to unfollow a program used by the context menu"""
    move_down = bool(plugin.args.get('move_down'))
    from favorites import Favorites
    Favorites().unfollow(program_name=program_name,
                         title=to_unicode(unquote_plus(from_unicode(title))),
                         program_id=program_id,
                         move_down=move_down)
Ejemplo n.º 22
0
 def __iter__(self):
     try:
         self.source.seek(0)
         for line in self.source:
             k = utils.to_unicode(line.rstrip()).split("\t")
             categories = k[3].split(" ")
             for cat in categories:
                 if "/" in cat: continue
                 yield k[4:], k[1], cat
     except AttributeError:
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 k = utils.to_unicode(line.rstrip()).split("\t")
                 categories = k[3].split(" ")
                 for cat in categories:
                     if "/" in cat: continue
                     yield k[4:], k[1], cat
Ejemplo n.º 23
0
    def __init__(self, id, name):
        if id is None:
            self.id = DBTag.id_counter
            DBTag.id_counter += 1
        else:
            self.id = id

        self.name = to_unicode(name)
Ejemplo n.º 24
0
    def __init__ (self, id, name):
        if id is None:
            self.id = DBTag.id_counter
            DBTag.id_counter += 1
        else:
            self.id = id

        self.name = to_unicode (name)
Ejemplo n.º 25
0
 def _check_same_origin(self, current_url):
     """
     检查两个URL是否同源
     """
     current_url = to_unicode(current_url)
     url_part = urlparse.urlparse(current_url)
     url_origin = (url_part.scheme, url_part.netloc)
     return url_origin == self.origin
Ejemplo n.º 26
0
def replace(infile,
            outfile,
            find_what,
            replace_with,
            match_case=False,
            output_paragraphs=False):
    u"""Replace find_what with replace_with in docx or docm.
    :param infile: file in which replacement will be performed
    :type infile: str | unicode
    :param outfile: file in which the new content will be saved
    :type outfile: str | unicode
    :param find_what: text to search for
    :type find_what: str | unicode
    :param replace_with: text to replace find_what with
    :type replace_with: str | unicode
    :param match_case: True to make search case-sensitive
    :type match_case: bool
    :param output_paragraphs: True to output the paragraphs replaced
    :type output_paragraphs: bool
    """
    if not os.path.isfile(infile):
        raise ValueError('infile not found.')
    if not outfile:
        raise ValueError('outfile must be specified.')
    if not find_what:
        raise ValueError('find_what must be specified')

    if replace_with is None:
        replace_with = u''

    infile = to_unicode(infile)
    outfile = to_unicode(outfile)
    find_what = to_unicode(find_what)
    replace_with = to_unicode(replace_with)

    global count
    count = 0

    parts = extract_parts(infile)
    for part in parts:
        part['content'] = __replace_part(etree.fromstring(part['content']),
                                         find_what, replace_with, match_case,
                                         output_paragraphs)

    save_parts(parts, infile, outfile)
    print('Paragraphs replaced: {0}'.format(count))
Ejemplo n.º 27
0
 def get_terms_info(self, term):
     """
     returns: dict
         amount of documents which includes 'term' for different sentiment
         classes and at all (DocVocabulary.ALL)
     """
     uterm = to_unicode(term)
     return self.terms_info[uterm]
Ejemplo n.º 28
0
def createI18NFile(data):
    columns = json.loads(data.get("columns"))
    strprefix = data.get("strprefix")
    cns, ens = [], []
    for column in columns:
        if column.get("creat_i18n") == 'on':
            i18nStr = strprefix + column.get("column_name")
            if column.get("data_type") in ("date", "datetime"):
                cns.append({
                    "key":
                    i18nStr + "_search",
                    "value":
                    utils.to_unicode("查询" + column.get("cn_name"))
                })
                ens.append({
                    "key": i18nStr + "_search",
                    "value": "Search" + column.get("en_name")
                })
            cns.append({
                "key": i18nStr,
                "value": utils.to_unicode(column.get("cn_name"))
            })
            ens.append({"key": i18nStr, "value": column.get("en_name")})
    cns.append({
        "key": "com.zhiyin.mes.app.to",
        "value": utils.to_unicode("到")
    })
    ens.append({"key": "com.zhiyin.mes.app.to", "value": "To"})
    if data.get("checkFactory") == 'on':
        cns.append({
            "key": "com.zhiyin.mes.app.factory_name",
            "value": utils.to_unicode("所属工厂")
        })
        ens.append({
            "key": "com.zhiyin.mes.app.factory_name",
            "value": "FactoryName"
        })
    utils.create_file(
        data.get("packageName") + "/i18n",
        render_template('/generate/i18n.txt', data=cns),
        data.get("packageName") + ".datagrid_zh_CN.properties")
    utils.create_file(
        data.get("packageName") + "/i18n",
        render_template('/generate/i18n.txt', data=ens),
        data.get("packageName") + ".datagrid_en_US.properties")
Ejemplo n.º 29
0
 def __init__ (self, id, person):
     if id is None:
         self.id = DBPerson.id_counter
         DBPerson.id_counter += 1
     else:
         self.id = id
         
     self.name = to_unicode (person.name)
     self.email = person.email or None
Ejemplo n.º 30
0
def ReplyText(ToUserName,FromUserName,Content):      
    Temp="""<xml>
 <ToUserName><![CDATA[%s]]></ToUserName>
 <FromUserName><![CDATA[%s]]></FromUserName>
 <CreateTime>%s</CreateTime>
 <MsgType><![CDATA[text]]></MsgType>
 <Content><![CDATA[%s]]></Content>
 </xml>"""
    return Temp % (ToUserName,FromUserName,str(int(time.time())),to_unicode(Content))
Ejemplo n.º 31
0
    def __getitem__(self, name):
        '''Get a header value, from the message, decoded and as a
        unicode string.

        If the header does not exist, None is returned'''
        value = self._msg[name]
        if value is None:
            return None
        return u''.join(to_unicode(*tupl) for tupl in decode_header(value))
Ejemplo n.º 32
0
    def __getitem__(self, name):
        '''Get a header value, from the message, decoded and as a
        unicode string.

        If the header does not exist, None is returned'''
        value = self._msg[name]
        if value is None:
            return None
        return u''.join(to_unicode(*tupl) for tupl in decode_header(value))
Ejemplo n.º 33
0
 def __init__ (self, id, file_name):
     if id is None:
         self.id = DBFile.id_counter
         DBFile.id_counter += 1
     else:
         self.id = id
         
     self.file_name = to_unicode (file_name)
     self.repository_id = None
Ejemplo n.º 34
0
def get_setting(key, default=None):
    """Get an add-on setting as string"""
    try:
        value = to_unicode(ADDON.getSetting(key))
    except RuntimeError:  # Occurs when the add-on is disabled
        return default
    if value == '' and default is not None:
        return default
    return value
Ejemplo n.º 35
0
    def __init__(self, id, person):
        if id is None:
            self.id = DBPerson.id_counter
            DBPerson.id_counter += 1
        else:
            self.id = id

        self.name = to_unicode(person.name)
        self.email = person.email or None
Ejemplo n.º 36
0
    def __init__(self, id, file_name):
        if id is None:
            self.id = DBFile.id_counter
            DBFile.id_counter += 1
        else:
            self.id = id

        self.file_name = to_unicode(file_name)
        self.repository_id = None
Ejemplo n.º 37
0
 def push_upnext(self):
     """Push episode info to Up Next service add-on"""
     if has_addon('service.upnext') and get_setting_bool(
             'useupnext', default=True) and self.isPlaying():
         info_tag = self.getVideoInfoTag()
         next_info = self.apihelper.get_upnext(
             dict(
                 program=to_unicode(info_tag.getTVShowTitle()),
                 playcount=info_tag.getPlayCount(),
                 rating=info_tag.getRating(),
                 path=self.path,
                 runtime=self.total,
             ))
         if next_info:
             from base64 import b64encode
             from json import dumps
             data = [to_unicode(b64encode(dumps(next_info).encode()))]
             sender = '{addon_id}.SIGNAL'.format(addon_id=addon_id())
             notify(sender=sender, message='upnext_data', data=data)
Ejemplo n.º 38
0
 def get_term_in_voc_count(self, term, sentiment=None):
     """
     term : string
     sentiment: '-1', '0', '1'
         Describes the sentiment class
     """
     uterm = to_unicode(term)
     term_counters = self.term_in_voc_count[self.get_term_index(uterm)]
     return term_counters[sentiment] if sentiment is not None \
         else term_counters[self.ALL]
Ejemplo n.º 39
0
 def __iter__(self):
     try:
         self.source.seek(0)
         for line in self.source:
             k = utils.to_unicode(line.rstrip()).split("\t")
             categories = k[3].split(" ")
             for cat in categories:
                 if "/" in cat:
                     continue
                 yield k[4:], k[1], cat
     except AttributeError:
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 k = utils.to_unicode(line.rstrip()).split("\t")
                 categories = k[3].split(" ")
                 for cat in categories:
                     if "/" in cat:
                         continue
                     yield k[4:], k[1], cat
Ejemplo n.º 40
0
 def get_release_note(self):
     if self.notes is None:
         field = self.parent.field_id_map['Release Note']
         if field in self.fields:
             self.notes = to_unicode(self.fields[field])
         elif self.get_incompatible_change() or self.get_important():
             self.notes = self.get_description()
         else:
             self.notes = ""
     return self.notes
Ejemplo n.º 41
0
def filter_wiki(raw):
    """
    Filter out wiki mark-up from `raw`, leaving only text. `raw` is either unicode
    or utf-8 encoded string.
    """
    # parsing of the wiki markup is not perfect, but sufficient for our purposes
    # contributions to improving this code are welcome :)
    text = utils.to_unicode(raw, 'utf8', errors='ignore')
    text = utils.decode_htmlentities(text) # '&amp;nbsp;' --> '\xa0'
    return remove_markup(text)
Ejemplo n.º 42
0
 def get_release_note(self):
     if self.notes is None:
         field = self.parent.field_id_map['Release Note']
         if field in self.fields:
             self.notes = to_unicode(self.fields[field])
         elif self.get_incompatible_change() or self.get_important():
             self.notes = self.get_description()
         else:
             self.notes = ""
     return self.notes
Ejemplo n.º 43
0
    def get_all_tones_from_table(self):
        logging.info("Loading lexicon '%s': %s ..." % (self.unique_name,
                                                       self.table_filepath))

        df = pd.read_csv(self.table_filepath, sep=',')

        for row in df.index:
            name = df[self.term_column_name][row]
            value = df[self.value_column_name][row]
            self.cache[utils.to_unicode(name)] = float(value)
Ejemplo n.º 44
0
 def get_term_in_voc_count(self, term, sentiment=None):
     """
     term : string
     sentiment: '-1', '0', '1'
         Describes the sentiment class
     """
     uterm = to_unicode(term)
     term_counters = self.term_in_voc_count[self.get_term_index(uterm)]
     return term_counters[sentiment] if sentiment is not None \
         else term_counters[self.ALL]
Ejemplo n.º 45
0
    def load_model(self, model: str, model_path: str):

        try:
            encoding = 'utf-8'
            unicode_errors = 'strict'

            model_file = [
                f for f in os.listdir(model_path)
                if os.path.isfile(os.path.join(model_path, f))
            ]
            f = open(os.path.join(model_path, model_file[0]), 'rb')

            header = to_unicode(f.readline(), encoding=encoding)
            vocab_size, vector_size = (int(x) for x in header.split()
                                       )  # throws for invalid file format

            binary_len = dtype(real).itemsize * vector_size
            for _ in tqdm(range(vocab_size)):
                word = []
                while True:
                    ch = f.read(1)
                    if ch == b' ':
                        break
                    if ch == b'':
                        raise EOFError(
                            "unexpected end of input; is count incorrect or file otherwise damaged?"
                        )
                    if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                        word.append(ch)
                word = to_unicode(b''.join(word),
                                  encoding=encoding,
                                  errors=unicode_errors)

                weights = fromstring(f.read(binary_len),
                                     dtype=real).astype(real)

                self.word_vectors[word] = weights
            self.model = model
            print("Model loaded Successfully !")
            return self
        except Exception as e:
            print('Error loading Model, ', str(e))
Ejemplo n.º 46
0
 def get_terms_info_safe(self, term):
     """
     returns : dict or 0
         amount of documents which includes 'term' for different sentiment
         classes and at all (DocVocabulary.ALL)
     """
     uterm = to_unicode(term)
     if (uterm in self.terms_info):
         return self.get_terms_info(uterm)
     else:
         return None
Ejemplo n.º 47
0
 def _download_url(self, url):
     """
     Return a tuple (status, content), with content being the
     content of the url as a string, and status a boolean marking
     whether the download was succesful or not."""
     try:
         web = urllib2.urlopen(url)
     except Exception as e:
         result = (False, to_unicode(str(e)))
     else:
         result = (True, web.read())
     return result
Ejemplo n.º 48
0
def main():
    args = parse_args()
    cfg = read_cfg(args.config)
    if args.reloads is not None:
        if 'all' in args.reloads:
            tags = cfg.keys()
        else:
            tags = args.reloads
        tags = tuple(
            (to_unicode(tag) for tag in tags if to_unicode(tag) in cfg))
        reload_cfg(cfg, tags)
    elif args.failovers is not None:
        if 'all' in args.failovers:
            tags = cfg.keys()
        else:
            tags = args.failovers
        tags = tuple(
            (to_unicode(tag) for tag in tags if to_unicode(tag) in cfg))
        do_failover(cfg, tags)
    else:
        start_scheduler(cfg, args.seconds)
def replace(infile, outfile, find_what, replace_with, match_case=False, output_paragraphs=False):
    u"""Replace find_what with replace_with in pptx or pptm.
    :param infile: file in which replacement will be performed
    :type infile: str | unicode
    :param outfile: file in which the new content will be saved
    :type outfile: str | unicode
    :param find_what: text to search for
    :type find_what: str | unicode
    :param replace_with: text to replace find_what with
    :type replace_with: str | unicode
    :param match_case: True to make search case-sensitive
    :type match_case: bool
    :param output_paragraphs: True to output the paragraphs replaced
    :type match_case: bool
    """
    if not os.path.isfile(infile):
        raise ValueError('infile not found.')
    if not outfile:
        raise ValueError('outfile must be specified.')
    if not find_what:
        raise ValueError('find_what must be specified')

    if replace_with is None:
        replace_with = u''

    infile = to_unicode(infile)
    outfile = to_unicode(outfile)
    find_what = to_unicode(find_what)
    replace_with = to_unicode(replace_with)

    global count
    count = 0

    parts = extract_parts(infile)
    for part in parts:
        part['content'] = __replace_part(etree.fromstring(part['content']), find_what, replace_with, match_case,
                                         output_paragraphs)

    save_parts(parts, infile, outfile)
    print('Paragraphs replaced: {0}'.format(count))
Ejemplo n.º 50
0
def parse_user_msg(signature, xml):
    """
    Parse xml from wechat server and return an Message
    :param xml: raw xml from wechat server.
    :return: an Message object
    """
    if not xml:
        return

    _msg = dict((child.tag, to_unicode(child.text))
                for child in ElementTree.fromstring(xml))

    return pkgRequest(signature, **_msg)
Ejemplo n.º 51
0
    def onNotification(self, sender, method, data):  # pylint: disable=invalid-name
        """Handler for notifications"""
        # log(2, '[Notification] sender={sender}, method={method}, data={data}', sender=sender, method=method, data=to_unicode(data))

        # Handle play_action events from upnextprovider
        if sender.startswith('upnextprovider') and method.endswith(
                'plugin.video.vrt.nu_play_action'):
            from json import loads
            hexdata = loads(data)

            if not hexdata:
                return

            # NOTE: With Python 3.5 and older json.loads() does not support bytes or bytearray, so we convert to unicode
            from base64 import b64decode
            data = loads(to_unicode(b64decode(hexdata[0])))
            log(2,
                '[Up Next notification] sender={sender}, method={method}, data={data}',
                sender=sender,
                method=method,
                data=to_unicode(data))
            self._playerinfo.add_upnext(data.get('video_id'))
Ejemplo n.º 52
0
    def feed_url(self, url):
        """
        设置初始爬取URL
        """
        if isinstance(url, basestring):
            url = to_unicode(url)
            url = UrlData(url)

        if self.same_origin:
            url_part = urlparse.urlparse(unicode(url))
            self.origin = (url_part.scheme, url_part.netloc)

        self.fetcher_queue.put(url, block=True)
Ejemplo n.º 53
0
    def feed_url(self, url):
        '''
        设置初始爬取URL
        '''
        if isinstance(url, basestring):
            url = to_unicode(url)
            url = UrlData(url)

        if self.same_origin:
            url_part = urlparse.urlparse(unicode(url))
            self.origin = (url_part.scheme, url_part.netloc)

        self.fetcher_queue.put(url, block=True)
Ejemplo n.º 54
0
 def get_term_in_docs_count(self, term, sentiment=None):
     """
     term : str
     sentiment : None or '-1', '0', '1'
         sentiment class name
     returns : int
         amount of 'term' appeared in all documents or in a documents of
         the certain 'sentiment' class
     """
     uterm = to_unicode(term)
     term_info = self.terms_info[uterm]
     return term_info[self.ALL] if sentiment is None \
         else self.__get_term_in_sentiment_docs_count(term_info, sentiment)
Ejemplo n.º 55
0
def get_json_data(response, fail=None):
    """Return json object from HTTP response"""
    from json import load, loads
    try:
        if (3, 0, 0) <= version_info < (
                3, 6, 0):  # the JSON object must be str, not 'bytes'
            return loads(to_unicode(response.read()))
        return load(response)
    except TypeError as exc:  # 'NoneType' object is not callable
        log_error('JSON TypeError: {exc}', exc=exc)
        return fail
    except ValueError as exc:  # No JSON object could be decoded
        log_error('JSON ValueError: {exc}', exc=exc)
        return fail