Example #1
0
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                            (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                      (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(statement(
                "SELECT id from people where name = ?", self.db.place_holder),
                (to_utf8(name).decode("utf-8"),))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(statement(DBPerson.__insert__,
                                self.db.place_holder),
                                (p.id, to_utf8(p.name).decode("utf-8"),
                                 email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                           (person.name, self.repo_id), True)

            return person_id
Example #2
0
    def save_word2vec_format(self, fname, fvocab=None, binary=False):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.

        """
        if fvocab is not None:
            logger.info("Storing vocabulary in %s" % (fvocab))
            with utils.smart_open(fvocab, 'wb') as vout:
                for word, vocab in sorted(iteritems(self.vocab),
                                          key=lambda item: -item[1].count):
                    vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
        logger.info("storing %sx%s projection weights into %s" %
                    (len(self.vocab), self.layer1_size, fname))
        assert (len(self.vocab), self.layer1_size) == self.syn0.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("%s %s\n" % self.syn0.shape))
            # store in sorted order: most frequent words at the top
            for word, vocab in sorted(iteritems(self.vocab),
                                      key=lambda item: -item[1].count):
                row = self.syn0[vocab.index]
                if binary:
                    fout.write(utils.to_utf8(word) + b" " + row.tostring())
                else:
                    fout.write(
                        utils.to_utf8("%s %s\n" %
                                      (word, ' '.join("%f" % val
                                                      for val in row))))
Example #3
0
    def login_bind(self, account, platform_id, remark='0'):
        '''send player account to zqb account center'''
        def on_response(user_data, res):
            succeed = False
            if not res.error:
                succeed = True
                ret = json.loads(res.body)
                if str(ret['status']) == '200':
                    self.log_info("bind account success:(%s)(%s)" %
                                  (user_data, ret))
                elif str(ret['status']) == '501':
                    self.log_warning("repeat bind:(%s)(%s)" % (user_data, ret))
                else:
                    succeed = False
            if not succeed:
                self.log_error("bind account error:(%s), %s, %s" %
                               (user_data, res.error, res.body))

        account = utils.to_utf8(account)
        remark = utils.to_utf8(remark)
        url = GameCenterMixin.get_bind_url(account, platform_id, remark)
        user_data = {
            'account': account,
            'platform_id': platform_id,
            'remark': remark
        }
        self.request_get(url, {}, on_response, user_data)
Example #4
0
 def parse(cls, selector):
     with contextlib.closing(StringIO()) as result:
         if type(selector) == dict:
             for k, v in selector.items():
                 result.write('%s:(%s)' % (to_utf8(k), cls.parse(v)))
         elif type(selector) in (list, tuple):
             result.write(','.join(map(cls.parse, selector)))
         else:
             result.write(to_utf8(selector))
         return result.getvalue()
Example #5
0
    def __init__(self, id, uri, name, type):
        if id is None:
            self.id = DBRepository.id_counter
            DBRepository.id_counter += 1
        else:
            self.id = id

        self.uri = to_utf8(uri)
        self.name = to_utf8(name)
        self.type = to_utf8(type)
Example #6
0
    def __init__(self, id, uri, name, type):
        if id is None:
            self.id = DBRepository.id_counter
            DBRepository.id_counter += 1
        else:
            self.id = id

        self.uri = to_utf8(uri)
        self.name = to_utf8(name)
        self.type = to_utf8(type)
Example #7
0
    def __init__(self, id, commit):
        if id is None:
            self.id = DBLog.id_counter
            DBLog.id_counter += 1
        else:
            self.id = id

        self.rev = to_utf8(commit.revision)
        self.committer = None
        self.author = None
        self.date = commit.date
        self.message = to_utf8(commit.message)
        self.composed_rev = commit.composed_rev
Example #8
0
 def get_verify_code_url(account, platformid, phone, code, remark='0'):
     account = utils.to_utf8(account)
     thirdcode = "zqb" + str(platformid)
     sysparam = GameCenterMixin.get_sys_param()
     params = [
         account, thirdcode, phone, code, GameCenterMixin.APPID,
         urllib.quote(utils.to_utf8(remark)), sysparam
     ]
     uri = '/'.join(params)
     sign_str = ''.join(params)
     sign = hashlib.md5(sign_str +
                        GameCenterMixin.SECRET_KEY).hexdigest().lower()
     return GameCenterMixin.VERIFY_CODE_URL + uri + '/' + sign
Example #9
0
    def save_cat2vec_format(self, fname):
        """
        Store cat vectors

        """
        logger.info("storing %sx%s projection weights into %s" % (self.cat_len, self.layer1_size, fname))
        assert (self.cat_len, self.layer1_size) == self.cats.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("#cats_len: %d\n#size:%d\n" % self.cats.shape))
            fout.write(utils.to_utf8("#sg:%d\n#hs:%d\n#negative:%d\n#cbow_mean:%d\n" % (self.sg,self.hs,self.negative,self.cbow_mean)))
            for cat_id in self.cat_no_hash.keys():
                row = self.cats[self.cat_no_hash[cat_id]]
                fout.write(utils.to_utf8("%s\t%s\n" % (cat_id, ' '.join("%f" % val for val in row))))
Example #10
0
    def write_headers(self, num_docs, num_terms, num_nnz):
        self.fout.write(MmWriter.HEADER_LINE)

        if num_nnz < 0:
            # we don't know the matrix shape/density yet, so only log a general line
            logger.info("saving sparse matrix to %s" % self.fname)
            self.fout.write(utils.to_utf8(' ' * 50 + '\n')) # 48 digits must be enough for everybody
        else:
            logger.info("saving sparse %sx%s matrix with %i non-zero entries to %s" %
                         (num_docs, num_terms, num_nnz, self.fname))
            self.fout.write(utils.to_utf8('%s %s %s\n' % (num_docs, num_terms, num_nnz)))
        self.last_docno = -1
        self.headers_written = True
Example #11
0
 def __init__(self, id, commit):
     if id is None:
         self.id = DBLog.id_counter
         DBLog.id_counter += 1
     else:
         self.id = id
         
     self.rev = to_utf8(commit.revision)
     self.committer = None
     self.author = None
     self.date = commit.date
     self.message = to_utf8(commit.message)
     self.composed_rev = commit.composed_rev
Example #12
0
    def __iter__(self):
        try:
            if self.cache == 0 and self._save:
                for sts in self.cache_list:
                    yield sts
            elif self.cache == 1 and self._save:
                self.strf.seek(0, 0)
                greader = gzip.GzipFile(filename='test', mode='rb', fileobj=self.strf)
                with closing(greader):
                    for line in greader:
                        yield line.split()
            elif self.cache == 2 and self._save:
                greader = gen_open(self.path, mode='rb')
                with closing(greader):
                    for line in greader:
                        yield line.split()
            else:
                self._save = True
                for doc in self.corpus:
                    for pos, sentence in enumerate(doc.to_sentences()):
                        if True:
                            ts = analyzer.tokenStream("dummy", StringReader(str(sentence)))
                            #offsetAtt = ts.addAttribute(OffsetAttribute.class_)
                            termAtt = ts.addAttribute(CharTermAttribute.class_)
                            ts.reset() ##Resets this stream to the beginning. (Required
                            buf = []
                            while ts.incrementToken():
                                buf.append(to_utf8(termAtt.toString()))
                        else:
                            buf = [to_utf8(word.lower().strip()) for word in sentence.split() if word.isalpha()]
                        if self.cache == 0:
                            self.cache_list.append(buf)
                        elif self.cache == 1 or self.cache == 2:
                            self.file_writer.write(' '.join(buf) + '\n')
                        yield buf
                self.file_writer.close()
        except Exception as inst:
            print 'error in Disk125', type(inst)

            print inst.args
            print self.fname
            print inst
            #print sentence
            #print [to_utf8(word.lower().strip()) for word in sentence.split() if word.isalpha()]
            #print doc
        finally:
            if self._save:
                pass
Example #13
0
    def modify(self, dn, mod_type=None, attrs=None):
        """ Modify a record """
        if self.read_only:
            msg = 'Running in read-only mode, modification is disabled'
            logger.info(msg)
            return msg

        utf8_dn = to_utf8(dn)
        res = self.search(base=utf8_dn, scope=self.BASE)
        attrs = attrs and attrs or {}

        if res['exception']:
            return res['exception']

        if res['size'] == 0:
            return 'LDAPDelegate.modify: Cannot find dn "%s"' % dn

        cur_rec = res['results'][0]
        mod_list = []
        msg = ''

        for key, values in attrs.items():
            values = map(to_utf8, values)

            if mod_type is None:
                if cur_rec.get(key, ['']) != values and values != ['']:
                    mod_list.append((self.REPLACE, key, values))
                elif cur_rec.has_key(key) and values == ['']:
                    mod_list.append((self.DELETE, key, None))
            else:
                mod_list.append((mod_type, key, values))

        try:
            connection = self.connect()

            new_rdn = attrs.get(self.rdn_attr, [''])[0]
            if new_rdn and new_rdn != cur_rec.get(self.rdn_attr)[0]:
                new_utf8_rdn = to_utf8('%s=%s' % (self.rdn_attr, new_rdn))
                connection.modrdn_s(utf8_dn, new_utf8_rdn)
                old_dn_exploded = self.explode_dn(utf8_dn)
                old_dn_exploded[0] = new_utf8_rdn
                utf8_dn = ','.join(old_dn_exploded)

            connection.modify_s(utf8_dn, mod_list)

        except ldap.INVALID_CREDENTIALS, e:
            e_name = e.__class__.__name__
            msg = '%s No permission to modify "%s"' % (e_name, dn)
Example #14
0
    def insert(self, base, rdn, attrs=None):
        """ Insert a new record """
        if self.read_only:
            msg = 'Running in read-only mode, insertion is disabled'
            logger.info(msg)
            return msg

        msg = ''
        dn = to_utf8('%s,%s' % (rdn, base))
        attribute_list = []
        attrs = attrs and attrs or {}

        for attr_key, attr_val in attrs.items():
            if isinstance(attr_val, str) or isinstance(attr_val, unicode):
                attr_val = [x.strip() for x in attr_val.split(';')]

            if attr_val != ['']:
                attr_val = map(to_utf8, attr_val)
                attribute_list.append((attr_key, attr_val))

        try:
            connection = self.connect()
            connection.add_s(dn, attribute_list)
        except ldap.INVALID_CREDENTIALS, e:
            e_name = e.__class__.__name__
            msg = '%s No permission to insert "%s"' % (e_name, dn)
Example #15
0
def create_dealer_index_xychart(title,labels,score,mark_value=None,format='{value|1}',fontAngle=0,Scale=100):
    new_labels = [truncate_hanzi(label,25) for label in labels]
    colors = BASE_COLOR
    chart_height = 60+20*len(new_labels)
    c = XYChart(400, chart_height)
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 10, 30)
    c.setBackground(c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),'0X666666')
    title_height = 0
    c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff')
    plot_height = chart_height-30
    c.setPlotArea(70, 50, 270, plot_height,  -1, -1, Transparent, '0xffffff')
    layer = c.addBarLayer3(score, colors)
#    layer.setBorderColor(Transparent, softLighting(Right))
    layer.setAggregateLabelFormat(format)
    layer.setAggregateLabelStyle("simsun.ttc", 8)
    
    xAxis = c.xAxis()
    xAxis.setLabels(new_labels)
    c.yAxis().setColors(Transparent)
    c.yAxis2().setColors(Transparent)
    c.xAxis().setTickColor(Transparent)
    c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis2().setLabelStyle("simsun.ttc", 9)
    c.yAxis().setLinearScale(0,Scale)

    c.packPlotArea(20, title_height + 15, c.getWidth() - 30, c.getHeight() - 15)

    return c.makeChart2(PNG)
Example #16
0
    def __insert_many(self):
        if not self.actions and not self.commits:
            return

        cursor = self.cursor

        if self.actions:
            actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) \
                       for a in self.actions]
            profiler_start("Inserting actions for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBAction.__insert__,
                                         self.db.place_holder), actions)
            self.actions = []
            profiler_stop("Inserting actions for repository %d",
                          (self.repo_id,))
        if self.commits:
            commits = [(c.id, c.rev, c.committer, c.author, c.date, \
                        to_utf8(c.message).decode("utf-8"), c.composed_rev, \
                        c.repository_id) for c in self.commits]
            profiler_start("Inserting commits for repository %d",
                           (self.repo_id,))
            cursor.executemany(statement(DBLog.__insert__,
                                         self.db.place_holder), commits)
            self.commits = []
            profiler_stop("Inserting commits for repository %d",
                          (self.repo_id,))

        profiler_start("Committing inserts for repository %d",
                       (self.repo_id,))
        self.cnn.commit()
        profiler_stop("Committing inserts for repository %d",
                      (self.repo_id,))
Example #17
0
    def search( self
              , base
              , scope
              , filter='(objectClass=*)'
              , attrs=[]
              , bind_dn=''
              , bind_pwd=''
              ):
        """ The main search engine """
        result = { 'exception' : ''
                 , 'size' : 0
                 , 'results' : []
                 }
        filter = to_utf8(filter)

        try:
            connection = self.connect(bind_dn=bind_dn, bind_pwd=bind_pwd)
            if connection is None:
                result['exception'] = 'Cannot connect to LDAP server'
                return result

            try:
                res = connection.search_s(base, scope, filter, attrs)
            except ldap.PARTIAL_RESULTS:
                res_type, res = connection.result(all=0)
            except ldap.REFERRAL, e:
                connection = self.handle_referral(e)

                try:
                    res = connection.search_s(base, scope, filter, attrs)
                except ldap.PARTIAL_RESULTS:
                    res_type, res = connection.result(all=0)

            for rec_dn, rec_dict in res:
                # When used against Active Directory, "rec_dict" may not be
                # be a dictionary in some cases (instead, it can be a list)
                # An example of a useless "res" entry that can be ignored
                # from AD is
                # (None, ['ldap://ForestDnsZones.PORTAL.LOCAL/DC=ForestDnsZones,DC=PORTAL,DC=LOCAL'])
                # This appears to be some sort of internal referral, but
                # we can't handle it, so we need to skip over it.
                try:
                    items =  rec_dict.items()
                except AttributeError:
                    # 'items' not found on rec_dict
                    continue

                for key, value in items:
                    if not isinstance(value, str):
                        try:
                            for i in range(len(value)):
                                value[i] = from_utf8(value[i])
                        except:
                            pass

                rec_dict['dn'] = from_utf8(rec_dn)

                result['results'].append(rec_dict)
                result['size'] += 1
Example #18
0
    def __init__(self, id, name):
        if id is None:
            self.id = DBTag.id_counter
            DBTag.id_counter += 1
        else:
            self.id = id

        self.name = to_utf8(name)
Example #19
0
    def write_headers(self, num_docs, num_terms, num_nnz):
        self.fout.write(MmWriter.HEADER_LINE)

        if num_nnz < 0:
            # we don't know the matrix shape/density yet, so only log a general line
            logger.info("saving sparse matrix to %s" % self.fname)
            self.fout.write(
                utils.to_utf8(' ' * 50 +
                              '\n'))  # 48 digits must be enough for everybody
        else:
            logger.info(
                "saving sparse %sx%s matrix with %i non-zero entries to %s" %
                (num_docs, num_terms, num_nnz, self.fname))
            self.fout.write(
                utils.to_utf8('%s %s %s\n' % (num_docs, num_terms, num_nnz)))
        self.last_docno = -1
        self.headers_written = True
Example #20
0
    def __init__(self, id, name):
        if id is None:
            self.id = DBTag.id_counter
            DBTag.id_counter += 1
        else:
            self.id = id

        self.name = to_utf8(name)
Example #21
0
    def save_doc2vec_format(self, fname):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.

        """
        logger.info("storing %sx%s projection weights into %s" %
                    (self.sents_len, self.layer1_size, fname))
        assert (self.sents_len, self.layer1_size) == self.sents.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("%s %s\n" % self.sents.shape))
            # store in sorted order: most frequent words at the top
            for sent_no in xrange(self.sents_len):
                row = self.sents[sent_no]
                fout.write(
                    utils.to_utf8("sent_%d %s\n" %
                                  (sent_no, ' '.join("%f" % val
                                                     for val in row))))
Example #22
0
    def __getitem__(self, key):
        key = to_utf8(key)
        if key in self.cache:
            return self.cache[key]

        value = self.converter(self.db[key])
        self.cache[key] = value

        return value
Example #23
0
    def __init__(self, id, file_name):
        if id is None:
            self.id = DBFile.id_counter
            DBFile.id_counter += 1
        else:
            self.id = id

        self.file_name = to_utf8(file_name)
        self.repository_id = None
Example #24
0
 def __init__(self, id, person):
     if id is None:
         self.id = DBPerson.id_counter
         DBPerson.id_counter += 1
     else:
         self.id = id
         
     self.name = to_utf8(person.name)
     self.email = person.email or None
Example #25
0
 def _on_notify_url_found(result, ex):
     if ex:
         pass
     else:
         pay_notice_url = result['url']
         ext = utils.to_utf8(result['ext']) or ''
         self.save_order_notification(
             app, pay_notice_url, params,
             functools.partial(on_created, ext))
Example #26
0
    def __init__(self, id, person):
        if id is None:
            self.id = DBPerson.id_counter
            DBPerson.id_counter += 1
        else:
            self.id = id

        self.name = to_utf8(person.name)
        self.email = person.email or None
Example #27
0
 def __init__(self, id, file_name):
     if id is None:
         self.id = DBFile.id_counter
         DBFile.id_counter += 1
     else:
         self.id = id
         
     self.file_name = to_utf8(file_name)
     self.repository_id = None
Example #28
0
 def calc_sign(self, params):
     keys = sorted(filter(lambda x: x != "sign", params.keys()))
     sign_str = '&'.join([
         "%s=%s" % (key, to_utf8(params.get(key, ''))) for key in keys
         if params.get(key)
     ])
     print("sign_str:%s" % sign_str)
     sign = hashlib.md5(sign_str + self._app['key']).hexdigest()
     return sign
Example #29
0
    def __init__(self, id, commit_id, file_id, file_path):
        if id is None:
            self.id = DBFilePath.id_counter
            DBFilePath.id_counter += 1
        else:
            self.id = id

        self.commit_id = commit_id
        self.file_id = file_id
        self.file_path = to_utf8(file_path)
Example #30
0
    def __init__(self, id, commit_id, file_id, file_path):
        if id is None:
            self.id = DBFilePath.id_counter
            DBFilePath.id_counter += 1
        else:
            self.id = id

        self.commit_id = commit_id
        self.file_id = file_id
        self.file_path = to_utf8(file_path)
Example #31
0
    def save_word2vec_format(self,
                             fname,
                             fvocab=None,
                             binary=False,
                             total_vec=None):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.

         `fname` is the file used to save the vectors in
         `fvocab` is an optional file used to save the vocabulary
         `binary` is an optional boolean indicating whether the data is to be saved
         in binary word2vec format (default: False)
         `total_vec` is an optional parameter to explicitly specify total no. of vectors
         (in case word vectors are appended with document vectors afterwards)

        """
        if total_vec is None:
            total_vec = len(self.vocab)
        vector_size = self.syn0.shape[1]
        if fvocab is not None:
            logger.info("storing vocabulary in %s" % (fvocab))
            with utils.smart_open(fvocab, 'wb') as vout:
                for word, vocab in sorted(iteritems(self.vocab),
                                          key=lambda item: -item[1].count):
                    vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
        logger.info("storing %sx%s projection weights into %s" %
                    (total_vec, vector_size, fname))
        assert (len(self.vocab), vector_size) == self.syn0.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
            # store in sorted order: most frequent words at the top
            for word, vocab in sorted(iteritems(self.vocab),
                                      key=lambda item: -item[1].count):
                row = self.syn0[vocab.index]
                if binary:
                    fout.write(utils.to_utf8(word) + b" " + row.tostring())
                else:
                    fout.write(
                        utils.to_utf8("%s %s\n" %
                                      (word, ' '.join("%f" % val
                                                      for val in row))))
Example #32
0
def create_simple_xychart(title,
                          labels,
                          data,
                          mark_value=None,
                          format='{value|1}',
                          fontAngle=0,
                          x=560,
                          y=220,
                          swapxy=False,
                          Scale=100):
    colors = BASE_COLOR
    c = XYChart(x, y)
    c.setBackground(
        c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),
        '0X666666')

    title_height = 0

    c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff')

    plot_width = 30 + 50 * len(labels)
    c.setPlotArea(70, 50, plot_width, 170, -1, -1, Transparent, '0xffffff')
    if swapxy:
        c.swapXY()
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 10, 30)

    layer = c.addBarLayer3(data, colors)
    layer.setBorderColor(Transparent, softLighting(Right))
    layer.setAggregateLabelFormat(format)
    font_size = 8 if fontAngle == 0 else 7
    layer.setAggregateLabelStyle("simsun.ttc", font_size)
    layer.setBarWidth(x, 15)

    xAxis = c.xAxis()
    xAxis.setLabels(labels)

    c.yAxis().setLinearScale(0, Scale)
    c.yAxis().setColors(Transparent)
    c.yAxis2().setColors(Transparent)
    c.xAxis().setTickColor(Transparent)
    c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis2().setLabelStyle("simsun.ttc", 9)

    #    if mark_value:
    #        markData = [mark_value for i in range(len(data))]
    #        markLayer = c.addBoxWhiskerLayer(None, None, None, None, markData, -1, '0xff0000')

    c.packPlotArea(20, title_height + 40,
                   c.getWidth() - 30,
                   c.getHeight() - 15)

    return c.makeChart2(PNG)
Example #33
0
 def calc_sign(self, params):
     keys = sorted(filter(lambda x: x != "sign", params.keys()))
     sign_str = '&'.join([
         "%s=%s" % (key, to_utf8(params.get(key, ''))) for key in keys
         if params.get(key, "") != ""
     ])
     sign = hashlib.md5(
         (sign_str + self._app['key']).encode('utf-8')).hexdigest()
     self.log_info("sign_str:%s, sign:%s" %
                   (sign_str + self._app['key'], sign))
     return sign
Example #34
0
    def save_as_text(self, fname, sort_by_word=True):
        """
        Save this Dictionary to a text file, in format:
        `id[TAB]word_utf8[TAB]document frequency[NEWLINE]`. Sorted by word,
        or by decreasing word frequency.

        Note: text format should be use for corpus inspection. Use `save`/`load`
        to store in binary format (pickle) for improved performance.
        """
        logger.info("saving dictionary mapping to %s", fname)
        with utils.smart_open(fname, 'wb') as fout:
            if sort_by_word:
                for token, tokenid in sorted(iteritems(self.token2id)):
                    line = "%i\t%s\t%i\n" % (tokenid, token,
                                             self.dfs.get(tokenid, 0))
                    fout.write(utils.to_utf8(line))
            else:
                for tokenid, freq in sorted(iteritems(self.dfs),
                                            key=lambda item: -item[1]):
                    line = "%i\t%s\t%i\n" % (tokenid, self[tokenid], freq)
                    fout.write(utils.to_utf8(line))
Example #35
0
    def save_cat2vec_format(self, fname):
        """
        Store cat vectors

        """
        logger.info("storing %sx%s projection weights into %s" %
                    (self.cat_len, self.layer1_size, fname))
        assert (self.cat_len, self.layer1_size) == self.cats.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(
                utils.to_utf8("#cats_len: %d\n#size:%d\n" % self.cats.shape))
            fout.write(
                utils.to_utf8(
                    "#sg:%d\n#hs:%d\n#negative:%d\n#cbow_mean:%d\n" %
                    (self.sg, self.hs, self.negative, self.cbow_mean)))
            for cat_id in self.cat_no_hash.keys():
                row = self.cats[self.cat_no_hash[cat_id]]
                fout.write(
                    utils.to_utf8("%s\t%s\n" %
                                  (cat_id, ' '.join("%f" % val
                                                    for val in row))))
Example #36
0
    def write_vector(self, docno, vector):
        """
        Write a single sparse vector to the file.

        Sparse vector is any iterable yielding (field id, field value) pairs.
        """
        assert self.headers_written, "must write Matrix Market file headers before writing data!"
        assert self.last_docno < docno, "documents %i and %i not in sequential order!" % (self.last_docno, docno)
        vector = sorted((i, w) for i, w in vector if abs(w) > 1e-12) # ignore near-zero entries
        for termid, weight in vector: # write term ids in sorted order
            self.fout.write(utils.to_utf8("%i %i %s\n" % (docno + 1, termid + 1, weight))) # +1 because MM format starts counting from 1
        self.last_docno = docno
        return (vector[-1][0], len(vector)) if vector else (-1, 0)
Example #37
0
    def save_word2vec_format(self, fname, fvocab=None, binary=False):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.

        """
        if fvocab is not None:
            logger.info("Storing vocabulary in %s" % (fvocab))
            with utils.smart_open(fvocab, 'wb') as vout:
                for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                    vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
        logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname))
        assert (len(self.vocab), self.layer1_size) == self.syn0.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("%s %s\n" % self.syn0.shape))
            # store in sorted order: most frequent words at the top
            for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                row = self.syn0[vocab.index]
                if binary:
                    fout.write(utils.to_utf8(word) + b" " + row.tostring())
                else:
                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
Example #38
0
    def __get_person(self, person):
        """Get the person_id given a person struct
           First, it tries to get it from cache and then from the database.
           When a new person_id is gotten from the database, the cache must be
           updated
        """
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                           (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                     (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(
                statement("SELECT id from people where name = ?",
                          self.db.place_holder),
                (to_utf8(name).decode("utf-8"), ))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(
                    statement(DBPerson.__insert__, self.db.place_holder),
                    (p.id, to_utf8(p.name).decode("utf-8"), email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                          (person.name, self.repo_id), True)

            return person_id

        if person is None:
            return None

        name = to_utf8(person.name)

        if name in self.people_cache:
            person_id = self.people_cache[name]
        else:
            person_id = ensure_person(person)
            self.people_cache[name] = person_id

        return person_id
    def __get_person(self, person):
        """Get the person_id given a person struct
           First, it tries to get it from cache and then from the database.
           When a new person_id is gotten from the database, the cache must be
           updated
        """
        def ensure_person(person):
            profiler_start("Ensuring person %s for repository %d",
                            (person.name, self.repo_id))
            printdbg("DBContentHandler: ensure_person %s <%s>",
                      (person.name, person.email))
            cursor = self.cursor

            name = to_utf8(person.name)
            email = person.email

            if email is not None:
                email = to_utf8(email).decode("utf-8")

            cursor.execute(statement(
                "SELECT id from people where name = ?", self.db.place_holder),
                (to_utf8(name).decode("utf-8"),))
            rs = cursor.fetchone()
            if not rs:
                p = DBPerson(None, person)

                cursor.execute(statement(DBPerson.__insert__,
                                self.db.place_holder),
                                (p.id, to_utf8(p.name).decode("utf-8"),
                                 email))
                person_id = p.id
            else:
                person_id = rs[0]

            profiler_stop("Ensuring person %s for repository %d",
                           (person.name, self.repo_id), True)

            return person_id
        
        if person is None:
            return None
        
        name = to_utf8(person.name)

        if name in self.people_cache:
            person_id = self.people_cache[name]
        else:
            person_id = ensure_person(person)
            self.people_cache[name] = person_id

        return person_id
Example #40
0
 def save_word2vec_format(self, fname, binary=False):
     """
     Store the input-hidden weight matrix in the same format used by the original
     C word2vec-tool, for compatibility.
     """
     logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname))
     assert (len(self.vocab), self.layer1_size) == self.syn0.shape
     with open(fname, 'wb') as fout:
         fout.write("%s %s\n" % self.syn0.shape)
         # store in sorted order: most frequent words at the top
         for word, vocab in sorted(self.vocab.iteritems(), key=lambda item: -item[1].count):
             word = utils.to_utf8(word)  # always store in utf8
             row = self.syn0[vocab.index]
             if binary:
                 fout.write("%s %s\n" % (word, row.tostring()))
             else:
                 fout.write("%s %s\n" % (word, ' '.join("%f" % val for val in row)))
Example #41
0
    def save_word2vec_format(self, fname, binary=False):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.

        """
        logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname))
        assert (len(self.vocab), self.layer1_size) == self.syn0.shape
        with open(fname, 'wb') as fout:
            fout.write("%s %s\n" % self.syn0.shape)
            # store in sorted order: most frequent words at the top
            for word, vocab in sorted(self.vocab.iteritems(), key=lambda item: -item[1].count):
                word = utils.to_utf8(word)  # always store in utf8
                row = self.syn0[vocab.index]
                if binary:
                    fout.write("%s %s\n" % (word, row.tostring()))
                else:
                    fout.write("%s %s\n" % (word, ' '.join("%f" % val for val in row)))
Example #42
0
    def write_vector(self, docno, vector):
        """
        Write a single sparse vector to the file.

        Sparse vector is any iterable yielding (field id, field value) pairs.
        """
        assert self.headers_written, "must write Matrix Market file headers before writing data!"
        assert self.last_docno < docno, "documents %i and %i not in sequential order!" % (
            self.last_docno, docno)
        vector = sorted((i, w) for i, w in vector
                        if abs(w) > 1e-12)  # ignore near-zero entries
        for termid, weight in vector:  # write term ids in sorted order
            self.fout.write(
                utils.to_utf8(
                    "%i %i %s\n" %
                    (docno + 1, termid + 1,
                     weight)))  # +1 because MM format starts counting from 1
        self.last_docno = docno
        return (vector[-1][0], len(vector)) if vector else (-1, 0)
Example #43
0
def create_simple_xychart(title,labels,data,mark_value=None,format='{value|1}',fontAngle=0,x=560,y=220,swapxy=False,Scale=100):
    colors = BASE_COLOR
    c = XYChart(x, y)
    c.setBackground(c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),'0X666666')

    title_height = 0

    c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff')

    plot_width = 30+50*len(labels)
    c.setPlotArea(70, 50, plot_width, 170, -1, -1, Transparent, '0xffffff')
    if swapxy:
        c.swapXY()
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 10, 30)
    
    layer = c.addBarLayer3(data, colors)
    layer.setBorderColor(Transparent, softLighting(Right))
    layer.setAggregateLabelFormat(format)
    font_size = 8 if fontAngle == 0 else 7
    layer.setAggregateLabelStyle("simsun.ttc", font_size)
    layer.setBarWidth(x,15)
    
    xAxis = c.xAxis()
    xAxis.setLabels(labels)
    
    c.yAxis().setLinearScale(0,Scale)
    c.yAxis().setColors(Transparent)
    c.yAxis2().setColors(Transparent)
    c.xAxis().setTickColor(Transparent)
    c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis2().setLabelStyle("simsun.ttc", 9)
    
#    if mark_value:
#        markData = [mark_value for i in range(len(data))]
#        markLayer = c.addBoxWhiskerLayer(None, None, None, None, markData, -1, '0xff0000')

    c.packPlotArea(20, title_height + 40, c.getWidth() - 30, c.getHeight() - 15)

    return c.makeChart2(PNG)
Example #44
0
def create_dealer_index_xychart(title,
                                labels,
                                score,
                                mark_value=None,
                                format='{value|1}',
                                fontAngle=0,
                                Scale=100):
    new_labels = [truncate_hanzi(label, 25) for label in labels]
    colors = BASE_COLOR
    chart_height = 60 + 20 * len(new_labels)
    c = XYChart(400, chart_height)
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 10, 30)
    c.setBackground(
        c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),
        '0X666666')
    title_height = 0
    c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff')
    plot_height = chart_height - 30
    c.setPlotArea(70, 50, 270, plot_height, -1, -1, Transparent, '0xffffff')
    layer = c.addBarLayer3(score, colors)
    #    layer.setBorderColor(Transparent, softLighting(Right))
    layer.setAggregateLabelFormat(format)
    layer.setAggregateLabelStyle("simsun.ttc", 8)

    xAxis = c.xAxis()
    xAxis.setLabels(new_labels)
    c.yAxis().setColors(Transparent)
    c.yAxis2().setColors(Transparent)
    c.xAxis().setTickColor(Transparent)
    c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis2().setLabelStyle("simsun.ttc", 9)
    c.yAxis().setLinearScale(0, Scale)

    c.packPlotArea(20, title_height + 15,
                   c.getWidth() - 30,
                   c.getHeight() - 15)

    return c.makeChart2(PNG)
Example #45
0
 def bind_account(self, params, callback):
     account, phone, code = utils.to_utf8(params.get(
         'account', '')), params['phone'], params.get('code', '')
     server_id, user_id, player_id = params.get('server_id', 0), params.get(
         'user_id', 0), params.get('player_id', '0')
     if not phone or not isinstance(phone,
                                    (str, unicode)) or not phone.isdigit():
         self.log_error("reject deal with bad phone number:%s" % phone)
         return
     platform_id = int(self._platform_info['distributor_id'])
     self.log_info("bind_account params:(%s)" % params)
     if not (code and code.strip()):
         url = self.apply_verify_code(account or player_id, platform_id,
                                      phone)
     else:
         url = self.get_verify_code_url(account or player_id, platform_id,
                                        phone, code)
     user_data = dict(server_id=server_id,
                      player_id=player_id,
                      account=account,
                      phone=phone,
                      code=code,
                      callback=callback)
     self.request_get(url, {}, self.on_bind_account, user_data)
Example #46
0
 def to_utf8(self, str):
     str = utils.to_utf8(str)
     return str
Example #47
0
 def fake_headers(self, num_docs, num_terms, num_nnz):
     stats = '%i %i %i' % (num_docs, num_terms, num_nnz)
     if len(stats) > 50:
         raise ValueError('Invalid stats: matrix too large!')
     self.fout.seek(len(MmWriter.HEADER_LINE))
     self.fout.write(utils.to_utf8(stats))
 def test_to_utf8(self):
     assert utils.to_utf8('abc') == 'abc'
     assert utils.to_utf8(u'abc') == 'abc'
     assert utils.to_utf8(u'\u4f60\u597d') == '\xe4\xbd\xa0\xe5\xa5\xbd'
     assert utils.to_utf8('\xe4\xbd\xa0\xe5\xa5\xbd') == \
         '\xe4\xbd\xa0\xe5\xa5\xbd'
Example #49
0
 def test_to_utf8(self):
   self.assertEqual('abc', utils.to_utf8('abc'))
   self.assertEqual('abc', utils.to_utf8(u'abc'))
   self.assertEqual('\xe4\xbd\xa0\xe5\xa5\xbd', utils.to_utf8(u'\u4f60\u597d'))
   self.assertEqual('\xe4\xbd\xa0\xe5\xa5\xbd',
                    utils.to_utf8('\xe4\xbd\xa0\xe5\xa5\xbd'))
Example #50
0
 def __contains__(self, key):
     key = to_utf8(key)
     return key in self.cache or self.db_contains(key)
Example #51
0
 def test_to_utf8(self):
     assert utils.to_utf8('abc') == 'abc'
     assert utils.to_utf8(u'abc') == 'abc'
     assert utils.to_utf8(u'\u4f60\u597d') == '\xe4\xbd\xa0\xe5\xa5\xbd'
     assert utils.to_utf8('\xe4\xbd\xa0\xe5\xa5\xbd') == \
         '\xe4\xbd\xa0\xe5\xa5\xbd'
Example #52
0
def create_history_now_future_xychart(title, labels, series_list, series_top, maxv=100):
    
    top3, ytd, ave, future_score, point = get_ave_score(series_list)
    series_list.append(dict(name=u'2012 Top3 Ave', value=top3))
    series_list.append(dict(name=u'2012 YTD', value=ytd))
    series_list.append(dict(name=u'2011 Ave', value=ave))
    
    # Create a XYChart object of size 540 x 375 pixels
    c = XYChart(900, 320)
    # Add a title to the chart using 18 pts Times Bold Italic font
    #c.addTitle("Average Weekly Network Load", "timesbi.ttf", 18)
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 10, 30)
    
    color_list = BASE_COLOR
    COLOR_BLUE = 0x0070C0
    COLOR_93 = 0x00B050
    COLOR_87 = 0xFFD600
    COLOR_TOP3_AVE = 0x595443
    COLOR_YTD = 0xFF0000
    COLOR_AVE = 0x5678A9
    
    # Set the plotarea at (50, 55) and of 440 x 280 pixels in size. Use a vertical
    # gradient color from light red (ffdddd) to dark red (880000) as background. Set
    # border and grid lines to white (ffffff).
    chart_width = 30 + 190 * len(labels) 
    c.setPlotArea(50, 90, chart_width, 200, c.linearGradientColor(60, 40, 60, 280, 0xffffff,
    0xd8e2ec), -1, 0xffffff, 0xffffff)
    
    legendBox = c.addLegend(50, 30, 0, "simsun.ttc", 10)
    legendBox.setBackground(Transparent)
    #legendBox.setAlignment(TopCenter)
    legendBox.setHeight(30)
    
    # Set the x axis labels
    c.xAxis().setLabels([utils.to_utf8(label) for label in labels])
    
    # Draw the ticks between label positions (instead of at label positions)
    c.xAxis().setTickOffset(0.5)
    
    # Set axis label style to 8pts Arial Bold
    c.xAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    
    # Set axis line width to 2 pixels
    c.xAxis().setWidth(2)
    c.yAxis().setWidth(2)
    c.yAxis2().setWidth(1)
    
    # Add axis title
    c.yAxis().setTitle("得分/Score", "simsun.ttc", 9)
    
    c.yAxis().setLinearScale(0, maxv)
    
    # Add a multi-bar layer with 3 data sets and 4 pixels 3D depth
    #~ layer = c.addBarLayer2(Side, 1)
    
    layer = c.addBarLayer()
    layer.setBarGap(0.1)
    layer.setBarWidth(170, 18)

    for index, series in enumerate(series_list):
        values = series['value']
        if len(values) > 1:
            color = COLOR_BLUE
        else:
            values.append(future_score)
            if point == 93:
                color = COLOR_93
            elif point == 87:
                color = COLOR_87
            else:
                color = COLOR_BLUE
        name = utils.to_utf8(series['name'])
        if name == u'2012 Top3 Ave':
            color = COLOR_TOP3_AVE
        if name == u'2012 YTD':
            color = COLOR_YTD
        if name == u'2011 Ave':
            color = COLOR_AVE
        #print values, color, name
        write_list = []
        for value in values:
            if value == -1 or value > 100:
                write_list.append(0)
            else:
                write_list.append(value)
        layer.addDataSet(write_list, color, name)
        for i, v in enumerate(values):
            if v == -1 or v > 100:
                if name in (u'2012 Top3 Ave',u'2012 YTD', u'2011 Ave'):
                    layer.addCustomGroupLabel(index, i, " ")
                else:
                    layer.addCustomGroupLabel(index, i, "N/A")
            else:
                layer.setAggregateLabelFormat("{value|1}")
                layer.setAggregateLabelStyle ('', 10, '0x0000', 0)
    
    yMark = c.yAxis().addMark(point, '0x800080', '%s' % point)
    yMark.setLineWidth(1)
    yMark.setAlignment(TopCenter)

    # Set bar border to transparent. Use soft lighting effect with light direction from
    # top.
    layer.setBorderColor(Transparent, softLighting(Top))
    #layer.setBorderColor(Transparent, barLighting(0.75, 2.0))
    
    #layer.setAggregateLabelFormat("{value|1}")
    
    # output the chart
    return c.makeChart2(PNG)
Example #53
0
def create_multi_xychart(title, labels, series_list, series_top, maxv=100):
    
#    labels = [labels[0]]
    series_list = [series_list[0]]
    
    # Create a XYChart object of size 540 x 375 pixels
    c = XYChart(900, 320)
    # Add a title to the chart using 18 pts Times Bold Italic font
    #c.addTitle("Average Weekly Network Load", "timesbi.ttf", 18)
    title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12)
    title.setMargin2(20, 0, 5, 30)
    
    color_list = BASE_COLOR
    
    # Set the plotarea at (50, 55) and of 440 x 280 pixels in size. Use a vertical
    # gradient color from light red (ffdddd) to dark red (880000) as background. Set
    # border and grid lines to white (ffffff).
    chart_width = 30 + 190 * len(labels)
    c.setPlotArea(50, 90, chart_width, 200, c.linearGradientColor(60, 40, 60, 280, 0xffffff,
    0xd8e2ec), -1, 0xffffff, 0xffffff)
    
    legendBox = c.addLegend(50, 16, 0, "simsun.ttc", 10)
    legendBox.setBackground(Transparent)
    #legendBox.setAlignment(TopCenter)
    legendBox.setHeight(30)
    
    # Set the x axis labels
    c.xAxis().setLabels([utils.to_utf8(label) for label in labels])
    
    # Draw the ticks between label positions (instead of at label positions)
    c.xAxis().setTickOffset(0.5)
    
    # Set axis label style to 8pts Arial Bold
    c.xAxis().setLabelStyle("simsun.ttc", 9)
    c.yAxis().setLabelStyle("simsun.ttc", 9)
    
    # Set axis line width to 2 pixels
    c.xAxis().setWidth(2)
    c.yAxis().setWidth(2)
    c.yAxis2().setWidth(1)
    
    # Add axis title
    c.yAxis().setTitle("得分/Score", "simsun.ttc", 9)
    
    c.yAxis().setLinearScale(0, maxv)
    
    # Add a multi-bar layer with 3 data sets and 4 pixels 3D depth
    #~ layer = c.addBarLayer2(Side, 1)
    layer = c.addBarLayer()
    layer.setBarGap(0.2)
    layer.setBarWidth(150, 48)

    for index, series in enumerate(series_list):
        layer.addDataSet(series['value'], color_list[index % len(color_list)], utils.to_utf8(series['name']))
    
    if series_top:
        legendBox.addKey2(2, utils.to_utf8(series_top['name']), 0xFF6900, 2)
        markLayer = c.addBoxWhiskerLayer(None, None, None, None, series_top['value'], -1, 0xFF6900)
        markLayer.setLineWidth(2)
        markLayer.setDataGap(0.1)
        markLayer.setDataLabelStyle("simsun.ttc", 9)
        markLayer.setDataLabelFormat("{value|1}")
    
    # Set bar border to transparent. Use soft lighting effect with light direction from
    # top.
    layer.setBorderColor(Transparent, softLighting(Top))
    #layer.setBorderColor(Transparent, barLighting(0.75, 2.0))
    
    layer.setAggregateLabelFormat("{value|1}")
    
    # output the chart
    return c.makeChart2(PNG)
Example #54
0
        model.save(s2v_model_name)
    else:
        model = Sentence2Vec.load(s2v_model_name)
    
    print "Input an article title (type EXIT to exit)"
    sys.stdout.write("Name: ")
    line = sys.stdin.readline()
    while line:
        line = utils.to_unicode(line.rstrip())
        if line == "EXIT":
            break
        try:
            if model.sent_no_hash.has_key(line):
                sent_no = model.sent_no_hash[line]
                sent_vec = model.sents[sent_no]
                nsents = model.most_similar_sentence(sent_vec, 11)
                print "Similar articles              similarity"
                print "-"*45
                for nsent in nsents[1:]:
                    print nsent[0], " "*(max(30 - len(utils.to_utf8(nsent[0])), 0)), nsent[1]
                print
            else:
                print "we couldn't find the specified category/article"
                print
        except Exception:
            print "something wrong is happened"

        print "Input a category name or an article title (type EXIT to exit)"
        sys.stdout.write("Name: ")
        line = sys.stdin.readline()