예제 #1
0
def run():
    """
    如果文件不存在,则创建
    :return:
    """
    if not os.path.exists('./res'):
        os.makedirs('res')
    config = get_config()
    if not os.path.exists(config['url']) or not os.path.exists(
            config['title'] or not os.path.exists(config['content'])):
        data_load(config)
    if not os.path.exists(config['content_clean']):
        data_clean_content(config)
    if not os.path.exists(config['content_filter']):
        filter_stop_word(config)
    if not os.path.exists(config['content_stemming']):
        stemming(config)
    if not os.path.exists(config['term_list']):
        create_term_list(config)

    documents = get_content(config)
    tf_documents = get_tf(documents)
    if not os.path.exists(config['idf']):
        create_idf(config, documents)
    idf_documents = get_idf(config)
    if not os.path.exists(config['tf_idf']):
        create_tf_idf(config, tf_documents, idf_documents, documents)
예제 #2
0
 def get(self):
   namespace = self.request.get('namespace', 'default-gzip')
   # Support 'hash' for compatibility with old links. To remove eventually.
   digest = self.request.get('digest', '') or self.request.get('hash', '')
   params = {
     u'digest': unicode(digest),
     u'namespace': unicode(namespace),
   }
   # Check for existence of element, so we can 400/404
   if digest and namespace:
     try:
       model.get_content(namespace, digest)
     except ValueError:
       self.abort(400, 'Invalid key')
     except LookupError:
       self.abort(404, 'Unable to retrieve the entry')
   self.response.write(template.render('isolate/browse.html', params))
예제 #3
0
 def GET(self, page, content = None):
     c = model.get_content(content)
     f = self.content_form()
     if c:
         f.content.set_value(c.content)
         f.title.set_value(c.title)
         f.draft.set_value(c.draft)
     f.page.set_value(page)
     f.c_id.set_value(content)
     return render.edit_page_content(page, content, f)
예제 #4
0
 def POST(self, page, c = None):
     data = self.content_form()
     if data.validates():
         d = data.d
         cur = model.get_content(d.c_id)
         if cur:
             model.update_content(d.c_id, d.page, d.title, d.content, int(d.draft))
         else:
             model.add_page_content(d.page, d.title, d.content, int(d.draft))
         raise web.seeother('/page/'+d.page+"/content")
     else:
         raise web.seeother('/page')
예제 #5
0
  def get(self):
    namespace = self.request.get('namespace', 'default-gzip')
    digest = self.request.get('digest', '')
    content = None

    if digest and namespace:
      try:
        raw_data, entity = model.get_content(namespace, digest)
      except ValueError:
        self.abort(400, 'Invalid key')
      except LookupError:
        self.abort(404, 'Unable to retrieve the entry')

      if not raw_data:
        stream = gcs.read_file(config.settings().gs_bucket, entity.key.id())
      else:
        stream = [raw_data]
      content = ''.join(model.expand_content(namespace, stream))

      self.response.headers['X-Frame-Options'] = 'SAMEORIGIN'
      # We delete Content-Type before storing to it to avoid having two (yes,
      # two) Content-Type headers.
      del self.response.headers['Content-Type']
      # Apparently, setting the content type to text/plain encourages the
      # browser (Chrome, at least) to sniff the mime type and display
      # things like images.  Images are autowrapped in <img> and text is
      # wrapped in <pre>.
      self.response.headers['Content-Type'] = 'text/plain; charset=utf-8'
      self.response.headers['Content-Disposition'] = str('filename=%s' % digest)
      if content.startswith('{'):
        # Try to format as JSON.
        try:
          content = json.dumps(
              json.loads(content), sort_keys=True, indent=2,
              separators=(',', ': '))
          # If we don't wrap this in html, browsers will put content in a pre
          # tag which is also styled with monospace/pre-wrap.  We can't use
          # anchor tags in <pre>, so we force it to be a <div>, which happily
          # accepts links.
          content = (
            '<div style="font-family:monospace;white-space:pre-wrap;">%s</div>'
             % content)
          # Linkify things that look like hashes
          content = re.sub(r'([0-9a-f]{40})',
            r'<a target="_blank" href="/browse?namespace=%s' % namespace +
              r'&digest=\1">\1</a>',
            content)
          self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
        except ValueError:
          pass

    self.response.write(content)
예제 #6
0
    def get(self):
        namespace = self.request.get('namespace', 'default-gzip')
        digest = self.request.get('digest', '')
        content = None
        if not digest:
            self.abort(400, 'Missing digest')
        if not namespace:
            self.abort(400, 'Missing namespace')

        try:
            raw_data, entity = model.get_content(namespace, digest)
        except ValueError:
            self.abort(400, 'Invalid key')
        except LookupError:
            self.abort(404, 'Unable to retrieve the entry')

        logging.info('%s', entity)
        if not raw_data:
            try:
                stream = gcs.read_file(config.settings().gs_bucket,
                                       entity.key.id())
                content = ''.join(model.expand_content(namespace, stream))
            except cloudstorage.NotFoundError:
                logging.error(
                    'Entity in DB but not in GCS: deleting entity in DB')
                entity.key.delete()
                self.abort(404, 'Unable to retrieve the file from GCS')
        else:
            content = ''.join(model.expand_content(namespace, [raw_data]))

        self.response.headers['X-Frame-Options'] = 'SAMEORIGIN'
        # We delete Content-Type before storing to it to avoid having two (yes,
        # two) Content-Type headers.
        del self.response.headers['Content-Type']

        # Apparently, setting the content type to text/plain encourages the
        # browser (Chrome, at least) to sniff the mime type and display
        # things like images.  Images are autowrapped in <img> and text is
        # wrapped in <pre>.
        self.response.headers['Content-Type'] = 'text/plain; charset=utf-8'

        # App Engine puts a limit of 33554432 bytes on a request, which includes
        # headers. Headers are ~150 bytes.  If the content + headers might
        # exceed that limit, we give the user an option to workround getting
        # their file.
        if len(content) > 33554000:
            host = modules.get_hostname(module='default', version='default')
            # host is something like default.default.myisolateserver.appspot.com
            host = host.replace('default.default.', '')
            sizeInMib = len(content) / (1024.0 * 1024.0)
            content = (
                'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB'
                ' App Engine limit.\nTo work around this, run the following command:\n'
                '    python isolateserver.py download -I %s --namespace %s -f %s %s'
                % (sizeInMib, host, namespace, digest, digest))
        else:
            self.response.headers['Content-Disposition'] = str(
                'filename=%s' % self.request.get('as') or digest)
            try:
                json_data = json.loads(content)
                if self._is_isolated_format(json_data):
                    self.response.headers[
                        'Content-Type'] = 'text/html; charset=utf-8'
                    json_data['files'] = collections.OrderedDict(
                        sorted(json_data['files'].items(),
                               key=lambda (filepath, data): filepath))
                    params = {
                        'namespace': namespace,
                        'isolated': json_data,
                    }
                    content = template.render('isolate/isolated.html', params)
            except ValueError:
                pass

        self.response.write(content)
예제 #7
0
    def get(self):
        namespace = self.request.get('namespace', 'default-gzip')
        digest = self.request.get('digest', '')
        content = None

        if digest and namespace:
            try:
                raw_data, entity = model.get_content(namespace, digest)
            except ValueError:
                self.abort(400, 'Invalid key')
            except LookupError:
                self.abort(404, 'Unable to retrieve the entry')

            if not raw_data:
                stream = gcs.read_file(config.settings().gs_bucket,
                                       entity.key.id())
            else:
                stream = [raw_data]
            content = ''.join(model.expand_content(namespace, stream))

            self.response.headers['X-Frame-Options'] = 'SAMEORIGIN'
            # We delete Content-Type before storing to it to avoid having two (yes,
            # two) Content-Type headers.
            del self.response.headers['Content-Type']

            # Apparently, setting the content type to text/plain encourages the
            # browser (Chrome, at least) to sniff the mime type and display
            # things like images.  Images are autowrapped in <img> and text is
            # wrapped in <pre>.
            self.response.headers['Content-Type'] = 'text/plain; charset=utf-8'

            # App Engine puts a limit of 33554432 bytes on a request, which includes
            # headers. Headers are ~150 bytes.  If the content + headers might
            # exceed that limit, we give the user an option to workround getting
            # their file.
            if len(content) > 33554000:
                host = modules.get_hostname(module='default',
                                            version='default')
                # host is something like default.default.myisolateserver.appspot.com
                host = host.replace('default.default.', '')
                sizeInMib = len(content) / (1024.0 * 1024.0)
                content = (
                    'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB'
                    ' App Engine limit.\nTo work around this, run the following command:\n'
                    '    python isolateserver.py download -I %s --namespace %s -f %s %s'
                    % (sizeInMib, host, namespace, digest, digest))
            else:
                self.response.headers['Content-Disposition'] = str(
                    'filename=%s' % digest)
                if content.startswith('{'):
                    # Try to format as JSON.
                    try:
                        content = json.dumps(json.loads(content),
                                             sort_keys=True,
                                             indent=2,
                                             separators=(',', ': '))
                        # If we don't wrap this in html, browsers will put content in a pre
                        # tag which is also styled with monospace/pre-wrap.  We can't use
                        # anchor tags in <pre>, so we force it to be a <div>, which happily
                        # accepts links.
                        content = (
                            '<div style="font-family:monospace;white-space:pre-wrap;">%s'
                            '</div>' % content)
                        # Linkify things that look like hashes
                        content = re.sub(
                            r'([0-9a-f]{40})',
                            r'<a target="_blank" href="/browse?namespace=%s' %
                            namespace + r'&digest=\1">\1</a>', content)
                        self.response.headers[
                            'Content-Type'] = 'text/html; charset=utf-8'
                    except ValueError:
                        pass

        self.response.write(content)
예제 #8
0
파일: blog.py 프로젝트: qq40660/weixin-1
 def GET(self, id):
     post_content = model.get_content(int(id))
     post_comments = model.get_comment(int(id))
     return render.view(post_content, post_comments)