def run(): """ 如果文件不存在,则创建 :return: """ if not os.path.exists('./res'): os.makedirs('res') config = get_config() if not os.path.exists(config['url']) or not os.path.exists( config['title'] or not os.path.exists(config['content'])): data_load(config) if not os.path.exists(config['content_clean']): data_clean_content(config) if not os.path.exists(config['content_filter']): filter_stop_word(config) if not os.path.exists(config['content_stemming']): stemming(config) if not os.path.exists(config['term_list']): create_term_list(config) documents = get_content(config) tf_documents = get_tf(documents) if not os.path.exists(config['idf']): create_idf(config, documents) idf_documents = get_idf(config) if not os.path.exists(config['tf_idf']): create_tf_idf(config, tf_documents, idf_documents, documents)
def get(self): namespace = self.request.get('namespace', 'default-gzip') # Support 'hash' for compatibility with old links. To remove eventually. digest = self.request.get('digest', '') or self.request.get('hash', '') params = { u'digest': unicode(digest), u'namespace': unicode(namespace), } # Check for existence of element, so we can 400/404 if digest and namespace: try: model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') self.response.write(template.render('isolate/browse.html', params))
def GET(self, page, content = None): c = model.get_content(content) f = self.content_form() if c: f.content.set_value(c.content) f.title.set_value(c.title) f.draft.set_value(c.draft) f.page.set_value(page) f.c_id.set_value(content) return render.edit_page_content(page, content, f)
def POST(self, page, c = None): data = self.content_form() if data.validates(): d = data.d cur = model.get_content(d.c_id) if cur: model.update_content(d.c_id, d.page, d.title, d.content, int(d.draft)) else: model.add_page_content(d.page, d.title, d.content, int(d.draft)) raise web.seeother('/page/'+d.page+"/content") else: raise web.seeother('/page')
def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if digest and namespace: try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') if not raw_data: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) else: stream = [raw_data] content = ''.join(model.expand_content(namespace, stream)) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' self.response.headers['Content-Disposition'] = str('filename=%s' % digest) if content.startswith('{'): # Try to format as JSON. try: content = json.dumps( json.loads(content), sort_keys=True, indent=2, separators=(',', ': ')) # If we don't wrap this in html, browsers will put content in a pre # tag which is also styled with monospace/pre-wrap. We can't use # anchor tags in <pre>, so we force it to be a <div>, which happily # accepts links. content = ( '<div style="font-family:monospace;white-space:pre-wrap;">%s</div>' % content) # Linkify things that look like hashes content = re.sub(r'([0-9a-f]{40})', r'<a target="_blank" href="/browse?namespace=%s' % namespace + r'&digest=\1">\1</a>', content) self.response.headers['Content-Type'] = 'text/html; charset=utf-8' except ValueError: pass self.response.write(content)
def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if not digest: self.abort(400, 'Missing digest') if not namespace: self.abort(400, 'Missing namespace') try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') logging.info('%s', entity) if not raw_data: try: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) content = ''.join(model.expand_content(namespace, stream)) except cloudstorage.NotFoundError: logging.error( 'Entity in DB but not in GCS: deleting entity in DB') entity.key.delete() self.abort(404, 'Unable to retrieve the file from GCS') else: content = ''.join(model.expand_content(namespace, [raw_data])) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' # App Engine puts a limit of 33554432 bytes on a request, which includes # headers. Headers are ~150 bytes. If the content + headers might # exceed that limit, we give the user an option to workround getting # their file. if len(content) > 33554000: host = modules.get_hostname(module='default', version='default') # host is something like default.default.myisolateserver.appspot.com host = host.replace('default.default.', '') sizeInMib = len(content) / (1024.0 * 1024.0) content = ( 'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB' ' App Engine limit.\nTo work around this, run the following command:\n' ' python isolateserver.py download -I %s --namespace %s -f %s %s' % (sizeInMib, host, namespace, digest, digest)) else: self.response.headers['Content-Disposition'] = str( 'filename=%s' % self.request.get('as') or digest) try: json_data = json.loads(content) if self._is_isolated_format(json_data): self.response.headers[ 'Content-Type'] = 'text/html; charset=utf-8' json_data['files'] = collections.OrderedDict( sorted(json_data['files'].items(), key=lambda (filepath, data): filepath)) params = { 'namespace': namespace, 'isolated': json_data, } content = template.render('isolate/isolated.html', params) except ValueError: pass self.response.write(content)
def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if digest and namespace: try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') if not raw_data: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) else: stream = [raw_data] content = ''.join(model.expand_content(namespace, stream)) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' # App Engine puts a limit of 33554432 bytes on a request, which includes # headers. Headers are ~150 bytes. If the content + headers might # exceed that limit, we give the user an option to workround getting # their file. if len(content) > 33554000: host = modules.get_hostname(module='default', version='default') # host is something like default.default.myisolateserver.appspot.com host = host.replace('default.default.', '') sizeInMib = len(content) / (1024.0 * 1024.0) content = ( 'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB' ' App Engine limit.\nTo work around this, run the following command:\n' ' python isolateserver.py download -I %s --namespace %s -f %s %s' % (sizeInMib, host, namespace, digest, digest)) else: self.response.headers['Content-Disposition'] = str( 'filename=%s' % digest) if content.startswith('{'): # Try to format as JSON. try: content = json.dumps(json.loads(content), sort_keys=True, indent=2, separators=(',', ': ')) # If we don't wrap this in html, browsers will put content in a pre # tag which is also styled with monospace/pre-wrap. We can't use # anchor tags in <pre>, so we force it to be a <div>, which happily # accepts links. content = ( '<div style="font-family:monospace;white-space:pre-wrap;">%s' '</div>' % content) # Linkify things that look like hashes content = re.sub( r'([0-9a-f]{40})', r'<a target="_blank" href="/browse?namespace=%s' % namespace + r'&digest=\1">\1</a>', content) self.response.headers[ 'Content-Type'] = 'text/html; charset=utf-8' except ValueError: pass self.response.write(content)
def GET(self, id): post_content = model.get_content(int(id)) post_comments = model.get_comment(int(id)) return render.view(post_content, post_comments)