def fetch_page_source(self, url, item, params=None, **kwargs): """Fetch the html of given url""" self.update_status('_status_sent') if getattr(item.Meta, 'web', {}).get( 'with_ajax', False) and self.browser is not None: self.browser.get(url) text = self.browser.page_source if text != '': logger.info(Fore.GREEN, 'Sent', '%s %s 200' % (url, len(text))) else: logger.error('Sent', '%s %s' % (url, len(text))) result = text else: request_config = getattr(item.Meta, 'web', {}).get( 'request_config', {}) or self.web.get('request_config', {}) response = requests.get(url, params=params, timeout=15, **request_config) content = response.content charset = cchardet.detect(content) text = content.decode(charset['encoding']) if response.status_code != 200: logger.error( 'Sent', '%s %s %s' % (url, len(text), response.status_code)) else: logger.info( Fore.GREEN, 'Sent', '%s %s %s' % (url, len(text), response.status_code)) result = text self.set_storage(url, result) return result
def serve(self, ip='127.0.0.1', port=5000, **options): try: logger.info(Fore.WHITE, 'Serving', 'http://%s:%s' % (ip, port)) self.server.run(ip, port, **options) except Exception as e: logger.error('Serving', '%s' % str(e)) exit()
def register(self, item): """Register items""" if item in self.item_classes: logger.error('Register', 'Repeat register item <%s>' % (item.__name__)) exit() self.item_classes.append(item) item.__base_url__ = item.__base_url__ or self.base_url for define_alias, define_route in OrderedDict(item.Meta.route).items(): alias = '^' + define_alias.replace('?', '\?') + '$' _alias_re = re.compile( re.sub( ':(?P<params>[a-z_]+)', lambda m: '(?P<{}>[A-Za-z0-9_?&/=\s\-\u4e00-\u9fa5]+)'. format(m.group('params')), alias)) self.alias_re.append((define_alias, _alias_re)) self.items[define_alias].append({ 'item': item, 'alias_re': _alias_re, 'alias': define_alias, 'route': item.__base_url__ + define_route }) logger.info(Fore.GREEN, 'Register', '<%s>' % (item.__name__)) item_with_ajax = getattr(item.Meta, 'web', {}).get('with_ajax', False) if self.browser is None and item_with_ajax: self.browser = self.get_browser(settings=self.settings, item_with_ajax=item_with_ajax)
def page_not_found(error): start_time = time() path = request.full_path if path.endswith('?'): path = path[:-1] try: res = api.get_cache(path) if res is None: res = api.parse(path) api.set_cache(path, res) if res is None: logger.error('Received', '%s 404' % request.url) return 'Not Found', 404 api.update_status('_status_received') end_time = time() time_usage = end_time - start_time logger.info( Fore.GREEN, 'Received', '%s %s 200 %.2fms' % (request.url, len(res), time_usage * 1000)) return app.response_class(response=res, status=200, mimetype='application/json') except Exception as e: return str(e), 500
def run(self, host='127.0.0.1', port=5000, **options): try: logger.info(Fore.GREEN, 'Serving', f'http://{host}:{port}') self.app.run(host, port, **options) except Exception as e: logger.error('Serving', '%s' % str(e)) logger.error('Serving', '%s' % str(traceback.format_exc())) exit()
def run(self, host="127.0.0.1", port=5000, **options): try: logger.info(Fore.GREEN, "Serving", f"http://{host}:{port}") self.app.run(host, port, **options) except Exception as e: logger.error("Serving", "%s" % str(e)) logger.error("Serving", "%s" % str(traceback.format_exc())) exit()
def parse_item(self, html, item): """Parse item from html""" result = {} result[item.__name__] = item.parse(html) if len(result[item.__name__]) == 0: logger.error('Parsed', 'Item<%s[%s]>' % (item.__name__.title(), len(result[item.__name__]))) else: logger.info(Fore.CYAN, 'Parsed', 'Item<%s[%s]>' % (item.__name__.title(), len(result[item.__name__]))) return result
def set_storage(self, key, value): """Set storage""" try: if self.storage.get(key) is None and self.storage.save(key, value): logger.info(Fore.BLUE, 'Storage', 'Set<%s>' % key) self.update_status('_status_storage_set') return True return False except Exception as e: logger.error('Storage', 'Set<{}>'.format(str(e))) return False
def handler(path): try: start_time = time() full_path = request.full_path.strip('?') results = self.parse_url(full_path) end_time = time() time_usage = end_time - start_time res = jsonify(results) logger.info( Fore.GREEN, 'Received', '%s %s 200 %.2fms' % (request.url, len(res.response), time_usage * 1000)) return res except Exception as e: logger.error('Serving', f'{e}') logger.error('Serving', '%s' % str(traceback.format_exc())) return jsonify({'msg': 'System Error', 'code': -1}), 500
def new(output_dir): """Create a new Toapi project.""" if os.path.exists(output_dir): logger.error('New project', 'Directory already exists.') return logger.info(Fore.GREEN, 'New project', 'Creating project directory "%s"' % output_dir) os.system('git clone https://github.com/toapi/toapi-template %s' % output_dir) os.system('rm -rf %s/.git' % output_dir) logger.info(Fore.GREEN, 'New project', 'Success!') click.echo('') click.echo(' cd %s' % output_dir) click.echo(' toapi run') click.echo('')
def run(addr): """Run app server.""" base_path = os.getcwd() app_path = os.path.join(base_path, 'app.py') if not os.path.exists(app_path): logger.error('Run', 'Cannot find file "app.py"!') return try: ip, port = addr.split(':') except: logger.error('Run', 'The "addr" parameter should be like "IP:PORT"') return sys.path.append(base_path) app = importlib.import_module('app', base_path) app.api.serve(ip=ip, port=port)
def handler(path): try: start_time = time() full_path = request.full_path.strip("?") results = self.parse_url(full_path) end_time = time() time_usage = end_time - start_time res = jsonify(results) logger.info( Fore.GREEN, "Received", "%s %s 200 %.2fms" % (request.url, len(res.response), time_usage * 1000), ) return res except Exception as e: logger.error("Serving", f"{e}") logger.error("Serving", "%s" % str(traceback.format_exc())) return jsonify({"msg": "System Error", "code": -1}), 500
def new(dir_or_project): """Create a new Toapi project. Giving a dir means start a default template, Example: toapi new api Giving a github project means start a github template. Example: toapi new toapi/toapi-one """ if '/' in dir_or_project: dir_name = dir_or_project.split('/')[-1] logger.info(Fore.GREEN, 'New project', 'Creating project directory "%s"' % dir_name) os.system('git clone https://github.com/%s %s' % (dir_or_project, dir_name)) os.system('rm -rf %s/.git' % dir_name) logger.info(Fore.GREEN, 'New project', 'Success!') click.echo('') click.echo(' cd %s' % dir_name) click.echo(' toapi run') click.echo('') else: if os.path.exists(dir_or_project): logger.error('New project', 'Directory already exists.') return logger.info(Fore.GREEN, 'New project', 'Creating project directory "%s"' % dir_or_project) os.system('git clone https://github.com/toapi/toapi-template %s' % dir_or_project) os.system('rm -rf %s/.git' % dir_or_project) logger.info(Fore.GREEN, 'New project', 'Success!') click.echo('') click.echo(' cd %s' % dir_or_project) click.echo(' toapi run') click.echo('')
def page_not_found(error): start_time = time() path = request.full_path if path.endswith('?'): path = path[:-1] try: result = api.get_cache(path) or api.parse(path) if result is None: logger.error('Received', '%s 404' % request.url) return 'Not Found', 404 api.set_cache(path, result) res = jsonify(result) api.update_status('_status_received') end_time = time() time_usage = end_time - start_time logger.info( Fore.GREEN, 'Received', '%s %s 200 %.2fms' % (request.url, len(res.response), time_usage * 1000)) return res except Exception as e: return str(e)