def initialize(self, config, include_version=False): try: self.config = config self.datastore = config.datastore self.data = {} self.paths = url_handler.gather_paths() self.datastore_handler = config.datastore_handler self.drivers_handler = config.drivers_handler self.proxy_handler = ProxyHandler(config = config) except: import traceback traceback.print_exc()
def get_page_content(start_date, end_date, page_no, proxy=False): url = "http://datacenter.mep.gov.cn:8099/ths-report/report%21list.action" payload = "page.pageNo={}&xmlname=1462259560614&queryflag=open&isdesignpatterns=false&V_DATE={}&E_DATE={}"\ .format(page_no, start_date, end_date) headers = { 'content-type': "application/x-www-form-urlencoded", 'cache-control': "no-cache" } while True: proxies = None if proxy: proxy_handler = ProxyHandler.instance() proxy_host = proxy_handler.get_proxy_host() proxies = {'http': 'http://' + proxy_host} logger.debug("fetch page {} with proxy host {}".format( page_no, proxy_host)) try: response = requests.request("POST", url, data=payload, headers=headers, proxies=proxies, timeout=20) response_content = response.text for ec in error_content: if ec in response_content: raise Exception(ec) if 'class="report-table"' not in response_content: logger.warn(u"未找到数据, page {}".format(response_content)) raise Exception(u"未找到数据") return response_content except Exception, e: if proxy: proxy_handler.remove_host(proxy_host) logger.debug(u"ERR: {} , change proxy host, rm host {}".format( e, proxy_host)) else: logger.debug(u"ERR: {}".format(e)) else: break
class ApiHandler(tornado.web.RequestHandler): executor = ThreadPoolExecutor(max_workers= 4) status = None def initialize(self, config, include_version=False): try: self.config = config self.datastore = config.datastore self.data = {} self.paths = url_handler.gather_paths() self.datastore_handler = config.datastore_handler self.drivers_handler = config.drivers_handler self.proxy_handler = ProxyHandler(config = config) except: import traceback traceback.print_exc() #Temporary for testing def set_default_headers(self): self.set_header("Access-Control-Allow-Origin", "*") self.set_header("Access-Control-Allow-Headers", "x-requested-with, Authorization, Content-Type") self.set_header('Access-Control-Allow-Methods', 'POST, GET, DELETE, OPTIONS, PUT') def json(self, obj, status=200): try: if not obj: return self.set_header('Content-Type', 'application/json') self.set_status(status) self.write(json.dumps(obj)) self.flush() except: print ('Error with ', obj) import traceback traceback.print_exc() # self.finish() def has_error(self, result): """ Returns True if result is a string which contains a salt error. May need more work, but is fine for now. """ exceptions = [ "The minion function caused an exception", "is not available", "Passed invalid arguments to", "ERROR", ] if type(result) == str: has_error = any([i in result for i in exceptions]) if has_error: self.config.logger.error('Salt error: ' + result) return has_error else: return False def formatted_result(self, result): """ Returns True if the result is formatted properly. The format for now is : {'data' : {'field' : []}, 'success' : :True/False, 'message' : 'Information. Usually empty if successful. '} """ try: result_fields = ['data', 'success', 'message'] result = (set (result.keys()) == set(result_fields)) return result except: return False @tornado.gen.coroutine def get_proxy_server(self): host = self.request.headers['host'].split(':')[0] server = yield self.datastore_handler.get_object(object_type = 'server', server_name = host) if server: raise tornado.gen.Return(host) def fetch_func(self, method, path, data): try: api_func = self.paths[method].get(path) # logging_data = {x : str(data[x])[:50] for x in data} self.config.logger.info('Getting a call at ' + str(path) + ' with data ' + str(data) + ' and will call function: ' + str(api_func)) if not api_func: api_func = {'function' : invalid_url, 'args' : ['path', 'method']} except: import traceback traceback.print_exc() raise return api_func @tornado.gen.coroutine def handle_user_auth(self, path): auth_successful = True try: user = yield get_current_user(self) if not user: self.json({'success' : False, 'message' : 'User not authenticated properly. ', 'data' : {}}) auth_successful = False elif user['type'] == 'user' : user_functions = yield self.datastore_handler.get_user_functions(user.get('username')) user_functions = [x.get('func_path', '') for x in user_functions] user_functions += self.paths.get('user_allowed', []) if path not in user_functions: self.json({'success' : False, 'message' : 'User ' + user['username'] + ' tried to access ' + path + ' but it is not in their allowed functions : ' + str(user_functions)}) auth_successful = False except Exception as e: import traceback traceback.print_exc() self.json({'success' : False, 'message' : 'There was an error retrieving user data. ' + e.message, 'data' : {}}) auth_successful = False raise tornado.gen.Return(auth_successful) @tornado.gen.coroutine def check_arguments(self, api_func, api_args, call_args): api_args = [x for x in api_args if x not in self.utils.keys()] call_args = [x for x in call_args if x not in self.utils.keys()] func_name = api_func.func_name missing_arguments = [x for x in api_args if x not in call_args] unrecognized_arguments = [x for x in call_args if x not in api_args] error_msg = '' if missing_arguments: error_msg += 'Missing arguments: {arg_list}. '.format(**{'func_name' : func_name, 'arg_list' : str(missing_arguments)}) if unrecognized_arguments: error_msg += 'Unrecognized arguments: {arg_list}. '.format(**{'func_name' : func_name, 'arg_list' : str(unrecognized_arguments)}) if error_msg: error_msg = 'Attempted to call {func_name} with arguments {func_args} but called with invalid arguments. {error_msg}'.format(**{ 'func_args': str(api_args), 'func_name': func_name, 'error_msg' : error_msg }) return error_msg @tornado.gen.coroutine def handle_func(self, api_func, data): try: api_func, api_args = api_func.get('function'), api_func.get('args') api_kwargs = {x : data.get(x) for x in api_args if x in data.keys()} or {} api_kwargs.update({x : self.utils[x] for x in api_args if x in self.utils}) yield self.check_arguments(api_func, api_args, api_kwargs.keys()) try: result = yield api_func(**api_kwargs) except TypeError: import traceback traceback.print_exc() error_msg = yield self.check_arguments(api_func, api_args, api_kwargs.keys()) if error_msg: raise TypeError("Function raised a TypeError exception - maybe caused by bad arguments. " + error_msg) raise if type(result) == dict: if result.get('data_type', 'json') == 'file' : raise tornado.gen.Return(None) if self.formatted_result(result) or self.data.get('plain_result'): pass elif self.has_error(result): result = {'success' : False, 'message' : result, 'data' : {}} else: result = {'success' : True, 'message' : '', 'data' : result} except tornado.gen.Return: raise except Exception as e: logging_data = {x : str(data[x])[:50] for x in data} self.config.logger.error('An error occured performing request. Function was %s and data was %s. ' % (str(api_func), str(logging_data))) import traceback traceback.print_exc() result = {'success' : False, 'message' : 'There was an error performing a request : ' + str(e) + ':' + str(e.message), 'data' : {}} if not result['success'] and not self.status: self.status = 400 raise tornado.gen.Return(result) # NOTE: This is kind of a temporary thing. We're doing triggers now which we're still in the middle of defining # The way triggers work would be fine if the they were actually triggered where they're supposed to be. But they're not. # So I'm working around it. If the call comes from an app, I pass it to the triggers/triggered call. # It probably shouldn't work like that, but from past experience, I feel it's gonna stay this way. @tornado.gen.coroutine def check_and_resolve_trigger(self, api_func, dash_user): if api_func['function'] == panel_action: yield handle_app_trigger(self, dash_user) @tornado.gen.coroutine def exec_method(self, method, path, data): try: data = data or {} proxy_server = yield self.get_proxy_server() if proxy_server: result = yield self.proxy_handler.handle_request(self, proxy_server, method, path, data) raise tornado.gen.Return() self.data = data self.data.update({ 'method' : method, 'path' : path }) self.utils = { 'handler' : self, 'datastore_handler' : self.datastore_handler, 'drivers_handler' : self.drivers_handler, 'datastore' : self.datastore, } user = yield get_current_user(self) data['dash_user'] = user api_func = self.fetch_func(method, path, data) if api_func['function'] not in [user_login]: auth_successful = yield self.handle_user_auth(path) if not auth_successful: raise tornado.gen.Return({"success" : False, "message" : "Authentication not successful for " + api_func['function'].func_name, "data" : {}}) if user['type'] == 'user' : predef_args = yield get_predefined_arguments(self.datastore_handler, user, data.get('action', path)) data.update(predef_args) print ('Calling ', api_func, ' with data ', data, ' where keys are : ', data.keys()) result = yield self.handle_func(api_func, data) yield self.check_and_resolve_trigger(api_func, data['dash_user']) status = self.status or 200 yield self.log_message(path = path, data = data, func = api_func['function'], result = {})#log_result) self.json(result, status) except tornado.gen.Return: raise except: import traceback traceback.print_exc() @tornado.gen.coroutine def get(self, path): try: args = self.request.query_arguments t_args = args for x in t_args: if len(t_args[x]) == 1: args[x] = args[x][0] result = yield self.exec_method('get', path, args) except: import traceback traceback.print_exc() @tornado.gen.coroutine def delete(self, path): try: data = json.loads(self.request.body) result = yield self.exec_method('delete', path, data) except: import traceback traceback.print_exc() @tornado.gen.coroutine def post(self, path): try: if 'json' in self.request.headers['Content-Type']: try: data = json.loads(self.request.body) except: raise Exception('Bad json in request body : ', self.request.body) else: data = {x : self.request.arguments[x][0] for x in self.request.arguments} data.update(self.request.files) yield self.exec_method('post', path, data) except: import traceback traceback.print_exc() put = post @tornado.gen.coroutine def options(self, path): self.set_status(204) self.finish() @tornado.gen.coroutine def log_message(self, path, data, func, result): data = {x : str(data[x]) for x in data} user = yield url_handler.login.get_current_user(self) if not user: user = {'username' : 'unknown', 'type' : 'unknown'} message = json.dumps({ 'type' : data['method'], 'function' : func.func_name, 'user' : user.get('username', 'unknown'), 'user_type' : user['type'], 'path' : path, 'data' : data, 'time' : str(datetime.datetime.now()), 'result' : result, }) try: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_LOCAL0, message) except: import traceback traceback.print_exc() @tornado.gen.coroutine def send_data(self, source, kwargs, chunk_size): args = [] #kwargs has a 'source_args' field for placement arguments sent to the source. For instance, for file.read(), we have to send the "size" argument as a placement argument. if kwargs.get('source_args'): args = kwargs.pop('source_args') offset = 0 while True: print ('Calling ', source, ' with ', kwargs) data = source(*args, **kwargs) offset += chunk_size if 'kwarg' in kwargs: if 'range_from' in kwargs['kwarg'].keys(): kwargs['kwarg']['range_from'] = offset if type(data) == dict: #If using salt, it typically is formatted as {"minion" : "data"} if kwargs.get('tgt') in data: data = data[kwargs.get('tgt')] if not data: break if type(data) == str: print ('Writing data') self.write(data) self.flush() elif type(data) == Future: self.flush() data = yield data raise tornado.gen.Return(data) @tornado.gen.coroutine def serve_file(self, source, chunk_size = 10**6, salt_source = {}, url_source = ''): self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Disposition', 'attachment; filename=test.zip') try: offset = 0 if salt_source: client = LocalClient() source = client.cmd kwargs = salt_source kwargs['kwarg'] = kwargs.get('kwarg', {}) kwargs['kwarg']['range_from'] = 0 elif url_source: def streaming_callback(chunk): self.write(chunk) self.flush() source = AsyncHTTPClient().fetch request = HTTPRequest(url = url_source, streaming_callback = streaming_callback) request = url_source kwargs = {"request" : request, 'streaming_callback' : streaming_callback} else: f = open(source, 'r') source = f.read kwargs = {"source_args" : [chunk_size]} print ('Serving file with : ', source, kwargs, chunk_size) result = yield self.send_data(source, kwargs, chunk_size) print ('Sent data and result is : ', result) self.finish() except: import traceback traceback.print_exc()
from proxy_handler import ProxyHandler businessses_response = requests.get( constants.ENDPOINT, params={ 'fetch': 'businesses', } ).text categories = json.loads( businessses_response ) proxy_handler = ProxyHandler() def begin(business): proxy = proxy_handler.getProxy() print (f"Fetching reviews for {business['displayName']} with proxy {proxy.ip}") # There are 20 reviews per page. So we take the amount of total reviews and we divide on 20 total_pages = (business['numberOfReviews'] / 20) + 1 # Always +1 for current_page in range(1,total_pages+1): # Retrieve the page url = f"https://www.trustpilot.com/review/{business['identifyingName']}?languages=en"
from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, ConversationHandler from bot_config import token, adminlist from proxy_config import proxy as main_proxy from edu_parser.profile import Profile from edu_parser.exceptions import * from datetime import datetime from proxy_handler import ProxyHandler from requests.exceptions import ProxyError, ConnectionError import os bot = telegram.Bot(token=token) updater = Updater(token=token) dispatcher = updater.dispatcher if not main_proxy: proxies = ProxyHandler(proxy_path='good_proxies.txt') users = {} START_CREDENTIALS = 0 GET_TERM = 1 GET_DAY = 2 PREDICT_SUBJECT = 3 def check_creds(f): def wrap(bot, update): global users chat = update.message.chat_id try: return f(bot, update)