コード例 #1
0
    def initialize(self, config, include_version=False):
        try:
            self.config = config
            self.datastore = config.datastore
            self.data = {}
            self.paths = url_handler.gather_paths()
            self.datastore_handler = config.datastore_handler
            self.drivers_handler = config.drivers_handler

            self.proxy_handler = ProxyHandler(config = config)

        except:
            import traceback
            traceback.print_exc()
コード例 #2
0
def get_page_content(start_date, end_date, page_no, proxy=False):
    url = "http://datacenter.mep.gov.cn:8099/ths-report/report%21list.action"

    payload = "page.pageNo={}&xmlname=1462259560614&queryflag=open&isdesignpatterns=false&V_DATE={}&E_DATE={}"\
        .format(page_no, start_date, end_date)
    headers = {
        'content-type': "application/x-www-form-urlencoded",
        'cache-control': "no-cache"
    }

    while True:
        proxies = None
        if proxy:
            proxy_handler = ProxyHandler.instance()
            proxy_host = proxy_handler.get_proxy_host()
            proxies = {'http': 'http://' + proxy_host}
            logger.debug("fetch page {} with proxy host {}".format(
                page_no, proxy_host))
        try:
            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxies,
                                        timeout=20)
            response_content = response.text
            for ec in error_content:
                if ec in response_content:
                    raise Exception(ec)

            if 'class="report-table"' not in response_content:
                logger.warn(u"未找到数据, page {}".format(response_content))
                raise Exception(u"未找到数据")

            return response_content
        except Exception, e:
            if proxy:
                proxy_handler.remove_host(proxy_host)
                logger.debug(u"ERR: {} , change proxy host, rm host {}".format(
                    e, proxy_host))
            else:
                logger.debug(u"ERR: {}".format(e))
        else:
            break
コード例 #3
0
class ApiHandler(tornado.web.RequestHandler):
    executor = ThreadPoolExecutor(max_workers= 4)
    status = None

    def initialize(self, config, include_version=False):
        try:
            self.config = config
            self.datastore = config.datastore
            self.data = {}
            self.paths = url_handler.gather_paths()
            self.datastore_handler = config.datastore_handler
            self.drivers_handler = config.drivers_handler

            self.proxy_handler = ProxyHandler(config = config)

        except:
            import traceback
            traceback.print_exc()

    #Temporary for testing
    def set_default_headers(self):
        self.set_header("Access-Control-Allow-Origin", "*")
        self.set_header("Access-Control-Allow-Headers", "x-requested-with, Authorization, Content-Type")
        self.set_header('Access-Control-Allow-Methods', 'POST, GET, DELETE, OPTIONS, PUT')


    def json(self, obj, status=200):
        try:
            if not obj:
                return
            self.set_header('Content-Type', 'application/json')
            self.set_status(status)
            self.write(json.dumps(obj))
            self.flush()
        except:
            print ('Error with ', obj)
            import traceback
            traceback.print_exc()
#        self.finish()


    def has_error(self, result):
        """ Returns True if result is a string which contains a salt error. May need more work, but is fine for now. """
        exceptions = [
            "The minion function caused an exception",
            "is not available",
            "Passed invalid arguments to",
            "ERROR",
        ]
        if type(result) == str:
            has_error =  any([i in result for i in exceptions])
            if has_error:
                self.config.logger.error('Salt error: ' + result)
            return has_error
        else: return False


    def formatted_result(self, result):
        """ Returns True if the result is formatted properly. The format for now is : {'data' : {'field' : []}, 'success' : :True/False, 'message' : 'Information. Usually empty if successful. '} """
        try:
            result_fields = ['data', 'success', 'message']
            result = (set (result.keys()) == set(result_fields))
            return result
        except:
            return False

    @tornado.gen.coroutine
    def get_proxy_server(self):
        host = self.request.headers['host'].split(':')[0]
        server = yield self.datastore_handler.get_object(object_type = 'server', server_name = host)
        if server:
            raise tornado.gen.Return(host)

    def fetch_func(self, method, path, data):
        try:
            api_func = self.paths[method].get(path)
#            logging_data = {x : str(data[x])[:50] for x in data}
            self.config.logger.info('Getting a call at ' + str(path) + ' with data ' + str(data) + ' and will call function: ' + str(api_func))

            if not api_func:
                api_func = {'function' : invalid_url, 'args' : ['path', 'method']}
        except:
            import traceback
            traceback.print_exc()
            raise
        return api_func


    @tornado.gen.coroutine
    def handle_user_auth(self, path):
        auth_successful = True
        try:
            user = yield get_current_user(self)
            if not user:
                self.json({'success' : False, 'message' : 'User not authenticated properly. ', 'data' : {}})
                auth_successful = False
            elif user['type'] == 'user' :
                user_functions = yield self.datastore_handler.get_user_functions(user.get('username'))
                user_functions = [x.get('func_path', '') for x in user_functions]
                user_functions += self.paths.get('user_allowed', [])
                if path not in user_functions:
                    self.json({'success' : False, 'message' : 'User ' + user['username'] + ' tried to access ' + path + ' but it is not in their allowed functions : ' + str(user_functions)})
                    auth_successful = False
        except Exception as e:
            import traceback
            traceback.print_exc()

            self.json({'success' : False, 'message' : 'There was an error retrieving user data. ' + e.message, 'data' : {}})
            auth_successful = False

        raise tornado.gen.Return(auth_successful)


    @tornado.gen.coroutine
    def check_arguments(self, api_func, api_args, call_args):
        api_args = [x for x in api_args if x not in self.utils.keys()]
        call_args = [x for x in call_args if x not in self.utils.keys()]
        func_name = api_func.func_name

        missing_arguments = [x for x in api_args if x not in call_args]
        unrecognized_arguments = [x for x in call_args if x not in api_args]

        error_msg = ''
        if missing_arguments:
            error_msg += 'Missing arguments: {arg_list}. '.format(**{'func_name' : func_name, 'arg_list' : str(missing_arguments)})
        if unrecognized_arguments:
            error_msg += 'Unrecognized arguments: {arg_list}. '.format(**{'func_name' : func_name, 'arg_list' : str(unrecognized_arguments)})

        if error_msg:
            error_msg = 'Attempted to call {func_name} with arguments {func_args} but called with invalid arguments. {error_msg}'.format(**{
                'func_args': str(api_args), 'func_name': func_name, 'error_msg' : error_msg
            })

        return error_msg


    @tornado.gen.coroutine
    def handle_func(self, api_func, data):
        try:
            api_func, api_args = api_func.get('function'), api_func.get('args')
            api_kwargs = {x : data.get(x) for x in api_args if x in data.keys()} or {}
            api_kwargs.update({x : self.utils[x] for x in api_args if x in self.utils})

            yield self.check_arguments(api_func, api_args, api_kwargs.keys())

            try:
                result = yield api_func(**api_kwargs)
            except TypeError:
                import traceback
                traceback.print_exc()
                error_msg = yield self.check_arguments(api_func, api_args, api_kwargs.keys())
                if error_msg:
                    raise TypeError("Function raised a TypeError exception - maybe caused by bad arguments. " + error_msg)
                raise

            if type(result) == dict:
                if result.get('data_type', 'json') == 'file' :
                    raise tornado.gen.Return(None)
            if self.formatted_result(result) or self.data.get('plain_result'):
                pass
            elif self.has_error(result):
                result = {'success' : False, 'message' : result, 'data' : {}}
            else:
                result = {'success' : True, 'message' : '', 'data' : result}
        except tornado.gen.Return:
            raise
        except Exception as e:
            logging_data = {x : str(data[x])[:50] for x in data}
            self.config.logger.error('An error occured performing request. Function was %s and data was %s. ' % (str(api_func), str(logging_data)))
            import traceback
            traceback.print_exc()
            result = {'success' : False, 'message' : 'There was an error performing a request : ' + str(e) + ':' + str(e.message), 'data' : {}}

        if not result['success'] and not self.status:
            self.status = 400
        raise tornado.gen.Return(result)


    # NOTE: This is kind of a temporary thing. We're doing triggers now which we're still in the middle of defining
    # The way triggers work would be fine if the they were actually triggered where they're supposed to be. But they're not. 
    # So I'm working around it. If the call comes from an app, I pass it to the triggers/triggered call. 
    # It probably shouldn't work like that, but from past experience, I feel it's gonna stay this way. 
    @tornado.gen.coroutine
    def check_and_resolve_trigger(self, api_func, dash_user):
        if api_func['function'] == panel_action:
            yield handle_app_trigger(self, dash_user)

    @tornado.gen.coroutine
    def exec_method(self, method, path, data):
        try:
            data = data or {}
            proxy_server = yield self.get_proxy_server()
            if proxy_server:
                result = yield self.proxy_handler.handle_request(self, proxy_server, method, path, data)
                raise tornado.gen.Return()
            self.data = data
            self.data.update({
                'method' :  method,
                'path' : path
            })
            self.utils = {
                'handler' : self,
                'datastore_handler' : self.datastore_handler,
                'drivers_handler' : self.drivers_handler,
                'datastore' : self.datastore,
            }

            user = yield get_current_user(self)
            data['dash_user'] = user
            api_func = self.fetch_func(method, path, data)

            if api_func['function'] not in [user_login]:
                auth_successful = yield self.handle_user_auth(path)
                if not auth_successful:
                    raise tornado.gen.Return({"success" : False, "message" : "Authentication not successful for " + api_func['function'].func_name, "data" : {}})

                if user['type'] == 'user' : 
                    predef_args = yield get_predefined_arguments(self.datastore_handler, user, data.get('action', path))
                    data.update(predef_args)

            print ('Calling ', api_func, ' with data ', data, ' where keys are : ', data.keys())
            result = yield self.handle_func(api_func, data)
            yield self.check_and_resolve_trigger(api_func, data['dash_user'])

            status = self.status or 200
            yield self.log_message(path = path, data = data, func = api_func['function'], result = {})#log_result)
            self.json(result, status)
        except tornado.gen.Return:
            raise
        except:
            import traceback
            traceback.print_exc()

    @tornado.gen.coroutine
    def get(self, path):
        try:

            args = self.request.query_arguments

            t_args = args
            for x in t_args:
                if len(t_args[x]) == 1:
                    args[x] = args[x][0]
            result = yield self.exec_method('get', path, args)
        except:
            import traceback
            traceback.print_exc()


    @tornado.gen.coroutine
    def delete(self, path):
        try:
            data = json.loads(self.request.body)
            result = yield self.exec_method('delete', path, data)
        except:
            import traceback
            traceback.print_exc()

    @tornado.gen.coroutine
    def post(self, path):
        try:
            if 'json' in self.request.headers['Content-Type']:
                try:
                    data = json.loads(self.request.body)
                except:
                    raise Exception('Bad json in request body : ', self.request.body)
            else:
                data = {x : self.request.arguments[x][0] for x in self.request.arguments}
                data.update(self.request.files)
            yield self.exec_method('post', path, data)

        except:
            import traceback
            traceback.print_exc()

    put = post


    @tornado.gen.coroutine
    def options(self, path):
        self.set_status(204)
        self.finish()



    @tornado.gen.coroutine
    def log_message(self, path, data, func, result):

        data = {x : str(data[x]) for x in data}
        user = yield url_handler.login.get_current_user(self)
        if not user:
            user = {'username' : 'unknown', 'type' : 'unknown'}
        message = json.dumps({
            'type' : data['method'],
            'function' : func.func_name,
            'user' : user.get('username', 'unknown'),
            'user_type' : user['type'],
            'path' : path,
            'data' : data,
            'time' : str(datetime.datetime.now()),
            'result' : result,
        })
        try:
            syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_LOCAL0, message)
        except:
            import traceback
            traceback.print_exc()


    @tornado.gen.coroutine
    def send_data(self, source, kwargs, chunk_size):
        args = []

        #kwargs has a 'source_args' field for placement arguments sent to the source. For instance, for file.read(), we have to send the "size" argument as a placement argument.
        if kwargs.get('source_args'):
            args = kwargs.pop('source_args')

        offset = 0
        while True:
            print ('Calling ', source, ' with ', kwargs)
            data = source(*args, **kwargs)

            offset += chunk_size
            if 'kwarg' in kwargs:
                if 'range_from' in kwargs['kwarg'].keys():
                    kwargs['kwarg']['range_from'] = offset

            if type(data) == dict: #If using salt, it typically is formatted as {"minion" : "data"}
                if kwargs.get('tgt') in data:
                    data = data[kwargs.get('tgt')]
            if not data:
                break


            if type(data) == str:
                print ('Writing data')
                self.write(data)
                self.flush()

            elif type(data) == Future:
                self.flush()
                data = yield data
                raise tornado.gen.Return(data)




    @tornado.gen.coroutine
    def serve_file(self, source, chunk_size = 10**6, salt_source = {}, url_source = ''):

        self.set_header('Content-Type', 'application/octet-stream')
        self.set_header('Content-Disposition', 'attachment; filename=test.zip')

        try:
            offset = 0

            if salt_source:
                client = LocalClient()
                source = client.cmd
                kwargs = salt_source
                kwargs['kwarg'] = kwargs.get('kwarg', {})
                kwargs['kwarg']['range_from'] = 0
            elif url_source:
                def streaming_callback(chunk):
                    self.write(chunk)
                    self.flush()
                source = AsyncHTTPClient().fetch
                request = HTTPRequest(url = url_source, streaming_callback = streaming_callback)
                request = url_source
                kwargs = {"request" : request, 'streaming_callback' : streaming_callback}
            else:
                f = open(source, 'r')
                source = f.read
                kwargs = {"source_args" : [chunk_size]}

            print ('Serving file with : ', source, kwargs, chunk_size)
            result = yield self.send_data(source, kwargs, chunk_size)
            print ('Sent data and result is : ', result)
            self.finish()
        except:
            import traceback
            traceback.print_exc()
コード例 #4
0
from proxy_handler import ProxyHandler


businessses_response = requests.get(
        constants.ENDPOINT,
        params={
            'fetch': 'businesses',
        }
    ).text


categories = json.loads(
    businessses_response
)

proxy_handler = ProxyHandler()


def begin(business):
    proxy = proxy_handler.getProxy()
    print (f"Fetching reviews for {business['displayName']} with proxy {proxy.ip}")
    
    
    # There are 20 reviews per page. So we take the amount of total reviews and we divide on 20
    
    total_pages = (business['numberOfReviews'] / 20) + 1 # Always +1
    
    
    for current_page in range(1,total_pages+1):
        # Retrieve the page
        url = f"https://www.trustpilot.com/review/{business['identifyingName']}?languages=en"
コード例 #5
0
from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, ConversationHandler
from bot_config import token, adminlist
from proxy_config import proxy as main_proxy
from edu_parser.profile import Profile
from edu_parser.exceptions import *
from datetime import datetime
from proxy_handler import ProxyHandler
from requests.exceptions import ProxyError, ConnectionError
import os

bot = telegram.Bot(token=token)
updater = Updater(token=token)
dispatcher = updater.dispatcher
if not main_proxy:
    proxies = ProxyHandler(proxy_path='good_proxies.txt')

users = {}

START_CREDENTIALS = 0
GET_TERM = 1
GET_DAY = 2
PREDICT_SUBJECT = 3


def check_creds(f):
    def wrap(bot, update):
        global users
        chat = update.message.chat_id
        try:
            return f(bot, update)