def post_jobs_for_site(self, site_id): """ Request background jobs for site identified by `id`. **Example Request** ..sourcode:: json { "jobs": [ { "name": "test", }, ... ] } **Example Response** .. sourcecode:: json { "tracker_ids": { "1": "tracker.12344565", } } :<header Content-Type: application/json :<header X-Auth: the client's auth token :>json list jobs: a list of jobs to schedule :>json string jobs[n].name: name of job :>header Content-Type: application/json :>json array tracker_ids: array of worker tracking ids {site ID: tracker ID} :status 202: scheduled :status 400: invalid request body :status 401: authentication required """ request_attrs = { 'jobs': { 'type': list, 'required': True }, } job_attrs = { 'name': { 'type': str, 'required': True }, } available_jobs = ['test'] tracker_ids = dict() # Get site. site_id = get_int_arg('site_id', site_id) site = g.db.query(Site).filter(Site.id == site_id).first() # Validate if site is None: raise NotFound("Site '%s' does not exist." % site_id) request_json = request.get_json() validate_request_json(request_json, request_attrs) for job in request_json['jobs']: validate_json_attr('name', job_attrs, job) if job['name'] not in available_jobs: raise BadRequest('`{}` does not exist in available' ' jobs: {}'.format(job['name'], ','.join(available_jobs))) # Schedule jobs for job in request_json['jobs']: tracker_id = 'tracker.{}'.format(random_string(10)) tracker_ids[site.id] = tracker_id if job['name'] == 'test': description = 'Testing site "{}"'.format(site.name) worker.scrape.test_site.enqueue( site_id=site.id, tracker_id=tracker_id, jobdesc=description, user_id=g.user.id, ) response = jsonify(tracker_ids=tracker_ids) response.status_code = 202 return response
def post_jobs_for_sites(self): """ Request background jobs for all sites. **Example Request** ..sourcode:: json { "jobs": [ { "name": "test", }, ... ] } **Example Response** .. sourcecode:: json { "tracker_ids": { "1": "tracker.12344565", } } :<header Content-Type: application/json :<header X-Auth: the client's auth token :>json list jobs: a list of jobs to schedule :>json string jobs[n].name: name of job :>header Content-Type: application/json :>json array tracker_ids: array of worker tracking ids :status 202: scheduled :status 400: invalid request body :status 401: authentication required """ request_attrs = { 'jobs': { 'type': list, 'required': True }, } job_attrs = { 'name': { 'type': str, 'required': True }, } available_jobs = ['test'] tracker_ids = dict() request_json = request.get_json() validate_request_json(request_json, request_attrs) for job in request_json['jobs']: validate_json_attr('name', job_attrs, job) if job['name'] not in available_jobs: raise BadRequest('`{}` does not exist in available' ' jobs: {}'.format(job['name'], ','.join(available_jobs))) # Get sites. sites = g.db.query(Site).all() # Schedule jobs for job in request_json['jobs']: for site in sites: tracker_id = 'tracker.{}'.format(random_string(10)) tracker_ids[site.id] = tracker_id if job['name'] == 'test': app.queue.schedule_site_test( site=site, tracker_id=tracker_id, ) response = jsonify(tracker_ids=tracker_ids) response.status_code = 202 return response
def put(self, id_): ''' Update the site identified by `id`. **Example Request** ..sourcecode:: json { "name": "bebo", "url": "http://bebo.com/usernames/search=%s", "status_code": 200, "match_type": "text", "match_expr": "Foo Bar Baz", "test_username_pos": "bob", "test_username_ne": "adfjf393rfjffkjd", "headers": {"referer": "http://www.google.com"}, "censor_images": false, "wait_time": 5, "use_proxy": false, } **Example Response** ..sourcecode:: json { "id": 2, "name": "bebo", "search_text": "Bebo User Page.</title>", "status_code": 200, "match_type": "text", "match_expr": "Foo Bar Baz", "url": "https://bebo.com/usernames/search=%s", "test_username_pos": "bob", "test_username_neg": "adfjf393rfjffkjd", "test_status": "f", "tested_at": "2016-01-01T00:00:00.000000+00:00", "headers": {"referer": "http://www.google.com"}, "censor_images": false, "wait_time": 5, "use_proxy": false, }, :<header Content-Type: application/json :<header X-Auth: the client's auth token :<json string name: name of site :<json string url: username search url for the site :<json string test_username_pos: username that exists on site (used for testing) :<json string test_username_neg: username that does not exist on site (used for testing) :<json array headers: custom headers :<json bool censor_images: whether to censor images from this profile :<json int wait_time: time (in seconds) to wait for updates after page is loaded :<json bool use_proxy: whether to proxy requests for this profile URL :>header Content-Type: application/json :>json int id: unique identifier for site :>json str name: name of site :>json str url: username search url for the site :>json int status_code: the status code to check for determining a match (nullable) :>json string match_type: type of match (see get_match_types() for valid match types) (nullable) :>json string match_expr: expression to use for determining a page match (nullable) :>json str test_status: results of username test :>json str tested_at: timestamp of last test :>json str test_username_pos: username that exists on site (used for testing) :>json str test_username_neg: username that does not exist on site (used for testing) :>json array headers: custom headers :>json bool censor_images: whether to censor images from this profile :>json int wait_time: time (in seconds) to wait for updates after page is loaded :>json bool use_proxy: whether to proxy requests for this profile URL :status 202: updated :status 400: invalid request body :status 401: authentication required :status 404: site does not exist ''' # Get site. id_ = get_int_arg('id_', id_) site = g.db.query(Site).filter(Site.id == id_).first() if site is None: raise NotFound("Site '%s' does not exist." % id_) request_json = request.get_json() # Validate data and set attributes if 'name' in request_json: validate_json_attr('name', _site_attrs, request_json) site.name = request_json['name'].strip() if 'url' in request_json: validate_json_attr('url', _site_attrs, request_json) site.url = request_json['url'].lower().strip() if 'match_expr' in request_json: validate_json_attr('match_expr', _site_attrs, request_json) site.match_expr = request_json['match_expr'] if 'match_type' in request_json: validate_json_attr('match_type', _site_attrs, request_json) site.match_type = request_json['match_type'].strip() if 'status_code' in request_json: validate_json_attr('status_code', _site_attrs, request_json) status = request_json['status_code'] site.status_code = None if status is None else int(status) if (request_json['match_type'] is None or request_json['match_expr'] is None) and \ request_json['status_code'] is None: raise BadRequest('At least one of the ' 'following is required: ' 'status code or page match.') if 'test_username_pos' in request_json: validate_json_attr('test_username_pos', _site_attrs, request_json) site.test_username_pos = ( request_json['test_username_pos'].lower().strip()) if 'test_username_neg' in request_json: validate_json_attr('test_username_neg', _site_attrs, request_json) site.test_username_neg = ( request_json['test_username_neg'].lower().strip()) if 'headers' in request_json: validate_json_attr('headers', _site_attrs, request_json) site.headers = request_json['headers'] if 'censor_images' in request_json: validate_json_attr('censor_images', _site_attrs, request_json) site.censor_images = request_json['censor_images'] if 'use_proxy' in request_json: validate_json_attr('use_proxy', _site_attrs, request_json) site.use_proxy = request_json['use_proxy'] if 'wait_time' in request_json: validate_json_attr('wait_time', _site_attrs, request_json) site.wait_time = request_json['wait_time'] # Save the updated site try: g.db.commit() except DBAPIError as e: g.db.rollback() raise BadRequest('Database error: {}'.format(e)) # Send redis notifications notify_mask_client(channel='site', message={ 'site': site.as_dict(), 'status': 'updated', 'resource': None }) response = jsonify(site.as_dict()) response.status_code = 200 # Send response. return response
def put(self, id_): ''' Update the category identified by `id`. **Example Request** ..sourcode:: json { { "name": "priority sites" "sites": [1,5] }, } **Example Response** ..sourcecode:: json { "id": 1, "name": "priority sites", "sites": [ { "category": "books", "id": 1, "name": "aNobil", "search_text": "- aNobii</title>", "status_code": 200, "url": "http://www.anobii.com/%s/books" }, { "category": "coding", "id": 5, "name": "bitbucket", "search_text": "\"username\":", "status_code": 200, "url": "https://bitbucket.org/api/2.0/users/%s" }, ... ] }, :<header Content-Type: application/json :<header X-Auth: the client's auth token :>json str name: the value of the name attribute :>header Content-Type: application/json :>json int id: unique identifier for category :>json str name: the category name :>json list sites: list of sites associated with this category :>json str sites[n].category: the site category :>json str sites[n].id: the unique id for site :>json str sites[n].name: the site name :>json str sites[n].search_text: string search pattern :>json str sites[n].status_code: server response code for site :>json str sites[n].url: the site url :status 200: updated :status 400: invalid request body :status 401: authentication required ''' editable_fields = ['name', 'sites'] # Get category. id_ = get_int_arg('id_', id_) category = g.db.query(Category).filter(Category.id == id_).first() if category is None: raise NotFound("Category '%s' does not exist." % id_) request_json = request.get_json() # Validate data and set attributes if request_json is None: raise BadRequest("Specify at least one editable field: {}".format( editable_fields)) for field in request_json: if field not in editable_fields: raise BadRequest( "'{}' is not one of the editable fields: {}".format( field, editable_fields)) if 'name' in request_json: validate_json_attr('name', GROUP_ATTRS, request_json) category.name = request_json['name'].strip() if 'sites' in request_json: try: request_site_ids = [int(s) for s in request_json['sites']] except ValueError: raise BadRequest('Sites must be a list of integer site ids') if len(request_site_ids) == 0: raise BadRequest('Categorys must have at least one site') sites = g.db.query(Site) \ .filter(Site.id.in_(request_site_ids)) \ .all() site_ids = [site.id for site in sites] missing_sites = list(set(request_site_ids) - set(site_ids)) if len(missing_sites) > 0: raise BadRequest('Site ids "{}" do not exist'.format( ','.join(missing_sites))) else: category.sites = sites # Save the updated category g.db.add(category) try: g.db.commit() except DBAPIError as e: g.db.rollback() raise BadRequest('Database error: {}'.format(e)) # Send redis notifications notify_mask_client(channel='category', message={ 'id': category.id, 'name': category.name, 'status': 'updated', 'resource': url_for('CategoryView:get', id_=category.id) }) response = category.as_dict() response['url-for'] = url_for('CategoryView:get', id_=category.id) # Send response. return jsonify(**response)