Example #1
0
    def post_jobs_for_site(self, site_id):
        """
        Request background jobs for site identified by `id`.

        **Example Request**

        ..sourcode:: json

            {
                "jobs": [
                    {
                        "name": "test",
                    },
                    ...
                ]
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "1": "tracker.12344565",
                }
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list jobs: a list of jobs to schedule
        :>json string jobs[n].name: name of job

        :>header Content-Type: application/json
        :>json array tracker_ids: array of worker tracking ids
            {site ID: tracker ID}

        :status 202: scheduled
        :status 400: invalid request body
        :status 401: authentication required
        """
        request_attrs = {
            'jobs': {
                'type': list,
                'required': True
            },
        }
        job_attrs = {
            'name': {
                'type': str,
                'required': True
            },
        }
        available_jobs = ['test']
        tracker_ids = dict()

        # Get site.
        site_id = get_int_arg('site_id', site_id)
        site = g.db.query(Site).filter(Site.id == site_id).first()

        # Validate
        if site is None:
            raise NotFound("Site '%s' does not exist." % site_id)

        request_json = request.get_json()
        validate_request_json(request_json, request_attrs)

        for job in request_json['jobs']:
            validate_json_attr('name', job_attrs, job)

            if job['name'] not in available_jobs:
                raise BadRequest('`{}` does not exist in available'
                                 ' jobs: {}'.format(job['name'],
                                                    ','.join(available_jobs)))

        # Schedule jobs
        for job in request_json['jobs']:
            tracker_id = 'tracker.{}'.format(random_string(10))
            tracker_ids[site.id] = tracker_id

            if job['name'] == 'test':
                description = 'Testing site "{}"'.format(site.name)
                worker.scrape.test_site.enqueue(
                    site_id=site.id,
                    tracker_id=tracker_id,
                    jobdesc=description,
                    user_id=g.user.id,
                )

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response
Example #2
0
    def post_jobs_for_sites(self):
        """
        Request background jobs for all sites.

        **Example Request**

        ..sourcode:: json

            {
                "jobs": [
                    {
                        "name": "test",
                    },
                    ...
                ]
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "1": "tracker.12344565",
                }

            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list jobs: a list of jobs to schedule
        :>json string jobs[n].name: name of job

        :>header Content-Type: application/json
        :>json array tracker_ids: array of worker tracking ids

        :status 202: scheduled
        :status 400: invalid request body
        :status 401: authentication required
        """
        request_attrs = {
            'jobs': {
                'type': list,
                'required': True
            },
        }
        job_attrs = {
            'name': {
                'type': str,
                'required': True
            },
        }
        available_jobs = ['test']
        tracker_ids = dict()

        request_json = request.get_json()
        validate_request_json(request_json, request_attrs)

        for job in request_json['jobs']:
            validate_json_attr('name', job_attrs, job)

            if job['name'] not in available_jobs:
                raise BadRequest('`{}` does not exist in available'
                                 ' jobs: {}'.format(job['name'],
                                                    ','.join(available_jobs)))

        # Get sites.
        sites = g.db.query(Site).all()

        # Schedule jobs
        for job in request_json['jobs']:
            for site in sites:
                tracker_id = 'tracker.{}'.format(random_string(10))
                tracker_ids[site.id] = tracker_id

                if job['name'] == 'test':
                    app.queue.schedule_site_test(
                        site=site,
                        tracker_id=tracker_id,
                    )

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response
Example #3
0
    def put(self, id_):
        '''
        Update the site identified by `id`.

        **Example Request**

        ..sourcecode:: json

            {
                "name": "bebo",
                "url": "http://bebo.com/usernames/search=%s",
                "status_code": 200,
                "match_type": "text",
                "match_expr": "Foo Bar Baz",
                "test_username_pos": "bob",
                "test_username_ne": "adfjf393rfjffkjd",
                "headers": {"referer": "http://www.google.com"},
                "censor_images": false,
                "wait_time": 5,
                "use_proxy": false,
            }

        **Example Response**

        ..sourcecode:: json

            {
                "id": 2,
                "name": "bebo",
                "search_text": "Bebo User Page.</title>",
                "status_code": 200,
                "match_type": "text",
                "match_expr": "Foo Bar Baz",
                "url": "https://bebo.com/usernames/search=%s",
                "test_username_pos": "bob",
                "test_username_neg": "adfjf393rfjffkjd",
                "test_status": "f",
                "tested_at": "2016-01-01T00:00:00.000000+00:00",
                "headers": {"referer": "http://www.google.com"},
                "censor_images": false,
                "wait_time": 5,
                "use_proxy": false,
            },

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :<json string name: name of site
        :<json string url: username search url for the site
        :<json string test_username_pos: username that exists on site
            (used for testing)
        :<json string test_username_neg: username that does not
            exist on site (used for testing)
        :<json array headers: custom headers
        :<json bool censor_images: whether to censor images from this profile
        :<json int wait_time: time (in seconds) to wait for updates
            after page is loaded
        :<json bool use_proxy: whether to proxy requests for this profile URL

        :>header Content-Type: application/json
        :>json int id: unique identifier for site
        :>json str name: name of site
        :>json str url: username search url for the site
        :>json int status_code: the status code to check for
            determining a match (nullable)
        :>json string match_type: type of match (see get_match_types()
            for valid match types) (nullable)
        :>json string match_expr: expression to use for determining
            a page match (nullable)
        :>json str test_status: results of username test
        :>json str tested_at: timestamp of last test
        :>json str test_username_pos: username that exists on site
            (used for testing)
        :>json str test_username_neg: username that does not
            exist on site (used for testing)
        :>json array headers: custom headers
        :>json bool censor_images: whether to censor images from this profile
        :>json int wait_time: time (in seconds) to wait for updates after
            page is loaded
        :>json bool use_proxy: whether to proxy requests for this profile URL

        :status 202: updated
        :status 400: invalid request body
        :status 401: authentication required
        :status 404: site does not exist
        '''

        # Get site.
        id_ = get_int_arg('id_', id_)
        site = g.db.query(Site).filter(Site.id == id_).first()

        if site is None:
            raise NotFound("Site '%s' does not exist." % id_)

        request_json = request.get_json()

        # Validate data and set attributes
        if 'name' in request_json:
            validate_json_attr('name', _site_attrs, request_json)
            site.name = request_json['name'].strip()

        if 'url' in request_json:
            validate_json_attr('url', _site_attrs, request_json)
            site.url = request_json['url'].lower().strip()

        if 'match_expr' in request_json:
            validate_json_attr('match_expr', _site_attrs, request_json)
            site.match_expr = request_json['match_expr']

        if 'match_type' in request_json:
            validate_json_attr('match_type', _site_attrs, request_json)
            site.match_type = request_json['match_type'].strip()

        if 'status_code' in request_json:
            validate_json_attr('status_code', _site_attrs, request_json)
            status = request_json['status_code']
            site.status_code = None if status is None else int(status)

        if (request_json['match_type'] is None or
            request_json['match_expr'] is None) and \
                request_json['status_code'] is None:
            raise BadRequest('At least one of the '
                             'following is required: '
                             'status code or page match.')

        if 'test_username_pos' in request_json:
            validate_json_attr('test_username_pos', _site_attrs, request_json)
            site.test_username_pos = (
                request_json['test_username_pos'].lower().strip())

        if 'test_username_neg' in request_json:
            validate_json_attr('test_username_neg', _site_attrs, request_json)
            site.test_username_neg = (
                request_json['test_username_neg'].lower().strip())
        if 'headers' in request_json:
            validate_json_attr('headers', _site_attrs, request_json)
            site.headers = request_json['headers']

        if 'censor_images' in request_json:
            validate_json_attr('censor_images', _site_attrs, request_json)
            site.censor_images = request_json['censor_images']

        if 'use_proxy' in request_json:
            validate_json_attr('use_proxy', _site_attrs, request_json)
            site.use_proxy = request_json['use_proxy']

        if 'wait_time' in request_json:
            validate_json_attr('wait_time', _site_attrs, request_json)
            site.wait_time = request_json['wait_time']

        # Save the updated site
        try:
            g.db.commit()
        except DBAPIError as e:
            g.db.rollback()
            raise BadRequest('Database error: {}'.format(e))

        # Send redis notifications
        notify_mask_client(channel='site',
                           message={
                               'site': site.as_dict(),
                               'status': 'updated',
                               'resource': None
                           })

        response = jsonify(site.as_dict())
        response.status_code = 200

        # Send response.
        return response
Example #4
0
    def put(self, id_):
        '''
        Update the category identified by `id`.

            **Example Request**

            ..sourcode:: json

                {
                    {
                        "name": "priority sites"
                        "sites": [1,5]
                    },
                }

        **Example Response**

        ..sourcecode:: json

            {
                "id": 1,
                "name": "priority sites",
                "sites": [
                    {
                        "category": "books",
                        "id": 1,
                        "name": "aNobil",
                        "search_text": "- aNobii</title>",
                        "status_code": 200,
                        "url": "http://www.anobii.com/%s/books"
                    },
                    {
                        "category": "coding",
                        "id": 5,
                        "name": "bitbucket",
                        "search_text": "\"username\":",
                        "status_code": 200,
                        "url": "https://bitbucket.org/api/2.0/users/%s"
                    },
                    ...
                ]
            },

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json str name: the value of the name attribute

        :>header Content-Type: application/json
        :>json int id: unique identifier for category
        :>json str name: the category name
        :>json list sites: list of sites associated with this category
        :>json str sites[n].category: the site category
        :>json str sites[n].id: the unique id for site
        :>json str sites[n].name: the site name
        :>json str sites[n].search_text: string search pattern
        :>json str sites[n].status_code: server response code for site
        :>json str sites[n].url: the site url

        :status 200: updated
        :status 400: invalid request body
        :status 401: authentication required
        '''
        editable_fields = ['name', 'sites']
        # Get category.
        id_ = get_int_arg('id_', id_)
        category = g.db.query(Category).filter(Category.id == id_).first()

        if category is None:
            raise NotFound("Category '%s' does not exist." % id_)

        request_json = request.get_json()

        # Validate data and set attributes
        if request_json is None:
            raise BadRequest("Specify at least one editable field: {}".format(
                editable_fields))

        for field in request_json:
            if field not in editable_fields:
                raise BadRequest(
                    "'{}' is not one of the editable fields: {}".format(
                        field, editable_fields))

        if 'name' in request_json:
            validate_json_attr('name', GROUP_ATTRS, request_json)
            category.name = request_json['name'].strip()

        if 'sites' in request_json:
            try:
                request_site_ids = [int(s) for s in request_json['sites']]
            except ValueError:
                raise BadRequest('Sites must be a list of integer site ids')

            if len(request_site_ids) == 0:
                raise BadRequest('Categorys must have at least one site')

            sites = g.db.query(Site) \
                .filter(Site.id.in_(request_site_ids)) \
                .all()
            site_ids = [site.id for site in sites]
            missing_sites = list(set(request_site_ids) - set(site_ids))

            if len(missing_sites) > 0:
                raise BadRequest('Site ids "{}" do not exist'.format(
                    ','.join(missing_sites)))
            else:
                category.sites = sites

        # Save the updated category
        g.db.add(category)
        try:
            g.db.commit()
        except DBAPIError as e:
            g.db.rollback()
            raise BadRequest('Database error: {}'.format(e))

        # Send redis notifications
        notify_mask_client(channel='category',
                           message={
                               'id':
                               category.id,
                               'name':
                               category.name,
                               'status':
                               'updated',
                               'resource':
                               url_for('CategoryView:get', id_=category.id)
                           })

        response = category.as_dict()
        response['url-for'] = url_for('CategoryView:get', id_=category.id)

        # Send response.
        return jsonify(**response)