Example #1
0
    def test_commit(self):
        with session_scope() as session:
            project = Project()
            project.name = 'test project'
            session.add(project)

        # after scope closed, the data should be already commited

        with session_scope() as session:
            project = session.query(Project).first()

        self.assertEqual('test project', project.name)
Example #2
0
    def test_register(self):
        with session_scope() as session:
            session.query(Node).delete()

        response = self.fetch('/nodes', method="POST", body="")

        with session_scope() as session:
            new_node = session.query(Node).first()

        self.assertEqual(200, response.code)
        self.assertEqual('127.0.0.1', new_node.client_ip)
        self.assertEqual(datetime.date.today(), new_node.create_time.date())
        self.assertEqual(datetime.date.today(), new_node.last_heartbeat.date())
        self.assertEqual(True, new_node.isalive)
        self.assertEqual(None, new_node.tags)
Example #3
0
    def test_post_create(self):
        project_name = 'test_project'
        postdata = {'project': project_name}
        response = self.fetch('/delproject.json',
                              method='POST',
                              body=urlencode(postdata))
        self.assertIn(response.code, [404, 200])
        post_data = {}
        post_data['egg'] = open(TEST_EGG_FILE, 'rb')
        post_data['project'] = project_name
        post_data['version'] = '1.0'
        post_data['_xsrf'] = 'dummy'

        datagen, headers = multipart_encode(post_data)
        databuffer = b''.join([ensure_binary(x) for x in datagen])
        headers['Cookie'] = "_xsrf=dummy"
        response = self.fetch('/addversion.json',
                              method='POST',
                              headers=headers,
                              body=databuffer)

        self.assertEqual(200, response.code)

        with session_scope() as session:
            project = session.query(Project)\
                .filter_by(name=project_name).first()
            self.assertIsNotNone(project)
            self.assertEqual(project.name, project_name)
Example #4
0
    def test_post(self):
        project_name = 'test_project'
        self._upload_test_project()
        with session_scope() as session:
            project = session.query(Project).filter_by(
                name=project_name).first()

            project_storage = ProjectStorage(
                self._app.settings.get('project_storage_dir'), project)

            self.assertTrue(
                path.exists(
                    project_storage.storage_provider.get_project_eggs_dir(
                        project)))

            headers = {'Cookie': "_xsrf=dummy"}
            post_data = {'_xsrf': 'dummy'}
            res = self.fetch('/projects/%s/delete' % project_name,
                             method="POST",
                             headers=headers,
                             body=urlencode(post_data))
            self.assertEqual(200, res.code)

            # do not delete folder
            # self.assertFalse(path.exists(project_storage.storage_provider.get_project_eggs_dir(project)))
            self.assertEqual(len(project_storage.list_egg_versions()), 0)

            self.assertIsNone(
                session.query(Project).filter_by(name=project_name).first())

            self.assertEqual(
                0,
                len(
                    session.query(Spider).filter_by(
                        project_id=project.id).all()))
Example #5
0
    def get_app(self):
        config = Config()
        scheduler_manager = SchedulerManager(config=config)
        scheduler_manager.init()
        node_manager = NodeManager(scheduler_manager)
        node_manager.init()
        secret_key = '123'
        with session_scope() as session:
            user = session.query(User).filter_by(username='******').first()
            user.password = encrypt_password('password', secret_key)
            session.add(user)
            session.commit()

            normal_user = session.query(User).filter_by(
                username='******').first()
            if not normal_user:
                normal_user = User()
                normal_user.username = '******'
            normal_user.is_admin = False
            normal_user.password = encrypt_password('passw0rd', secret_key)
            session.add(normal_user)
            session.commit()

        return make_app(scheduler_manager,
                        node_manager,
                        None,
                        secret_key='123',
                        enable_authentication=True)
Example #6
0
    def test_post(self):
        project_name = 'test_project'
        post_data = {}
        post_data['egg'] = open(
            path.join(path.dirname(__file__), '..',
                      'test_project-1.0-py2.7.egg'), 'rb')
        post_data['project'] = project_name
        post_data['version'] = '1.0'
        post_data['_xsrf'] = 'dummy'

        datagen, headers = multipart_encode(post_data)
        databuffer = b''.join(datagen)
        headers['Cookie'] = "_xsrf=dummy"
        response = self.fetch('/uploadproject',
                              method='POST',
                              headers=headers,
                              body=databuffer)

        self.assertEqual(200, response.code)

        with session_scope() as session:
            project = session.query(Project).filter_by(
                name=project_name).first()
            self.assertIsNotNone(project)
            self.assertEqual(project.name, project_name)
Example #7
0
    def test_post_with_triggers(self):
        project_name = 'test_project'
        spider_name = 'error_spider'
        self._upload_test_project()

        headers = {'Cookie': "_xsrf=dummy"}
        with session_scope() as session:
            project = session.query(Project)\
                .filter_by(name=project_name)\
                .first()

            post_data = {'_xsrf': 'dummy', 'cron': '0 0 0 0 0'}
            res = self.fetch('/projects/%s/spiders/%s/triggers' %
                             (project_name, spider_name),
                             method='POST',
                             headers=headers,
                             body=urlencode(post_data))
            self.assertEqual(200, res.code)

            post_data = {'_xsrf': 'dummy'}
            res = self.fetch('/projects/%s/delete' % project_name,
                             method="POST",
                             headers=headers,
                             body=urlencode(post_data))
            self.assertEqual(200, res.code)
def upgrade():
    ioloop = IOLoop.current()
    config = Config()
    runner_factory = RunnerFactory(config)
    with session_scope() as session:
        for project in session.query(Project):
            if project.package is not None:
                continue
            storage = ProjectStorage(config.get('project_storage_dir'),
                                     project)
            version, eggf = storage.get_egg()
            runner = runner_factory.build(eggf)
            try:
                project_settings_module = ioloop.run_sync(
                    runner.settings_module)
                spider_list = ioloop.run_sync(runner.list)
                package = ProjectPackage()
                package.project = project
                package.type = 'scrapy'
                package.settings_module = project_settings_module
                package.spider_list = ','.join(spider_list)
                session.add(package)
                session.commit()
            except (ProcessFailed, InvalidProjectEgg) as ex:
                logger.error(ex)
Example #9
0
 def init_project(self, project_name):
     with session_scope() as session:
         project = session.query(Project).filter_by(
             name=project_name).first()
         if project:
             self.project_manager.delete_project('', project.id)
     AppTest.init_project()
Example #10
0
 def delete_project(self, user_id, project_id):
     with session_scope() as session:
         project = session.query(Project).get(project_id)
         project_storage = ProjectStorage(
             self.project_storage_dir, project,
             self.default_project_storage_version)
         for spider in project.spiders:
             triggers = session.query(Trigger).filter_by(
                 spider_id=spider.id)
             session.query(SpiderExecutionQueue).filter_by(
                 spider_id=spider.id).delete()
             session.query(SpiderParameter).filter_by(
                 spider_id=spider.id).delete()
             session.commit()
             for trigger in triggers:
                 self.scheduler_manager.remove_schedule(
                     project.name, spider.name, trigger_id=trigger.id)
             session.query(SpiderExecutionQueue).filter_by(
                 spider_id=spider.id).delete()
             for historical_job in spider.historical_jobs:
                 project_storage.delete_job_data(historical_job)
                 session.delete(historical_job)
             session.delete(spider)
         project_storage.delete_egg()
         session.delete(project.package)
         session.delete(project)
Example #11
0
    def register_node(self):
        with session_scope() as session:
            session.query(Node).delete()

        response = self.fetch('/nodes', method="POST", body="")
        self.assertEqual(200, response.code)
        return json.loads(response.body)['id']
Example #12
0
    def test_get(self):
        self._upload_test_project()
        with session_scope() as session:
            spider = session.query(Spider).first()

        self.assertIsNotNone(spider)
        response = self.fetch('/spiders/%d/egg' % (spider.id, ))
        self.assertEqual(200, response.code)
Example #13
0
    def test_register_realip(self):
        with session_scope() as session:
            session.query(Node).delete()

        headers = {'X-Real-IP': '1.2.3.4'}
        response = self.fetch('/nodes',
                              method="POST", headers=headers, body="")

        with session_scope() as session:
            new_node = session.query(Node).first()

        self.assertEqual(200, response.code)
        self.assertEqual('1.2.3.4', new_node.client_ip)
        self.assertEqual(datetime.date.today(), new_node.create_time.date())
        self.assertEqual(datetime.date.today(), new_node.last_heartbeat.date())
        self.assertEqual(True, new_node.isalive)
        self.assertEqual(None, new_node.tags)
Example #14
0
    def test_rollback(self):
        class CommitFailedError(Exception):
            pass
        try:
            with session_scope() as session:
                project = Project()
                project.name = 'test project'
                session.add(project)
                raise CommitFailedError()
        except CommitFailedError:
            pass


        # after an exception raised in the scope, the data should be rolled back.
        with session_scope() as session:
            project = session.query(Project).first()

        self.assertIsNone(project)
Example #15
0
    def test_job_complete(self):
        project_name = 'test_project'
        spider_name = 'success_spider'

        node_id = self.register_node()

        # schedule a job
        with session_scope() as session:
            session.query(SpiderExecutionQueue).delete()
            session.commit()
        run_spider_post_data =  {'project': project_name, 'spider': spider_name}
        res = self.fetch('/schedule.json', method='POST', body=urlencode(run_spider_post_data))
        self.assertEqual(200, res.code)

        # fetch a job
        next_job_post_data = {'node_id': node_id}
        headers = {'X-Dd-Nodeid': str(node_id)}
        res = self.fetch('/executing/next_task', method='POST', body=urlencode(next_job_post_data),
                         headers=headers)
        self.assertEqual(200, res.code)
        task_id = json.loads(res.body)['data']['task']['task_id']

        # job start
        post_data = {'pid' : '1'}
        headers = {'X-Dd-Nodeid': str(node_id)}
        res = self.fetch('/jobs/%s/start' % task_id, method='POST', headers=headers, body=urlencode(post_data))
        self.assertEqual(200, res.code)

        # complete this job
        post_data = {'task_id': task_id,
                     'status': 'success'}
        post_data['log'] = BytesIO(b'some logs')
        post_data['items'] = BytesIO(b'{"a" : "some items"}')
        datagen, headers = multipart_encode(post_data)
        headers['X-Dd-Nodeid'] = str(node_id)
        #
        res = self.fetch('/executing/complete', method='POST', headers=headers,
                         body_producer=MultipartRequestBodyProducer(datagen))
        self.assertEqual(200, res.code)

        with session_scope() as session:
            complete_job = session.query(HistoricalJob).filter_by(id=task_id).first()
            self.assertIsNotNone(complete_job)
            self.assertEqual(2, complete_job.status)
Example #16
0
 def test_post(self):
     with session_scope() as session:
         session.query(SpiderExecutionQueue).delete()
         session.commit()
     # schedule once
     project = 'test_project'
     spider = 'success_spider'
     postdata = urlencode({'project': project, 'spider': spider})
     response = self.fetch('/schedule.json', method='POST', body=postdata)
     self.assertEqual(200, response.code)
Example #17
0
    def test_get(self):
        self._upload_test_project()
        with session_scope() as session:
            spider = session.query(Spider).first()
            project = spider.project

        self.assertIsNotNone(spider)
        self.assertIsNotNone(project)
        response = self.fetch('/projects/%s/spiders/%s' %
                              (project.name, spider.name))
        self.assertEqual(200, response.code)
Example #18
0
    def test_get_egg_by_project_spider_name(self):
        self._upload_test_project()
        with session_scope() as session:
            spider = session.query(Spider).first()
            project = spider.project

        self.assertIsNotNone(spider)
        self.assertIsNotNone(project)
        response = self.fetch('/projects/%s/spiders/%s/egg' %
                              ('test_project', 'log_spider'))
        self.assertEqual(200, response.code)
Example #19
0
    def test_post(self):
        project_name = 'test_project'
        postdata = {'project': project_name}
        response = self.fetch('/delproject.json',
                              method='POST',
                              body=urlencode(postdata))
        self.assertIn(response.code, [404, 200])

        with session_scope() as session:
            project = session.query(Project)\
                .filter_by(name=project_name).first()
            self.assertIsNone(project)
Example #20
0
    def setUp(self):
        super(NodeSecureTest, self).setUp()
        with session_scope() as session:
            node = Node()
            session.add(node)

            nodekey = NodeKey()
            nodekey.key = str(uuid.uuid4())
            nodekey.create_at = datetime.datetime.now()
            nodekey.secret_key = generate_random_string(32)
            session.add(nodekey)
            session.commit()
            self.node_key = nodekey
            self.node_id = node.id
Example #21
0
    def test_post(self):
        node_key = self.node_manager.create_node_key()
        headers = {'Authorization': '%s %s %s' % ('HMAC',
                                                  node_key.key,
                                                  generate_digest(node_key.secret_key, 'POST', '/nodes/register', '', ''))}
        response = self.fetch('/nodes/register', method="POST", body="",
                              headers=headers)
        self.assertEqual(200, response.code)
        new_node_id = json.loads(response.body)['id']
        self.assertTrue(new_node_id > 0)
        with session_scope() as session:
            node = session.query(Node).get(new_node_id)
            self.assertEqual(node.node_key_id, node_key.id)

            updated_node_key = session.query(NodeKey).get(node_key.id)
            self.assertEqual(updated_node_key.used_node_id, new_node_id)
    def test_node_creation(self):
        with session_scope() as session:
            session.query(NodeKey).delete()
            session.commit()

            self.assertEqual(0, len(session.query(NodeKey).all()))
            response = self.fetch('/admin/nodes')
            self.assertEqual(200, response.code)

            self.assertEqual(1, len(session.query(NodeKey).all()))

            usable_key = session.query(NodeKey).filter(
                NodeKey.used_node_id.is_(None),
                NodeKey.is_deleted == False).first()

            self.assertEqual(False, usable_key.is_deleted)
            self.assertIsNone(usable_key.used_node_id)
Example #23
0
    def upload_project(self, user_id, project_name, version, eggf):
        runner = self.runner_factory.build(eggf)
        try:
            spiders = yield runner.list()
            logger.debug('spiders: %s' % spiders)
            project_settings_module = yield runner.settings_module()
        finally:
            runner.clear()

        with session_scope() as session:
            project = session.query(Project).filter_by(
                name=project_name).first()

            if project is None:
                project = Project()
                project.name = project_name
                project.storage_version = self.default_project_storage_version
            project.version = version
            session.add(project)
            package = project.package
            if not package:
                package = ProjectPackage()
                package.project = project
            package.type = 'scrapy'
            package.settings_module = project_settings_module
            package.spider_list = ','.join(spiders)
            session.add(package)
            session.flush()
            project_storage = ProjectStorage(self.project_storage_dir, project)
            project_storage.put_egg(eggf, version)
            session.refresh(project)

            for spider_name in spiders:
                spider = session.query(Spider).filter_by(
                    project_id=project.id, name=spider_name).first()
                if spider is None:
                    spider = Spider()
                    spider.name = spider_name
                    spider.project_id = project.id
                    session.add(spider)
                    session.commit()
                    session.refresh(spider)

            session.commit()
        raise Return(project)
Example #24
0
    def test_post_real_ip(self):
        self.node_key = self.node_manager.create_node_key()
        res = self.fetch_secure('/nodes/register', method='POST', body='')
        self.assertEqual(200, res.code)
        node_id = json.loads(res.body)['id']


        headers = {'Authorization': '%s %s %s' % ('HMAC',
                                                  self.node_key.key,
                                                  generate_digest(self.node_key.secret_key, 'POST', '/nodes', '', ''))}
        headers['X-Real-IP'] = '1.2.3.4'
        response = self.fetch('/nodes', method="POST", body="", headers=headers)
        self.assertEqual(200, response.code)
        new_node_id = json.loads(response.body)['id']
        self.assertTrue(new_node_id > 0)
        with session_scope() as session:
            active_node = session.query(Node).get(new_node_id)
            self.assertEqual(active_node.client_ip, '1.2.3.4')
    def test_get_key_expire(self):
        with session_scope() as session:
            session.query(NodeKey).delete()

            expire_key = NodeKey()
            expire_key.key = 'abc'
            expire_key.secret_key = 'cba'
            expire_key.is_deleted = False
            expire_key.create_at = datetime.datetime.now(
            ) - datetime.timedelta(hours=2)
            session.add(expire_key)
            session.commit()
            session.expire_all()

            headers = self.populate_cookie_header(headers={})
            response = self.fetch('/admin/nodes', headers=headers)
            self.assertEqual(200, response.code)

            expire_key = session.query(NodeKey).filter_by(key='abc').first()
            self.assertTrue(expire_key.is_deleted)

            new_key = session.query(NodeKey).filter_by(
                is_deleted=False).first()
            self.assertIsNotNone(new_key)
Example #26
0
 def test_open(self):
     with session_scope() as session:
         pass