Exemple #1
0
def download_clawer_task(task):
    # 加载对应job的设置任务
    try:
        crawler_download = CrawlerDownload.objects(job=task.job).first()
        crawler_download_setting = CrawlerDownloadSetting.objects(
            job=task.job).first()
    except Exception:
        task.status = CrawlerTask.STATUS_FAIL
        task.save()
    down = Download(task, crawler_download, crawler_download_setting)

    timer = threading.Timer(crawler_download_setting.download_timeout,
                            force_exit,
                            [crawler_download_setting.download_timeout, task])
    timer.start()
    down.download()
    timer.cancel()
Exemple #2
0
    def test_download(self):
        sys.path.append('/Users/princetechs3/my_code')

        onetype = CrawlerDownloadType(language='python')
        onetype.save()
        job1 = Job(name='1', info='2', customer='ddd', priority=-1)
        job1.save()
        ctg1 = CrawlerTaskGenerator(job=job1,
                                    code='echo hello1',
                                    cron='* * * * *')
        ctg1.save()
        ct1 = CrawlerTask(job=job1,
                          task_generator=ctg1,
                          uri='http://www.baidu.com',
                          args='i',
                          from_host='1')
        ct1.save()
        codestr1 = open('/Users/princetechs3/my_code/code1.py', 'r').read()
        cd1 = CrawlerDownload(job=job1, code=codestr1, types=onetype)
        cd1.save()
        cds1 = CrawlerDownloadSetting(job=job1,
                                      proxy='122',
                                      cookie='22',
                                      dispatch_num=50)
        cds1.save()

        job = Job.objects(status=Job.STATUS_ON)[0]
        self.assertTrue(job)
        task = CrawlerTask.objects(job=job)[0]
        self.assertTrue(task)

        cd = CrawlerDownload.objects(job=task.job)[0]
        self.assertTrue(cd)

        self.assertTrue(cd.code)
        with open('/Users/princetechs3/my_code/jobcode1.py', 'w') as f:
            f.write(cd.code)
        self.exec_command('import jobcode1;jobcode1.run(%s)' %
                          "'http://www.baidu.com'")
        # print cd.code
        self.assertEqual(cd.types.language, 'python')
        print cd.types.language