Esempio n. 1
0
    def __init__(self, port, website_folder, regional_main_pages=[]):
        self.dlrobot = None
        self.dlrobot_project = None
        self.web_site_folder = os.path.join(os.path.dirname(__file__), website_folder)
        name = os.path.basename(website_folder)
        self.data_folder = os.path.join(os.path.dirname(__file__), "data.{}".format(name))
        self.dlrobot_result_folder = os.path.join(self.data_folder, "result")
        if os.path.exists(self.data_folder):
            shutil.rmtree(self.data_folder, ignore_errors=True)
        handler = partial(http.server.SimpleHTTPRequestHandler,
                          directory=self.web_site_folder)
        if not is_local_http_port_free(port):
            for p in TTestEnv.additional_ports:
                if is_local_http_port_free(p):
                    port = p
                    break
        assert is_local_http_port_free(port)
        self.web_site = http.server.HTTPServer(server_address=("127.0.0.1", port), RequestHandlerClass=handler)
        os.mkdir(self.data_folder)
        os.chdir(self.data_folder)
        self.project_path = os.path.join(self.data_folder, "project.txt")
        regional = list("http://127.0.0.1:{}/{}".format(port, url) for url in regional_main_pages)

        project = TRobotProject.create_project_str("http://127.0.0.1:{}".format(port),
                                                   regional_main_pages=regional,
                                                   disable_search_engine=True)
        with open(self.project_path, "w") as outp:
            outp.write(project)
Esempio n. 2
0
 def get_new_project_to_process(self, worker_host_name, worker_ip):
     site_url = self.web_sites_to_process.pop(0)
     project_file = TRemoteDlrobotCall.web_site_to_project_file(site_url)
     self.logger.info(
         "start job: {} on {} (host name={}), left jobs: {}, running jobs: {}"
         .format(project_file, worker_ip, worker_host_name,
                 len(self.web_sites_to_process),
                 self.get_running_jobs_count()))
     remote_call = TRemoteDlrobotCall(worker_ip=worker_ip,
                                      project_file=project_file,
                                      web_site=site_url)
     remote_call.worker_host_name = worker_host_name
     web_site_passport = self.web_sites_db.get_web_site(site_url)
     regional_main_pages = list()
     if web_site_passport is None:
         self.logger.error(
             "{} is not registered in the web site db, no office information is available for the site"
         )
     project_content_str = TRobotProject.create_project_str(
         site_url,
         regional_main_pages,
         disable_search_engine=not self.args.enable_search_engines)
     self.worker_2_running_tasks[worker_ip].append(remote_call)
     return remote_call, project_content_str.encode("utf8")