Beispiel #1
0
    def generateCrawler(self):
        threadnum = self.getThreadnum()
        duration = self.getDuration()
        lock = threading.Lock()
        for i in range(threadnum):
            cname = "corpuscollect-crawler" + str(i + 1)
            #create user extract index
            response = Configure.get_es_client().request(
                "GET",
                "http://test-mhis-service.pingan.com.cn/elasticsearch/u" +
                str(self.getUser()["userId"]) + "_" + cname +
                "_extract/process_task/_mapping",
                body=None,
                headers={"Content-Type": "application/json"})

            if not (response.status == 200):
                data = {
                    "settings": {
                        "index": {
                            "number_of_shards": 3,
                            "number_of_replicas": 1
                        }
                    },
                    "mappings": {
                        "process_task": {
                            "properties": {
                                "scenarioId": {
                                    "type": "keyword"
                                },
                                "href": {
                                    "type": "text"
                                },
                                "sceneno": {
                                    "type": "integer"
                                },
                                "gageno": {
                                    "type": "integer"
                                },
                                "configuration": {
                                    "type": "object"
                                }
                            }
                        }
                    }
                }
                encoded_data = json.dumps(data).encode('utf-8')
                response = Configure.get_es_client().request(
                    "PUT",
                    "http://test-mhis-service.pingan.com.cn/elasticsearch/u" +
                    str(self.getUser()["userId"]) + "_" + cname + "_extract",
                    body=encoded_data,
                    headers={"Content-Type": "application/json"})
                if not (response.status == 200):
                    print("User extract task indice did not create.")
                print(response.data.decode('utf-8'))
            crawler = SubCrawler(cname,
                                 p_scenario=self,
                                 p_lock=lock,
                                 p_duration=duration)
            crawler.start()
Beispiel #2
0
    def writeextracttask(p_indice, p_type, p_scenarioid, p_sceneno, p_pageno,
                         p_uri):
        id = Util.hash(p_uri)

        data = {
            "scenarioId": p_scenarioid,
            "sceneno": p_sceneno,
            "pageno": p_pageno,
            "href": p_uri
        }
        #print (data)
        #print ("http://test-mhis-service.pingan.com.cn/elasticsearch/"+p_indice+"/"+p_type+"/"+id)
        encoded_data = json.dumps(data).encode('utf-8')
        response = Configure.get_es_client().request(
            "PUT",
            "http://test-mhis-service.pingan.com.cn/elasticsearch/" +
            p_indice + "/" + p_type + "/" + id,
            body=encoded_data,
            headers={"Content-Type": "application/json"})
        print("write extract: " + str(response.status))
Beispiel #3
0
 def getoneextracttask(p_indice, p_type):
     data = {"from": 0, "size": 1, "query": {"match_all": {}}}
     encoded_data = json.dumps(data).encode('utf-8')
     print("http://test-mhis-service.pingan.com.cn/elasticsearch/" +
           p_indice + "/" + p_type + "/_search")
     response = Configure.get_es_client().request(
         "GET",
         "http://test-mhis-service.pingan.com.cn/elasticsearch/" +
         p_indice + "/" + p_type + "/_search",
         body=encoded_data,
         headers={"Content-Type": "application/json"})
     if response.status == 200:
         hits = json.loads(response.data.decode('utf-8'))["hits"]
         total = hits["total"]
         if total > 0:
             docs = hits["hits"]
             id = docs[0]["_id"]
             doc = docs[0]["_source"]
             return (id, doc)
     return (None, None)
Beispiel #4
0
 def submit_user_specified_task_immediately(p_userid,
                                            p_scenarioid,
                                            p_href=None):
     data = {"query": {"match": {"scenarioId": p_scenarioid}}}
     encoded_data = json.dumps(data).encode('utf-8')
     response = Configure.get_es_client().request(
         "GET",
         "http://test-mhis-service.pingan.com.cn/elasticsearch/u" +
         str(p_userid) + "_indice_base/custom_task/_search?pretty",
         body=encoded_data,
         headers={"Content-Type": "application/json"})
     if response.status == 200:
         hits = json.loads(response.data.decode('utf-8'))["hits"]
         total = hits["total"]
         if total > 0:
             docs = hits["hits"]
             for idx, doc in enumerate(docs):
                 scenario = doc["_source"]
                 print(scenario)
                 executor = Executor(p_scenario=scenario)
                 executor.action()
Beispiel #5
0
 def deletedoc(p_indice, p_type, p_id):
     response = Configure.get_es_client().request(
         "DELETE", "http://test-mhis-service.pingan.com.cn/elasticsearch/" +
         p_indice + "/" + p_type + "/" + p_id)
     print(response.status)
     return response.status