Exemple #1
0
 def pushToAzure(self, object_name):
     # push to azure
     if os.path.isfile("output/{}.csv".format(object_name)):
         try:
             self.__block_blob_service.create_blob_from_path(
                 Config.AZURE_CONTAINER_NAME,
                 Config.AZURE_FOLDER_NAME + "/{}.csv".format(object_name),
                 "output/{}.csv".format(object_name))
             print("^^^^^^ {} object pushed to Azure Storage Blob".format(
                 object_name))
         except Exception as inst:
             print(inst)
             IOHelper.appendToLog("azure_error.log", "\n\n{}".format(inst))
 def createJob(self, objectName, fieldList):
     print("###### Sending retrieve request: {} ".format(objectName))
     try:
         job_id, batch_id = self.__bulkHelper.createNormalBatch(
             objectName, fieldList)
         self.norm_job_batch_list.append({"job": job_id, "batch": batch_id})
         self.norm_job_download_list.append(job_id)
         self.norm_job_object_dict[job_id] = objectName
         IOHelper.appendToLog(
             "norm_jobs.log",
             "\nobject: {}, job_id: {}, batch_id, {}".format(
                 objectName, job_id, batch_id))
     except Exception as inst:
         print(inst)
         IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
Exemple #3
0
 def createJob(self, objectName, fieldList):
     print("###### Sending retrieve request: {} ".format(objectName))
     try:
         job_id, main_batch_id = self.__bulkHelper.createChunkBatch(
             objectName, fieldList)
         self.chunk_job_mainbatch_list.append({
             "job": job_id,
             "batch": main_batch_id
         })
         self.chunk_job_object_dict[job_id] = objectName
         IOHelper.appendToLog(
             "chunk_job.log",
             "\nobject: {}, job_id: {}, main_batch_id, {}".format(
                 objectName, job_id, main_batch_id))
     except Exception as inst:
         print(inst)
         IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
 def _check_norm_batch_status(self):
     while len(self.__normalHelper.norm_job_batch_list) > 0:
         print("\n\n### Checking norm batch status...")
         print("norm_job_batch_list: {}".format(
             self.__normalHelper.norm_job_batch_list))
         for job_batch_dict in self.__normalHelper.norm_job_batch_list[:]:
             j_id = job_batch_dict['job']
             b_id = job_batch_dict['batch']
             try:
                 b_status = self.__bulkHelper.getBatchStatus(
                     b_id, j_id, True)
                 b_state = b_status['state']
                 object_name = self.__normalHelper.norm_job_object_dict[
                     j_id]
                 print(
                     "\n# norm job: {} (object: {}), batch: {} status: {}".
                     format(j_id, object_name, b_id, b_state))
                 print("# full status result:")
                 print(b_status)
                 if b_state == "Completed":
                     result_id = self.__bulkHelper.getQueryBatchResultIds(
                         b_id, j_id)[0]
                     self.__normalHelper.norm_result_list.append({
                         "job":
                         j_id,
                         "batch":
                         b_id,
                         "result":
                         result_id
                     })
                     self.__normalHelper.norm_job_batch_list.remove(
                         job_batch_dict)
                 elif b_state == "Failed":
                     IOHelper.appendToLog(
                         "extract_error.log",
                         "\n\n# norm job: {} (object: {}), batch: {} status: {}"
                         .format(j_id, object_name, b_id, b_state))
                     self.__normalHelper.norm_job_batch_list.remove(
                         job_batch_dict)
             except Exception as inst:
                 print(inst)
                 IOHelper.appendToLog("api_error.log",
                                      "\n\n{}".format(inst))
             time.sleep(0.5)
         time.sleep(15)
 def _download_norm_batch_result(self):
     while len(self.__normalHelper.norm_job_download_list) > 0:
         print("\n\n### Checking download tasks...")
         print("## norm_job_download_list (To Do): {}".format(
             self.__normalHelper.norm_job_download_list))
         print("## norm_result_list (Ready To Do): {}".format(
             self.__normalHelper.norm_result_list))
         if len(self.__normalHelper.norm_result_list) > 0:
             for result_dict in self.__normalHelper.norm_result_list[:]:
                 j_id = result_dict['job']
                 b_id = result_dict['batch']
                 r_id = result_dict['result']
                 object_name = self.__normalHelper.norm_job_object_dict[
                     j_id]
                 if j_id in self.__normalHelper.norm_job_download_list:
                     print("# downloading job: {} - batch: {} - result: {}".
                           format(j_id, b_id, r_id))
                     try:
                         raw = self.__bulkHelper.getQueryBatchResults(
                             b_id, r_id, j_id, raw=True)
                         str_output = raw.read(decode_content=True).decode(
                             "utf-8", "replace")
                         # save to a file
                         # with open("output/{}.csv".format(object_name), "w") as text_file:
                         # 	text_file.write(str_output)
                         IOHelper.outputObjectToFile(
                             object_name, str_output)
                         self.__normalHelper.norm_job_download_list.remove(
                             j_id)
                         self.__normalHelper.norm_result_list.remove(
                             result_dict)
                         self.__bulkHelper.closeJob(j_id)
                         print("$$$$$$ {} objected downloaded!".format(
                             object_name))
                     except Exception as inst:
                         print(inst)
                         IOHelper.appendToLog("api_error.log",
                                              "\n\n{}".format(inst))
                     # push to azure
                     if Config.PUSH_TO_AZURE:
                         self.__azureHelper.pushToAzure(object_name)
                 time.sleep(0.5)
         time.sleep(15)
Exemple #6
0
 def _check_chunk_main_batch_status(self):
     while len(self.__chunkHelper.chunk_job_mainbatch_list) > 0:
         print("\n\n### Checking chunk main batch status...")
         print("# chunk_job_mainbatch_list: {}".format(
             self.__chunkHelper.chunk_job_mainbatch_list))
         for job_batch_dict in self.__chunkHelper.chunk_job_mainbatch_list[:]:
             j_id, b_id = job_batch_dict['job'], job_batch_dict['batch']
             try:
                 b_state = self.__bulkHelper.getBatchStatus(
                     b_id, j_id, True)['state']
                 object_name = self.__chunkHelper.chunk_job_object_dict[
                     j_id]
                 print("# chunk job: {} (object: {}), batch: {} status: {}".
                       format(j_id, object_name, b_id, b_state))
                 if b_state == "NotProcessed":
                     self.chunk_notprocessed_job_list.append(j_id)
                     self.__chunkHelper.chunk_job_mainbatch_list.remove(
                         job_batch_dict)
                     print("# chunk_notprocessed_job_list: {}".format(
                         ', '.join(self.chunk_notprocessed_job_list)))
                     download_chunk_batch_result_thread = DownloadChunkBatchResultThread(
                         "Download Chunk Batch Result Thread", object_name,
                         j_id, b_id)
                     download_chunk_batch_result_thread.start()
                 elif b_state == "Failed":
                     IOHelper.appendToLog(
                         "extract_error.log",
                         "\n\n# chunk job: {} (object: {}), batch: {} status: {}"
                         .format(j_id, object_name, b_id, b_state))
                     chunk_job_mainbatch_list.remove(job_batch_dict)
             except Exception as inst:
                 print(inst)
                 IOHelper.appendToLog("api_error.log",
                                      "\n\n{}".format(inst))
             time.sleep(1)
         time.sleep(40)
Exemple #7
0
    def _download_chunk_batch_result(self, object_name, job, main_batch):
        # wait until main_batch become "NotProcessed"
        while True:
            try:
                main_batch_state = self.__bulkHelper.getBatchStatus(
                    main_batch, job, True)['state']
                print(
                    "\n# Checking chunk job: {} (object: {}) - main_batch status: {}"
                    .format(job, object_name, main_batch_state))
                if main_batch_state == "NotProcessed":
                    break
                else:
                    time.sleep(40)
            except Exception as inst:
                print(inst)
                IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        # wait until all the sub-batches become "Completed"
        while True:
            try:
                sub_batches = self.__bulkHelper.getBatchList(job)
                can_break = True
                print(
                    "\n# Checking chunk job: {} (object: {}) - sub_batch status:"
                    .format(job, object_name))
                for b in sub_batches:
                    if b['id'] != main_batch:
                        print("sub_batch: " + b['id'] + " - state: " +
                              b['state'])
                        if b['state'] != "Completed":
                            can_break = False
                if can_break:
                    print("All sub_batches completed!")
                    break
                else:
                    time.sleep(40)
            except Exception as inst:
                print(inst)
                IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        try:
            sub_batch_ids = [
                b['id'] for b in self.__bulkHelper.getBatchList(job)
                if b['id'] != main_batch
            ]
            sub_batch_ids.sort()
            # retrieve data from batch
            for b in sub_batch_ids:
                r_ids = self.__bulkHelper.getQueryBatchResultIds(b, job)
                for r in r_ids:
                    raw = self.__bulkHelper.getQueryBatchResults(b,
                                                                 r,
                                                                 job,
                                                                 raw=True)
                    str_output = raw.read(decode_content=True).decode(
                        "utf-8", "replace")
                    print("Writing {} to file.".format(r))
                    # save to a file
                    IOHelper.outputObjectToFile(object_name, str_output)
        except Exception as inst:
            print(inst)
            IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        self.__bulkHelper.closeJob(job)
        print("$$$$$$ {} object downloaded!".format(object_name))

        # push to azure
        if Config.PUSH_TO_AZURE:
            self.__azureHelper.pushToAzure(object_name)