Ejemplo n.º 1
0
 def copy_table(self, src_table: StorageTableABC, dest_table: StorageTableABC):
     count = 0
     data_temp = []
     part_of_data = []
     src_table_meta = src_table.get_meta()
     LOGGER.info(f"start copying table")
     LOGGER.info(
         f"source table name: {src_table.get_name()} namespace: {src_table.get_namespace()} engine: {src_table.get_engine()}")
     LOGGER.info(
         f"destination table name: {dest_table.get_name()} namespace: {dest_table.get_namespace()} engine: {dest_table.get_engine()}")
     schema = {}
     if not src_table_meta.get_in_serialized():
         if src_table_meta.get_have_head():
             get_head = False
         else:
             get_head = True
         for line in src_table.read():
             if not get_head:
                 schema = data_utils.get_header_schema(header_line=line, id_delimiter=src_table_meta.get_id_delimiter())
                 get_head = True
                 continue
             values = line.rstrip().split(src_table.get_meta().get_id_delimiter())
             k, v = values[0], data_utils.list_to_str(values[1:],
                                                      id_delimiter=src_table.get_meta().get_id_delimiter())
             count = self.put_in_table(table=dest_table, k=k, v=v, temp=data_temp, count=count,
                                       part_of_data=part_of_data)
     else:
         for k, v in src_table.collect():
             count = self.put_in_table(table=dest_table, k=k, v=v, temp=data_temp, count=count,
                                       part_of_data=part_of_data)
         schema = src_table.get_meta().get_schema()
     if data_temp:
         dest_table.put_all(data_temp)
     LOGGER.info("copy successfully")
     dest_table.get_meta().update_metas(schema=schema, part_of_data=part_of_data)
Ejemplo n.º 2
0
 def save_data_table(self,
                     job_id,
                     dst_table_name,
                     dst_table_namespace,
                     head=True):
     input_file = self.parameters["file"]
     input_feature_count = self.get_count(input_file)
     with open(input_file, 'r') as fin:
         lines_count = 0
         if head is True:
             data_head = fin.readline()
             input_feature_count -= 1
             _, meta = self.table.get_meta().update_metas(
                 schema=data_utils.get_header_schema(
                     header_line=data_head,
                     id_delimiter=self.parameters["id_delimiter"]))
             self.table.set_meta(meta)
         n = 0
         while True:
             data = list()
             lines = fin.readlines(self.MAX_BYTES)
             if lines:
                 for line in lines:
                     values = line.rstrip().split(
                         self.parameters["id_delimiter"])
                     data.append((
                         values[0],
                         data_utils.list_to_str(
                             values[1:],
                             id_delimiter=self.parameters["id_delimiter"])))
                 lines_count += len(data)
                 save_progress = lines_count / input_feature_count * 100 // 1
                 job_info = {
                     'progress': save_progress,
                     "job_id": job_id,
                     "role": self.parameters["local"]['role'],
                     "party_id": self.parameters["local"]['party_id']
                 }
                 ControllerClient.update_job(job_info=job_info)
                 self.table.put_all(data)
                 if n == 0:
                     self.table.get_meta().update_metas(part_of_data=data)
             else:
                 table_count = self.table.count()
                 self.table.get_meta().update_metas(
                     count=table_count,
                     partitions=self.parameters["partition"])
                 self.save_meta(dst_table_namespace=dst_table_namespace,
                                dst_table_name=dst_table_name,
                                table_count=table_count)
                 return table_count
             n += 1