Beispiel #1
0
 def _sync_raw_data_meta(self, process_index):
     kvstore_key = common.raw_data_meta_kvstore_key(
         self._data_source.data_source_meta.name, self._partition_id,
         process_index)
     data = self._kvstore.get_data(kvstore_key)
     if data is not None:
         return text_format.Parse(data, dj_pb.RawDataMeta())
     return None
Beispiel #2
0
 def _new_index_meta(self, process_index, start_index):
     if self._manifest.next_process_index <= process_index:
         return None
     raw_data_meta = None
     if process_index < len(self._all_metas):
         assert process_index == self._all_metas[process_index][0], \
             "process index should equal {} != {}".format(
                 process_index, self._all_metas[process_index][0]
             )
         raw_data_meta = self._all_metas[process_index][1]
     else:
         assert process_index == len(self._all_metas), \
             "the process index should be the next all metas "\
             "{}(process_index) != {}(size of all_metas)".format(
                     process_index, len(self._all_metas)
                 )
         raw_data_meta = self._sync_raw_data_meta(process_index)
         if raw_data_meta is None:
             logging.fatal("the raw data of partition %d index with "\
                           "%d must in kvstore",
                           self._partition_id, process_index)
             traceback.print_stack()
             os._exit(-1)  # pylint: disable=protected-access
         self._all_metas.append((process_index, raw_data_meta))
     if raw_data_meta.start_index == -1:
         new_meta = dj_pb.RawDataMeta()
         new_meta.MergeFrom(raw_data_meta)
         new_meta.start_index = start_index
         odata = text_format.MessageToString(raw_data_meta)
         ndata = text_format.MessageToString(new_meta)
         kvstore_key = common.raw_data_meta_kvstore_key(
             self._data_source.data_source_meta.name, self._partition_id,
             process_index)
         if not self._kvstore.cas(kvstore_key, odata, ndata):
             raw_data_meta = self._sync_raw_data_meta(process_index)
             assert raw_data_meta is not None, \
                 "the raw data meta of process index {} "\
                 "must not None".format(process_index)
             if raw_data_meta.start_index != start_index:
                 logging.fatal("raw data of partition %d index with "\
                               "%d must start with %d",
                               self._partition_id, process_index,
                               start_index)
                 traceback.print_stack()
                 os._exit(-1)  # pylint: disable=protected-access
     return visitor.IndexMeta(process_index, start_index,
                              raw_data_meta.file_path)
Beispiel #3
0
 def _store_raw_data_metas(self, partition_id, new_raw_data_metas):
     if len(new_raw_data_metas) > 0:
         manifest = self._sync_manifest(partition_id)
         self._local_manifest[partition_id] = None
         process_index = manifest.next_process_index
         for raw_date_meta in new_raw_data_metas:
             kvstore_key = common.raw_data_meta_kvstore_key(
                 self._data_source.data_source_meta.name, partition_id,
                 process_index)
             raw_date_meta.start_index = -1
             data = text_format.MessageToString(raw_date_meta)
             self._kvstore.set_data(kvstore_key, data)
             self._existed_fpath[raw_date_meta.file_path] = \
                     (partition_id, process_index)
             self._update_raw_data_latest_timestamp(partition_id,
                                                    raw_date_meta.timestamp)
             process_index += 1
         if manifest.next_process_index != process_index:
             manifest.next_process_index = process_index
             self._update_manifest(manifest)
         else:
             self._local_manifest[partition_id] = manifest
Beispiel #4
0
 def _process_next_process_index(self, partition_id, manifest):
     assert manifest is not None and manifest.partition_id == partition_id
     next_process_index = manifest.next_process_index
     while True:
         meta_kvstore_key = \
                 common.raw_data_meta_kvstore_key(
                         self._data_source.data_source_meta.name,
                         partition_id, next_process_index
                     )
         data = self._kvstore.get_data(meta_kvstore_key)
         if data is None:
             break
         meta = text_format.Parse(data, dj_pb.RawDataMeta())
         self._existed_fpath[meta.file_path] = \
                 (partition_id, next_process_index)
         self._update_raw_data_latest_timestamp(partition_id,
                                                meta.timestamp)
         next_process_index += 1
     if next_process_index != manifest.next_process_index:
         manifest.next_process_index = next_process_index
         self._update_manifest(manifest)
     else:
         self._local_manifest[partition_id] = manifest