Example #1
0
def retrieve_portal_manifest(etcd, portal_name):
    etcd_key = os.path.join(portal_name, 'manifest')
    raw_data = etcd.get_data(etcd_key)
    if raw_data is None:
        raise ValueError("the manifest of {} should be stored "\
                         "in etcd".format(portal_name))
    return text_format.Parse(raw_data, common_pb.DataJoinPortalManifest())
Example #2
0
 def _setUpPortalManifest(self):
     self._portal_name = 'test_portal'
     self._etcd_l.delete_prefix(self._portal_name)
     self._etcd_f.delete_prefix(self._portal_name)
     self._portal_manifest_l = common_pb.DataJoinPortalManifest(
         name=self._portal_name,
         input_partition_num=4,
         output_partition_num=2,
         input_data_base_dir='./portal_input_l',
         output_data_base_dir='./portal_output_l',
         begin_timestamp=common.trim_timestamp_by_hourly(
             common.convert_datetime_to_timestamp(datetime.now())))
     self._portal_manifest_f = common_pb.DataJoinPortalManifest(
         name=self._portal_name,
         input_partition_num=2,
         output_partition_num=2,
         input_data_base_dir='./portal_input_f',
         output_data_base_dir='./portal_output_f',
         begin_timestamp=common.trim_timestamp_by_hourly(
             common.convert_datetime_to_timestamp(datetime.now())))
     common.commit_portal_manifest(self._etcd_l, self._portal_manifest_l)
     common.commit_portal_manifest(self._etcd_f, self._portal_manifest_f)
Example #3
0
 def _update_portal_commited_timestamp(self, new_committed_datetime):
     new_manifest = None
     with self._lock:
         old_committed_datetime = common.convert_timestamp_to_datetime(
             common.trim_timestamp_by_hourly(
                 self._portal_manifest.committed_timestamp))
         assert new_committed_datetime > old_committed_datetime
         new_manifest = common_pb.DataJoinPortalManifest()
         new_manifest.MergeFrom(self._portal_manifest)
     assert new_manifest is not None
     new_manifest.committed_timestamp.MergeFrom(
         common.trim_timestamp_by_hourly(
             common.convert_datetime_to_timestamp(new_committed_datetime)))
     common.commit_portal_manifest(self._etcd, new_manifest)
     return new_manifest
Example #4
0
 def _prepare_test(self):
     self._portal_manifest = common_pb.DataJoinPortalManifest(
         name='test_portal',
         input_partition_num=4,
         output_partition_num=8,
         input_data_base_dir='./portal_input',
         output_data_base_dir='./portal_output')
     self._portal_options = dj_pb.DataJoinPotralOptions(
         example_validator=dj_pb.ExampleValidatorOptions(
             example_validator='EXAMPLE_VALIDATOR',
             validate_event_time=True,
         ),
         reducer_buffer_size=128,
         raw_data_options=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD'),
         use_mock_etcd=True)
     self._date_time = common.convert_timestamp_to_datetime(
         common.trim_timestamp_by_hourly(
             common.convert_datetime_to_timestamp(datetime.now())))
     self._generate_input_data()