def data_acquisition_download(config): """Download data for a specific query configuration and write it to files. Args: config: argparse arguments passed by the user. Returns: None. """ bosdyn.client.util.setup_logging(config.verbose) sdk = bosdyn.client.create_standard_sdk('DataAcquisitionDownloadExample') robot = sdk.create_robot(config.hostname) robot.authenticate(config.username, config.password) query_params = None try: from_timestamp = Timestamp() from_timestamp.FromJsonString(config.query_from_timestamp) to_timestamp = Timestamp() to_timestamp.FromJsonString(config.query_to_timestamp) query_params = data_acquisition_store_pb2.DataQueryParams( time_range=data_acquisition_store_pb2.TimeRangeQuery(from_timestamp=from_timestamp, to_timestamp=to_timestamp)) except ValueError as val_err: print("Value Exception:\n" + str(val_err)) download_data_REST(query_params, config.hostname, robot.user_token, config.destination_folder, config.additional_REST_params)
def create_cluster(self): """Create the cluster.""" print('Creating cluster...') # idle_delete_ttl only accepts google.protobuf.duration d-type as a duration start = Timestamp() end = Timestamp() duration = Duration() start.FromJsonString('2019-06-01T10:00:20.021-05:00') end.FromJsonString('2019-06-01T10:10:20.021-05:00') duration.seconds = end.seconds - start.seconds # duration will be 10 minute. zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( self.project_id, self.zone) cluster_data = { 'project_id': self.project_id, 'cluster_name': self.cluster_name, 'config': { 'gce_cluster_config': { 'zone_uri': zone_uri, "metadata": { 'PIP_PACKAGES': 'pandas requests beautifulsoup4 PyMySQL' } }, 'master_config': { 'num_instances': 1, 'machine_type_uri': 'n1-standard-8' }, 'worker_config': { 'num_instances': 2, 'machine_type_uri': 'n1-standard-8', }, "software_config": { "image_version": "1.4-ubuntu18", "properties": { "dataproc:alpha.state.shuffle.hcfs.enabled": "false" } }, "lifecycle_config": { "idle_delete_ttl": duration }, 'initialization_actions': [{ 'executable_file': 'gs://sparkrecommendationengine/packages.sh' }] } } cluster = self.dataproc_cluster_client.create_cluster( self.project_id, self.region, cluster_data) cluster.add_done_callback(self._callback) global waiting_callback waiting_callback = True
def setUp(self): self.config = test_utils.ConsumerMockConfig() scan_start_time = Timestamp() scan_start_time.FromJsonString("1991-01-01T00:00:00Z") scan_info = engine_pb2.ScanInfo( scan_start_time=scan_start_time, scan_uuid='dd1794f2-544d-456b-a45a-a2bec53633b1') scan_results = engine_pb2.LaunchToolResponse(scan_info=scan_info) scan_results.tool_name = 'unit_tests' issue = issue_pb2.Issue() issue.target = 'target.py:0' issue.type = "test" issue.title = "test title" issue.cvss = 2.0 issue.description = "test.description" issue.severity = issue_pb2.Severity.SEVERITY_LOW issue.confidence = issue_pb2.Confidence.CONFIDENCE_LOW scan_results.issues.extend([issue]) first_seen = Timestamp() first_seen.FromJsonString("1992-02-02T00:00:00Z") enriched_issue = issue_pb2.EnrichedIssue(first_seen=first_seen) enriched_issue.raw_issue.CopyFrom(issue) enriched_issue.count = 2 enriched_issue.false_positive = True enriched_scan_results = engine_pb2.EnrichedLaunchToolResponse( original_results=scan_results, ) enriched_scan_results.issues.extend([enriched_issue]) self.enriched_dtemp = tempfile.mkdtemp(prefix="enriched_", dir=self.config.pvc_location) self.enriched, _ = tempfile.mkstemp(prefix="enriched_", dir=self.enriched_dtemp, suffix=".pb") self.raw_dtemp = tempfile.mkdtemp(prefix="raw_", dir=self.config.pvc_location) self.raw, _ = tempfile.mkstemp(prefix="raw_", dir=self.raw_dtemp, suffix=".pb") f = open(self.enriched, "wb") scan_proto_string = enriched_scan_results.SerializeToString() f.write(scan_proto_string) f.close() f = open(self.raw, "wb") scan_proto_string = scan_results.SerializeToString() f.write(scan_proto_string) f.close()
def _generate_pipeline_stages(self, time_series, pipeline_id): from ngcd_common import events_pb2 from google.protobuf.timestamp_pb2 import Timestamp from google.protobuf.json_format import MessageToDict import random from datetime import timedelta import pytz pipeline_stage_id = 0 for i in range(0, 4): time = next(time_series)[0].isoformat() if i % 2 == 0: # Generate started event ts = Timestamp() ts.FromJsonString(time) event_pb = events_pb2.PipelineStageStarted( uuid=str(pipeline_stage_id), pipeline_uuid=str(pipeline_id), timestamp=ts) event_model = Event(id=self._next_global_id(), type='PipelineStageStarted', body=MessageToDict( event_pb, including_default_value_fields=True), event_origin_time=time) yield event_model else: # Generate finished event ts = Timestamp() ts.FromJsonString(time) event_pb = events_pb2.PipelineStageFinished( uuid=str(pipeline_stage_id), pipeline_uuid=str(pipeline_id), timestamp=ts, result=random.choice([ events_pb2.SUCCESS, events_pb2.FAILURE, events_pb2.ABORTED ]), duration_ms=random.randint(100, 10000)) event_model = Event(id=self._next_global_id(), type='PipelineStageFinished', body=MessageToDict( event_pb, including_default_value_fields=True), event_origin_time=time) pipeline_stage_id = pipeline_stage_id + 1 yield event_model
def _get_dict_to_fill(message): """We are populating an empty dictionary from the message descriptor fields. This solves the problem of proto not sending zeroed values, say empty string (""), zero value in an int, 0 constant of an enum, etc. We just populate an empty dictionary with the default values and then let the looping over the proto object to override the default values """ default_val_dct = {} for field in message.DESCRIPTOR.fields: if field.label == FieldDescriptor.LABEL_REPEATED: if _is_field_a_map(field, message): val = {} else: val = [] elif field.type == FieldDescriptor.TYPE_MESSAGE: if field.message_type.full_name == _TIMESTAMP_MESSAGE_TYPE_NAME: val = Timestamp() val.FromJsonString(_DEFAULT_TIMESTAMP) else: val = {} elif field.type == FieldDescriptor.TYPE_ENUM: # The first enum value must be zero in proto3. So not sending an enum value # can be implicitly assumed to be an intention of sending the value 0. val = _enum_label_from_constant(field, 0) elif field.type in FIELD_DEFAULT_VALS: val = FIELD_DEFAULT_VALS.get(field.type, "") default_val_dct[field.name] = val return default_val_dct
def decode_attribute_event_time(attr: str) -> datetime.datetime: try: ts = Timestamp() ts.FromJsonString(attr) return ts.ToDatetime() except ValueError: raise InvalidArgument("Invalid value for event time attribute.")
def setUp(self): self.config = { 'dry_run': True, 'es_index': 'dracon', 'es_url': 'https://some_test.url.somewhere.io:443', 'pvc_location': './' } # Create an scan results object and serialize it to a file ts = Timestamp() ts.FromJsonString("1991-01-01T00:00:00Z") scan_results = engine_pb2.LaunchToolResponse( scan_info=engine_pb2.ScanInfo( scan_uuid='dd1794f2-544d-456b-a45a-a2bec53633b1', scan_start_time=ts, ), tool_name='bandit', ) issue = issue_pb2.Issue() issue.target = 'target.py:0' scan_results.issues.extend([issue]) enriched_scan_results = engine_pb2.EnrichedLaunchToolResponse( original_results=scan_results, ) f = open(self.config['pvc_location'] + "example_response.pb", "wb") scan_proto_string = enriched_scan_results.SerializeToString() f.write(scan_proto_string) f.close()
def setUp(self): dct = {} dct["a_str"] = "Neeraj Koul" dct["an_enum"] = "second" dct["an_int"] = 2 dct["lst_ints"] = [0, 1, 2] dct["lst_messages"] = [{ "a_str": "first_five_non_zero_odd_nos", "a_long": long(5), "lst_longs": [long(1), long(3), long(5), long(7), long(9)] }, { "a_str": "first_three_prime_nos", "a_long": long(3), "lst_longs": [long(2), long(3), long(5)] }] dct["lst_enums"] = ["first", "second", "first"] dct["int_to_lst_ints_map"] = { 1: { "lst_ints": [0, 1] }, 2: { "lst_ints": [2, 3] }, 3: { "lst_ints": [4, 5] } } dct["str_to_message_map"] = { "where_from": { "a_str": "Kashmir", "a_long": long(1), "lst_longs": [long(1), long(2), long(3)] } } dct["str_to_int_map"] = {"some_str": 1} dct["str_to_enum_map"] = {"first_key": "first", "second_key": "second"} dct["sub_message"] = { "a_str": "bangalore", "a_long": long(560048), "lst_longs": [long(1), long(2), long(3)] } timestamp_1, timestamp_2, timestamp_3, timestamp_4 = Timestamp(), Timestamp(), \ Timestamp(), Timestamp() timestamp_1.FromJsonString("2018-06-01T05:30:00+5:30") timestamp_2.FromJsonString("2018-06-02T00:00:00Z") timestamp_3.FromJsonString("2018-06-03T00:00:00Z") timestamp_4.FromJsonString("2018-06-04T00:00:00Z") dct["a_timestamp"] = timestamp_1 dct["lst_timestamps"] = [timestamp_2, timestamp_3] dct["str_to_timestamp_map"] = {"some_timestamp": timestamp_4} self.data_dct = dct self.main_msg_fields = self.data_dct.keys()
def test_convert_to_utc(self): ts = Timestamp() ts.FromJsonString("2018-06-01T00:00:00+5:30") ts_old_seconds = ts.seconds convert_to_utc(ts) ts_new_seconds = ts.seconds self.assertEqual(ts_old_seconds - ts_new_seconds, (5 * 60 * 60 + 30 * 60))
def to_proto_tweet(db_tweet): """Convert db tweet to proto tweet""" created_at = Timestamp() created_at.FromJsonString(db_tweet.created_at.isoformat()) return ProtoTweet(id=db_tweet.id, user_id=db_tweet.user_id, content=db_tweet.content, latitude=db_tweet.latitude, longtitude=db_tweet.longtitude, num_favorites=db_tweet.num_favorites, created_at=created_at)
def test_convert_to_local_timezone(self): ts = Timestamp() ts.FromJsonString("2018-06-01T00:00:00Z") ts_old_seconds = ts.seconds offset = time.timezone if (time.localtime().tm_isdst == 0) else time.altzone convert_to_local_timezone(ts) ts_new_seconds = ts.seconds self.assertEqual(ts_old_seconds - ts_new_seconds, offset)
def change_timestamp_type(value): if isinstance(value, datetime): ts = Timestamp() ts.FromDatetime(value) return ts elif isinstance(value, str): ts = Timestamp() ts.FromJsonString(value) return ts else: return value
def test_log_message(mmp_servicer, caplog): timestamp = Timestamp() timestamp.FromJsonString("1970-01-01T00:00:00.000Z") message = mmp.LogMessage(instance_id='test_instance_id', timestamp=timestamp, level=mmp.LOG_LEVEL_WARNING, text='Testing log message') result = mmp_servicer.SubmitLogMessage(message, None) assert isinstance(result, mmp.LogResult) assert caplog.records[0].name == 'test_instance_id' assert caplog.records[0].time_stamp == '1970-01-01T00:00:00Z' assert caplog.records[0].levelname == 'WARNING' assert caplog.records[0].message == ('Testing log message')
def get_pages(config): """Get data pages from robot""" bosdyn.client.util.setup_logging(config.verbose) sdk = bosdyn.client.create_standard_sdk('GetPagesClient') robot = sdk.create_robot(config.hostname) robot.authenticate(config.username, config.password) service_client = robot.ensure_client( DataServiceClient.default_service_name) start_timestamp = None end_timestamp = None if config.start: start_timestamp = Timestamp() start_timestamp.FromJsonString(config.start) if config.end: end_timestamp = Timestamp() end_timestamp.FromJsonString(config.end) print( service_client.get_data_pages( TimeRange(start=start_timestamp, end=end_timestamp)))
def proto_ts_from_datetime_str(dt): """Converts string datetime in ISO format to protobuf timestamp. :type dt: str :param dt: string with datetime in ISO format :rtype: :class:`~google.protobuf.timestamp_pb2.Timestamp` :returns: protobuf timestamp """ ts = Timestamp() if (dt is not None): try: ts.FromJsonString(dt) except ParseError: pass return ts
def mock_CreateXYZ(msg: xyz_pb2.CreateXYZRequest, **kwargs) -> xyz_pb2.XYZ: assert isinstance(msg, xyz_pb2.CreateXYZRequest) expires_timestamp = Timestamp() expires_timestamp.FromJsonString("2003-01-02T04:05:06.789+00:00") return xyz_pb2.XYZ( id="mclovin", name=msg.name, description=msg.description, serialized_graft=msg.serialized_graft, typespec=msg.typespec, parameters=msg.parameters, public=msg.public, viz_options=msg.viz_options, expires_timestamp=expires_timestamp, channel=msg.channel, client_version=msg.client_version, )
def produce_feature_rows(entity_name, feature_infos, feature_values_filepath, bootstrap_servers, topic): producer = KafkaProducer(bootstrap_servers=bootstrap_servers) feature_values = pd.read_csv( feature_values_filepath, names=["id", "event_timestamp"] + [f["name"] for f in feature_infos], dtype=dict([("id", np.string_)] + [(f["name"], f["dtype"]) for f in feature_infos]), parse_dates=["event_timestamp"], ) for i, row in feature_values.iterrows(): feature_row = FeatureRow() feature_row.entityKey = row["id"] feature_row.entityName = entity_name timestamp = Timestamp() timestamp.FromJsonString( row["event_timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ")) feature_row.eventTimestamp.CopyFrom(timestamp) for info in feature_infos: feature = Feature() feature.id = info["id"] feature_value = Value() feature_name = info["name"] if info["dtype"] is "Int64": feature_value.int64Val = row[feature_name] elif info["dtype"] is "Int32": feature_value.int32Val = row[feature_name] elif info["dtype"] is np.float64: feature_value.doubleVal = row[feature_name] else: raise RuntimeError( f"Unsupported dtype: {info['dtype']}\n" "Supported valueType: INT32, INT64, FLOAT, DOUBLE\n" "Please update your feature specs in testdata/feature_specs folder" ) feature.value.CopyFrom(feature_value) feature_row.features.extend([feature]) producer.send(topic, feature_row.SerializeToString()) producer.flush()
def task_data_get_last_updated( request: TaskDataGetLastUpdatedRequest, ) -> TaskDataGetLastUpdatedResponse: (error, result) = check_crawler_request(request.index, request.crawler, None) if error: # Note: here we are abusing the fact that TaskDataGetLastUpdatedError # is a strict subset of TaskDataCommitRequest return TaskDataGetLastUpdatedResponse(error=result) db = create_db_connection(request.index) metadata = db.get_task_crawler_metadata(result.name) # TODO(add details to the protobuf description) # if "details" in request.args and request.args.get("details") == "true": # return jsonify(metadata) timestamp = Timestamp() if not metadata.get("last_commit_at"): timestamp.FromDatetime(result.updated_since) else: timestamp.FromJsonString(metadata["last_commit_at"] + "Z") return TaskDataGetLastUpdatedResponse(timestamp=timestamp)
def test_properties(self, stub): id_ = "foo" obj = types.Int(1) format = "geotiff" destination = {"type": "email"} expires_timestamp = Timestamp() expires_timestamp.FromJsonString("2003-01-02T04:05:06.789+00:00") job_state = job_pb2.Job.State(stage=job_pb2.Job.Stage.QUEUED) def create_side_effect(req, **kwargs): return job_pb2.Job( id=id_, serialized_graft=req.serialized_graft, typespec=req.typespec, arguments=req.arguments, geoctx_graft=req.geoctx_graft, no_ruster=req.no_ruster, channel=req.channel, client_version=__version__, expires_timestamp=expires_timestamp, no_cache=req.no_cache, trace=req.trace, state=job_state, type=req.type, format=user_format_to_proto(format), destination=user_destination_to_proto(destination), ) stub.return_value.CreateJob.side_effect = create_side_effect job = Job(obj, format=format, destination=destination) job_from_msg = Job._from_proto(job._message, client=job._client) assert job.object is obj utils.assert_graft_is_scope_isolated_equvalent( job_from_msg.object.graft, obj.graft) assert job_from_msg.type is type(job_from_msg.object) is type( obj) # noqa: E721 assert job.result_type == "Int" assert job.id == id_ assert job.arguments == {} assert job.geoctx is None assert job.channel == _channel.__channel__ assert job.stage == "QUEUED" assert job.created_datetime is None assert job.updated_datetime is None assert job.expires_datetime == pb_timestamp_to_datetime( expires_timestamp) assert job.runtime is None assert job.error is None assert job.done is False assert job.cache_enabled is True assert job.version == __version__ assert job.format == has_proto_to_user_dict(job._message.format) assert job.destination == has_proto_to_user_dict( job._message.destination) job._message.state.stage = job_pb2.Job.Stage.SUCCEEDED job._message.timestamp = 1 job._message.state.timestamp = 2 assert job.stage == "SUCCEEDED" assert job.created_datetime == pb_milliseconds_to_datetime(1) assert job.updated_datetime == pb_milliseconds_to_datetime(2) assert job.runtime == job.updated_datetime - job.created_datetime assert job.error is None assert job.done is True job._message.state.stage = job_pb2.Job.Stage.FAILED job._message.state.error.code = errors_pb2.ERROR_INVALID job._message.state.error.message = "test" assert job.stage == "FAILED" assert isinstance(job.error, JobInvalid) assert job.done is True
def timestamp_from_json_string(json_string): ts = Timestamp() ts.FromJsonString(json_string) return ts
def _to_timestamp(json_string): timestamp_proto = Timestamp() timestamp_proto.FromJsonString(json_string) return timestamp_proto
def _generate_pipeline_events(self): from ngcd_common import events_pb2 from google.protobuf.timestamp_pb2 import Timestamp from google.protobuf.json_format import MessageToDict import random from datetime import timedelta import pytz pipeline_id = 0 times = self.fake.time_series(start_date="-5d", end_date="now", precision=timedelta(minutes=1), distrib=None, tzinfo=pytz.utc) for i in range(0, 10): time = next(times)[0].isoformat() if i % 2 == 0: # Generate started event ts = Timestamp() ts.FromJsonString(time) event_pb = events_pb2.PipelineStarted(uuid=str(pipeline_id), timestamp=ts) event_model = Event(id=self._next_global_id(), type='PipelineStarted', body=MessageToDict( event_pb, including_default_value_fields=True), event_origin_time=time) yield event_model yield from self._generate_pipeline_stages(times, pipeline_id) else: # Generate finished event ts = Timestamp() ts.FromJsonString(time) event_pb = events_pb2.PipelineFinished( uuid=str(pipeline_id), timestamp=ts, result=random.choice([ events_pb2.SUCCESS, events_pb2.FAILURE, events_pb2.ABORTED ]), duration_ms=random.randint(100, 10000)) event_model = Event(id=self._next_global_id(), type='PipelineFinished', body=MessageToDict( event_pb, including_default_value_fields=True), event_origin_time=time) pipeline_id = pipeline_id + 1 yield event_model # Create a started one, to be able to check in-progress time = next(times)[0].isoformat() ts = Timestamp() ts.FromJsonString(time) event_pb = events_pb2.PipelineStarted(uuid=str(pipeline_id), timestamp=ts) event_model = Event(id=self._next_global_id(), type='PipelineStarted', body=MessageToDict( event_pb, including_default_value_fields=True), event_origin_time=time) yield event_model yield from self._generate_pipeline_stages(times, pipeline_id)