def test_validation_with_transformation(self): behaviour = pyneric.MetadataBehaviour(validate_transforms=True) a_value = dict() class CustomException(Exception): """Exception for validation test""" class M(pyneric.Metaclass): __metadata_behaviour__ = behaviour __metadata__ = dict(a=a_value) @staticmethod def validate_a(value): if not isinstance(value, dict): raise CustomException("must be a dict") return list(value) class C(with_metaclass(M, object)): a = {'valid': 'dict'} self.assertEqual(['valid'], C.a) # still also raises exception when appropriate self.assertRaises(CustomException, type, 'C', (with_metaclass(M, object),), dict(a={'invalid', 'set', 'not', 'a', 'dict'})) self.assertRaises(CustomException, type, 'C1', (C,), dict(a={'invalid', 'set', 'not', 'a', 'dict'})) class C1(C): a = {'another': 4, 'one': 1, 'transformed': 0} self.assertIsInstance(C1.a, list) self.assertEqual({'another', 'one', 'transformed'}, set(C1.a))
def __new__(generator_cls, ABC): MetaMagicType = create_metaclass( BasicMetaMagicType, generator_cls, ) MagicType = create_class( with_metaclass(MetaMagicType, BasicMagicType), generator_cls, ) MagicType.generator_cls = generator_cls MagicType.main_cls = ABC MagicType.partial_cls = None return MagicType
def test_validation(self): a_value = dict() class CustomException(Exception): """Exception for validation test""" class M(pyneric.Metaclass): __metadata__ = dict(a=a_value) @staticmethod def validate_a(value): if not isinstance(value, dict): raise CustomException("must be a dict") class C(with_metaclass(M, object)): a = {'valid': 'dict'} self.assertRaises(CustomException, type, 'C', (with_metaclass(M, object),), dict(a={'invalid', 'set', 'not', 'a', 'dict'})) self.assertRaises(CustomException, type, 'C1', (C,), dict(a={'invalid', 'set', 'not', 'a', 'dict'}))
class DishMaster(with_metaclass(DeviceMeta, SKAMaster)): # class DishMaster(SKAMaster): """ SKA Dish Master TANGO device server """ # PROTECTED REGION ID(DishMaster.class_variable) ENABLED START # # __metaclass__ = SKAMaster # Function to set achieved pointing attribute to the desired pointing attribute def point(self): """ Points the dish towards the desired pointing coordinates. """ if ((self._achieved_pointing[1] != self._desired_pointing[1]) | (self._achieved_pointing[2] != self._desired_pointing[2])): try: self.change_azimuth_thread = threading.Thread( None, self.azimuth, 'DishMaster') self.change_elevation_thread = threading.Thread( None, self.elevation, 'DishMaster') self.change_azimuth_thread.start() self.change_elevation_thread.start() self._pointing_state = 1 except Exception as except_occured: print(CONST.ERR_EXE_POINT_FN, self.ReceptorNumber) print(CONST.STR_ERR_MSG, except_occured) else: self.set_status(CONST.STR_DISH_POINT_ALREADY) self.dev_logging(CONST.STR_DISH_POINT_ALREADY, int(tango.LogLevel.LOG_INFO)) def azimuth(self): """ Calculates the azimuth angle difference. """ self._pointing_state = 1 azimuth_index = 1 self._azimuth_difference = self._desired_pointing[ 1] - self._achieved_pointing[1] if self._azimuth_difference > 0.00: self.increment_position([azimuth_index, self._azimuth_difference]) elif self._azimuth_difference < 0.00: self.decrement_position( [azimuth_index, abs(self._azimuth_difference)]) def elevation(self): """ Calculates the elevation angle difference. """ self._pointing_state = 1 elevation_index = 2 self._elevation_difference = self._desired_pointing[ 2] - self._achieved_pointing[2] if self._elevation_difference > 0.00: self.increment_position( [elevation_index, self._elevation_difference]) elif self._elevation_difference < 0.00: self.decrement_position( [elevation_index, abs(self._elevation_difference)]) def increment_position(self, argin): """ Increments the current pointing coordinates gradually to match the desired pointing coordinates. :param argin: Difference between current and desired Azimuth/Elevation angle. :return: None """ input_increment = int(argin[1]) time.sleep(2) if abs(self._azimuth_difference) > abs(self._elevation_difference): max_increment = abs(self._azimuth_difference) elif abs(self._azimuth_difference) < abs(self._elevation_difference): max_increment = abs(self._elevation_difference) else: max_increment = input_increment if input_increment == max_increment: input_increment = input_increment + 1 for position in range(0, input_increment): self.set_status(CONST.STR_DISH_POINT_INPROG) self.dev_logging(CONST.STR_DISH_POINT_INPROG, int(tango.LogLevel.LOG_INFO)) self._pointing_state = 1 time.sleep(2) if (self._achieved_pointing[1] == self._desired_pointing[1]) and ( self._achieved_pointing[2] == self._desired_pointing[2]): self._pointing_state = 0 self.set_status(CONST.STR_DISH_POINT_SUCCESS) self.dev_logging(CONST.STR_DISH_POINT_SUCCESS, int(tango.LogLevel.LOG_INFO)) else: self._achieved_pointing[ argin[0]] = self._achieved_pointing[argin[0]] + 1 def decrement_position(self, argin): """ Decrements the current pointing coordinates gradually to match the desired pointing coordinates. :param argin: Difference between current and desired Azimuth/Elevation angle. :return: None """ input_decrement = int(argin[1]) time.sleep(2) if abs(self._azimuth_difference) > abs(self._elevation_difference): max_decrement = abs(self._azimuth_difference) elif abs(self._azimuth_difference) < abs(self._elevation_difference): max_decrement = abs(self._elevation_difference) else: max_decrement = input_decrement if input_decrement == max_decrement: input_decrement = input_decrement + 1 for position in range(0, (input_decrement)): self.set_status(CONST.STR_DISH_POINT_INPROG) self._pointing_state = 1 time.sleep(2) if (self._achieved_pointing[1] == self._desired_pointing[1]) and ( self._achieved_pointing[2] == self._desired_pointing[2]): self._pointing_state = 0 self.set_status(CONST.STR_DISH_POINT_SUCCESS) self.dev_logging(CONST.STR_DISH_POINT_SUCCESS, int(tango.LogLevel.LOG_INFO)) else: self._achieved_pointing[ argin[0]] = self._achieved_pointing[argin[0]] - 1 def check_slew(self): """ Waits until the Dish is slewing and stows it later. :return: None """ while True: if self._pointing_state != 1: self._admin_mode = 1 # Set adminMode to OFFLINE self.set_state(DevState.DISABLE) # Set STATE to DISABLE self._dish_mode = 6 # Set dishMode to STOW self._health_state = 0 # Set healthState to OK self.set_status(CONST.STR_DISH_STOW_SUCCESS) self.dev_logging(CONST.STR_DISH_STOW_SUCCESS, int(tango.LogLevel.LOG_INFO)) break # PROTECTED REGION END # //DishMaster.class_variable # ----------------- # Device Properties # ----------------- ReceptorNumber = device_property( dtype='uint', doc="Number of Receptor ", ) # ---------- # Attributes # ---------- dishMode = attribute( dtype='DevEnum', enum_labels=[ "OFF", "STARTUP", "SHUTDOWN", "STANDBY-LP", "STANDBY-FP", "MAINTENANCE", "STOW", "CONFIG", "OPERATE", ], doc="Mode of the dish", ) pointingState = attribute( dtype='DevEnum', enum_labels=[ "READY", "SLEW", "TRACK", "SCAN", ], doc="Pointing state of the dish", ) band1SamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band1 Sampler Frequency of the dish", ) band2SamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band2 Sampler Frequency of the dish", ) band3SamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band3 Sampler Frequency of the dish", ) band4SamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band4 Sampler Frequency of the dish", ) band5aSamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band5a Sampler Frequency of the dish", ) band5bSamplerFrequency = attribute( dtype='double', access=AttrWriteType.WRITE, doc="Band5b Sampler Frequency of the dish", ) capturing = attribute( dtype='bool', doc="Data Capturing of the dish", ) ConfiguredBand = attribute( dtype='DevEnum', enum_labels=[ "BAND1", "BAND2", "BAND3", "BAND4", "BAND5a", "BAND5b", "NONE", ], doc="Configured band of the dish", ) WindSpeed = attribute( dtype='double', access=AttrWriteType.READ_WRITE, unit="km/h", doc="Wind speed of the dish", ) desiredPointing = attribute( dtype=('double', ), access=AttrWriteType.READ_WRITE, max_dim_x=7, doc="Desired pointing coordinates of the dish", ) achievedPointing = attribute( dtype=('double', ), max_dim_x=7, doc="Achieved pointing coordinates of the dish", ) # --------------- # General methods # --------------- def init_device(self): """ Initializes the properties and attributes of DishMaster. :return: None """ SKAMaster.init_device(self) # PROTECTED REGION ID(DishMaster.init_device) ENABLED START # try: # Initialise Properties self.SkaLevel = 1 # Set SkaLevel to 1 # Initialise Attributes self._health_state = 0 # Set healthState to OK self._admin_mode = 0 # Set adminMode to ONLINE self._dish_mode = 3 # Set dishMode to STANDBY-LP Mode self._pointing_state = 0 # Set pointingState to READY Mode self._band1_sampler_frequency = 0 # Set Band 1 Sampler Frequency to 0 self._band2_sampler_frequency = 0 # Set Band 2 Sampler Frequency to 0 self._band3_sampler_frequency = 0 # Set Band 3 Sampler Frequency to 0 self._band4_sampler_frequency = 0 # Set Band 4 Sampler Frequency to 0 self._band5a_sampler_frequency = 0 # Set Band 5a Sampler Frequency to 0 self._band5b_sampler_frequency = 0 # Set Band 5b Sampler Frequency to 0 self._capturing = False self._desired_pointing = [0, 2, 4] self._achieved_pointing = [0, 0, 0] self._elevation_difference = 0 self._azimuth_difference = 0 self._configured_band = 1 self._wind_speed = 5 self.set_state(DevState.STANDBY) # Set STATE to STANDBY # Initialise Point command variables self._current_time = 0 self._point_execution_time = 0 self._point_delta_t = 0 # Initialise Scan command variables self._scan_execution_time = 0 self._scan_delta_t = 0 self.set_status(CONST.STR_DISH_INIT_SUCCESS) self.dev_logging(CONST.STR_DISH_INIT_SUCCESS, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_INIT_PROP_ATTR_DISH, self.ReceptorNumber) self.dev_logging(CONST.ERR_INIT_PROP_ATTR_DISH, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.always_executed_hook def always_executed_hook(self): # PROTECTED REGION ID(DishMaster.always_executed_hook) ENABLED START # """ Internal construct of TANGO. """ # PROTECTED REGION END # // DishMaster.always_executed_hook def delete_device(self): # PROTECTED REGION ID(DishMaster.delete_device) ENABLED START # """ Internal construct of TANGO. """ # PROTECTED REGION END # // DishMaster.delete_device # ------------------ # Attributes methods # ------------------ def read_dishMode(self): # PROTECTED REGION ID(DishMaster.dishMode_read) ENABLED START # """ Returns the dishMode. """ return self._dish_mode # PROTECTED REGION END # // DishMaster.dishMode_read def read_pointingState(self): # PROTECTED REGION ID(DishMaster.pointingState_read) ENABLED START # """ Returns the pointingState. """ return self._pointing_state # PROTECTED REGION END # // DishMaster.pointingState_read def write_band1SamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band1SamplerFrequency_write) ENABLED START # """ Sets the band1 sampler frequency. :param value: band1SamplerFrequency :return: None """ self._band1_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band1SamplerFrequency_write def write_band2SamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band2SamplerFrequency_write) ENABLED START # """ Sets the band2 sampler frequency. :param value: band2SamplerFrequency :return: None """ self._band2_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band2SamplerFrequency_write def write_band3SamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band3SamplerFrequency_write) ENABLED START # """ Sets the band3 sampler frequency. :param value: band3SamplerFrequency :return: None """ self._band3_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band3SamplerFrequency_write def write_band4SamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band4SamplerFrequency_write) ENABLED START # """ Sets band4 sampler frequency. :param value: band4SamplerFrequency :return: None """ self._band4_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band4SamplerFrequency_write def write_band5aSamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band5aSamplerFrequency_write) ENABLED START # """ Sets the band5a sampler frequency. :param value: band5aSamplerFrequency :return: None """ self._band5a_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band5aSamplerFrequency_write def write_band5bSamplerFrequency(self, value): # PROTECTED REGION ID(DishMaster.band5bSamplerFrequency_write) ENABLED START # """ Sets the band5b sampler frequency. :param value: band5bSamplerFrequency :return: None """ self._band5b_sampler_frequency = value # PROTECTED REGION END # // DishMaster.band5bSamplerFrequency_write def read_capturing(self): # PROTECTED REGION ID(DishMaster.capturing_read) ENABLED START # """ Returns true if the dish is capturing the data, else false. """ return self._capturing # PROTECTED REGION END # // DishMaster.capturing_read def read_ConfiguredBand(self): # PROTECTED REGION ID(DishMaster.ConfiguredBand_read) ENABLED START # """ Returns the band configured for the Dish. """ return self._configured_band # PROTECTED REGION END # // DishMaster.ConfiguredBand_read def read_WindSpeed(self): # PROTECTED REGION ID(DishMaster.WindSpeed_read) ENABLED START # """ Returns the Wind speed. """ return self._wind_speed # PROTECTED REGION END # // DishMaster.WindSpeed_read def write_WindSpeed(self, value): # PROTECTED REGION ID(DishMaster.WindSpeed_write) ENABLED START # """ Sets the wind speed. :param value: WindSpeed :return: None """ self._wind_speed = value # PROTECTED REGION END # // DishMaster.WindSpeed_write def read_desiredPointing(self): # PROTECTED REGION ID(DishMaster.desiredPointing_read) ENABLED START # """ Returns the desired pointing coordinates of Dish. """ return self._desired_pointing # PROTECTED REGION END # // DishMaster.desiredPointing_read def write_desiredPointing(self, value): # PROTECTED REGION ID(DishMaster.desiredPointing_write) ENABLED START # """ Sets the desired pointing coordinates of Dish. :param value: desiredPointing :return: None """ self._desired_pointing = value # PROTECTED REGION END # // DishMaster.desiredPointing_write def read_achievedPointing(self): # PROTECTED REGION ID(DishMaster.achievedPointing_read) ENABLED START # """ Returns the achieved pointing coordinates of Dish. """ return self._achieved_pointing # PROTECTED REGION END # // DishMaster.achievedPointing_read # -------- # Commands # -------- @command() @DebugIt() def SetStowMode(self): # PROTECTED REGION ID(DishMaster.SetStowMode) ENABLED START # """ Triggers the Dish to transition into the STOW Dish Element Mode. Used to point the dish in a direction that minimises the wind loads on the structure,for survival in strong wind conditions. The Dish is able to observe in the stove position, for the purpose of transient detection. """ try: # Command to set Dish to STOW Mode self._desired_pointing = [0, 0, 0] self.Slew("0") time.sleep(1) self.stow_thread = threading.Thread(None, self.check_slew, 'DishMaster') self.stow_thread.start() except Exception as except_occured: print(CONST.ERR_EXE_SET_STOW_MODE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SET_STOW_MODE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.SetStowMode def is_SetStowMode_allowed(self): # PROTECTED REGION ID(DishMaster.is_SetStowMode_allowed) ENABLED START # """ Checks if the SetStowMode is allowed in the current state of DishMaster. """ return self.get_state() not in [DevState.ON, DevState.ALARM] # PROTECTED REGION END # // DishMaster.is_SetStowMode_allowed @command() @DebugIt() def SetStandbyLPMode(self): # PROTECTED REGION ID(DishMaster.SetStandbyLPMode) ENABLED START # """ Triggers the Dish to transition into the STANDBY-LP (Standby-Low power) Dish Element Mode. Standby-LP is the default mode when the Dish is configured for low power consumption. It is the mode wherein Dish ends after a start up procedure. """ try: # Command to set Dish to STANDBY-LP Mode self.set_state(DevState.STANDBY) # Set STATE to STANDBY self._dish_mode = 3 # set dishMode to STANDBYLP self.set_status(CONST.STR_DISH_STANDBYLP_MODE) self.dev_logging(CONST.STR_DISH_STANDBYLP_MODE, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SET_STNBYLP_MODE_CMD, self.ReceptorNumber) self.set_status(str(except_occured)) self.dev_logging(CONST.ERR_EXE_SET_STNBYLP_MODE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.SetStandbyLPMode def is_SetStandbyLPMode_allowed(self): # PROTECTED REGION ID(DishMaster.is_SetMaintenanceMode_allowed) ENABLED START # return self._pointing_state not in [1, 2, 3] # PROTECTED REGION END # // DishMaster.is_SetMaintenanceMode_allowed @command() @DebugIt() def SetMaintenanceMode(self): # PROTECTED REGION ID(DishMaster.SetMaintenanceMode) ENABLED START # """ Triggers the Dish to transition into the MAINTENANCE Dish Element Mode. This mode will also enable engineers and maintainers to upgrade SW and FW. Dish also enters this mode when an emergency stop button is pressed. """ try: # Command to set Dish to MAINTENANCE Mode self._admin_mode = 2 # Set adminMode to MAINTENANCE self.set_state(DevState.DISABLE) # Set STATE to DISABLE self._dish_mode = 5 # set dishMode to MAINTENANCE self.set_status(CONST.STR_DISH_MAINT_MODE) self.dev_logging(CONST.STR_DISH_MAINT_MODE, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SET_MAINT_MODE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SET_MAINT_MODE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.SetMaintenanceMode def is_SetMaintenanceMode_allowed(self): # PROTECTED REGION ID(DishMaster.is_SetMaintenanceMode_allowed) ENABLED START # """ Checks if SetMaintenanceMode is allowed in the current state of DishMaster.""" return self.get_state() not in [ DevState.ON, DevState.ALARM, DevState.DISABLE ] # PROTECTED REGION END # // DishMaster.is_SetMaintenanceMode_allowed @command() @DebugIt() def SetOperateMode(self): """ Triggers the Dish to transition into the OPERATE Dish Element Mode. :return: None """ # PROTECTED REGION ID(DishMaster.SetOperateMode) ENABLED START # try: # Command to set Dish to OPERATE Mode self._admin_mode = 0 # Set adminMode to ONLINE self.set_state(DevState.ON) # Set STATE to ON self._dish_mode = 8 # set dishMode to OPERATE self.set_status(CONST.STR_DISH_OPERATE_MODE) self.dev_logging(CONST.STR_DISH_OPERATE_MODE, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SET_OPERATE_MODE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SET_OPERATE_MODE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.SetOperateMode def is_SetOperateMode_allowed(self): # PROTECTED REGION ID(DishMaster.is_SetOperateMode_allowed) ENABLED START # """ Checks if SetOperateMode is allowed in the current state of DishMaster.""" return self.get_state() not in [ DevState.ON, DevState.OFF, DevState.FAULT, DevState.ALARM, DevState.UNKNOWN, DevState.DISABLE ] # PROTECTED REGION END # // DishMaster.is_SetOperateMode_allowed @command( dtype_in='str', doc_in= "The timestamp indicates the time, in UTC, at which command execution" " should start.", ) @DebugIt() def Scan(self, argin): # PROTECTED REGION ID(DishMaster.Scan) ENABLED START # """ Triggers the dish to start scanning at the set pointing coordinates and capture the data at the input timestamp. :param argin: timestamp :return: None """ try: # Command to start SCAN if self._pointing_state == 0: self._current_time = time.time() self._scan_execution_time = float(argin) self._scan_delta_t = self._scan_execution_time - self._current_time schedule_scan_thread = Timer(self._scan_delta_t, self.StartCapture, [argin]) schedule_scan_thread.start() self.dev_logging(CONST.STR_SCAN_INPROG, int(tango.LogLevel.LOG_INFO)) else: self.set_status(CONST.STR_DISH_NOT_READY) self.dev_logging(CONST.STR_DISH_NOT_READY, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SCAN_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SCAN_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.Scan def is_Scan_allowed(self): # PROTECTED REGION ID(DishMaster.is_Scan_allowed) ENABLED START # """ Checks if the Scan is allowed in the current state of DishMaster. """ return self.get_state() not in [ DevState.OFF, DevState.FAULT, DevState.INIT, DevState.UNKNOWN, DevState.STANDBY, DevState.DISABLE ] # PROTECTED REGION END # // DishMaster.is_Scan_allowed @command( dtype_in='str', doc_in= "The timestamp indicates the time, in UTC, at which command execution" " should start.", ) @DebugIt() def StartCapture(self, argin): # PROTECTED REGION ID(DishMaster.StartCapture) ENABLED START # """ Triggers the dish to start capturing the data on the configured band. :param argin: timestamp :return: None """ try: if type(float(argin)) == float: if (self._capturing == False): # Command to start Data Capturing self._capturing = True # set Capturing to True self._pointing_state = 3 # set pointingState to SCAN self.set_status(CONST.STR_DATA_CAPTURE_STRT) self.dev_logging(CONST.STR_DATA_CAPTURE_STRT, int(tango.LogLevel.LOG_INFO)) else: self.set_status(CONST.STR_DATA_CAPTURE_ALREADY_STARTED) self.dev_logging(CONST.STR_DATA_CAPTURE_ALREADY_STARTED, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_STRT_CAPTURE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_STRT_CAPTURE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.StartCapture def is_StartCapture_allowed(self): """ Checks if the StartCapture is allowed in the current state of DishMaster. """ # PROTECTED REGION ID(DishMaster.is_StartCapture_allowed) ENABLED START # return self.get_state() not in [ DevState.OFF, DevState.FAULT, DevState.INIT, DevState.UNKNOWN, DevState.STANDBY, DevState.DISABLE ] # PROTECTED REGION END # // DishMaster.is_StartCapture_allowed @command( dtype_in='str', doc_in= "The timestamp indicates the time, in UTC, at which command execution should start", ) @DebugIt() def StopCapture(self, argin): # PROTECTED REGION ID(DishMaster.StopCapture) ENABLED START # """ Triggers the dish to stop capturing the data on the configured band. :param argin: timestamp :return: None """ try: if type(float(argin)) == float: if (self._capturing == True): # Command to stop Data Capturing self._capturing = False # set Capturing to FALSE self._pointing_state = 0 # set pointingState to READY self.set_status(CONST.STR_DATA_CAPTURE_STOP) self.dev_logging(CONST.STR_DATA_CAPTURE_STOP, int(tango.LogLevel.LOG_INFO)) else: self.set_status(CONST.STR_DATA_CAPTURE_ALREADY_STOPPED) self.dev_logging(CONST.STR_DATA_CAPTURE_ALREADY_STOPPED, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_STOP_CAPTURE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_STOP_CAPTURE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.StopCapture def is_StopCapture_allowed(self): # PROTECTED REGION ID(DishMaster.is_StopCapture_allowed) ENABLED START # """ Checks if the StopCapture is allowed in the current state of DishMaster. """ return self.get_state() not in [ DevState.OFF, DevState.FAULT, DevState.INIT, DevState.UNKNOWN, DevState.STANDBY, DevState.DISABLE ] # PROTECTED REGION END # // DishMaster.is_StopCapture_allowed @command() @DebugIt() def SetStandbyFPMode(self): # PROTECTED REGION ID(DishMaster.SetStandbyFPMode) ENABLED START # """ Triggers the Dish to transition into the STANDBY-FP (Standby-Full power) Dish Element Mode. :return: None """ try: # Command to set Dish to STANDBY-FP Mode self.set_state(DevState.STANDBY) # set STATE to STANDBY self._dish_mode = 4 # set dishMode to STANDBY-FP self.set_status(CONST.STR_DISH_STANDBYFP_MODE) self.dev_logging(CONST.STR_DISH_STANDBYFP_MODE, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SET_STNBYFP_MODE_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SET_STNBYFP_MODE_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured) # PROTECTED REGION END # // DishMaster.SetStandbyFPMode def is_SetStandbyFPMode_allowed(self): # PROTECTED REGION ID(DishMaster.is_SetStandbyFPMode_allowed) ENABLED START # """ Checks if the SetStandbyFPMode is allowed in the current state of DishMaster. """ return self.get_state() not in [DevState.UNKNOWN, DevState.DISABLE] # PROTECTED REGION END # // DishMaster.is_SetStandbyFPMode_allowed @command( dtype_in='str', doc_in="Timestamp at which command should be executed.", ) @DebugIt() def Slew(self, argin=0): # PROTECTED REGION ID(DishMaster.Slew) ENABLED START # """ Triggers the Dish to move (or slew) at the commanded pointing coordinates. :param argin: timestamp :return: None """ try: if type(float(argin)) == float: # Execute POINT command at given timestamp self._current_time = time.time() self._point_execution_time = self._desired_pointing[0] self._point_delta_t = self._point_execution_time - self._current_time schedule_slew_thread = Timer(self._point_delta_t, self.point) schedule_slew_thread.start() self.dev_logging(CONST.STR_DISH_SLEW, int(tango.LogLevel.LOG_INFO)) except Exception as except_occured: print(CONST.ERR_EXE_SLEW_CMD, self.ReceptorNumber) self.dev_logging(CONST.ERR_EXE_SLEW_CMD, int(tango.LogLevel.LOG_ERROR)) print(CONST.STR_ERR_MSG, except_occured)
class Instrument(with_metaclass(InstrumentMeta, object)): """ Base class for all instruments. """ _all_instances = {} @classmethod def _create(cls, paramset, **other_attrs): """Factory method meant to be used by `instrument()`""" obj = object.__new__(cls) # Avoid our version of __new__ for name, value in other_attrs.items(): setattr(obj, name, value) obj._paramset = ParamSet(cls, **paramset) matching_insts = [ open_inst for open_inst in obj._instances if obj._paramset.matches(open_inst._paramset) ] if matching_insts: if _REOPEN_POLICY == 'strict': raise InstrumentExistsError( "Device instance already exists, cannot open in strict " "mode") elif _REOPEN_POLICY == 'reuse': # TODO: Should we return something other than the first element? return matching_insts[0] elif _REOPEN_POLICY == 'new': pass # Cross our fingers and try to open a new instance obj._before_init() obj._fill_out_paramset() obj._initialize(**paramset.get('settings', {})) obj._after_init() return obj def __new__(cls, inst=None, **kwds): # TODO: Is there a more efficient way to implement this behavior? kwds['module'] = driver_submodule_name(cls.__module__) kwds['classname'] = cls.__name__ return instrument(inst, **kwds) def _initialize(self, **settings): pass def _before_init(self): """Called just before _initialize""" self._driver_name = driver_submodule_name(self.__class__.__module__) # TODO: consider setting the _module at the class level if not hasattr(self.__class__, '_module'): self.__class__._module = import_driver(self._driver_name) facet_data = [facet.instance(self) for facet in self._props] self.facets = FacetGroup(facet_data) def _after_init(self): """Called just after _initialize""" cls = self.__class__ # Only add the instrument after init, to ensure it hasn't failed to open Instrument._all_instances.setdefault(self._driver_name, {}).setdefault( cls, WeakSet()).add(self) self._instances.add(self) def _fill_out_paramset(self): # TODO: Fix the _INST_ system more fundamentally and remove this hack if hasattr(self, '_INST_PARAMS_'): mod_params = self._INST_PARAMS_ else: try: mod_params = driver_info[self._driver_name]['params'] except KeyError: log.info( 'Instrument class is lacking static info, checking module directly...' ) mod = import_module(self.__module__) if hasattr(mod, '_INST_PARAMS'): mod_params = mod._INST_PARAMS elif isinstance(self, VisaMixin): # Visa mixins *should* just need a visa resource mod_params = ['visa_address'] else: raise for mod_param_name in mod_params: if mod_param_name not in self._paramset.keys(): break else: log.info( "Paramset has all params listed in its driver module, not filling it out" ) return if hasattr(self._module, 'list_instruments'): log.info("Filling out paramset using `list_instruments()`") for paramset in self._module.list_instruments(): log.debug("Checking against %r", paramset) if self._paramset.matches(paramset): self._paramset.lazyupdate(paramset) log.info("Found match; new params: %r", self._paramset) break else: log.info( "Driver module missing `list_instruments()`, not filling out paramset" ) def get(self, facet_name, use_cache=False): facet = getattr(self.__class__, facet_name) if not isinstance(facet, Facet): raise ValueError("'{}' is not a Facet".format(facet_name)) return facet.get_value(self, use_cache=use_cache) def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() def close(self): pass def save_instrument(self, name, force=False): """ Save an entry for this instrument in the config file. Parameters ---------- name : str The name to give the instrument, e.g. 'myCam' force : bool, optional Force overwrite of the old entry for instrument `name`. By default, Instrumental will raise an exception if you try to write to a name that's already taken. If `force` is True, the old entry will be commented out (with a warning given) and a new entry will be written. """ from datetime import datetime import os import os.path conf.load_config_file() # Reload latest version if name in conf.instruments.keys(): if not force: raise Exception( "An entry already exists for '{}'!".format(name)) else: import warnings warnings.warn( "Commenting out existing entry for '{}'".format(name)) try: paramset = self._paramset except AttributeError: raise NotImplementedError( "Class '{}' does not yet support saving".format(type(self))) date_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') new_entry = '\n# Entry auto-created ' + date_str + '\n' + paramset.to_ini( name) + '\n' old_fname = os.path.join(conf.user_conf_dir, 'instrumental.conf') new_fname = os.path.join(conf.user_conf_dir, 'instrumental_new.conf') bak_fname = os.path.join(conf.user_conf_dir, 'instrumental.conf.bak') with open(old_fname, 'r') as old, open(new_fname, 'w') as new: in_section = False num_trailing = 0 lines = None for line in old: if in_section: if re.split(':|=', line)[0].strip() == name: # Comment out existing version of this entry line = '# [{} Auto-commented duplicate] '.format( date_str) + line if line.startswith('['): # We found the start of the *next* section in_section = False for l in lines[:len(lines) - num_trailing]: new.write(l) new.write(new_entry) # Write original trailing space and new section header for l in lines[len(lines) - num_trailing:]: new.write(l) new.write(line) else: lines.append(line) if not line.strip(): num_trailing += 1 else: num_trailing = 0 else: new.write(line) if line.startswith('[instruments]'): in_section = True lines = [] if in_section: # File ended before we saw a new section for l in lines[:len(lines) - num_trailing]: new.write(l) new.write(new_entry) # Write original trailing space for l in lines[len(lines) - num_trailing:]: new.write(l) if os.path.exists(bak_fname): os.remove(bak_fname) os.rename(old_fname, bak_fname) os.rename(new_fname, old_fname) # Reload newly modified file conf.load_config_file() @cached_property def _state_path(self): if not getattr(self, '_alias', None): raise RuntimeError( 'Instrument must have an alias to provide a default path for saving ' 'or loading its state. An alias will be set by using ' 'save_instrument() or loading an instrument by alias') inst_module = inspect.getmodule(self.__class__) filename = '{}-{}.{}.pkl'.format(self._alias, inst_module.__name__, self.__class__.__name__) if not os.path.exists(conf.save_dir): os.makedirs(conf.save_dir) return os.path.join(conf.save_dir, filename) def _save_state(self, state_path=None): """Save instrument state to a pickle file""" state_path = state_path or self._state_path with open(state_path, 'wb') as f: pickle.dump(self.__dict__, f) def _load_state(self, state_path=None): """Load instrument state from a pickle file""" state_path = state_path or self._state_path with open(state_path, 'rb') as f: state = pickle.load(f) self.__dict__.update(state) print(state) def observe(self, name, callback): """Add a callback to observe changes in a facet's value The callback should be a callable accepting a ``ChangeEvent`` as its only argument. This ``ChangeEvent`` is a namedtuple with ``name``, ``old``, and ``new`` fields. ``name`` is the facet's name, ``old`` is the old value, and ``new`` is the new value. """ facet = getattr(self.__class__, name) facet_instance = facet.instance(self) facet_instance.observe(callback)
class ContentPlugin(with_metaclass(PluginMediaDefiningClass, object)): """ The base class for all content plugins. A plugin defines the rendering for a :class:`~fluent_contents.models.ContentItem`, settings and presentation in the admin interface. To create a new plugin, derive from this class and call :func:`plugin_pool.register <PluginPool.register>` to enable it. For example: .. code-block:: python from fluent_contents.extensions import plugin_pool, ContentPlugin @plugin_pool.register class AnnouncementBlockPlugin(ContentPlugin): model = AnnouncementBlockItem render_template = "plugins/announcementblock.html" category = _("Simple blocks") As minimal configuration, specify the :attr:`model` and :attr:`render_template` fields. The :attr:`model` should be a subclass of the :class:`~fluent_contents.models.ContentItem` model class. .. note:: When the plugin is registered in the :attr:`plugin_pool`, it will be instantiated only once. It is therefore not possible to store per-request state at the plugin object. This is similar to the behavior of the :class:`~django.contrib.admin.ModelAdmin` classes in Django. To customize the admin, the :attr:`admin_form_template` and :attr:`form` can be defined. Some well known properties of the :class:`~django.contrib.admin.ModelAdmin` class can also be specified on plugins; such as: * :attr:`~django.contrib.admin.ModelAdmin.fieldsets` * :attr:`~django.contrib.admin.ModelAdmin.filter_horizontal` * :attr:`~django.contrib.admin.ModelAdmin.filter_vertical` * :attr:`~django.contrib.admin.ModelAdmin.prepopulated_fields` * :attr:`~django.contrib.admin.ModelAdmin.radio_fields` * :attr:`~django.contrib.admin.ModelAdmin.raw_id_fields` * :attr:`~django.contrib.admin.ModelAdmin.readonly_fields` * A ``class Media`` to provide extra CSS and JavaScript files for the admin interface. The rendered output of a plugin is cached by default, assuming that most content is static. This also avoids extra database queries to retrieve the model objects. In case the plugin needs to output content dynamically, include ``cache_output = False`` in the plugin definition. """ # -- Settings to override: #: The model to use, must derive from :class:`fluent_contents.models.ContentItem`. model = None #: The form to use in the admin interface. By default it uses a :class:`fluent_contents.models.ContentItemForm`. form = ContentItemForm #: The template to render the admin interface with admin_form_template = "admin/fluent_contents/contentitem/admin_form.html" #: An optional template which is included in the admin interface, to initialize components (e.g. JavaScript) admin_init_template = None #: The fieldsets for the admin view. fieldsets = None #: The template to render the frontend HTML output. render_template = None #: By default, rendered output is cached, and updated on admin changes. cache_output = True #: .. versionadded:: 0.9 #: Cache the plugin output per :django:setting:`SITE_ID`. cache_output_per_site = False #: .. versionadded:: 1.0 #: Cache the plugin output per language. #: This can be useful for sites which either: #: #: * Display fallback content on pages, but still use ``{% trans %}`` inside templates. #: * Dynamically switch the language per request, and *share* content between multiple languages. #: #: This option does not have to be used for translated CMS pages, #: as each page can have it's own set of :class:`~fluent_contents.models.ContentItem` objects. #: It's only needed for rendering the *same* item in different languages. cache_output_per_language = False #: .. versionadded: 1.0 #: Set a custom cache timeout value cache_timeout = DEFAULT_TIMEOUT #: .. versionadded:: 1.0 #: Tell which languages the plugin will cache. #: It defaults to the language codes from the :django:setting:`LANGUAGES` setting. cache_supported_language_codes = [code for code, _ in settings.LANGUAGES] #: The category title to place the plugin into. #: This is only used for the "Add Plugin" menu. category = None #: .. versionadded:: 1.0 #: By default, the plugin is rendered in the :attr:`language_code` it's written in. #: It can be disabled explicitly in case the content should be rendered language agnostic. #: For plugins that cache output per language, this will be done already. #: #: See also: :attr:`cache_output_per_language` render_ignore_item_language = False #: Alternative template for the view. ADMIN_TEMPLATE_WITHOUT_LABELS = "admin/fluent_contents/contentitem/admin_form_without_labels.html" #: .. versionadded:: 0.8.5 #: The ``HORIZONTAL`` constant for the :attr:`radio_fields`. HORIZONTAL = admin.HORIZONTAL #: .. versionadded:: 0.8.5 #: The ``VERTICAL`` constant for the :attr:`radio_fields`. VERTICAL = admin.VERTICAL #: The fields to display as raw ID raw_id_fields = () #: The fields to display in a vertical filter filter_vertical = () #: The fields to display in a horizontal filter filter_horizontal = () #: The fields to display as radio choice. For example:: #: #: radio_fields = { #: 'align': ContentPlugin.VERTICAL, #: } #: #: The value can be :attr:`ContentPlugin.HORIZONTAL` or :attr:`ContentPlugin.VERTICAL`. radio_fields = {} #: Fields to automatically populate with values prepopulated_fields = {} #: Overwritten formfield attributes, e.g. the 'widget'. Allows both the class and fieldname as key. formfield_overrides = {} #: The fields to display as readonly. readonly_fields = () def __init__(self): self._type_id = None def __repr__(self): return '<{0} for {1} model>'.format(self.__class__.__name__, self.model.__name__) @property def verbose_name(self): """ The title for the plugin, by default it reads the ``verbose_name`` of the model. """ return self.model._meta.verbose_name @property def name(self): """ Return the classname of the plugin, this is mainly provided for templates. This value can also be used in :func:`PluginPool`. """ return self.__class__.__name__ @property def type_name(self): """ Return the classname of the model, this is mainly provided for templates. """ return self.model.__name__ @property def type_id(self): """ Shortcut to retrieving the ContentType id of the model. """ if self._type_id is None: try: self._type_id = ContentType.objects.get_for_model(self.model).id except DatabaseError as e: raise DatabaseError("Unable to fetch ContentType object, is a plugin being registered before the initial syncdb? (original error: {0})".format(str(e))) return self._type_id def get_model_instances(self): """ Return the model instances the plugin has created. """ return self.model.objects.all() def _render_contentitem(self, request, instance): # Internal wrapper for render(), to allow updating the method signature easily. # It also happens to really simplify code navigation. result = self.render(request=request, instance=instance) if isinstance(result, ContentItemOutput): # Return in new 1.0 format # Also include the statically declared FrontendMedia, inserted before any extra added files. # These could be included already in the ContentItemOutput object, but duplicates are removed. media = self.get_frontend_media(instance) if media is not ImmutableMedia.empty_instance: result._insert_media(media) return result elif isinstance(result, (HttpResponseRedirect, HttpResponsePermanentRedirect)): # Can't return a HTTP response from a plugin that is rendered as a string in a template. # However, this response can be translated into our custom exception-based redirect mechanism. return self.redirect(result['Location'], result.status_code) else: # Old 0.9 syntax, wrap it. # The 'cacheable' is implied in the rendering already, but this is just for completeness. media = self.get_frontend_media(instance) return ContentItemOutput(result, media, cacheable=self.cache_output, cache_timeout=self.cache_timeout) def get_output_cache_base_key(self, placeholder_name, instance): """ .. versionadded:: 1.0 Return the default cache key, both :func:`get_output_cache_key` and :func:`get_output_cache_keys` rely on this. By default, this function generates the cache key using :func:`~fluent_contents.cache.get_rendering_cache_key`. """ return get_rendering_cache_key(placeholder_name, instance) def get_output_cache_key(self, placeholder_name, instance): """ .. versionadded:: 0.9 Return the default cache key which is used to store a rendered item. By default, this function generates the cache key using :func:`get_output_cache_base_key`. """ cachekey = self.get_output_cache_base_key(placeholder_name, instance) if self.cache_output_per_site: cachekey = "{0}-s{1}".format(cachekey, settings.SITE_ID) # Append language code if self.cache_output_per_language: # NOTE: Not using self.language_code, but using the current language instead. # That is what the {% trans %} tags are rendered as after all. # The render_placeholder() code can switch the language if needed. user_language = get_language() if user_language not in self.cache_supported_language_codes: user_language = 'unsupported' cachekey = "{0}.{1}".format(cachekey, user_language) return cachekey def get_output_cache_keys(self, placeholder_name, instance): """ .. versionadded:: 0.9 Return the possible cache keys for a rendered item. This method should be overwritten when implementing a function :func:`set_cached_output` method or when implementing a :func:`get_output_cache_key` function. By default, this function generates the cache key using :func:`get_output_cache_base_key`. """ base_key = self.get_output_cache_base_key(placeholder_name, instance) cachekeys = [ base_key, ] if self.cache_output_per_site: site_ids = list(Site.objects.values_list('pk', flat=True)) if settings.SITE_ID not in site_ids: site_ids.append(settings.SITE_ID) base_key = get_rendering_cache_key(placeholder_name, instance) cachekeys = ["{0}-s{1}".format(base_key, site_id) for site_id in site_ids] if self.cache_output_per_language or self.render_ignore_item_language: # Append language code to all keys, # have to invalidate a lot more items in memcache. # Also added "None" suffix, since get_parent_language_code() may return that. # TODO: ideally for render_ignore_item_language, only invalidate all when the fallback language changed. total_list = [] cache_languages = list(self.cache_supported_language_codes) + ['unsupported', 'None'] # All variants of the Placeholder (for full page caching) placeholder = instance.placeholder total_list.extend(get_placeholder_cache_key(placeholder, lc) for lc in cache_languages) # All variants of the ContentItem in different languages for user_language in cache_languages: total_list.extend("{0}.{1}".format(base, user_language) for base in cachekeys) cachekeys = total_list return cachekeys def get_cached_output(self, placeholder_name, instance): """ .. versionadded:: 0.9 Return the cached output for a rendered item, or ``None`` if no output is cached. This method can be overwritten to implement custom caching mechanisms. By default, this function generates the cache key using :func:`get_output_cache_key` and retrieves the results from the configured Django cache backend (e.g. memcached). """ cachekey = self.get_output_cache_key(placeholder_name, instance) return cache.get(cachekey) def set_cached_output(self, placeholder_name, instance, output): """ .. versionadded:: 0.9 Store the cached output for a rendered item. This method can be overwritten to implement custom caching mechanisms. By default, this function generates the cache key using :func:`~fluent_contents.cache.get_rendering_cache_key` and stores the results in the configured Django cache backend (e.g. memcached). When custom cache keys are used, also include those in :func:`get_output_cache_keys` so the cache will be cleared when needed. .. versionchanged:: 1.0 The received data is no longer a HTML string, but :class:`~fluent_contents.models.ContentItemOutput` object. """ cachekey = self.get_output_cache_key(placeholder_name, instance) if self.cache_timeout is not DEFAULT_TIMEOUT: cache.set(cachekey, output, self.cache_timeout) else: # Don't want to mix into the default 0/None issue. cache.set(cachekey, output) def render(self, request, instance, **kwargs): """ The rendering/view function that displays a plugin model instance. :param instance: An instance of the ``model`` the plugin uses. :param request: The Django :class:`~django.http.HttpRequest` class containing the request parameters. :param kwargs: An optional slot for any new parameters. To render a plugin, either override this function, or specify the :attr:`render_template` variable, and optionally override :func:`get_context`. It is recommended to wrap the output in a ``<div>`` tag, to prevent the item from being displayed right next to the previous plugin. .. versionadded:: 1.0 The function may either return a string of HTML code, or return a :class:`~fluent_contents.models.ContentItemOutput` object which holds both the CSS/JS includes and HTML string. For the sake of convenience and simplicity, most examples only return a HTML string directly. When the user needs to be redirected, simply return a :class:`~django.http.HttpResponseRedirect` or call the :func:`redirect` method. To render raw HTML code, use :func:`~django.utils.safestring.mark_safe` on the returned HTML. """ render_template = self.get_render_template(request, instance, **kwargs) if not render_template: return str(_(u"{No rendering defined for class '%s'}" % self.__class__.__name__)) context = self.get_context(request, instance, **kwargs) return self.render_to_string(request, render_template, context) def render_to_string(self, request, template, context, content_instance=None): """ Render a custom template with the :class:`~PluginContext` as context instance. """ if not content_instance: content_instance = PluginContext(request) return render_to_string(template, context, context_instance=content_instance) def render_error(self, error): """ A default implementation to render an exception. """ return '<div style="color: red; border: 1px solid red; padding: 5px;">' \ '<p><strong>%s</strong></p>%s</div>' % (_('Error:'), linebreaks(escape(str(error)))) def redirect(self, url, status=302): """ .. versionadded:: 1.0 Request a redirect to be performed for the user. Usage example: .. code-block:: python def get_context(self, request, instance, **kwargs): context = super(IdSearchPlugin, self).get_context(request, instance, **kwargs) if request.method == "POST": form = MyForm(request.POST) if form.is_valid(): self.redirect("/foo/") else: form = MyForm() context['form'] = form return context To handle redirects, :class:`fluent_contents.middleware.HttpRedirectRequestMiddleware` should be added to the :django:setting:`MIDDLEWARE_CLASSES`. """ raise HttpRedirectRequest(url, status=status) def get_render_template(self, request, instance, **kwargs): """ Return the template to render for the specific model `instance` or `request`, By default it uses the ``render_template`` attribute. """ return self.render_template def get_context(self, request, instance, **kwargs): """ Return the context to use in the template defined by ``render_template`` (or :func:`get_render_template`). By default, it returns the model instance as ``instance`` field in the template. """ return { 'instance': instance, } @property def frontend_media(self): """ .. versionadded:: 1.0 The frontend media, typically declared using a ``class FrontendMedia`` definition. """ # By adding this property, frontend_media_property() is further optimized. return ImmutableMedia.empty_instance def get_frontend_media(self, instance): """ Return the frontend media for a specific instance. By default, it returns ``self.frontend_media``, which derives from the ``class FrontendMedia`` of the plugin. """ return self.frontend_media
class RDFValue(with_metaclass(RDFValueMetaclass, object)): """Baseclass for values. RDFValues are serialized to and from the data store. """ # This is how the attribute will be serialized to the data store. It must # indicate both the type emitted by SerializeToDataStore() and expected by # FromDatastoreValue() data_store_type = "bytes" # URL pointing to a help page about this value type. context_help_url = None _value = None _prev_hash = None # Mark as dirty each time we modify this object. dirty = False # If this value was created as part of an AFF4 attribute, the attribute is # assigned here. attribute_instance = None def __init__(self, initializer=None): """Constructor must be able to take no args. Args: initializer: Optional parameter to construct from. Raises: InitializeError: if we can not be initialized from this parameter. """ # Default timestamp is now. # Allow an RDFValue to be initialized from an identical RDFValue. # TODO(user):pytype: type checker can't infer that the initializer # is not None after the check below. if initializer.__class__ == self.__class__: self.ParseFromBytes( cast(self.__class__, initializer).SerializeToBytes()) self._prev_hash = None def Copy(self): """Make a new copy of this RDFValue.""" res = self.__class__() # pytype: disable=not-instantiable res.ParseFromBytes(self.SerializeToBytes()) return res def SetRaw(self, value): self._value = value def __copy__(self): return self.Copy() @abc.abstractmethod def ParseFromBytes(self, string): """Given a string, parse ourselves from it.""" pass @abc.abstractmethod def ParseFromDatastore(self, value): """Initialize the RDF object from the datastore value.""" pass @classmethod def FromDatastoreValue(cls, value): res = cls() res.ParseFromDatastore(value) return res @classmethod def FromSerializedBytes(cls, value): res = cls() res.ParseFromBytes(value) return res # TODO: Remove legacy SerializeToDataStore. def SerializeToDataStore(self): """Serialize to a datastore compatible form.""" return self.SerializeToBytes() @abc.abstractmethod def SerializeToBytes(self): """Serialize into a string which can be parsed using ParseFromBytes.""" @classmethod def Fields(cls): """Return a list of fields which can be queried from this value.""" return [] def __eq__(self, other): return self._value == other def __ne__(self, other): return not self.__eq__(other) def __hash__(self): new_hash = hash(self.SerializeToBytes()) if self._prev_hash is not None and new_hash != self._prev_hash: raise AssertionError( "Usage of {} violates Python data model: hash() has changed! Usage " "of RDFStructs as members of sets or keys of dicts is discouraged. " "If used anyway, mutating is prohibited, because it causes the hash " "to change. Be aware that accessing unset fields can trigger a " "mutation.".format(compatibility.GetName(type(self)))) else: self._prev_hash = new_hash return new_hash def __bool__(self): return bool(self._value) # TODO: Remove after support for Python 2 is dropped. __nonzero__ = __bool__ def __str__(self): # pylint: disable=super-on-old-class """Ignores the __repr__ override below to avoid indefinite recursion.""" return super(RDFValue, self).__repr__() def __repr__(self): content = str(self) # Note %r, which prevents nasty nonascii characters from being printed, # including dangerous terminal escape sequences. return "<%s(%r)>" % (compatibility.GetName(self.__class__), content)
class Updater(with_metaclass(abc.ABCMeta, Parameterized)): build_saver = True def __init__(self, env, scope=None, mpi_context=None, **kwargs): self.scope = scope self.env = env self.mpi_context = mpi_context self._n_experiences = 0 self.step = 0 self._saver = None @property def n_experiences(self): return self._n_experiences def build_graph(self): # with tf.name_scope(self.scope or self.__class__.__name__) as scope: # self._scope = scope self._build_graph() global_step = tf.train.get_or_create_global_step() self.inc_global_step_op = tf.assign_add(global_step, 1) global_step_input = tf.placeholder(tf.int64, ()) assign_global_step = tf.assign(global_step, global_step_input) tf.get_default_session().run(assign_global_step, feed_dict={global_step_input: 0}) if self.build_saver: updater_variables = { v.name: v for v in self.trainable_variables(for_opt=False) } self.saver = tf.train.Saver(updater_variables) @abc.abstractmethod def _build_graph(self): raise Exception("NotImplemented") def update(self, batch_size, step): update_result = self._update(batch_size) sess = tf.get_default_session() sess.run(self.inc_global_step_op) self._n_experiences += batch_size return update_result @abc.abstractmethod def _update(self, batch_size): raise Exception("NotImplemented") def evaluate(self, batch_size, step, mode="val"): assert mode in "val test".split() return self._evaluate(batch_size, mode) @abc.abstractmethod def _evaluate(self, batch_size, mode): raise Exception("NotImplemented") def trainable_variables(self, for_opt): raise Exception("AbstractMethod") def save(self, filename): path = self.saver.save(tf.get_default_session(), filename) return path def restore(self, path): self.saver.restore(tf.get_default_session(), path)
class IWebDriverSource(with_metaclass(ABCMeta, object)): @abstractproperty def driver(self): # type: () -> WebDriver pass
class OpDelegate(with_metaclass(abc.ABCMeta, object)): def op_arg(self, op, n): """ Returns the nth argument of an op-graph Op op as an op-graph Op. Overridden by the exec graph to reflect modifications made to the graph. Args: op: The op-graph op we want an args for. n: The arg number. Returns: The arg's op. """ return self.op_args(op)[n] def op_args(self, op): """ Returns all the arguments of an op-graph Op. Overridden by the exec graph to reflect modification made to the graph. Args: op: An op-graph Op. Returns: The args for op. """ return op.args def get_device_op(self, op): """ Helper function that traverses through any reshape ops or value ops to return the tensor op. Overridden by the exec graph to reflect modification made to the graph. Args: op: An op-graph Op. Returns: The op providing actual storage for op's value. """ while isinstance(op, SequentialOp): op = op.value_tensor if op.is_device_op: return op if isinstance(op, TensorValueOp): return op.tensor for arg in op.args: dev_op = self.get_device_op(arg) if dev_op: return dev_op return None
class AbstractJobStore(with_metaclass(ABCMeta, object)): """ Represents the physical storage for the jobs and files in a Toil workflow. """ def __init__(self): """ Create an instance of the job store. The instance will not be fully functional until either :meth:`.initialize` or :meth:`.resume` is invoked. Note that the :meth:`.destroy` method may be invoked on the object with or without prior invocation of either of these two methods. """ self.__config = None def initialize(self, config): """ Create the physical storage for this job store, allocate a workflow ID and persist the given Toil configuration to the store. :param toil.common.Config config: the Toil configuration to initialize this job store with. The given configuration will be updated with the newly allocated workflow ID. :raises JobStoreExistsException: if the physical storage for this job store already exists """ assert config.workflowID is None config.workflowID = str(uuid4()) logger.debug("The workflow ID is: '%s'" % config.workflowID) self.__config = config self.writeConfig() def writeConfig(self): """ Persists the value of the :attr:`AbstractJobStore.config` attribute to the job store, so that it can be retrieved later by other instances of this class. """ with self.writeSharedFileStream('config.pickle', isProtected=False) as fileHandle: pickle.dump(self.__config, fileHandle, pickle.HIGHEST_PROTOCOL) def resume(self): """ Connect this instance to the physical storage it represents and load the Toil configuration into the :attr:`AbstractJobStore.config` attribute. :raises NoSuchJobStoreException: if the physical storage for this job store doesn't exist """ with self.readSharedFileStream('config.pickle') as fileHandle: config = safeUnpickleFromStream(fileHandle) assert config.workflowID is not None self.__config = config @property def config(self): """ The Toil configuration associated with this job store. :rtype: toil.common.Config """ return self.__config rootJobStoreIDFileName = 'rootJobStoreID' def setRootJob(self, rootJobStoreID): """ Set the root job of the workflow backed by this job store :param str rootJobStoreID: The ID of the job to set as root """ with self.writeSharedFileStream(self.rootJobStoreIDFileName) as f: f.write(rootJobStoreID.encode('utf-8')) def loadRootJob(self): """ Loads the root job in the current job store. :raises toil.job.JobException: If no root job is set or if the root job doesn't exist in this job store :return: The root job. :rtype: toil.jobGraph.JobGraph """ try: with self.readSharedFileStream(self.rootJobStoreIDFileName) as f: rootJobStoreID = f.read().decode('utf-8') except NoSuchFileException: raise JobException( 'No job has been set as the root in this job store') if not self.exists(rootJobStoreID): raise JobException( "The root job '%s' doesn't exist. Either the Toil workflow " "is finished or has never been started" % rootJobStoreID) return self.load(rootJobStoreID) # FIXME: This is only used in tests, why do we have it? def createRootJob(self, *args, **kwargs): """ Create a new job and set it as the root job in this job store :rtype: toil.jobGraph.JobGraph """ rootJob = self.create(*args, **kwargs) self.setRootJob(rootJob.jobStoreID) return rootJob def getRootJobReturnValue(self): """ Parse the return value from the root job. Raises an exception if the root job hasn't fulfilled its promise yet. """ # Parse out the return value from the root job with self.readSharedFileStream('rootJobReturnValue') as fH: return safeUnpickleFromStream(fH) @property @memoize def _jobStoreClasses(self): """ A list of concrete AbstractJobStore implementations whose dependencies are installed. :rtype: list[AbstractJobStore] """ jobStoreClassNames = ( "toil.jobStores.fileJobStore.FileJobStore", "toil.jobStores.googleJobStore.GoogleJobStore", "toil.jobStores.aws.jobStore.AWSJobStore", "toil.jobStores.abstractJobStore.JobStoreSupport") jobStoreClasses = [] for className in jobStoreClassNames: moduleName, className = className.rsplit('.', 1) from importlib import import_module try: module = import_module(moduleName) except ImportError: logger.debug( "Unable to import '%s' as is expected if the corresponding extra was " "omitted at installation time.", moduleName) else: jobStoreClass = getattr(module, className) jobStoreClasses.append(jobStoreClass) return jobStoreClasses def _findJobStoreForUrl(self, url, export=False): """ Returns the AbstractJobStore subclass that supports the given URL. :param urlparse.ParseResult url: The given URL :param bool export: The URL for :rtype: toil.jobStore.AbstractJobStore """ for jobStoreCls in self._jobStoreClasses: if jobStoreCls._supportsUrl(url, export): return jobStoreCls raise RuntimeError( "No job store implementation supports %sporting for URL '%s'" % ('ex' if export else 'im', url.geturl())) def importFile(self, srcUrl, sharedFileName=None, hardlink=False): """ Imports the file at the given URL into job store. The ID of the newly imported file is returned. If the name of a shared file name is provided, the file will be imported as such and None is returned. Currently supported schemes are: - 's3' for objects in Amazon S3 e.g. s3://bucket/key - 'file' for local files e.g. file:///local/file/path - 'http' e.g. http://someurl.com/path - 'gs' e.g. gs://bucket/file :param str srcUrl: URL that points to a file or object in the storage mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket. :param str sharedFileName: Optional name to assign to the imported file within the job store :return: The jobStoreFileId of the imported file or None if sharedFileName was given :rtype: toil.fileStores.FileID or None """ # Note that the helper method _importFile is used to read from the source and write to # destination (which is the current job store in this case). To implement any # optimizations that circumvent this, the _importFile method should be overridden by # subclasses of AbstractJobStore. srcUrl = urlparse.urlparse(srcUrl) otherCls = self._findJobStoreForUrl(srcUrl) return self._importFile(otherCls, srcUrl, sharedFileName=sharedFileName, hardlink=hardlink) def _importFile(self, otherCls, url, sharedFileName=None, hardlink=False): """ Import the file at the given URL using the given job store class to retrieve that file. See also :meth:`.importFile`. This method applies a generic approach to importing: it asks the other job store class for a stream and writes that stream as either a regular or a shared file. :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports reading from the given URL and getting the file size from the URL. :param urlparse.ParseResult url: The location of the file to import. :param str sharedFileName: Optional name to assign to the imported file within the job store :return The jobStoreFileId of imported file or None if sharedFileName was given :rtype: toil.fileStores.FileID or None """ if sharedFileName is None: with self.writeFileStream() as (writable, jobStoreFileID): size = otherCls._readFromUrl(url, writable) return FileID(jobStoreFileID, size) else: self._requireValidSharedFileName(sharedFileName) with self.writeSharedFileStream(sharedFileName) as writable: otherCls._readFromUrl(url, writable) return None def exportFile(self, jobStoreFileID, dstUrl): """ Exports file to destination pointed at by the destination URL. Refer to :meth:`.AbstractJobStore.importFile` documentation for currently supported URL schemes. Note that the helper method _exportFile is used to read from the source and write to destination. To implement any optimizations that circumvent this, the _exportFile method should be overridden by subclasses of AbstractJobStore. :param str jobStoreFileID: The id of the file in the job store that should be exported. :param str dstUrl: URL that points to a file or object in the storage mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket. """ dstUrl = urlparse.urlparse(dstUrl) otherCls = self._findJobStoreForUrl(dstUrl, export=True) self._exportFile(otherCls, jobStoreFileID, dstUrl) def _exportFile(self, otherCls, jobStoreFileID, url): """ Refer to exportFile docstring for information about this method. :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports exporting to the given URL. Note that the type annotation here is not completely accurate. This is not an instance, it's a class, but there is no way to reflect that in :pep:`484` type hints. :param str jobStoreFileID: The id of the file that will be exported. :param urlparse.ParseResult url: The parsed URL of the file to export to. """ self._defaultExportFile(otherCls, jobStoreFileID, url) def _defaultExportFile(self, otherCls, jobStoreFileID, url): """ Refer to exportFile docstring for information about this method. :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports exporting to the given URL. Note that the type annotation here is not completely accurate. This is not an instance, it's a class, but there is no way to reflect that in :pep:`484` type hints. :param str jobStoreFileID: The id of the file that will be exported. :param urlparse.ParseResult url: The parsed URL of the file to export to. """ with self.readFileStream(jobStoreFileID) as readable: otherCls._writeToUrl(readable, url) @abstractclassmethod def getSize(cls, url): """ Get the size in bytes of the file at the given URL, or None if it cannot be obtained. :param urlparse.ParseResult url: URL that points to a file or object in the storage mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket. """ raise NotImplementedError @abstractclassmethod def _readFromUrl(cls, url, writable): """ Reads the contents of the object at the specified location and writes it to the given writable stream. Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes. :param urlparse.ParseResult url: URL that points to a file or object in the storage mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket. :param writable: a writable stream :return int: returns the size of the file in bytes """ raise NotImplementedError() @abstractclassmethod def _writeToUrl(cls, readable, url): """ Reads the contents of the given readable stream and writes it to the object at the specified location. Refer to AbstractJobStore.importFile documentation for currently supported URL schemes. :param urlparse.ParseResult url: URL that points to a file or object in the storage mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket. :param readable: a readable stream """ raise NotImplementedError() @abstractclassmethod def _supportsUrl(cls, url, export=False): """ Returns True if the job store supports the URL's scheme. Refer to AbstractJobStore.importFile documentation for currently supported URL schemes. :param bool export: Determines if the url is supported for exported :param urlparse.ParseResult url: a parsed URL that may be supported :return bool: returns true if the cls supports the URL """ raise NotImplementedError() @abstractmethod def destroy(self): """ The inverse of :meth:`.initialize`, this method deletes the physical storage represented by this instance. While not being atomic, this method *is* at least idempotent, as a means to counteract potential issues with eventual consistency exhibited by the underlying storage mechanisms. This means that if the method fails (raises an exception), it may (and should be) invoked again. If the underlying storage mechanism is eventually consistent, even a successful invocation is not an ironclad guarantee that the physical storage vanished completely and immediately. A successful invocation only guarantees that the deletion will eventually happen. It is therefore recommended to not immediately reuse the same job store location for a new Toil workflow. """ raise NotImplementedError() def getEnv(self): """ Returns a dictionary of environment variables that this job store requires to be set in order to function properly on a worker. :rtype: dict[str,str] """ return {} # Cleanup functions def clean(self, jobCache=None): """ Function to cleanup the state of a job store after a restart. Fixes jobs that might have been partially updated. Resets the try counts and removes jobs that are not successors of the current root job. :param dict[str,toil.jobGraph.JobGraph] jobCache: if a value it must be a dict from job ID keys to JobGraph object values. Jobs will be loaded from the cache (which can be downloaded from the job store in a batch) instead of piecemeal when recursed into. """ if jobCache is None: logger.warning("Cleaning jobStore recursively. This may be slow.") # Functions to get and check the existence of jobs, using the jobCache # if present def getJob(jobId): if jobCache is not None: try: return jobCache[jobId] except KeyError: return self.load(jobId) else: return self.load(jobId) def haveJob(jobId): if jobCache is not None: if jobId in jobCache: return True else: return self.exists(jobId) else: return self.exists(jobId) def getJobs(): if jobCache is not None: return itervalues(jobCache) else: return self.jobs() # Iterate from the root jobGraph and collate all jobs that are reachable from it # All other jobs returned by self.jobs() are orphaned and can be removed reachableFromRoot = set() def getConnectedJobs(jobGraph): if jobGraph.jobStoreID in reachableFromRoot: return reachableFromRoot.add(jobGraph.jobStoreID) # Traverse jobs in stack for jobs in jobGraph.stack: for successorJobStoreID in [x.jobStoreID for x in jobs]: if (successorJobStoreID not in reachableFromRoot and haveJob(successorJobStoreID)): getConnectedJobs(getJob(successorJobStoreID)) # Traverse service jobs for jobs in jobGraph.services: for serviceJobStoreID in [x.jobStoreID for x in jobs]: if haveJob(serviceJobStoreID): assert serviceJobStoreID not in reachableFromRoot reachableFromRoot.add(serviceJobStoreID) logger.debug("Checking job graph connectivity...") getConnectedJobs(self.loadRootJob()) logger.debug("%d jobs reachable from root." % len(reachableFromRoot)) # Cleanup jobs that are not reachable from the root, and therefore orphaned jobsToDelete = [ x for x in getJobs() if x.jobStoreID not in reachableFromRoot ] for jobGraph in jobsToDelete: # clean up any associated files before deletion for fileID in jobGraph.filesToDelete: # Delete any files that should already be deleted logger.warning( "Deleting file '%s'. It is marked for deletion but has not yet been " "removed.", fileID) self.deleteFile(fileID) # Delete the job self.delete(jobGraph.jobStoreID) jobGraphsReachableFromRoot = { id: getJob(id) for id in reachableFromRoot } # Clean up any checkpoint jobs -- delete any successors it # may have launched, and restore the job to a pristine # state jobsDeletedByCheckpoints = set() for jobGraph in [ jG for jG in jobGraphsReachableFromRoot.values() if jG.checkpoint is not None ]: if jobGraph.jobStoreID in jobsDeletedByCheckpoints: # This is a checkpoint that was nested within an # earlier checkpoint, so it and all its successors are # already gone. continue logger.debug("Restarting checkpointed job %s" % jobGraph) deletedThisRound = jobGraph.restartCheckpoint(self) jobsDeletedByCheckpoints |= set(deletedThisRound) for jobID in jobsDeletedByCheckpoints: del jobGraphsReachableFromRoot[jobID] # Clean up jobs that are in reachable from the root for jobGraph in jobGraphsReachableFromRoot.values(): # jobGraphs here are necessarily in reachable from root. changed = [False ] # This is a flag to indicate the jobGraph state has # changed # If the job has files to delete delete them. if len(jobGraph.filesToDelete) != 0: # Delete any files that should already be deleted for fileID in jobGraph.filesToDelete: logger.critical( "Removing file in job store: %s that was " "marked for deletion but not previously removed" % fileID) self.deleteFile(fileID) jobGraph.filesToDelete = [] changed[0] = True # For a job whose command is already executed, remove jobs from the stack that are # already deleted. This cleans up the case that the jobGraph had successors to run, # but had not been updated to reflect this. if jobGraph.command is None: stackSizeFn = lambda: sum(map(len, jobGraph.stack)) startStackSize = stackSizeFn() # Remove deleted jobs jobGraph.stack = [[y for y in x if self.exists(y.jobStoreID)] for x in jobGraph.stack] # Remove empty stuff from the stack jobGraph.stack = [x for x in jobGraph.stack if len(x) > 0] # Check if anything got removed if stackSizeFn() != startStackSize: changed[0] = True # Cleanup any services that have already been finished. # Filter out deleted services and update the flags for services that exist # If there are services then renew # the start and terminate flags if they have been removed def subFlagFile(jobStoreID, jobStoreFileID, flag): if self.fileExists(jobStoreFileID): return jobStoreFileID # Make a new flag newFlag = self.getEmptyFileStoreID(jobStoreID, cleanup=False) # Load the jobGraph for the service and initialise the link serviceJobGraph = getJob(jobStoreID) if flag == 1: logger.debug( "Recreating a start service flag for job: %s, flag: %s", jobStoreID, newFlag) serviceJobGraph.startJobStoreID = newFlag elif flag == 2: logger.debug( "Recreating a terminate service flag for job: %s, flag: %s", jobStoreID, newFlag) serviceJobGraph.terminateJobStoreID = newFlag else: logger.debug( "Recreating a error service flag for job: %s, flag: %s", jobStoreID, newFlag) assert flag == 3 serviceJobGraph.errorJobStoreID = newFlag # Update the service job on disk self.update(serviceJobGraph) changed[0] = True return newFlag servicesSizeFn = lambda: sum(map(len, jobGraph.services)) startServicesSize = servicesSizeFn() def replaceFlagsIfNeeded(serviceJobNode): serviceJobNode.startJobStoreID = subFlagFile( serviceJobNode.jobStoreID, serviceJobNode.startJobStoreID, 1) serviceJobNode.terminateJobStoreID = subFlagFile( serviceJobNode.jobStoreID, serviceJobNode.terminateJobStoreID, 2) serviceJobNode.errorJobStoreID = subFlagFile( serviceJobNode.jobStoreID, serviceJobNode.errorJobStoreID, 3) # jobGraph.services is a list of lists containing serviceNodes # remove all services that no longer exist services = jobGraph.services jobGraph.services = [] for serviceList in services: existingServices = [ service for service in serviceList if self.exists(service.jobStoreID) ] if existingServices: jobGraph.services.append(existingServices) list( map( lambda serviceList: list( map(replaceFlagsIfNeeded, serviceList)), jobGraph.services)) if servicesSizeFn() != startServicesSize: changed[0] = True # Reset the retry count of the jobGraph if jobGraph.remainingRetryCount != self._defaultTryCount(): jobGraph.remainingRetryCount = self._defaultTryCount() changed[0] = True # This cleans the old log file which may # have been left if the jobGraph is being retried after a jobGraph failure. if jobGraph.logJobStoreFileID != None: self.deleteFile(jobGraph.logJobStoreFileID) jobGraph.logJobStoreFileID = None changed[0] = True if changed[0]: # Update, but only if a change has occurred logger.critical("Repairing job: %s" % jobGraph.jobStoreID) self.update(jobGraph) # Remove any crufty stats/logging files from the previous run logger.debug("Discarding old statistics and logs...") # We have to manually discard the stream to avoid getting # stuck on a blocking write from the job store. def discardStream(stream): """Read the stream 4K at a time until EOF, discarding all input.""" while len(stream.read(4096)) != 0: pass self.readStatsAndLogging(discardStream) logger.debug("Job store is clean") # TODO: reloading of the rootJob may be redundant here return self.loadRootJob() ########################################## # The following methods deal with creating/loading/updating/writing/checking for the # existence of jobs ########################################## @contextmanager def batch(self): """ All calls to create() with this context manager active will be performed in a batch after the context manager is released. :rtype: None """ yield @abstractmethod def create(self, jobNode): """ Creates a job graph from the given job node & writes it to the job store. :rtype: toil.jobGraph.JobGraph """ raise NotImplementedError() @abstractmethod def exists(self, jobStoreID): """ Indicates whether the job with the specified jobStoreID exists in the job store :rtype: bool """ raise NotImplementedError() # One year should be sufficient to finish any pipeline ;-) publicUrlExpiration = timedelta(days=365) @abstractmethod def getPublicUrl(self, fileName): """ Returns a publicly accessible URL to the given file in the job store. The returned URL may expire as early as 1h after its been returned. Throw an exception if the file does not exist. :param str fileName: the jobStoreFileID of the file to generate a URL for :raise NoSuchFileException: if the specified file does not exist in this job store :rtype: str """ raise NotImplementedError() @abstractmethod def getSharedPublicUrl(self, sharedFileName): """ Differs from :meth:`getPublicUrl` in that this method is for generating URLs for shared files written by :meth:`writeSharedFileStream`. Returns a publicly accessible URL to the given file in the job store. The returned URL starts with 'http:', 'https:' or 'file:'. The returned URL may expire as early as 1h after its been returned. Throw an exception if the file does not exist. :param str sharedFileName: The name of the shared file to generate a publically accessible url for. :raise NoSuchFileException: raised if the specified file does not exist in the store :rtype: str """ raise NotImplementedError() @abstractmethod def load(self, jobStoreID): """ Loads the job referenced by the given ID and returns it. :param str jobStoreID: the ID of the job to load :raise NoSuchJobException: if there is no job with the given ID :rtype: toil.jobGraph.JobGraph """ raise NotImplementedError() @abstractmethod def update(self, job): """ Persists the job in this store atomically. :param toil.jobGraph.JobGraph job: the job to write to this job store """ raise NotImplementedError() @abstractmethod def delete(self, jobStoreID): """ Removes from store atomically, can not then subsequently call load(), write(), update(), etc. with the job. This operation is idempotent, i.e. deleting a job twice or deleting a non-existent job will succeed silently. :param str jobStoreID: the ID of the job to delete from this job store """ raise NotImplementedError() def jobs(self): """ Best effort attempt to return iterator on all jobs in the store. The iterator may not return all jobs and may also contain orphaned jobs that have already finished successfully and should not be rerun. To guarantee you get any and all jobs that can be run instead construct a more expensive ToilState object :return: Returns iterator on jobs in the store. The iterator may or may not contain all jobs and may contain invalid jobs :rtype: Iterator[toil.jobGraph.JobGraph] """ raise NotImplementedError() ########################################## # The following provide an way of creating/reading/writing/updating files # associated with a given job. ########################################## @abstractmethod def writeFile(self, localFilePath, jobStoreID=None, cleanup=False): """ Takes a file (as a path) and places it in this job store. Returns an ID that can be used to retrieve the file at a later time. The file is written in a atomic manner. It will not appear in the jobStore until the write has successfully completed. :param str localFilePath: the path to the local file that will be uploaded to the job store. :param str jobStoreID: the id of a job, or None. If specified, the may be associated with that job in a job-store-specific way. This may influence the returned ID. :param bool cleanup: Whether to attempt to delete the file when the job whose jobStoreID was given as jobStoreID is deleted with jobStore.delete(job). If jobStoreID was not given, does nothing. :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method :raise NoSuchJobException: if the job specified via jobStoreID does not exist FIXME: some implementations may not raise this :return: an ID referencing the newly created file and can be used to read the file in the future. :rtype: str """ raise NotImplementedError() @abstractmethod @contextmanager def writeFileStream(self, jobStoreID=None, cleanup=False): """ Similar to writeFile, but returns a context manager yielding a tuple of 1) a file handle which can be written to and 2) the ID of the resulting file in the job store. The yielded file handle does not need to and should not be closed explicitly. The file is written in a atomic manner. It will not appear in the jobStore until the write has successfully completed. :param str jobStoreID: the id of a job, or None. If specified, the may be associated with that job in a job-store-specific way. This may influence the returned ID. :param bool cleanup: Whether to attempt to delete the file when the job whose jobStoreID was given as jobStoreID is deleted with jobStore.delete(job). If jobStoreID was not given, does nothing. :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method :raise NoSuchJobException: if the job specified via jobStoreID does not exist FIXME: some implementations may not raise this :return: an ID that references the newly created file and can be used to read the file in the future. :rtype: str """ raise NotImplementedError() @abstractmethod def getEmptyFileStoreID(self, jobStoreID=None, cleanup=False): """ Creates an empty file in the job store and returns its ID. Call to fileExists(getEmptyFileStoreID(jobStoreID)) will return True. :param str jobStoreID: the id of a job, or None. If specified, the may be associated with that job in a job-store-specific way. This may influence the returned ID. :param bool cleanup: Whether to attempt to delete the file when the job whose jobStoreID was given as jobStoreID is deleted with jobStore.delete(job). If jobStoreID was not given, does nothing. :return: a jobStoreFileID that references the newly created file and can be used to reference the file in the future. :rtype: str """ raise NotImplementedError() @abstractmethod def readFile(self, jobStoreFileID, localFilePath, symlink=False): """ Copies or hard links the file referenced by jobStoreFileID to the given local file path. The version will be consistent with the last copy of the file written/updated. If the file in the job store is later modified via updateFile or updateFileStream, it is implementation-defined whether those writes will be visible at localFilePath. The file is copied in an atomic manner. It will not appear in the local file system until the copy has completed. The file at the given local path may not be modified after this method returns! :param str jobStoreFileID: ID of the file to be copied :param str localFilePath: the local path indicating where to place the contents of the given file in the job store :param bool symlink: whether the reader can tolerate a symlink. If set to true, the job store may create a symlink instead of a full copy of the file or a hard link. """ raise NotImplementedError() @abstractmethod @contextmanager def readFileStream(self, jobStoreFileID): """ Similar to readFile, but returns a context manager yielding a file handle which can be read from. The yielded file handle does not need to and should not be closed explicitly. :param str jobStoreFileID: ID of the file to get a readable file handle for """ raise NotImplementedError() @abstractmethod def deleteFile(self, jobStoreFileID): """ Deletes the file with the given ID from this job store. This operation is idempotent, i.e. deleting a file twice or deleting a non-existent file will succeed silently. :param str jobStoreFileID: ID of the file to delete """ raise NotImplementedError() @abstractmethod def fileExists(self, jobStoreFileID): """ Determine whether a file exists in this job store. :param str jobStoreFileID: an ID referencing the file to be checked :rtype: bool """ raise NotImplementedError() @abstractmethod def getFileSize(self, jobStoreFileID): """ Get the size of the given file in bytes, or 0 if it does not exist when queried. Note that job stores which encrypt files might return overestimates of file sizes, since the encrypted file may have been padded to the nearest block, augmented with an initialization vector, etc. :param str jobStoreFileID: an ID referencing the file to be checked :rtype: int """ raise NotImplementedError() @abstractmethod def updateFile(self, jobStoreFileID, localFilePath): """ Replaces the existing version of a file in the job store. Throws an exception if the file does not exist. :param str jobStoreFileID: the ID of the file in the job store to be updated :param str localFilePath: the local path to a file that will overwrite the current version in the job store :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method :raise NoSuchFileException: if the specified file does not exist """ raise NotImplementedError() @abstractmethod def updateFileStream(self, jobStoreFileID): """ Replaces the existing version of a file in the job store. Similar to writeFile, but returns a context manager yielding a file handle which can be written to. The yielded file handle does not need to and should not be closed explicitly. :param str jobStoreFileID: the ID of the file in the job store to be updated :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method :raise NoSuchFileException: if the specified file does not exist """ raise NotImplementedError() ########################################## # The following methods deal with shared files, i.e. files not associated # with specific jobs. ########################################## sharedFileNameRegex = re.compile(r'^[a-zA-Z0-9._-]+$') # FIXME: Rename to updateSharedFileStream @abstractmethod @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): """ Returns a context manager yielding a writable file handle to the global file referenced by the given name. File will be created in an atomic manner. :param str sharedFileName: A file name matching AbstractJobStore.fileNameRegex, unique within this job store :param bool isProtected: True if the file must be encrypted, None if it may be encrypted or False if it must be stored in the clear. :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method """ raise NotImplementedError() @abstractmethod @contextmanager def readSharedFileStream(self, sharedFileName): """ Returns a context manager yielding a readable file handle to the global file referenced by the given name. :param str sharedFileName: A file name matching AbstractJobStore.fileNameRegex, unique within this job store """ raise NotImplementedError() @abstractmethod def writeStatsAndLogging(self, statsAndLoggingString): """ Adds the given statistics/logging string to the store of statistics info. :param str statsAndLoggingString: the string to be written to the stats file :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method """ raise NotImplementedError() @abstractmethod def readStatsAndLogging(self, callback, readAll=False): """ Reads stats/logging strings accumulated by the writeStatsAndLogging() method. For each stats/logging string this method calls the given callback function with an open, readable file handle from which the stats string can be read. Returns the number of stats/logging strings processed. Each stats/logging string is only processed once unless the readAll parameter is set, in which case the given callback will be invoked for all existing stats/logging strings, including the ones from a previous invocation of this method. :param Callable callback: a function to be applied to each of the stats file handles found :param bool readAll: a boolean indicating whether to read the already processed stats files in addition to the unread stats files :raise ConcurrentFileModificationException: if the file was modified concurrently during an invocation of this method :return: the number of stats files processed :rtype: int """ raise NotImplementedError() ## Helper methods for subclasses def _defaultTryCount(self): return int(self.config.retryCount + 1) @classmethod def _validateSharedFileName(cls, sharedFileName): return bool(cls.sharedFileNameRegex.match(sharedFileName)) @classmethod def _requireValidSharedFileName(cls, sharedFileName): if not cls._validateSharedFileName(sharedFileName): raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
class DynamicRenderer(with_metaclass(ABCMeta, Renderer)): """ A DynamicRenderer is a Renderer that creates each image as requested. It has a defined maximum size on construction. """ def __init__(self, height, width): """ :param height: The max height of the rendered image. :param width: The max width of the rendered image. """ super(DynamicRenderer, self).__init__() self._height = height self._width = width self._plain_image = [] self._colour_map = [] def _clear(self): """ Clear the current image. """ self._plain_image = [" " * self._width for _ in range(self._height)] self._colour_map = [[(None, 0, 0) for _ in range(self._width)] for _ in range(self._height)] def _write(self, text, x, y, colour=Screen.COLOUR_WHITE, attr=Screen.A_NORMAL, bg=Screen.COLOUR_BLACK): """ Write some text to the specified location in the current image. :param text: The text to be added. :param x: The X coordinate in the image. :param y: The Y coordinate in the image. :param colour: The colour of the text to add. :param attr: The attribute of the image. :param bg: The background colour of the text to add. """ # Limit checks to ensure that we don't try to draw off the end of the arrays if y >= self._height or x >= self._width: return # Limit text to draw to visible line if len(text) + x > self._width: text = text[:self._width - x] # Now draw it! self._plain_image[y] = text.join( [self._plain_image[y][:x], self._plain_image[y][x + len(text):]]) for i, _ in enumerate(text): self._colour_map[y][x + i] = (colour, attr, bg) @abstractmethod def _render_now(self): """ Common method to render the latest image. :returns: A tuple of the plain image and the colour map as per :py:meth:`.rendered_text`. """ @property def images(self): # We can't return all, so just return the latest rendered image. self._clear() return [self._render_now()[0]] @property def rendered_text(self): self._clear() return self._render_now() @property def max_height(self): return self._height @property def max_width(self): return self._width
class BaseMetadata(with_metaclass(abc.ABCMeta, object)): """ Abstract Metadata class, this has to be subclassed. if you need to add a standard XML property add it to _standard_properties @property and @propname.setter will be generated automatically Standard properties are the ones that we try to read from an xml file when instantiating a new metadata object. Reading from json metadata files is easier because we have an ordered structure. The class will try to read all she can without throwing errors because the more we can read from malformed input the better. .. versionadded:: 3.2 """ # paths in xml files for standard properties these are the ones we try # to read from an xml file _standard_properties = { 'organisation': ('gmd:contact/' 'gmd:CI_ResponsibleParty/' 'gmd:organisationName/' 'gco:CharacterString'), 'email': ('gmd:contact/' 'gmd:CI_ResponsibleParty/' 'gmd:contactInfo/' 'gmd:CI_Contact/' 'gmd:address/' 'gmd:CI_Address/' 'gmd:electronicMailAddress/' 'gco:CharacterString'), 'date': ('gmd:dateStamp/' 'gco:Date'), 'abstract': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:abstract/' 'gco:CharacterString'), 'title': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:citation/' 'gmd:CI_Citation/' 'gmd:title/' 'gco:CharacterString'), 'license': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:resourceConstraints/' 'gmd:MD_Constraints/' 'gmd:useLimitation/' 'gco:CharacterString'), 'url': ('gmd:distributionInfo/' 'gmd:MD_Distribution/' 'gmd:transferOptions/' 'gmd:MD_DigitalTransferOptions/' 'gmd:onLine/' 'gmd:CI_OnlineResource/' 'gmd:linkage/' 'gmd:URL'), 'report': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'report/' 'gco:CharacterString'), 'layer_purpose': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'layer_purpose/' 'gco:CharacterString'), 'layer_mode': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'layer_mode/' 'gco:CharacterString'), 'layer_geometry': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'layer_geometry/' 'gco:CharacterString'), 'keyword_version': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'keyword_version/' 'gco:CharacterString'), 'scale': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'scale/' 'gco:CharacterString'), 'source': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'source/' 'gco:CharacterString'), 'datatype': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'datatype/' 'gco:CharacterString'), 'multipart_polygon': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' '%s/' 'gco:Boolean' % multipart_polygon_key), 'resolution': ('gmd:identificationInfo/' 'gmd:MD_DataIdentification/' 'gmd:supplementalInformation/' 'inasafe/' 'resolution/' 'gco:FloatTuple') } def __getattr__(self, name): """ Dynamically generate getter for each _standard_properties. """ if name in self._standard_properties: value = self.get_value(name) else: value = super(BaseMetadata, self).__getattr__(name) return value def __setattr__(self, name, value): """ Dynamically generate setter for each _standard_properties. """ if name in self._standard_properties: path = self._standard_properties[name] self.set(name, value, path) else: super(BaseMetadata, self).__setattr__(name, value) def __eq__(self, other): return self.dict == other.dict @abc.abstractmethod def __init__(self, layer_uri, xml_uri=None, json_uri=None): """ Constructor. :param layer_uri: uri of the layer for which the metadata ae :type layer_uri: str :param xml_uri: uri of an xml file to use :type xml_uri: str :param json_uri: uri of a json file to use :type json_uri: str """ # private members self._layer_uri = layer_uri # TODO (MB): maybe use MetadataDbIO.are_metadata_file_based instead self._layer_is_file_based = os.path.isfile(layer_uri) instantiate_metadata_db = False path = os.path.splitext(layer_uri)[0] if xml_uri is None: if self.layer_is_file_based: self._xml_uri = '%s.xml' % path else: # xml should be stored in cacheDB self._xml_uri = None instantiate_metadata_db = True else: self._xml_uri = xml_uri if json_uri is None: if self.layer_is_file_based: self._json_uri = '%s.json' % path else: # json should be stored in cacheDB self._json_uri = None instantiate_metadata_db = True else: self._json_uri = json_uri if instantiate_metadata_db: self.db_io = MetadataDbIO() self.reading_ancillary_files = False self._properties = {} # initialise the properties for name, path in list(self._standard_properties.items()): self.set(name, None, path) self._last_update = datetime.now() try: self.read_from_ancillary_file(xml_uri) except IOError: pass @abc.abstractproperty def dict(self): """ dictionary representation of the metadata. :return: dictionary representation of the metadata :rtype: dict """ metadata = {} properties = {} for name, prop in list(self.properties.items()): properties[name] = prop.dict metadata['properties'] = properties return metadata @abc.abstractproperty def xml(self): """ xml representation of the metadata. :return: xml representation of the metadata :rtype: ElementTree.Element """ tree = ElementTree.parse(METADATA_XML_TEMPLATE) root = tree.getroot() for name, prop in list(self.properties.items()): path = prop.xml_path elem = root.find(path, XML_NS) if elem is None: # create elem elem = insert_xml_element(root, path) elem.text = self.get_xml_value(name) return root @abc.abstractproperty def json(self): """ json representation of the metadata. :return: json representation of the metadata :rtype: str """ json_dumps = json.dumps(self.dict, indent=2, sort_keys=True, separators=(',', ': '), cls=MetadataEncoder) if not json_dumps.endswith('\n'): json_dumps += '\n' return json_dumps @abc.abstractmethod def read_json(self): """ read metadata from json and set all the found properties. when overriding remember to wrap your calls in reading_ancillary_files :return: the read metadata :rtype: dict """ with reading_ancillary_files(self): if self.json_uri is None: metadata = self._read_json_db() else: metadata = self._read_json_file() if 'properties' in metadata: for name, prop in list(metadata['properties'].items()): try: self.set(prop['name'], prop['value'], prop['xml_path']) except KeyError: # we just skip if we don't have something, we want # to have as much as possible read from the JSON pass return metadata def _read_json_file(self): """ read metadata from a json file. :return: the parsed json dict :rtype: dict """ with open(self.json_uri) as metadata_file: try: metadata = json.load(metadata_file) return metadata except ValueError: message = tr('the file %s does not appear to be valid JSON') message = message % self.json_uri raise MetadataReadError(message) def _read_json_db(self): """ read metadata from a json string stored in a DB. :return: the parsed json dict :rtype: dict """ try: metadata_str = self.db_io.read_metadata_from_uri( self.layer_uri, 'json') except HashNotFoundError: return {} try: metadata = json.loads(metadata_str) return metadata except ValueError: message = tr('the file DB entry for %s does not appear to be ' 'valid JSON') message %= self.layer_uri raise MetadataReadError(message) @abc.abstractmethod def read_xml(self): """ read metadata from xml and set all the found properties. :return: the root element of the xml :rtype: ElementTree.Element """ if self.xml_uri is None: root = self._read_xml_db() else: root = self._read_xml_file() if root is not None: for name, path in list(self._standard_properties.items()): value = read_property_from_xml(root, path) if value is not None: # this calls the default setters setattr(self, name, value) return root def _read_xml_file(self): """ read metadata from an xml file. :return: the root element of the xml :rtype: ElementTree.Element """ # this raises a IOError if the file doesn't exist root = ElementTree.parse(self.xml_uri) root.getroot() return root def _read_xml_db(self): """ read metadata from an xml string stored in a DB. :return: the root element of the xml :rtype: ElementTree.Element """ try: metadata_str = self.db_io.read_metadata_from_uri( self.layer_uri, 'xml') root = ElementTree.fromstring(metadata_str) return root except HashNotFoundError: return None @property # there is no setter because the layer should not change overtime def layer_uri(self): """ the layer URI. :return: the layer URI :rtype: str """ return self._layer_uri @property # there is no setter because the json should not change overtime def json_uri(self): """ the json file URI if it is None than the json is coming from a DB. :return: the json URI :rtype: str, None """ return self._json_uri @property # there is no setter because the xml should not change overtime def xml_uri(self): """ the xml file URI if it is None than the xml is coming from a DB. :return: the xml URI :rtype: str, None """ return self._xml_uri @property def last_update(self): """ time of the last update of the metadata in memory. :return: time of the last update :rtype: datetime """ return self._last_update @last_update.setter def last_update(self, time): """ set time of the last update of the metadata in memory. :param time: the update time :type time: datetime """ self._last_update = time def set_last_update_to_now(self): """ set time of the last update of the metadata in memory to now. """ self._last_update = datetime.now() def get_value(self, name): """ get the typed value of a property. The type is the original python type used when the value was set :param name: the name of the property :type name: str :return: the value of the property """ return self.get_property(name).value def get_xml_value(self, name): """ get the xml value of a property. :param name: the name of the property :type name: str :return: the value of the property :rtype: str """ return self.get_property(name).xml_value def get_property(self, name): """ get a property. :param name: the name of the property :type name: str :return: the property :rtype: BaseProperty """ return self.properties[name] @property def properties(self): """ get all properties. :return: the properties :rtype: dict """ return self._properties def update(self, name, value): """ update a property value. The accepted type depends on the property type :param name: the name of the property :type name: str :param value: the new value """ self.get_property(name).value = value def set(self, name, value, xml_path): """ Create a new metadata property. The accepted type depends on the property type which is determined by the xml_path :param name: the name of the property :type name: str :param value: the value of the property :type value: :param xml_path: the xml path where the property should be stored. This is split on / and the last element is used to determine the property type :type xml_path: str """ xml_type = xml_path.split('/')[-1] # check if the desired type is supported try: property_class = TYPE_CONVERSIONS[xml_type] except KeyError: raise KeyError('The xml type %s is not supported yet' % xml_type) try: metadata_property = property_class(name, value, xml_path) self._properties[name] = metadata_property self.set_last_update_to_now() except TypeError: if self.reading_ancillary_files: # we are parsing files so we want to accept as much as # possible without raising exceptions pass else: raise def save(self, save_json=True, save_xml=True): """ Saves the metadata json and/or xml to a file or DB. :param save_json: flag to save json :type save_json: bool :param save_xml: flag to save xml :type save_xml: bool """ if self.layer_is_file_based: if save_json: self.write_to_file(self.json_uri) if save_xml: self.write_to_file(self.xml_uri) else: self.write_to_db(save_json, save_xml) def write_to_file(self, destination_path): """ Writes the metadata json or xml to a file. :param destination_path: the file path the file format is inferred from the destination_path extension. :type destination_path: str :return: the written metadata :rtype: str """ file_format = os.path.splitext(destination_path)[1][1:] metadata = self.get_writable_metadata(file_format) with open(destination_path, 'w') as f: f.write(metadata) return metadata def write_to_db(self, save_json=True, save_xml=True): """ Stores the metadata json and/or xml in a DB. The returned tuple can contain None. :param save_json: flag to save json :type save_json: bool :param save_xml: flag to save xml :type save_xml: bool :return: the stored metadata :rtype: (str, str) """ metadata_json = None metadata_xml = None if save_json: metadata_json = self.get_writable_metadata('json') if save_xml: metadata_xml = self.get_writable_metadata('xml') self.db_io.write_metadata_for_uri(self.layer_uri, metadata_json, metadata_xml) return metadata_json, metadata_xml def get_writable_metadata(self, file_format): """ Convert the metadata to a writable form. :param file_format: the needed format can be json or xml :type file_format: str :return: the dupled metadata :rtype: str """ if file_format == 'json': metadata = self.json elif file_format == 'xml': metadata = self.xml else: raise TypeError( 'The requested file type (%s) is not yet supported' % file_format) return metadata def read_from_ancillary_file(self, custom_xml=None): """ try to read xml and json from existing files or db. This is used when instantiating a new metadata object. We explicitly check if a custom XML was passed so we give it priority on the JSON. If no custom XML is passed, JSON has priority :param custom_xml: the path to a custom xml file :type custom_xml: str """ if custom_xml and os.path.isfile(self.xml_uri): self.read_xml() else: if not self.read_json(): self.read_xml() @property def layer_is_file_based(self): """ flag if the layer is file based. :return: flag if the layer is file based :rtype: bool """ return self._layer_is_file_based def update_from_dict(self, keywords): """Set properties of metadata using key and value from keywords :param keywords: A dictionary of keywords (key, value). :type keywords: dict """ for key, value in list(keywords.items()): setattr(self, key, value)
class Object(with_metaclass(ObjectMetaclass, ParseResource)): parse_table = None ENDPOINT_ROOT = '/'.join([API_ROOT, 'classes']) @classmethod def factory(cls, class_name): class DerivedClass(cls): pass DerivedClass.__name__ = str(class_name) DerivedClass.set_endpoint_root() return DerivedClass @classmethod def set_endpoint_root(cls): root = '/'.join([API_ROOT, 'classes', cls.parse_table or cls.__name__]) if cls.ENDPOINT_ROOT != root: cls.ENDPOINT_ROOT = root return cls.ENDPOINT_ROOT @property def _absolute_url(self): if not self.objectId: return None return '/'.join([self.__class__.ENDPOINT_ROOT, self.objectId]) @property def as_pointer(self): return Pointer(**{ 'className': self.__class__.__name__, 'objectId': self.objectId }) def serialize(self): vals = {'pk':getattr(self,'objectId',None), '__type':self.parse_table or self.__class__.__name__, 'objectId':self.objectId, 'createdAt':self.createdAt, 'updatedAt':self.updatedAt} for key, val in list(self.__dict__.items()): if key.startswith('_'): continue if isinstance(val,ParseResource): oid = getattr(self,key+'_id',None) vals[key] = {'pk':oid,'__type':val.parse_table or val.cls.__name__,'objectId':oid} elif isinstance(val,Object) or hasattr(val,'serialize'): vals[key] = val.serialize() else: vals[key] = val return vals def increment(self, key, amount=1,_using=None,_as_user=None,_throttle=None): """ Increment one value in the object. Note that this happens immediately: it does not wait for save() to be called """ payload = { key: { '__op': 'Increment', 'amount': amount } } self.__class__.PUT(self._absolute_url, _app_id=_using,_user=_as_user,_throttle=_throttle,**payload) self.__dict__[key] += amount def removeRelation(self, key, objs,_using=None,_as_user=None,_throttle=None): self.manageRelation('RemoveRelation', key, objs,_using=_using,_as_user=_as_user,_throttle=_throttle) def addRelation(self, key, objs,_using=None,_as_user=None,_throttle=None): self.manageRelation('AddRelation', key, objs,_using=_using,_as_user=_as_user,_throttle=_throttle) def manageRelation(self, action, key, objs,_using=None,_as_user=None,_throttle=None): if not (isinstance(objs,list) or isinstance(objs,tuple)): objs = [objs] objects = [{ "__type": "Pointer", "className": obj.parse_table or obj.__class__.__name__, "objectId": obj.objectId } for obj in objs] payload = { key: { "__op": action, "objects": objects } } self.__class__.PUT(self._absolute_url, _app_id=_using,_user=_as_user,_throttle=_throttle,**payload) self.__dict__[key] = ''
class ValidatorBase(with_metaclass(abc.ABCMeta, object)): @abc.abstractmethod def __call__(self, value): """Should validate value, returning a boolean result."""
class PostVisitor(with_metaclass(ABCMeta, object)): CUT_VISIT = object() @abstractmethod def visit_post(self, post): pass
class IUPACSequence(with_metaclass(ABCMeta, Sequence)): """Store biological sequence data conforming to the IUPAC character set. This is an abstract base class (ABC) that cannot be instantiated. Attributes ---------- values metadata positional_metadata alphabet gap_chars nondegenerate_chars degenerate_chars degenerate_map Raises ------ ValueError If sequence characters are not in the IUPAC character set [1]_. See Also -------- DNA RNA Protein References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden """ # ASCII is built such that the difference between uppercase and lowercase # is the 6th bit. _ascii_invert_case_bit_offset = 32 _number_of_extended_ascii_codes = 256 _ascii_lowercase_boundary = 90 __validation_mask = None __degenerate_codes = None __nondegenerate_codes = None __gap_codes = None @classproperty def _validation_mask(cls): # TODO These masks could be defined (as literals) on each concrete # object. For now, memoize! if cls.__validation_mask is None: cls.__validation_mask = np.invert( np.bincount( np.fromstring(''.join(cls.alphabet), dtype=np.uint8), minlength=cls._number_of_extended_ascii_codes).astype( bool)) return cls.__validation_mask @classproperty def _degenerate_codes(cls): if cls.__degenerate_codes is None: degens = cls.degenerate_chars cls.__degenerate_codes = np.asarray([ord(d) for d in degens]) return cls.__degenerate_codes @classproperty def _nondegenerate_codes(cls): if cls.__nondegenerate_codes is None: nondegens = cls.nondegenerate_chars cls.__nondegenerate_codes = np.asarray([ord(d) for d in nondegens]) return cls.__nondegenerate_codes @classproperty def _gap_codes(cls): if cls.__gap_codes is None: gaps = cls.gap_chars cls.__gap_codes = np.asarray([ord(g) for g in gaps]) return cls.__gap_codes @classproperty @stable(as_of='0.4.0') def alphabet(cls): """Return valid IUPAC characters. This includes gap, non-degenerate, and degenerate characters. Returns ------- set Valid IUPAC characters. """ return cls.degenerate_chars | cls.nondegenerate_chars | cls.gap_chars @classproperty @stable(as_of='0.4.0') def gap_chars(cls): """Return characters defined as gaps. Returns ------- set Characters defined as gaps. """ return set('-.') @classproperty @stable(as_of='0.4.0') def degenerate_chars(cls): """Return degenerate IUPAC characters. Returns ------- set Degenerate IUPAC characters. """ return set(cls.degenerate_map) @abstractproperty @classproperty @stable(as_of='0.4.0') def nondegenerate_chars(cls): """Return non-degenerate IUPAC characters. Returns ------- set Non-degenerate IUPAC characters. """ return set() # pragma: no cover @abstractproperty @classproperty @stable(as_of='0.4.0') def degenerate_map(cls): """Return mapping of degenerate to non-degenerate characters. Returns ------- dict (set) Mapping of each degenerate IUPAC character to the set of non-degenerate IUPAC characters it represents. """ return set() # pragma: no cover @property def _motifs(self): return _motifs @overrides(Sequence) def __init__(self, sequence, metadata=None, positional_metadata=None, validate=True, lowercase=False): super(IUPACSequence, self).__init__(sequence, metadata, positional_metadata) if lowercase is False: pass elif lowercase is True or isinstance(lowercase, string_types): lowercase_mask = self._bytes > self._ascii_lowercase_boundary self._convert_to_uppercase(lowercase_mask) # If it isn't True, it must be a string_type if not (lowercase is True): self.positional_metadata[lowercase] = lowercase_mask else: raise TypeError("lowercase keyword argument expected a bool or " "string, but got %s" % type(lowercase)) if validate: self._validate() def _convert_to_uppercase(self, lowercase): if np.any(lowercase): with self._byte_ownership(): self._bytes[lowercase] ^= self._ascii_invert_case_bit_offset def _validate(self): # This is the fastest way that we have found to identify the # presence or absence of certain characters (numbers). # It works by multiplying a mask where the numbers which are # permitted have a zero at their index, and all others have a one. # The result is a vector which will propogate counts of invalid # numbers and remove counts of valid numbers, so that we need only # see if the array is empty to determine validity. invalid_characters = np.bincount( self._bytes, minlength=self._number_of_extended_ascii_codes ) * self._validation_mask if np.any(invalid_characters): bad = list( np.where(invalid_characters > 0)[0].astype( np.uint8).view('|S1')) raise ValueError( "Invalid character%s in sequence: %r. Valid IUPAC characters: " "%r" % ('s' if len(bad) > 1 else '', [str(b.tostring().decode("ascii")) for b in bad] if len(bad) > 1 else bad[0], list(self.alphabet))) @stable(as_of='0.4.0') def lowercase(self, lowercase): """Return a case-sensitive string representation of the sequence. Parameters ---------- lowercase: str or boolean vector If lowercase is a boolean vector, it is used to set sequence characters to lowercase in the output string. True values in the boolean vector correspond to lowercase characters. If lowercase is a str, it is treated like a key into the positional metadata, pointing to a column which must be a boolean vector. That boolean vector is then used as described previously. Returns ------- str String representation of sequence with specified characters set to lowercase. Examples -------- >>> from skbio import DNA >>> s = DNA('ACGT') >>> s.lowercase([True, True, False, False]) 'acGT' >>> s = DNA('ACGT', ... positional_metadata={'exons': [True, False, False, True]}) >>> s.lowercase('exons') 'aCGt' Constructor automatically populates a column in positional metadata when the ``lowercase`` keyword argument is provided with a column name: >>> s = DNA('ACgt', lowercase='introns') >>> s.lowercase('introns') 'ACgt' >>> s = DNA('ACGT', lowercase='introns') >>> s.lowercase('introns') 'ACGT' """ index = self._munge_to_index_array(lowercase) outbytes = self._bytes.copy() outbytes[index] ^= self._ascii_invert_case_bit_offset return str(outbytes.tostring().decode('ascii')) @stable(as_of='0.4.0') def gaps(self): """Find positions containing gaps in the biological sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a gap character is present at that position in the biological sequence. See Also -------- has_gaps Examples -------- >>> from skbio import DNA >>> s = DNA('AC-G-') >>> s.gaps() array([False, False, True, False, True], dtype=bool) """ return np.in1d(self._bytes, self._gap_codes) @stable(as_of='0.4.0') def has_gaps(self): """Determine if the sequence contains one or more gap characters. Returns ------- bool Indicates whether there are one or more occurrences of gap characters in the biological sequence. Examples -------- >>> from skbio import DNA >>> s = DNA('ACACGACGTT') >>> s.has_gaps() False >>> t = DNA('A.CAC--GACGTT') >>> t.has_gaps() True """ # TODO use count, there aren't that many gap chars # TODO: cache results return bool(self.gaps().any()) @stable(as_of='0.4.0') def degenerates(self): """Find positions containing degenerate characters in the sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a degenerate character is present at that position in the biological sequence. See Also -------- has_degenerates nondegenerates has_nondegenerates Examples -------- >>> from skbio import DNA >>> s = DNA('ACWGN') >>> s.degenerates() array([False, False, True, False, True], dtype=bool) """ return np.in1d(self._bytes, self._degenerate_codes) @stable(as_of='0.4.0') def has_degenerates(self): """Determine if sequence contains one or more degenerate characters. Returns ------- bool Indicates whether there are one or more occurrences of degenerate characters in the biological sequence. See Also -------- degenerates nondegenerates has_nondegenerates Examples -------- >>> from skbio import DNA >>> s = DNA('ACAC-GACGTT') >>> s.has_degenerates() False >>> t = DNA('ANCACWWGACGTT') >>> t.has_degenerates() True """ # TODO use bincount! # TODO: cache results return bool(self.degenerates().any()) @stable(as_of='0.4.0') def nondegenerates(self): """Find positions containing non-degenerate characters in the sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a non-degenerate character is present at that position in the biological sequence. See Also -------- has_nondegenerates degenerates has_nondegenerates Examples -------- >>> from skbio import DNA >>> s = DNA('ACWGN') >>> s.nondegenerates() array([ True, True, False, True, False], dtype=bool) """ return np.in1d(self._bytes, self._nondegenerate_codes) @stable(as_of='0.4.0') def has_nondegenerates(self): """Determine if sequence contains one or more non-degenerate characters Returns ------- bool Indicates whether there are one or more occurrences of non-degenerate characters in the biological sequence. See Also -------- nondegenerates degenerates has_degenerates Examples -------- >>> from skbio import DNA >>> s = DNA('NWNNNNNN') >>> s.has_nondegenerates() False >>> t = DNA('ANCACWWGACGTT') >>> t.has_nondegenerates() True """ # TODO: cache results return bool(self.nondegenerates().any()) @stable(as_of='0.4.0') def degap(self): """Return a new sequence with gap characters removed. Returns ------- IUPACSequence A new sequence with all gap characters removed. See Also -------- gap_chars Notes ----- The type and metadata of the result will be the same as the biological sequence. If positional metadata is present, it will be filtered in the same manner as the sequence characters and included in the resulting degapped sequence. Examples -------- >>> from skbio import DNA >>> s = DNA('GGTC-C--ATT-C.', ... positional_metadata={'quality':range(14)}) >>> s.degap() DNA ----------------------------- Positional metadata: 'quality': <dtype: int64> Stats: length: 9 has gaps: False has degenerates: False has non-degenerates: True GC-content: 55.56% ----------------------------- 0 GGTCCATTC """ return self[np.invert(self.gaps())] @stable(as_of='0.4.0') def expand_degenerates(self): """Yield all possible non-degenerate versions of the sequence. Yields ------ IUPACSequence Non-degenerate version of the sequence. See Also -------- degenerate_map Notes ----- There is no guaranteed ordering to the non-degenerate sequences that are yielded. Each non-degenerate sequence will have the same type, metadata, and positional metadata as the biological sequence. Examples -------- >>> from skbio import DNA >>> seq = DNA('TRG') >>> seq_generator = seq.expand_degenerates() >>> for s in sorted(seq_generator, key=str): ... s ... print('') DNA ----------------------------- Stats: length: 3 has gaps: False has degenerates: False has non-degenerates: True GC-content: 33.33% ----------------------------- 0 TAG <BLANKLINE> DNA ----------------------------- Stats: length: 3 has gaps: False has degenerates: False has non-degenerates: True GC-content: 66.67% ----------------------------- 0 TGG <BLANKLINE> """ degen_chars = self.degenerate_map nonexpansion_chars = self.nondegenerate_chars.union(self.gap_chars) expansions = [] for char in self: char = str(char) if char in nonexpansion_chars: expansions.append(char) else: expansions.append(degen_chars[char]) result = product(*expansions) return (self._to(sequence=''.join(nondegen_seq)) for nondegen_seq in result) @stable(as_of='0.4.0-dev') def to_regex(self): """Return a regular expression object that accounts for degenerate chars. Returns ------- regex Pre-compiled regular expression object (as from ``re.compile``) that matches all non-degenerate versions of this sequence, and nothing else. Examples -------- >>> from skbio import DNA >>> seq = DNA('TRG') >>> regex = seq.to_regex() >>> regex.pattern 'T[AG]G' >>> regex.match('TAG').string 'TAG' >>> regex.match('TCG') is None True """ regex_string = [] for base in str(self): if base in self.degenerate_chars: regex_string.append('[{0}]'.format(''.join( self.degenerate_map[base]))) else: regex_string.append(base) return re.compile(''.join(regex_string)) @stable(as_of='0.4.0') def find_motifs(self, motif_type, min_length=1, ignore=None): """Search the biological sequence for motifs. Options for `motif_type`: Parameters ---------- motif_type : str Type of motif to find. min_length : int, optional Only motifs at least as long as `min_length` will be returned. ignore : 1D array_like (bool), optional Boolean vector indicating positions to ignore when matching. Yields ------ slice Location of the motif in the biological sequence. Raises ------ ValueError If an unknown `motif_type` is specified. Examples -------- >>> from skbio import DNA >>> s = DNA('ACGGGGAGGCGGAG') >>> for motif_slice in s.find_motifs('purine-run', min_length=2): ... motif_slice ... str(s[motif_slice]) slice(2, 9, None) 'GGGGAGG' slice(10, 14, None) 'GGAG' Gap characters can disrupt motifs: >>> s = DNA('GG-GG') >>> for motif_slice in s.find_motifs('purine-run'): ... motif_slice slice(0, 2, None) slice(3, 5, None) Gaps can be ignored by passing the gap boolean vector to `ignore`: >>> s = DNA('GG-GG') >>> for motif_slice in s.find_motifs('purine-run', ignore=s.gaps()): ... motif_slice slice(0, 5, None) """ if motif_type not in self._motifs: raise ValueError("Not a known motif (%r) for this sequence (%s)." % (motif_type, self.__class__.__name__)) return self._motifs[motif_type](self, min_length, ignore) @overrides(Sequence) def _constructor(self, **kwargs): return self.__class__(validate=False, lowercase=False, **kwargs) @overrides(Sequence) def _repr_stats(self): """Define custom statistics to display in the sequence's repr.""" stats = super(IUPACSequence, self)._repr_stats() stats.append(('has gaps', '%r' % self.has_gaps())) stats.append(('has degenerates', '%r' % self.has_degenerates())) stats.append(('has non-degenerates', '%r' % self.has_nondegenerates())) return stats
class FileSystem(with_metaclass(abc.ABCMeta, BeamPlugin)): """A class that defines the functions that can be performed on a filesystem. All methods are abstract and they are for file system providers to implement. Clients should use the FileSystems class to interact with the correct file system based on the provided file pattern scheme. """ CHUNK_SIZE = 1 # Chuck size in the batch operations def __init__(self, pipeline_options): """ Args: pipeline_options: Instance of ``PipelineOptions`` or dict of options and values (like ``RuntimeValueProvider.runtime_options``). """ @staticmethod def _get_compression_type(path, compression_type): if compression_type == CompressionTypes.AUTO: compression_type = CompressionTypes.detect_compression_type(path) elif not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError( 'compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) return compression_type @classmethod def scheme(cls): """URI scheme for the FileSystem """ raise NotImplementedError @abc.abstractmethod def join(self, basepath, *paths): """Join two or more pathname components for the filesystem Args: basepath: string path of the first component of the path paths: path components to be added Returns: full path after combining all the passed components """ raise NotImplementedError @abc.abstractmethod def split(self, path): """Splits the given path into two parts. Splits the path into a pair (head, tail) such that tail contains the last component of the path and head contains everything up to that. For file-systems other than the local file-system, head should include the prefix. Args: path: path as a string Returns: a pair of path components as strings. """ raise NotImplementedError @abc.abstractmethod def mkdirs(self, path): """Recursively create directories for the provided path. Args: path: string path of the directory structure that should be created Raises: IOError if leaf directory already exists. """ raise NotImplementedError @abc.abstractmethod def has_dirs(self): """Whether this FileSystem supports directories.""" raise NotImplementedError @abc.abstractmethod def _list(self, dir_or_prefix): """List files in a location. Listing is non-recursive (for filesystems that support directories). Args: dir_or_prefix: (string) A directory or location prefix (for filesystems that don't have directories). Returns: Generator of ``FileMetadata`` objects. Raises: ``BeamIOError`` if listing fails, but not if no files were found. """ raise NotImplementedError @staticmethod def _split_scheme(url_or_path): match = re.match(r'(^[a-z]+)://(.*)', url_or_path) if match is not None: return match.groups() return None, url_or_path @staticmethod def _combine_scheme(scheme, path): if scheme is None: return path return '{}://{}'.format(scheme, path) def _url_dirname(self, url_or_path): """Like posixpath.dirname, but preserves scheme:// prefix. Args: url_or_path: A string in the form of scheme://some/path OR /some/path. """ scheme, path = self._split_scheme(url_or_path) return self._combine_scheme(scheme, posixpath.dirname(path)) def match_files(self, file_metas, pattern): """Filter :class:`FileMetadata` objects by *pattern* Args: file_metas (list of :class:`FileMetadata`): Files to consider when matching pattern (str): File pattern See Also: :meth:`translate_pattern` Returns: Generator of matching :class:`FileMetadata` """ re_pattern = re.compile(self.translate_pattern(pattern)) match = re_pattern.match for file_metadata in file_metas: if match(file_metadata.path): yield file_metadata @staticmethod def translate_pattern(pattern): """ Translate a *pattern* to a regular expression. There is no way to quote meta-characters. Pattern syntax: The pattern syntax is based on the fnmatch_ syntax, with the following differences: - ``*`` Is equivalent to ``[^/\\]*`` rather than ``.*``. - ``**`` Is equivalent to ``.*``. See also: :meth:`match` uses this method This method is based on `Python 2.7's fnmatch.translate`_. The code in this method is licensed under PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. .. _`fnmatch`: https://docs.python.org/2/library/fnmatch.html .. _`Python 2.7's fnmatch.translate`: https://github.com/python/cpython\ /blob/170ea8ccd4235d28538ab713041502d07ad1cacd/Lib/fnmatch.py#L85-L120 """ i, n = 0, len(pattern) res = '' while i < n: c = pattern[i] i = i + 1 if c == '*': # One char lookahead for "**" if i < n and pattern[i] == "*": res = res + '.*' i = i + 1 else: res = res + r'[^/\\]*' elif c == '?': res = res + '.' elif c == '[': j = i if j < n and pattern[j] == '!': j = j + 1 if j < n and pattern[j] == ']': j = j + 1 while j < n and pattern[j] != ']': j = j + 1 if j >= n: res = res + r'\[' else: stuff = pattern[i:j].replace('\\', '\\\\') i = j + 1 if stuff[0] == '!': stuff = '^' + stuff[1:] elif stuff[0] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) else: res = res + re.escape(c) logger.debug('translate_pattern: %r -> %r', pattern, res) return res + r'\Z(?ms)' def match(self, patterns, limits=None): """Find all matching paths to the patterns provided. See Also: :meth:`translate_pattern` Patterns ending with '/' or '\\' will be appended with '*'. Args: patterns: list of string for the file path pattern to match against limits: list of maximum number of responses that need to be fetched Returns: list of ``MatchResult`` objects. Raises: ``BeamIOError`` if any of the pattern match operations fail """ if limits is None: limits = [None] * len(patterns) else: err_msg = "Patterns and limits should be equal in length" assert len(patterns) == len(limits), err_msg def _match(pattern, limit): """Find all matching paths to the pattern provided.""" if pattern.endswith('/') or pattern.endswith('\\'): pattern += '*' # Get the part of the pattern before the first globbing character. # For example scheme://path/foo* will become scheme://path/foo for # filesystems like GCS, or converted to scheme://path for filesystems with # directories. prefix_or_dir = re.match('^[^[*?]*', pattern).group(0) file_metadatas = [] if prefix_or_dir == pattern: # Short-circuit calling self.list() if there's no glob pattern to match. if self.exists(pattern): file_metadatas = [ FileMetadata(pattern, self.size(pattern)) ] else: if self.has_dirs(): prefix_dirname = self._url_dirname(prefix_or_dir) if not prefix_dirname == prefix_or_dir: logger.debug("Changed prefix_or_dir %r -> %r", prefix_or_dir, prefix_dirname) prefix_or_dir = prefix_dirname logger.debug("Listing files in %r", prefix_or_dir) file_metadatas = self._list(prefix_or_dir) metadata_list = [] for file_metadata in self.match_files(file_metadatas, pattern): if limit is not None and len(metadata_list) >= limit: break metadata_list.append(file_metadata) return MatchResult(pattern, metadata_list) exceptions = {} result = [] for pattern, limit in zip(patterns, limits): try: result.append(_match(pattern, limit)) except Exception as e: # pylint: disable=broad-except exceptions[pattern] = e if exceptions: raise BeamIOError("Match operation failed", exceptions) return result @abc.abstractmethod def create(self, path, mime_type='application/octet-stream', compression_type=CompressionTypes.AUTO): """Returns a write channel for the given file path. Args: path: string path of the file object to be written to the system mime_type: MIME type to specify the type of content in the file object compression_type: Type of compression to be used for this object Returns: file handle with a close function for the user to use """ raise NotImplementedError @abc.abstractmethod def open(self, path, mime_type='application/octet-stream', compression_type=CompressionTypes.AUTO): """Returns a read channel for the given file path. Args: path: string path of the file object to be read mime_type: MIME type to specify the type of content in the file object compression_type: Type of compression to be used for this object Returns: file handle with a close function for the user to use """ raise NotImplementedError @abc.abstractmethod def copy(self, source_file_names, destination_file_names): """Recursively copy the file tree from the source to the destination Args: source_file_names: list of source file objects that needs to be copied destination_file_names: list of destination of the new object Raises: ``BeamIOError`` if any of the copy operations fail """ raise NotImplementedError @abc.abstractmethod def rename(self, source_file_names, destination_file_names): """Rename the files at the source list to the destination list. Source and destination lists should be of the same size. Args: source_file_names: List of file paths that need to be moved destination_file_names: List of destination_file_names for the files Raises: ``BeamIOError`` if any of the rename operations fail """ raise NotImplementedError @abc.abstractmethod def exists(self, path): """Check if the provided path exists on the FileSystem. Args: path: string path that needs to be checked. Returns: boolean flag indicating if path exists """ raise NotImplementedError @abc.abstractmethod def size(self, path): """Get size in bytes of a file on the FileSystem. Args: path: string filepath of file. Returns: int size of file according to the FileSystem. Raises: ``BeamIOError`` if path doesn't exist. """ raise NotImplementedError @abc.abstractmethod def last_updated(self, path): """Get UNIX Epoch time in seconds on the FileSystem. Args: path: string path of file. Returns: float UNIX Epoch time Raises: ``BeamIOError`` if path doesn't exist. """ raise NotImplementedError def checksum(self, path): """Fetch checksum metadata of a file on the :class:`~apache_beam.io.filesystem.FileSystem`. This operation returns checksum metadata as stored in the underlying FileSystem. It should not need to read file data to obtain this value. Checksum type and format are FileSystem dependent and are not compatible between FileSystems. FileSystem implementations may return file size if a checksum isn't available. Args: path: string path of a file. Returns: string containing checksum Raises: ``BeamIOError`` if path isn't a file or doesn't exist. """ raise NotImplementedError @abc.abstractmethod def delete(self, paths): """Deletes files or directories at the provided paths. Directories will be deleted recursively. Args: paths: list of paths that give the file objects to be deleted Raises: ``BeamIOError`` if any of the delete operations fail """ raise NotImplementedError
class StatsCollectorTest(with_metaclass(abc.ABCMeta, absltest.TestCase)): """Stats collection tests. Each test method has uniquely-named metrics to accommodate implementations that do not support re-definition of metrics. For Events, the exact boundaries of Distribution bins are not tested. For these histogram metrics, it is acceptable that different implementations have slightly different behavior, e.g. one uses lower or equal while another uses strictly lower for bounds of bins. This allows integration with third-party metric libraries. """ def setUp(self): super(StatsCollectorTest, self).setUp() self._mock_time = 100.0 time_patcher = mock.patch.object(time, "time", lambda: self._mock_time) time_patcher.start() self.addCleanup(time_patcher.stop) @abc.abstractmethod def _CreateStatsCollector(self, metadata_list): """Creates a new stats collector with the given metadata.""" def _Sleep(self, n): """Simulates sleeping for a given number of seconds.""" self._mock_time += n def testSimpleCounter(self): counter_name = "testSimpleCounter_counter" collector = self._CreateStatsCollector( [stats_utils.CreateCounterMetadata(counter_name)]) self.assertEqual(0, collector.GetMetricValue(counter_name)) for _ in range(5): collector.IncrementCounter(counter_name) self.assertEqual(5, collector.GetMetricValue(counter_name)) collector.IncrementCounter(counter_name, 2) self.assertEqual(7, collector.GetMetricValue(counter_name)) def testDecrementingCounterRaises(self): counter_name = "testDecrementingCounterRaises_counter" collector = self._CreateStatsCollector( [stats_utils.CreateCounterMetadata(counter_name)]) with self.assertRaises(ValueError): collector.IncrementCounter(counter_name, -1) def testCounterWithFields(self): counter_name = "testCounterWithFields_counter" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name, fields=[("dimension", str)]) ]) # Test that default values for any fields values are 0." self.assertEqual(0, collector.GetMetricValue(counter_name, fields=["a"])) self.assertEqual(0, collector.GetMetricValue(counter_name, fields=["b"])) for _ in range(5): collector.IncrementCounter(counter_name, fields=["dimension_value_1"]) self.assertEqual( 5, collector.GetMetricValue(counter_name, fields=["dimension_value_1"])) collector.IncrementCounter(counter_name, 2, fields=["dimension_value_1"]) self.assertEqual( 7, collector.GetMetricValue(counter_name, fields=["dimension_value_1"])) collector.IncrementCounter(counter_name, 2, fields=["dimension_value_2"]) self.assertEqual( 2, collector.GetMetricValue(counter_name, fields=["dimension_value_2"])) # Check that previously set values with other fields are not affected. self.assertEqual( 7, collector.GetMetricValue(counter_name, fields=["dimension_value_1"])) def testSimpleGauge(self): int_gauge_name = "testSimpleGauge_int_gauge" float_gauge_name = "testSimpleGauge_float_gauge" collector = self._CreateStatsCollector([ stats_utils.CreateGaugeMetadata(int_gauge_name, int), stats_utils.CreateGaugeMetadata(float_gauge_name, float) ]) self.assertEqual(0, collector.GetMetricValue(int_gauge_name)) self.assertEqual(0.0, collector.GetMetricValue(float_gauge_name)) collector.SetGaugeValue(int_gauge_name, 42) collector.SetGaugeValue(float_gauge_name, 42.3) self.assertEqual(42, collector.GetMetricValue(int_gauge_name)) self.assertAlmostEqual(42.3, collector.GetMetricValue(float_gauge_name)) # At least default Python type checking is enforced in gauges: # we can't assign string to int with self.assertRaises(ValueError): collector.SetGaugeValue(int_gauge_name, "some") def testGaugeWithFields(self): int_gauge_name = "testGaugeWithFields_int_gauge" collector = self._CreateStatsCollector([ stats_utils.CreateGaugeMetadata(int_gauge_name, int, fields=[("dimension", str)]) ]) self.assertEqual( 0, collector.GetMetricValue(int_gauge_name, fields=["dimension_value_1"])) self.assertEqual( 0, collector.GetMetricValue(int_gauge_name, fields=["dimesnioN_value_2"])) collector.SetGaugeValue(int_gauge_name, 1, fields=["dimension_value_1"]) collector.SetGaugeValue(int_gauge_name, 2, fields=["dimension_value_2"]) self.assertEqual( 1, collector.GetMetricValue(int_gauge_name, fields=["dimension_value_1"])) self.assertEqual( 2, collector.GetMetricValue(int_gauge_name, fields=["dimension_value_2"])) def testGaugeWithCallback(self): int_gauge_name = "testGaugeWithCallback_int_gauge" float_gauge_name = "testGaugeWithCallback_float_gauge" collector = self._CreateStatsCollector([ stats_utils.CreateGaugeMetadata(int_gauge_name, int), stats_utils.CreateGaugeMetadata(float_gauge_name, float) ]) self.assertEqual(0, collector.GetMetricValue(int_gauge_name)) self.assertEqual(0.0, collector.GetMetricValue(float_gauge_name)) collector.SetGaugeCallback(int_gauge_name, lambda: 42) collector.SetGaugeCallback(float_gauge_name, lambda: 42.3) self.assertEqual(42, collector.GetMetricValue(int_gauge_name)) self.assertAlmostEqual(42.3, collector.GetMetricValue(float_gauge_name)) def testSimpleEventMetric(self): event_metric_name = "testSimpleEventMetric_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateEventMetadata(event_metric_name, bins=[0.0, 0.1, 0.2]), ]) data = collector.GetMetricValue(event_metric_name) self.assertAlmostEqual(0, data.sum) self.assertEqual(0, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 0, 0.2: 0}, data.bins_heights) collector.RecordEvent(event_metric_name, 0.15) data = collector.GetMetricValue(event_metric_name) self.assertAlmostEqual(0.15, data.sum) self.assertEqual(1, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 1, 0.2: 0}, data.bins_heights) collector.RecordEvent(event_metric_name, 0.5) data = collector.GetMetricValue(event_metric_name) self.assertAlmostEqual(0.65, data.sum) self.assertEqual(2, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 1, 0.2: 1}, data.bins_heights) collector.RecordEvent(event_metric_name, -0.1) data = collector.GetMetricValue(event_metric_name) self.assertAlmostEqual(0.55, data.sum) self.assertEqual(3, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 1, 0.0: 0, 0.1: 1, 0.2: 1}, data.bins_heights) def testEventMetricWithFields(self): event_metric_name = "testEventMetricWithFields_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateEventMetadata(event_metric_name, bins=[0.0, 0.1, 0.2], fields=[("dimension", str)]) ]) data = collector.GetMetricValue(event_metric_name, fields=["dimension_value_1"]) self.assertAlmostEqual(0, data.sum) self.assertEqual(0, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 0, 0.2: 0}, data.bins_heights) collector.RecordEvent(event_metric_name, 0.15, fields=["dimension_value_1"]) collector.RecordEvent(event_metric_name, 0.25, fields=["dimension_value_2"]) data = collector.GetMetricValue(event_metric_name, fields=["dimension_value_1"]) self.assertAlmostEqual(0.15, data.sum) self.assertEqual(1, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 1, 0.2: 0}, data.bins_heights) data = collector.GetMetricValue(event_metric_name, fields=["dimension_value_2"]) self.assertAlmostEqual(0.25, data.sum) self.assertEqual(1, data.count) self.assertEqual([-_INF, 0.0, 0.1, 0.2], list(data.bins)) self.assertEqual({-_INF: 0, 0.0: 0, 0.1: 0, 0.2: 1}, data.bins_heights) def testRaisesOnImproperFieldsUsage1(self): counter_name = "testRaisesOnImproperFieldsUsage1_counter" int_gauge_name = "testRaisesOnImproperFieldsUsage1_int_gauge" event_metric_name = "testRaisesOnImproperFieldsUsage1_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name), stats_utils.CreateGaugeMetadata(int_gauge_name, int), stats_utils.CreateEventMetadata(event_metric_name) ]) # Check for counters with self.assertRaises(ValueError): collector.GetMetricValue(counter_name, fields=["a"]) # Check for gauges with self.assertRaises(ValueError): collector.GetMetricValue(int_gauge_name, fields=["a"]) # Check for event metrics self.assertRaises(ValueError, collector.GetMetricValue, event_metric_name, fields=["a", "b"]) def testRaisesOnImproperFieldsUsage2(self): counter_name = "testRaisesOnImproperFieldsUsage2_counter" int_gauge_name = "testRaisesOnImproperFieldsUsage2_int_gauge" event_metric_name = "testRaisesOnImproperFieldsUsage2_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name, fields=[("dimension", str)]), stats_utils.CreateGaugeMetadata(int_gauge_name, int, fields=[("dimension", str)]), stats_utils.CreateEventMetadata(event_metric_name, fields=[("dimension", str)]) ]) # Check for counters self.assertRaises(ValueError, collector.GetMetricValue, counter_name) self.assertRaises(ValueError, collector.GetMetricValue, counter_name, fields=["a", "b"]) # Check for gauges self.assertRaises(ValueError, collector.GetMetricValue, int_gauge_name) self.assertRaises(ValueError, collector.GetMetricValue, int_gauge_name, fields=["a", "b"]) # Check for event metrics self.assertRaises(ValueError, collector.GetMetricValue, event_metric_name) self.assertRaises(ValueError, collector.GetMetricValue, event_metric_name, fields=["a", "b"]) def testGetAllMetricsMetadataWorksCorrectlyOnSimpleMetrics(self): counter_name = "testGAMM_SimpleMetrics_counter" int_gauge_name = "testGAMM_SimpleMetrics_int_gauge" event_metric_name = "testGAMM_SimpleMetrics_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name), stats_utils.CreateGaugeMetadata(int_gauge_name, int, fields=[("dimension", str)]), stats_utils.CreateEventMetadata(event_metric_name) ]) metrics = collector.GetAllMetricsMetadata() self.assertEqual(metrics[counter_name].metric_type, rdf_stats.MetricMetadata.MetricType.COUNTER) self.assertFalse(metrics[counter_name].fields_defs) self.assertEqual(metrics[int_gauge_name].metric_type, rdf_stats.MetricMetadata.MetricType.GAUGE) self.assertEqual(metrics[int_gauge_name].fields_defs, [ rdf_stats.MetricFieldDefinition( field_name="dimension", field_type=rdf_stats.MetricFieldDefinition.FieldType.STR) ]) self.assertEqual(metrics[event_metric_name].metric_type, rdf_stats.MetricMetadata.MetricType.EVENT) self.assertFalse(metrics[event_metric_name].fields_defs) def testGetMetricFieldsWorksCorrectly(self): counter_name = "testGetMetricFieldsWorksCorrectly_counter" int_gauge_name = "testGetMetricFieldsWorksCorrectly_int_gauge" event_metric_name = "testGetMetricFieldsWorksCorrectly_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name, fields=[("dimension1", str), ("dimension2", str)]), stats_utils.CreateGaugeMetadata(int_gauge_name, int, fields=[("dimension", str)]), stats_utils.CreateEventMetadata(event_metric_name, fields=[("dimension", str)]), ]) collector.IncrementCounter(counter_name, fields=["b", "b"]) collector.IncrementCounter(counter_name, fields=["a", "c"]) collector.SetGaugeValue(int_gauge_name, 20, fields=["a"]) collector.SetGaugeValue(int_gauge_name, 30, fields=["b"]) collector.RecordEvent(event_metric_name, 0.1, fields=["a"]) collector.RecordEvent(event_metric_name, 0.1, fields=["b"]) fields = sorted(collector.GetMetricFields(counter_name), key=lambda t: t[0]) self.assertEqual([("a", "c"), ("b", "b")], fields) fields = sorted(collector.GetMetricFields(int_gauge_name), key=lambda t: t[0]) self.assertEqual([("a", ), ("b", )], fields) fields = sorted(collector.GetMetricFields(event_metric_name), key=lambda t: t[0]) self.assertEqual([("a", ), ("b", )], fields) def testCountingDecorator(self): """Test _Function call counting.""" counter_name = "testCountingDecorator_counter" collector = self._CreateStatsCollector( [stats_utils.CreateCounterMetadata(counter_name)]) @stats_utils.Counted(counter_name) def CountedFunc(): pass with FakeStatsContext(collector): for _ in range(10): CountedFunc() self.assertEqual(collector.GetMetricValue(counter_name), 10) def testSuccessesCountingDecorator(self): counter_name = "testCountingDecorator_successes_counter" collector = self._CreateStatsCollector( [stats_utils.CreateCounterMetadata(counter_name)]) @stats_utils.SuccessesCounted(counter_name) def CountedFunc(should_raise): if should_raise: raise RuntimeError("foo") with FakeStatsContext(collector): for i in range(10): if i % 2 == 0: with self.assertRaises(RuntimeError): CountedFunc(True) else: CountedFunc(False) # Failing calls shouldn't increment the counter. self.assertEqual(collector.GetMetricValue(counter_name), 5) def testErrorsCountingDecorator(self): counter_name = "testCountingDecorator_errors_counter" collector = self._CreateStatsCollector( [stats_utils.CreateCounterMetadata(counter_name)]) @stats_utils.SuccessesCounted(counter_name) def CountedFunc(should_raise): if should_raise: raise RuntimeError("foo") with FakeStatsContext(collector): for i in range(10): if i % 2 == 0: with self.assertRaises(RuntimeError): CountedFunc(True) else: CountedFunc(False) # Non-failing calls shouldn't increment the counter. self.assertEqual(collector.GetMetricValue(counter_name), 5) def testMaps(self): """Test binned timings.""" event_metric_name = "testMaps_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateEventMetadata(event_metric_name, bins=[0, 0.1, 0.2]) ]) @stats_utils.Timed(event_metric_name) def TimedFunc(n): self._Sleep(n) with FakeStatsContext(collector): m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 0, 0.1: 0, 0.2: 0}) for _ in range(3): TimedFunc(0.01) m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 3, 0.1: 0, 0.2: 0}) TimedFunc(0.11) m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 3, 0.1: 1, 0.2: 0}) def testCombiningDecorators(self): """Test combining decorators.""" counter_name = "testCombiningDecorators_counter" event_metric_name = "testCombiningDecorators_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name), stats_utils.CreateEventMetadata(event_metric_name, bins=[0.0, 0.1, 0.2]) ]) @stats_utils.Timed(event_metric_name) @stats_utils.Counted(counter_name) def OverdecoratedFunc(n): self._Sleep(n) with FakeStatsContext(collector): OverdecoratedFunc(0.02) # Check if all vars get updated m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 1, 0.1: 0, 0.2: 0}) self.assertEqual(collector.GetMetricValue(counter_name), 1) def testExceptionHandling(self): """Test decorators when exceptions are thrown.""" counter_name = "testExceptionHandling_counter" event_metric_name = "testExceptionHandling_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name), stats_utils.CreateEventMetadata(event_metric_name, bins=[0, 0.1, 0.2]) ]) @stats_utils.Timed(event_metric_name) @stats_utils.Counted(counter_name) def RaiseFunc(n): self._Sleep(n) raise Exception() with FakeStatsContext(collector): self.assertRaises(Exception, RaiseFunc, 0.11) # Check if all vars get updated m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 0, 0.1: 1, 0.2: 0}) self.assertEqual(collector.GetMetricValue(counter_name), 1) def testMultipleFuncs(self): """Tests if multiple decorators produce aggregate stats.""" counter_name = "testMultipleFuncs_counter" event_metric_name = "testMultipleFuncs_event_metric" collector = self._CreateStatsCollector([ stats_utils.CreateCounterMetadata(counter_name), stats_utils.CreateEventMetadata(event_metric_name, bins=[0, 1, 2]) ]) @stats_utils.Counted(counter_name) def Func1(n): self._Sleep(n) @stats_utils.Counted(counter_name) def Func2(n): self._Sleep(n) @stats_utils.Timed(event_metric_name) def Func3(n): self._Sleep(n) @stats_utils.Timed(event_metric_name) def Func4(n): self._Sleep(n) with FakeStatsContext(collector): Func1(0.1) Func2(0.1) self.assertEqual(collector.GetMetricValue(counter_name), 2) Func3(0.1) Func4(1.1) m = collector.GetMetricValue(event_metric_name) self.assertEqual(m.bins_heights, {-_INF: 0, 0: 1, 1: 1, 2: 0})
class TestClass(with_metaclass(MetaClass, bytes)): pass
class Expression(with_metaclass(Meta)): """Expression class""" def __init__(self, ds, expression): self.ds = ds if isinstance(expression, Expression): expression = expression.expression self.expression = expression @property def dt(self): return DateTime(self) @property def str(self): """Gives access to string operations""" return StringOperations(self) @property def str_pandas(self): """Gives access to string operations (using Pandas Series)""" return StringOperationsPandas(self) @property def values(self): return self.evaluate() @property def dtype(self): return self.ds.dtype(self.expression) def derivative(self, var, simplify=True): var = _ensure_string_from_expression(var) return self.__class__( self, expresso.derivative(self.expression, var, simplify=simplify)) def expand(self, stop=[]): """Expand the expression such that no virtual columns occurs, only normal columns. Example: >>> df = vaex.example() >>> r = np.sqrt(df.data.x**2 + df.data.y**2) >>> r.expand().expression 'sqrt(((x ** 2) + (y ** 2)))' """ stop = _ensure_strings_from_expressions(stop) def translate(id): if id in self.ds.virtual_columns and id not in stop: return self.ds.virtual_columns[id] expr = expresso.translate(self.expression, translate) return Expression(self.ds, expr) def variables(self): """Return a set of variables this expression depends on. Example: >>> df = vaex.example() >>> r = np.sqrt(df.data.x**2 + df.data.y**2) >>> r.variables() {'x', 'y'} """ variables = set() def record(varname): # do this recursively for virtual columns if varname in self.ds.virtual_columns and varname not in variables: expresso.translate(self.ds.virtual_columns[varname], record) # we don't want to record ourself if varname != self.expression: variables.add(varname) expresso.translate(self.expression, record) return variables def _graph(self): """"Return a graph containing the dependencies of this expression Structure is: [<string expression>, <function name if callable>, <function object if callable>, [subgraph/dependencies, ....]] """ expression = self.expression def walk(node): if isinstance(node, six.string_types): if node in self.ds.virtual_columns: ex = Expression(self.ds, self.ds.virtual_columns[node]) return [node, None, None, [ex._graph()]] else: return node else: fname, node_repr, deps = node if len(node_repr) > 30: # clip too long expressions node_repr = node_repr[:26] + ' ....' deps = [walk(dep) for dep in deps] obj = self.ds.functions.get(fname) # we don't want the wrapper, we want the underlying object if isinstance(obj, Function): obj = obj.f if isinstance(obj, FunctionSerializablePickle): obj = obj.f return [node_repr, fname, obj, deps] return walk(expresso._graph(expression)) def _graphviz(self, dot=None): """Return a graphviz.Digraph object with a graph of the expression""" from graphviz import Graph, Digraph node = self._graph() dot = dot or Digraph(comment=self.expression) def walk(node): if isinstance(node, six.string_types): dot.node(node, node) return node, node else: node_repr, fname, fobj, deps = node node_id = node_repr dot.node(node_id, node_repr) for dep in deps: dep_id, dep = walk(dep) dot.edge(node_id, dep_id) return node_id, node walk(node) return dot def __str__(self): return self.expression # def __array__(self, dtype=None): # '''For casting to a numpy array # Example: # >>> np.array(ds.x**2) # ''' # return self.ds.evaluate(self) def tolist(self): '''Short for expr.evaluate().tolist()''' return self.evaluate().tolist() def __repr__(self): return self._repr_plain_() def _repr_plain_(self): from .formatting import _format_value def format(values): for i in range(len(values)): value = values[i] yield _format_value(value) colalign = ("right", ) * 2 try: N = len(self.ds) if N <= PRINT_MAX_COUNT: values = format(self.evaluate(0, N)) values = tabulate.tabulate([[i, k] for i, k in enumerate(values)], tablefmt='plain', colalign=colalign) else: values_head = format(self.evaluate(0, PRINT_MAX_COUNT // 2)) values_tail = format(self.evaluate(N - PRINT_MAX_COUNT // 2, N)) values_head = list(zip(range(PRINT_MAX_COUNT//2), values_head)) +\ list(zip(range(N - PRINT_MAX_COUNT//2, N), values_tail)) values = tabulate.tabulate([k for k in values_head], tablefmt='plain', colalign=colalign) values = values.split('\n') width = max(map(len, values)) separator = '\n' + '...'.center(width, ' ') + '\n' values = "\n".join( values[:PRINT_MAX_COUNT // 2]) + separator + "\n".join( values[PRINT_MAX_COUNT // 2:]) + '\n' except Exception as e: values = 'Error evaluating: %r' % e expression = self.expression if len(expression) > 60: expression = expression[:57] + '...' info = 'Expression = ' + expression + '\n' str_type = str dtype = self.dtype dtype = (str(dtype) if dtype != str_type else 'str') if self.expression in self.ds.columns: state = "column" elif self.expression in self.ds.get_column_names(hidden=True): state = "virtual column" else: state = "expression" line = 'Length: {:,} dtype: {} ({})\n'.format(len(self.ds), dtype, state) info += line info += '-' * (len(line) - 1) + '\n' info += values return info def count(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, edges=False, progress=None): '''Shortcut for ds.count(expression, ...), see `Dataset.count`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.count(**kwargs) def sum(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.sum(expression, ...), see `Dataset.sum`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.sum(**kwargs) def mean(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.mean(expression, ...), see `Dataset.mean`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.mean(**kwargs) def std(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.std(expression, ...), see `Dataset.std`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.std(**kwargs) def var(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.std(expression, ...), see `Dataset.var`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.var(**kwargs) def minmax(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.minmax(expression, ...), see `Dataset.minmax`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.minmax(**kwargs) def min(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.min(expression, ...), see `Dataset.min`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.min(**kwargs) def max(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, progress=None): '''Shortcut for ds.max(expression, ...), see `Dataset.max`''' kwargs = dict(locals()) del kwargs['self'] kwargs['expression'] = self.expression return self.ds.max(**kwargs) def nop(self): """Evaluates expression, and drop the result, usefull for benchmarking, since vaex is usually lazy""" return self.ds.nop(self.expression) @property def transient(self): """If this expression is not transient (e.g. on disk) optimizations can be made""" return self.expand().expression not in self.ds.columns @property def masked(self): """Alias to df.is_masked(expression)""" return self.ds.is_masked(self.expression) def value_counts(self, dropna=False, dropnull=True, ascending=False, progress=False): """Computes counts of unique values. WARNING: * If the expression/column is not categorical, it will be converted on the fly * dropna is False by default, it is True by default in pandas :param dropna: when True, it will not report the missing values :param ascending: when False (default) it will report the most frequent occuring item first :returns: Pandas series containing the counts """ from pandas import Series dtype = self.dtype transient = self.transient or self.ds.filtered or self.ds.is_masked( self.expression) if self.dtype == str_type and not transient: # string is a special case, only ColumnString are not transient ar = self.ds.columns[self.expression] if not isinstance(ar, ColumnString): transient = True counter_type = counter_type_from_dtype(self.dtype, transient) counters = [None] * self.ds.executor.thread_pool.nthreads def map(thread_index, i1, i2, ar): if counters[thread_index] is None: counters[thread_index] = counter_type() if dtype == str_type: previous_ar = ar ar = _to_string_sequence(ar) if not transient: assert ar is previous_ar.string_sequence if np.ma.isMaskedArray(ar): mask = np.ma.getmaskarray(ar) counters[thread_index].update(ar, mask) else: counters[thread_index].update(ar) return 0 def reduce(a, b): return a + b self.ds.map_reduce(map, reduce, [self.expression], delay=False, progress=progress, name='value_counts', info=True, to_numpy=False) counters = [k for k in counters if k is not None] counter0 = counters[0] for other in counters[1:]: counter0.merge(other) value_counts = counter0.extract() index = np.array(list(value_counts.keys())) counts = np.array(list(value_counts.values())) order = np.argsort(counts) if not ascending: order = order[::-1] counts = counts[order] index = index[order] if not dropna or not dropnull: index = index.tolist() counts = counts.tolist() if not dropna and counter0.nan_count: index = [np.nan] + index counts = [counter0.nan_count] + counts if not dropnull and counter0.null_count: index = ['null'] + index counts = [counter0.null_count] + counts return Series(counts, index=index) def unique(self): return self.ds.unique(self.expression) def evaluate(self, i1=None, i2=None, out=None, selection=None): return self.ds.evaluate(self, i1, i2, out=out, selection=selection) # TODO: it is not so elegant we need to have a custom version of this # it now also misses the docstring, reconsider how the the meta class auto # adds this method def fillna(self, value, fill_nan=True, fill_masked=True): return self.ds.func.fillna(self, value=value, fill_nan=fill_nan, fill_masked=fill_masked) def clip(self, lower=None, upper=None): return self.ds.func.clip(self, lower, upper) def jit_numba(self, verbose=False): import imp import hashlib names = [] funcs = set(expression_namespace.keys()) # if it's a virtual column, we probably want to optimize that # TODO: fully extract the virtual columns, i.e. depending ones? expression = self.expression if expression in self.ds.virtual_columns: expression = self.ds.virtual_columns[self.expression] all_vars = self.ds.get_column_names( virtual=True, strings=True, hidden=True) + list( self.ds.variables.keys()) vaex.expresso.validate_expression(expression, all_vars, funcs, names) arguments = list(set(names)) argument_dtypes = [self.ds.dtype(argument) for argument in arguments] # argument_dtypes = [getattr(np, dtype_name) for dtype_name in dtype_names] # TODO: for now only float64 output supported f = FunctionSerializableNumba(expression, arguments, argument_dtypes, return_dtype=np.dtype(np.float64)) function = self.ds.add_function('_jit', f, unique=True) return function(*arguments) def jit_pythran(self, verbose=False): import logging logger = logging.getLogger('pythran') log_level = logger.getEffectiveLevel() try: if not verbose: logger.setLevel(logging.ERROR) import pythran import imp import hashlib # self._import_all(module) names = [] funcs = set(expression_namespace.keys()) expression = self.expression if expression in self.ds.virtual_columns: expression = self.ds.virtual_columns[self.expression] all_vars = self.ds.get_column_names( virtual=True, strings=True, hidden=True) + list( self.ds.variables.keys()) vaex.expresso.validate_expression(expression, all_vars, funcs, names) names = list(set(names)) types = ", ".join( str(self.ds.dtype(name)) + "[]" for name in names) argstring = ", ".join(names) code = ''' from numpy import * #pythran export f({2}) def f({0}): return {1}'''.format(argstring, expression, types) if verbose: print("generated code") print(code) m = hashlib.md5() m.update(code.encode('utf-8')) module_name = "pythranized_" + m.hexdigest() # print(m.hexdigest()) module_path = pythran.compile_pythrancode( module_name, code, extra_compile_args=["-DBOOST_SIMD", "-march=native"] + [] if verbose else ["-w"]) module = imp.load_dynamic(module_name, module_path) function_name = "f_" + m.hexdigest() expression_namespace[function_name] = module.f return Expression(self.ds, "{0}({1})".format(function_name, argstring)) finally: logger.setLevel(log_level) def _rename(self, old, new): def translate(id): if id == old: return new expr = expresso.translate(self.expression, translate) return Expression(self.ds, expr) def astype(self, dtype): return self.ds.func.astype(self, str(dtype)) def apply(self, f): return self.ds.apply(f, [self.expression]) def map(self, mapper, nan_mapping=None, null_mapping=None): """Map values of an expression or in memory column accoring to an input dictionary or a custom callable function. Example: >>> import vaex >>> df = vaex.from_arrays(color=['red', 'red', 'blue', 'red', 'green']) >>> mapper = {'red': 1, 'blue': 2, 'green': 3} >>> df['color_mapped'] = df.color.map(mapper) >>> df # color color_mapped 0 red 1 1 red 1 2 blue 2 3 red 1 4 green 3 >>> import numpy as np >>> df = vaex.from_arrays(type=[0, 1, 2, 2, 2, np.nan]) >>> df['role'] = df['type'].map({0: 'admin', 1: 'maintainer', 2: 'user', np.nan: 'unknown'}) >>> df # type role 0 0 admin 1 1 maintainer 2 2 user 3 2 user 4 2 user 5 nan unknown :param mapper: dict like object used to map the values from keys to values :param nan_mapping: value to be used when a nan is present (and not in the mapper) :param null_mapping: value to use used when there is a missing value :return: A vaex expression :rtype: vaex.expression.Expression """ assert isinstance( mapper, collectionsAbc.Mapping), "mapper should be a dict like object" df = self.ds mapper_keys = np.array(list(mapper.keys())) # we map the keys to a ordinal values [0, N-1] using the set key_set = df._set(self.expression) found_keys = key_set.keys() mapper_has_nan = any([key != key for key in mapper_keys]) # we want all possible values to be converted # so mapper's key should be a superset of the keys found if not set(mapper_keys).issuperset(found_keys): missing = set(found_keys).difference(mapper_keys) missing0 = list(missing)[0] if missing0 == missing0: # safe nan check raise ValueError('Missing values in mapper: %s' % missing) # and these are the corresponding choices choices = [mapper[key] for key in found_keys] if key_set.has_nan: if mapper_has_nan: choices = [mapper[np.nan]] + choices else: choices = [nan_mapping] + choices if key_set.has_null: choices = [null_mapping] + choices choices = np.array(choices) key_set_name = df.add_variable('map_key_set', key_set, unique=True) choices_name = df.add_variable('map_choices', choices, unique=True) expr = '_choose(_ordinal_values({}, {}), {})'.format( self, key_set_name, choices_name) return Expression(df, expr)
class OpAccessor(with_metaclass(abc.ABCMeta, object)): """ Provides access to some op properties when they may have been modified during passes. This is currently used so that the same pass can be used with op-graph and exec-graph if the pass uses the OpAccessor methods to access the components of the Op. """ def __init__(self, **kwargs): self.replacement_list = [] self.replacements = dict() @abc.abstractmethod def op_arg(self, op, n): """ Returns the nth argument of an op-graph Op op as an op-graph Op. Overridden by the exec graph to reflect modifications made to the graph. Args: op: The op-graph op we want an args for. n: The arg number. Returns: The arg's op. """ @abc.abstractmethod def op_args(self, op): """ Returns all the arguments of an op-graph Op. Overridden by the exec graph to reflect modification made to the graph. Args: op: An op-graph Op. Returns: The args for op. """ @abc.abstractmethod def get_device_op(self, op): """ Helper function that traverses through any reshape ops or value ops to return the tensor op. Overridden by the exec graph to reflect modification made to the graph. Args: op: An op-graph Op. Returns: The op providing actual storage for op's value. """ @abc.abstractmethod def run_pass(self, process_op, **kwargs): """ Runs a pass to completion, calling process_op on each relevant op. """ def begin_batch(self): """ Called before beginning processing on a pass. """ self.replacement_list = [] def replace_op(self, op, replacement): """ Queue op-graph Op op to be replaced by replacement at the end of the batch. Args: op: The op-graph Op being replaced. replacement: The replacement op-graph Op fro old_op. """ self.replacement_list.append((op, replacement)) @abc.abstractmethod def perform_replace_op(self, op, replacement): """ Actually perform the op replacement Args: op: An Op to be replaced. replacement: An Op to replace op with. """ def end_batch(self): """ Called after a pass has been processed. Returns: True if the graph was changed. """ for op, replacement in self.replacement_list: self.perform_replace_op(op, replacement) self.replacements[op] = replacement return len(self.replacement_list) > 0 def get_replacement(self, op): return self.replacements.get(op, None)
import collections import functools import ftrack_api.entity.base import ftrack_api.exception import ftrack_api.event.base import ftrack_api.symbol import ftrack_api.inspection from ftrack_api.logging import LazyLogMessage as L from future.utils import with_metaclass MixinBaseClass = with_metaclass( ftrack_api.entity.base.DynamicEntityTypeMetaclass, ftrack_api.entity.base._EntityBase, collections.MutableMapping ) class Location(ftrack_api.entity.base.Entity): '''Represent storage for components.''' def __init__(self, session, data=None, reconstructing=False): '''Initialise entity. *session* is an instance of :class:`ftrack_api.session.Session` that this entity instance is bound to. *data* is a mapping of key, value pairs to apply as initial attribute values.
class _BulkObject(with_metaclass(ABCMeta)): """ The abstract base class for all bulk objects that can be read and written in a file that conforms to the Bing Ad Bulk File Schema. For more information about the Bulk File Schema, see http://go.microsoft.com/fwlink/?LinkID=511639. """ @abstractmethod def read_from_row_values(self, row_values): """ Read object data from a single row. *Example:* * SingleLineBulkEntity: reads entity fields. * BulkError: reads error fields. * BulkEntityIdentifier: reads identifier fields (Id, status etc.). :param row_values: :type row_values: _RowValues """ raise NotImplementedError() @abstractmethod def write_to_row_values(self, row_values, exclude_readonly_data): """ Writes object data to a single row. *Example:* * SingleLineBulkEntity: writes entity fields. * BulkEntityIdentifier: writes identifier fields (Id, status etc.) :param row_values: :type row_values: _RowValues """ raise NotImplementedError() @abstractmethod def read_related_data_from_stream(self, stream_reader): """ Reads object data from consecutive rows. *Example:* * SingleLineBulkEntity: reads entity errors. * MultilineBulkEntity: reads child entities. :param stream_reader: :type stream_reader: _BulkStreamReader """ pass @abstractmethod def write_to_stream(self, row_writer, exclude_readonly_data): """ Writes object data to consecutive rows. *Example:* * SingleLineBulkEntity: writes entity. * MultilineBulkEntity: writes child entities. * BulkEntityIdentifier: writes identifier information (Id, status etc.) :param row_writer: :type row_writer: :class:`._BulkObjectWriter` """ raise NotImplementedError() @abstractproperty def can_enclose_in_multiline_entity(self): """ Returns true if the entity is part of multiline entity, false otherwise. *Example:* * BulkSiteLinkAdExtension: returns true * BulkCampaignTarget: returns true * BulkAdGroup: returns false * BulkKeyword: returns false :rtype: bool """ return False @abstractmethod def enclose_in_multiline_entity(self): """ Creates a multiline entity containing this entity *Example:* * BulkSiteLink: returns BulkSiteLinkAdExtension containing this BulkSiteLink * BulkCampaignAgeTargetBid: return BulkCampaignTarget containing this BulkCampaignAgeTargetBid :return: the wrapping multi-line entity :rtype: :class:`._MultiRecordBulkEntity` """ raise NotImplementedError() def convert_to_values(self, row_values, mappings): for mapping in mappings: try: mapping.convert_to_csv(self, row_values) except Exception as ex: raise self._create_entity_write_error(mapping, ex) def _create_entity_write_error(self, mapping, ex): entity_type = str(type(self)) if isinstance(mapping, _SimpleBulkMapping): message = "Couldn't write column {0} of {1} entity: {2}".format(mapping.header, entity_type, ex) else: message = "Couldn't write {0} entity: {1}".format(entity_type, ex) message += " See InnerException for error details." return EntityWriteException(message=message, inner_exception=ex)
class GaussianProcessInterface(with_metaclass(ABCMeta, GaussianProcessDataInterface)): r"""Interface for a GaussianProcess: mean, variance, gradients thereof, and data I/O. .. Note:: comments in this class are copied from GaussianProcess in gpp_math.hpp and duplicated in cpp_wrappers.gaussian_process and duplicated in :class:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess` and :class:`moe.optimal_learning.python.python_version.gaussian_process.GaussianProcess` Object that encapsulates Gaussian Process Priors (GPPs). A GPP is defined by a set of (sample point, function value, noise variance) triples along with a covariance function that relates the points. Each point has dimension dim. These are the training data; for example, each sample point might specify an experimental cohort and the corresponding function value is the objective measured for that experiment. There is one noise variance value per function value; this is the measurement error and is treated as N(0, noise_variance) Gaussian noise. GPPs estimate a real process \ms f(x) = GP(m(x), k(x,x'))\me (see file docs). This class deals with building an estimator to the actual process using measurements taken from the actual process--the (sample point, function val, noise) triple. Then predictions about unknown points can be made by sampling from the GPP--in particular, finding the (predicted) mean and variance. These functions (and their gradients) are provided in ComputeMeanOfPoints, ComputeVarianceOfPoints, etc. Further mathematical details are given in the implementation comments, but we are essentially computing: | ComputeMeanOfPoints : ``K(Xs, X) * [K(X,X) + \sigma_n^2 I]^{-1} * y = Ks^T * K^{-1} * y`` | ComputeVarianceOfPoints: ``K(Xs, Xs) - K(Xs,X) * [K(X,X) + \sigma_n^2 I]^{-1} * K(X,Xs) = Kss - Ks^T * K^{-1} * Ks`` This (estimated) mean and variance characterize the predicted distributions of the actual \ms m(x), k(x,x')\me functions that underly our GP. The "independent variables" for this object are ``points_to_sample``. These points are both the "p" and the "q" in q,p-EI; i.e., they are the parameters of both ongoing experiments and new predictions. Recall that in q,p-EI, the q points are called ``points_to_sample`` and the p points are called ``points_being_sampled.`` Here, we need to make predictions about both point sets with the GP, so we simply call the union of point sets ``points_to_sample.`` In GP computations, there is really no distinction between the "q" and "p" points from EI, ``points_to_sample`` and ``points_being_sampled``, respectively. However, in EI optimization, we only need gradients of GP quantities wrt ``points_to_sample``, so users should call members functions with ``num_derivatives = num_to_sample`` in that context. """ @staticmethod def _clamp_num_derivatives(num_points, num_derivatives): """Clamp num_derivatives so that the result is 0 <= result <= num_points; negative num_derivatives yields num_points. :param num_points: number of total points :type num_points: int > 0 :param num_derivatives: number of points to differentiate against :type num_derivatives: int """ if num_derivatives < 0 or num_derivatives > num_points: return num_points else: return num_derivatives @abstractproperty def dim(self): """Return the number of spatial dimensions.""" pass @abstractproperty def num_sampled(self): """Return the number of sampled points.""" pass @abstractmethod def compute_mean_of_points(self, points_to_sample): r"""Compute the mean of this GP at each of point of ``Xs`` (``points_to_sample``). ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.compute_mean_of_points`. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :return: mean: where mean[i] is the mean at points_to_sample[i] :rtype: array of float64 with shape (num_to_sample) """ pass @abstractmethod def compute_grad_mean_of_points(self, points_to_sample, num_derivatives): r"""Compute the gradient of the mean of this GP at each of point of ``Xs`` (``points_to_sample``) wrt ``Xs``. ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. Note that ``grad_mu`` is nominally sized: ``grad_mu[num_to_sample][num_to_sample][dim]``. This is the the d-th component of the derivative evaluated at the i-th input wrt the j-th input. However, for ``0 <= i,j < num_to_sample``, ``i != j``, ``grad_mu[j][i][d] = 0``. (See references or implementation for further details.) Thus, ``grad_mu`` is stored in a reduced form which only tracks the nonzero entries. .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.compute_grad_mean_of_points`. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped :type num_derivatives: int :return: grad_mu: gradient of the mean of the GP. ``grad_mu[i][d]`` is actually the gradient of ``\mu_i`` wrt ``x_{i,d}``, the d-th dim of the i-th entry of ``points_to_sample``. :rtype: array of float64 with shape (num_derivatives, dim) """ pass @abstractmethod def compute_variance_of_points(self, points_to_sample): r"""Compute the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``). ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. The variance matrix is symmetric although we currently return the full representation. .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.compute_variance_of_points`. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :return: var_star: variance matrix of this GP :rtype: array of float64 with shape (num_to_sample, num_to_sample) """ pass @abstractmethod def compute_cholesky_variance_of_points(self, points_to_sample): r"""Compute the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``). ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :return: cholesky factorization of the variance matrix of this GP, lower triangular :rtype: array of float64 with shape (num_to_sample, num_to_sample), lower triangle filled in """ pass @abstractmethod def compute_grad_variance_of_points(self, points_to_sample, num_derivatives): r"""Compute the gradient of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``. ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. This function is similar to compute_grad_cholesky_variance_of_points() (below), except this does not include gradient terms from the cholesky factorization. Description will not be duplicated here. .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.compute_grad_variance_of_points`. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :param num_derivatives: return derivatives wrt points_to_sample[0:num_derivatives]; large or negative values are clamped :type num_derivatives: int :return: grad_var: gradient of the variance matrix of this GP :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim) """ pass @abstractmethod def compute_grad_cholesky_variance_of_points(self, points_to_sample, num_derivatives): r"""Compute the gradient of the cholesky factorization of the variance (matrix) of this GP at each point of ``Xs`` (``points_to_sample``) wrt ``Xs``. ``points_to_sample`` may not contain duplicate points. Violating this results in singular covariance matrices. This function accounts for the effect on the gradient resulting from cholesky-factoring the variance matrix. See Smith 1995 for algorithm details. Note that ``grad_chol`` is nominally sized: ``grad_chol[num_to_sample][num_to_sample][num_to_sample][dim]``. Let this be indexed ``grad_chol[k][j][i][d]``, which is read the derivative of ``var[j][i]`` with respect to ``x_{k,d}`` (x = ``points_to_sample``) .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.compute_grad_cholesky_variance_of_points`. :param points_to_sample: num_to_sample points (in dim dimensions) being sampled from the GP :type points_to_sample: array of float64 with shape (num_to_sample, dim) :param var_of_grad: index of ``points_to_sample`` to be differentiated against :type var_of_grad: integer in {0, .. ``num_to_sample``-1} :return: grad_chol: gradient of the cholesky factorization of the variance matrix of this GP. ``grad_chol[k][j][i][d]`` is actually the gradients of ``var_{j,i}`` with respect to ``x_{k,d}``, the d-th dimension of the k-th entry of ``points_to_sample`` :rtype: array of float64 with shape (num_derivatives, num_to_sample, num_to_sample, dim) """ pass @abstractmethod def add_sampled_points(self, sampled_points): r"""Add a sampled points (point, value, noise) to the GP's prior data. Also forces recomputation of all derived quantities for GP to remain consistent. :param sampled_points: SamplePoint objects to load into the GP (containing point, function value, and noise variance) :type sampled_points: single :class:`moe.optimal_learning.python.SamplePoint` or list of SamplePoint objects """ pass @abstractmethod def sample_point_from_gp(self, point_to_sample, noise_variance=0.0): r"""Sample a function value from a Gaussian Process prior, provided a point at which to sample. Uses the formula ``function_value = gpp_mean + sqrt(gpp_variance) * w1 + sqrt(noise_variance) * w2``, where ``w1, w2`` are draws from N(0,1). Implementers are responsible for providing a N(0,1) source. .. NOTE:: Set noise_variance to 0 if you want "accurate" draws from the GP. BUT if the drawn (point, value) pair is meant to be added back into the GP (e.g., for testing), then this point MUST be drawn with noise_variance equal to the noise associated with "point" as a member of "points_sampled" .. Note:: Comments are copied from GaussianProcess in gpp_math.hpp and duplicated in :meth:`moe.optimal_learning.python.cpp_wrappers.gaussian_process.GaussianProcess.sample_point_from_gp`. :param point_to_sample: point (in dim dimensions) at which to sample from this GP :type points_to_sample: array of float64 with shape (dim) :param noise_variance: amount of noise to associate with the sample :type noise_variance: float64 >= 0.0 :return: sample_value: function value drawn from this GP :rtype: float64 """ pass
class ModelMixin(with_metaclass(ModelMixinBase, object)): """
class CmdSet(with_metaclass(_CmdSetMeta, object)): """ This class describes a unique cmdset that understands priorities. CmdSets can be merged and made to perform various set operations on each other. CmdSets have priorities that affect which of their ingoing commands gets used. In the examples, cmdset A always have higher priority than cmdset B. key - the name of the cmdset. This can be used on its own for game operations mergetype (partly from Set theory): Union - The two command sets are merged so that as many commands as possible of each cmdset ends up in the merged cmdset. Same-name commands are merged by priority. This is the most common default. Ex: A1,A3 + B1,B2,B4,B5 = A1,B2,A3,B4,B5 Intersect - Only commands found in *both* cmdsets (i.e. which have same names) end up in the merged cmdset, with the higher-priority cmdset replacing the lower one. Ex: A1,A3 + B1,B2,B4,B5 = A1 Replace - The commands of this cmdset completely replaces the lower-priority cmdset's commands, regardless of if same-name commands exist. Ex: A1,A3 + B1,B2,B4,B5 = A1,A3 Remove - This removes the relevant commands from the lower-priority cmdset completely. They are not replaced with anything, so this in effects uses the high-priority cmdset as a filter to affect the low-priority cmdset. Ex: A1,A3 + B1,B2,B4,B5 = B2,B4,B5 Note: Commands longer than 2 characters and starting with double underscrores, like '__noinput_command' are considered 'system commands' and are excempt from all merge operations - they are ALWAYS included across mergers and only affected if same-named system commands replace them. priority- All cmdsets are always merged in pairs of two so that the higher set's mergetype is applied to the lower-priority cmdset. Default commands have priority 0, high-priority ones like Exits and Channels have 10 and 9. Priorities can be negative as well to give default commands preference. duplicates - determines what happens when two sets of equal priority merge. Default has the first of them in the merger (i.e. A above) automatically taking precedence. But if allow_duplicates is true, the result will be a merger with more than one of each name match. This will usually lead to the player receiving a multiple-match error higher up the road, but can be good for things like cmdsets on non-player objects in a room, to allow the system to warn that more than one 'ball' in the room has the same 'kick' command defined on it, so it may offer a chance to select which ball to kick ... Allowing duplicates only makes sense for Union and Intersect, the setting is ignored for the other mergetypes. key_mergetype (dict) - allows the cmdset to define a unique mergetype for particular cmdsets. Format is {CmdSetkeystring:mergetype}. Priorities still apply. Example: {'Myevilcmdset','Replace'} which would make sure for this set to always use 'Replace' on Myevilcmdset no matter what overall mergetype this set has. no_objs - don't include any commands from nearby objects when searching for suitable commands no_exits - ignore the names of exits when matching against commands no_channels - ignore the name of channels when matching against commands (WARNING- this is dangerous since the player can then not even ask staff for help if something goes wrong) """ key = "Unnamed CmdSet" mergetype = "Union" priority = 0 # These flags, if set to None, will allow "pass-through" of lower-prio settings # of True/False. If set to True/False, will override lower-prio settings. no_exits = None no_objs = None no_channels = None # same as above, but if left at None in the final merged set, the # cmdhandler will auto-assume True for Objects and stay False for all # other entities. duplicates = None permanent = False key_mergetypes = {} errmessage = "" # pre-store properties to duplicate straight off to_duplicate = ("key", "cmdsetobj", "no_exits", "no_objs", "no_channels", "permanent", "mergetype", "priority", "duplicates", "errmessage") def __init__(self, cmdsetobj=None, key=None): """ Creates a new CmdSet instance. Args: cmdsetobj (Session, Player, Object, optional): This is the database object to which this particular instance of cmdset is related. It is often a character but may also be a regular object, Player or Session. key (str, optional): The idenfier for this cmdset. This helps if wanting to selectively remov cmdsets. """ if key: self.key = key self.commands = [] self.system_commands = [] self.actual_mergetype = self.mergetype self.cmdsetobj = cmdsetobj # this is set only on merged sets, in cmdhandler.py, in order to # track, list and debug mergers correctly. self.merged_from = [] # initialize system self.at_cmdset_creation() self._contains_cache = WeakKeyDictionary() #{} # Priority-sensitive merge operations for cmdsets def _union(self, cmdset_a, cmdset_b): """ Merge two sets using union merger Args: cmdset_a (Cmdset): Cmdset given higher priority in the case of a tie. cmdset_b (Cmdset): Cmdset given lower priority in the case of a tie. Returns: cmdset_c (Cmdset): The result of A U B operation. Notes: Union, C = A U B, means that C gets all elements from both A and B. """ cmdset_c = cmdset_a._duplicate() # we make copies, not refs by use of [:] cmdset_c.commands = cmdset_a.commands[:] if cmdset_a.duplicates and cmdset_a.priority == cmdset_b.priority: cmdset_c.commands.extend(cmdset_b.commands) else: cmdset_c.commands.extend( [cmd for cmd in cmdset_b if not cmd in cmdset_a]) return cmdset_c def _intersect(self, cmdset_a, cmdset_b): """ Merge two sets using intersection merger Args: cmdset_a (Cmdset): Cmdset given higher priority in the case of a tie. cmdset_b (Cmdset): Cmdset given lower priority in the case of a tie. Returns: cmdset_c (Cmdset): The result of A (intersect) B operation. Notes: Intersection, C = A (intersect) B, means that C only gets the parts of A and B that are the same (that is, the commands of each set having the same name. Only the one of these having the higher prio ends up in C). """ cmdset_c = cmdset_a._duplicate() if cmdset_a.duplicates and cmdset_a.priority == cmdset_b.priority: for cmd in [cmd for cmd in cmdset_a if cmd in cmdset_b]: cmdset_c.add(cmd) cmdset_c.add(cmdset_b.get(cmd)) else: cmdset_c.commands = [cmd for cmd in cmdset_a if cmd in cmdset_b] return cmdset_c def _replace(self, cmdset_a, cmdset_b): """ Replace the contents of one set with another Args: cmdset_a (Cmdset): Cmdset replacing cmdset_b (Cmdset): Cmdset to replace Returns: cmdset_c (Cmdset): This is indentical to cmdset_a. Notes: C = A, where B is ignored. """ cmdset_c = cmdset_a._duplicate() cmdset_c.commands = cmdset_a.commands[:] return cmdset_c def _remove(self, cmdset_a, cmdset_b): """ Filter a set by another. Args: cmdset_a (Cmdset): Cmdset acting as a removal filter. cmdset_b (Cmdset): Cmdset to filter Returns: cmdset_c (Cmdset): B, with all matching commands from A removed. Notes: C = B - A, where A is used to remove the commands of B. """ cmdset_c = cmdset_a._duplicate() cmdset_c.commands = [cmd for cmd in cmdset_b if not cmd in cmdset_a] return cmdset_c def _instantiate(self, cmd): """ checks so that object is an instantiated command and not, say a cmdclass. If it is, instantiate it. Other types, like strings, are passed through. Args: cmd (any): Entity to analyze. Returns: result (any): An instantiated Command or the input unmodified. """ try: return cmd() except TypeError: return cmd def _duplicate(self): """ Returns a new cmdset with the same settings as this one (no actual commands are copied over) Returns: cmdset (Cmdset): A copy of the current cmdset. """ cmdset = CmdSet() for key, val in ((key, getattr(self, key)) for key in self.to_duplicate): if val != getattr(cmdset, key): # only copy if different from default; avoid turning # class-vars into instance vars setattr(cmdset, key, val) cmdset.key_mergetypes = self.key_mergetypes.copy() return cmdset def __str__(self): """ Show all commands in cmdset when printing it. Returns: commands (str): Representation of commands in Cmdset. """ return ", ".join( [str(cmd) for cmd in sorted(self.commands, key=lambda o: o.key)]) def __iter__(self): """ Allows for things like 'for cmd in cmdset': Returns: iterable (iter): Commands in Cmdset. """ return iter(self.commands) def __contains__(self, othercmd): """ Returns True if this cmdset contains the given command (as defined by command name and aliases). This allows for things like 'if cmd in cmdset' """ ret = self._contains_cache.get(othercmd) if ret is None: ret = othercmd in self.commands self._contains_cache[othercmd] = ret return ret def __add__(self, cmdset_a): """ Merge this cmdset (B) with another cmdset (A) using the + operator, C = B + A Here, we (by convention) say that 'A is merged onto B to form C'. The actual merge operation used in the 'addition' depends on which priorities A and B have. The one of the two with the highest priority will apply and give its properties to C. In the case of a tie, A takes priority and replaces the same-named commands in B unless A has the 'duplicate' variable set (which means both sets' commands are kept). """ # It's okay to merge with None if not cmdset_a: return self sys_commands_a = cmdset_a.get_system_cmds() sys_commands_b = self.get_system_cmds() if self.priority <= cmdset_a.priority: # A higher or equal priority to B # preserve system __commands sys_commands = sys_commands_a + [ cmd for cmd in sys_commands_b if cmd not in sys_commands_a ] mergetype = cmdset_a.key_mergetypes.get(self.key, cmdset_a.mergetype) if mergetype == "Intersect": cmdset_c = self._intersect(cmdset_a, self) elif mergetype == "Replace": cmdset_c = self._replace(cmdset_a, self) elif mergetype == "Remove": cmdset_c = self._remove(cmdset_a, self) else: # Union cmdset_c = self._union(cmdset_a, self) # pass through options whenever they are set, unless the merging or higher-prio # set changes the setting (i.e. has a non-None value). We don't pass through # the duplicates setting; that is per-merge cmdset_c.no_channels = self.no_channels if cmdset_a.no_channels is None else cmdset_a.no_channels cmdset_c.no_exits = self.no_exits if cmdset_a.no_exits is None else cmdset_a.no_exits cmdset_c.no_objs = self.no_objs if cmdset_a.no_objs is None else cmdset_a.no_objs else: # B higher priority than A # preserver system __commands sys_commands = sys_commands_b + [ cmd for cmd in sys_commands_a if cmd not in sys_commands_b ] mergetype = self.key_mergetypes.get(cmdset_a.key, self.mergetype) if mergetype == "Intersect": cmdset_c = self._intersect(self, cmdset_a) elif mergetype == "Replace": cmdset_c = self._replace(self, cmdset_a) elif mergetype == "Remove": cmdset_c = self._remove(self, cmdset_a) else: # Union cmdset_c = self._union(self, cmdset_a) # pass through options whenever they are set, unless the higher-prio # set changes the setting (i.e. has a non-None value). We don't pass through # the duplicates setting; that is per-merge cmdset_c.no_channels = cmdset_a.no_channels if self.no_channels is None else self.no_channels cmdset_c.no_exits = cmdset_a.no_exits if self.no_exits is None else self.no_exits cmdset_c.no_objs = cmdset_a.no_objs if self.no_objs is None else self.no_objs # we store actual_mergetype since key_mergetypes # might be different from the main mergetype. # This is used for diagnosis. cmdset_c.actual_mergetype = mergetype #print "__add__ for %s (prio %i) called with %s (prio %i)." % (self.key, self.priority, cmdset_a.key, cmdset_a.priority) # return the system commands to the cmdset cmdset_c.add(sys_commands) return cmdset_c def add(self, cmd): """ Add a new command or commands to this CmdSetcommand, a list of commands or a cmdset to this cmdset. Note that this is *not* a merge operation (that is handled by the + operator). Args: cmd (Command, list, Cmdset): This allows for adding one or more commands to this Cmdset in one go. If another Cmdset is given, all its commands will be added. Notes: If cmd already exists in set, it will replace the old one (no priority checking etc happens here). This is very useful when overloading default commands). If cmd is another cmdset class or -instance, the commands of that command set is added to this one, as if they were part of the original cmdset definition. No merging or priority checks are made, rather later added commands will simply replace existing ones to make a unique set. """ if inherits_from(cmd, "evennia.commands.cmdset.CmdSet"): # cmd is a command set so merge all commands in that set # to this one. We raise a visible error if we created # an infinite loop (adding cmdset to itself somehow) try: cmd = self._instantiate(cmd) except RuntimeError: string = "Adding cmdset %(cmd)s to %(class)s lead to an " string += "infinite loop. When adding a cmdset to another, " string += "make sure they are not themself cyclically added to " string += "the new cmdset somewhere in the chain." raise RuntimeError( _(string) % { "cmd": cmd, "class": self.__class__ }) cmds = cmd.commands elif is_iter(cmd): cmds = [self._instantiate(c) for c in cmd] else: cmds = [self._instantiate(cmd)] commands = self.commands system_commands = self.system_commands for cmd in cmds: # add all commands if not hasattr(cmd, 'obj'): cmd.obj = self.cmdsetobj try: ic = commands.index(cmd) commands[ic] = cmd # replace except ValueError: commands.append(cmd) # extra run to make sure to avoid doublets self.commands = list(set(commands)) # add system_command to separate list as well, # for quick look-up if cmd.key.startswith("__"): try: ic = system_commands.index(cmd) system_commands[ic] = cmd # replace except ValueError: system_commands.append(cmd) def remove(self, cmd): """ Remove a command instance from the cmdset. Args: cmd (Command or str): Either the Command object to remove or the key of such a command. """ cmd = self._instantiate(cmd) if cmd.key.startswith("__"): try: ic = self.system_commands.index(cmd) del self.system_commands[ic] except ValueError: # ignore error pass else: self.commands = [ oldcmd for oldcmd in self.commands if oldcmd != cmd ] def get(self, cmd): """ Get a command from the cmdset. This is mostly useful to check if the command is part of this cmdset or not. Args: cmd (Command or str): Either the Command object or its key. Returns: cmd (Command): The first matching Command in the set. """ cmd = self._instantiate(cmd) for thiscmd in self.commands: if thiscmd == cmd: return thiscmd return None def count(self): """ Number of commands in set. Returns: N (int): Number of commands in this Cmdset. """ return len(self.commands) def get_system_cmds(self): """ Get system commands in cmdset Returns: sys_cmds (list): The system commands in the set. Notes: As far as the Cmdset is concerned, system commands are any commands with a key starting with double underscore __. These are excempt from merge operations. """ return self.system_commands def make_unique(self, caller): """ Remove duplicate command-keys (unsafe) Args: caller (object): Commands on this object will get preference in the duplicate removal. Notes: This is an unsafe command meant to clean out a cmdset of doublet commands after it has been created. It is useful for commands inheriting cmdsets from the cmdhandler where obj-based cmdsets always are added double. Doublets will be weeded out with preference to commands defined on caller, otherwise just by first-come-first-served. """ unique = {} for cmd in self.commands: if cmd.key in unique: ocmd = unique[cmd.key] if (hasattr(cmd, 'obj') and cmd.obj == caller) and not \ (hasattr(ocmd, 'obj') and ocmd.obj == caller): unique[cmd.key] = cmd else: unique[cmd.key] = cmd self.commands = listvalues(unique) def get_all_cmd_keys_and_aliases(self, caller=None): """ Collects keys/aliases from commands Args: caller (Object, optional): If set, this is used to check access permissions on each command. Only commands that pass are returned. Returns: names (list): A list of all command keys and aliases in this cmdset. If `caller` was given, this list will only contain commands to which `caller` passed the `call` locktype check. """ names = [] if caller: [ names.extend(cmd._keyaliases) for cmd in self.commands if cmd.access(caller) ] else: [names.extend(cmd._keyaliases) for cmd in self.commands] return names def at_cmdset_creation(self): """ Hook method - this should be overloaded in the inheriting class, and should take care of populating the cmdset by use of self.add(). """ pass
class DBAPITestCase(with_metaclass(abc.ABCMeta, object)): @abc.abstractmethod def connect(self): raise NotImplementedError # pragma: no cover @with_cursor def test_fetchone(self, cursor): cursor.execute('SELECT * FROM one_row') self.assertEqual(cursor.rownumber, 0) self.assertEqual(cursor.fetchone(), (1, )) self.assertEqual(cursor.rownumber, 1) self.assertIsNone(cursor.fetchone()) @with_cursor def test_fetchall(self, cursor): cursor.execute('SELECT * FROM one_row') self.assertEqual(cursor.fetchall(), [(1, )]) cursor.execute('SELECT a FROM many_rows ORDER BY a') self.assertEqual(cursor.fetchall(), [(i, ) for i in range(10000)]) @with_cursor def test_null_param(self, cursor): cursor.execute('SELECT %s FROM one_row', (None, )) self.assertEqual(cursor.fetchall(), [(None, )]) @with_cursor def test_iterator(self, cursor): cursor.execute('SELECT * FROM one_row') self.assertEqual(list(cursor), [(1, )]) self.assertRaises(StopIteration, cursor.__next__) @with_cursor def test_description_initial(self, cursor): self.assertIsNone(cursor.description) @with_cursor def test_description_failed(self, cursor): try: cursor.execute('blah_blah') except exc.DatabaseError: pass self.assertIsNone(cursor.description) @with_cursor def test_bad_query(self, cursor): def run(): cursor.execute( 'SELECT does_not_exist FROM this_really_does_not_exist') cursor.fetchone() self.assertRaises(exc.DatabaseError, run) @with_cursor def test_concurrent_execution(self, cursor): cursor.execute('SELECT * FROM one_row') cursor.execute('SELECT * FROM one_row') self.assertEqual(cursor.fetchall(), [(1, )]) @with_cursor def test_executemany(self, cursor): for length in 1, 2: cursor.executemany('SELECT %(x)d FROM one_row', [{ 'x': i } for i in range(1, length + 1)]) self.assertEqual(cursor.fetchall(), [(length, )]) @with_cursor def test_executemany_none(self, cursor): cursor.executemany('should_never_get_used', []) self.assertIsNone(cursor.description) self.assertRaises(exc.ProgrammingError, cursor.fetchone) @with_cursor def test_fetchone_no_data(self, cursor): self.assertRaises(exc.ProgrammingError, cursor.fetchone) @with_cursor def test_fetchmany(self, cursor): cursor.execute('SELECT * FROM many_rows LIMIT 15') self.assertEqual(cursor.fetchmany(0), []) self.assertEqual(len(cursor.fetchmany(10)), 10) self.assertEqual(len(cursor.fetchmany(10)), 5) @with_cursor def test_arraysize(self, cursor): cursor.arraysize = 5 cursor.execute('SELECT * FROM many_rows LIMIT 20') self.assertEqual(len(cursor.fetchmany()), 5) @with_cursor def test_polling_loop(self, cursor): """Try to trigger the polling logic in fetchone()""" cursor._poll_interval = 0 cursor.execute('SELECT COUNT(*) FROM many_rows') self.assertEqual(cursor.fetchone(), (10000, )) @with_cursor def test_no_params(self, cursor): cursor.execute("SELECT '%(x)s' FROM one_row") self.assertEqual(cursor.fetchall(), [('%(x)s', )]) def test_escape(self): """Verify that funny characters can be escaped as strings and SELECTed back""" bad_str = '''`~!@#$%^&*()_+-={}[]|\\;:'",./<>?\n\r\t ''' self.run_escape_case(bad_str) @with_cursor def run_escape_case(self, cursor, bad_str): cursor.execute('SELECT %d, %s FROM one_row', (1, bad_str)) self.assertEqual(cursor.fetchall(), [( 1, bad_str, )]) cursor.execute('SELECT %(a)d, %(b)s FROM one_row', { 'a': 1, 'b': bad_str }) self.assertEqual(cursor.fetchall(), [(1, bad_str)]) @with_cursor def test_invalid_params(self, cursor): self.assertRaises(exc.ProgrammingError, lambda: cursor.execute('', 'hi')) self.assertRaises(exc.ProgrammingError, lambda: cursor.execute('', [object])) def test_open_close(self): with contextlib.closing(self.connect()): pass with contextlib.closing(self.connect()) as connection: with contextlib.closing(connection.cursor()): pass @with_cursor def test_unicode(self, cursor): unicode_str = "王兢" cursor.execute('SELECT %s FROM one_row', (unicode_str, )) self.assertEqual(cursor.fetchall(), [(unicode_str, )]) @with_cursor def test_null(self, cursor): cursor.execute('SELECT null FROM many_rows') self.assertEqual(cursor.fetchall(), [(None, )] * 10000) cursor.execute('SELECT IF(a % 11 = 0, null, a) FROM many_rows') self.assertEqual(cursor.fetchall(), [(None if a % 11 == 0 else a, ) for a in range(10000)]) @with_cursor def test_sql_where_in(self, cursor): cursor.execute('SELECT * FROM many_rows where a in %s', ([1, 2, 3], )) self.assertEqual(len(cursor.fetchall()), 3) cursor.execute('SELECT * FROM many_rows where b in %s limit 10', (['blah'], )) self.assertEqual(len(cursor.fetchall()), 10)
class KnowledgeRepository(with_metaclass(SubclassRegisteringABCMeta, object)): _registry_keys = None class PostStatus(Enum): ''' Do not store these values in a datastore, as they may change from release to release. These keys should only be used to compare with the output of a KnowledgeRepository. ''' DRAFT = 0 # Post is still being written and not yet submitted SUBMITTED = 1 # Post is submitted and waiting for review UNPUBLISHED = 2 # Post is approved to publish, but not published PUBLISHED = 3 # Post is published and visible on /feed @classmethod def for_uri(cls, uri, *args, **kwargs): if isinstance(uri, dict): return cls.for_uris(uri) scheme = urlparse(uri).scheme return cls._get_subclass_for(scheme)(uri, *args, **kwargs) @classmethod def for_uris(cls, uri): # Import this within this method so as not to cause import resolution problems from .repositories.meta import MetaKnowledgeRepository if isinstance(uri, str): uris = {'': uri} else: uris = uri krs = {name: cls.for_uri(uri) for name, uri in list(uris.items())} return MetaKnowledgeRepository(krs) @classmethod def create_for_uri(cls, uri, **kwargs): if isinstance(uri, dict): return cls.for_uris(uri) scheme = urlparse(uri).scheme return cls._get_subclass_for(scheme).create(uri, **kwargs) @classmethod def create(cls, uri, **kwargs): raise NotImplementedError def __init__(self, uri, debug=False, **kwargs): self.uri = uri self.config = KnowledgeRepositoryConfig() self.config.debug = debug self.config.update_defaults(config_defaults) self.init(**kwargs) def init(self): pass @property def config(self): return self._config @config.setter def config(self, config): assert isinstance( config, KnowledgeRepositoryConfig ), "`config` should be a `KnowledgeRepositoryConfig` instance." self._config = config @property def uris(self): # This translates KnowledgeRepository.uri to a consistent format # across all KnowledgeRepository instances: a dictionary of form # {<mountpoint>: <uri>} # It assumes that self.uri is either a string or a dictionary mapping # of form: # {<mountpoint>: <KnowledgeRepositoryInstance>} if isinstance(self.uri, str): return {'': self.uri} elif isinstance(self.uri, dict): uri_dict = {} def add_uris(uri_dict, uris, parent=''): assert isinstance(uris, dict) for mountpoint, uri in uris.items(): if isinstance(uri, (str, KnowledgeRepository)): uri_dict[posixpath.join( parent, mountpoint)] = uri if isinstance( uri, str) else uri.uri elif isinstance(uri, dict): add_uris(uri_dict, uri, parent=posixpath.join(parent, mountpoint)) else: raise ValueError("Unrecognised uri: {}".format(uri)) add_uris(uri_dict, self.uri) return uri_dict raise ValueError("Unrecognised KnowledgeRepository.uri: {}".format( self.uri)) @property def revisions(self): # This method provides a mapping from uri to revision for this repository # and/or any nested repositories. This is most useful when checking if an # update is required server side. if isinstance(self.uri, str): return {self.uri: self.revision} elif isinstance(self.uri, dict): revision_dict = {} def add_revisions(revision_dict, uris): assert isinstance(uris, dict) for mountpoint, uri in uris.items(): if isinstance(uri, str): revision_dict[uri] = KnowledgeRepository.for_uri( uri).revision elif isinstance(uri, KnowledgeRepository): revision_dict[uri] = uri.revision elif isinstance(uri, dict): add_revisions(revision_dict, uri) else: raise ValueError("Unrecognised uri: {}".format(uri)) add_revisions(revision_dict, self.uri) return revision_dict raise ValueError("Unrecognised KnowledgeRepository.uri: {}".format( self.uri)) # ------------- Repository actions / state ------------------------------------ def session_begin(self): pass def session_end(self): pass @abstractproperty def revision(self): raise NotImplementedError def update(self): pass @abstractproperty def status(self): raise NotImplementedError @abstractproperty def status_message(self): raise NotImplementedError def set_active_draft(self, path): pass # -------------- Post retrieval methods -------------------------------------- def post(self, path, revision=None): if path is None: raise ValueError("path is None") path = self._kp_path(path) if not self.has_post( path, revision=revision) and path in self.config.aliases: path = self.config.aliases[path] if path in self.config.alias: raise ValueError("Alias cycle detected.") assert self.has_post( path, revision=revision ), "{} does not have a post for path '{}'.".format( self.__class__.__name__, path) return KnowledgePost(path=path, repository=self, revision=revision or self._kp_get_revision(path)) def dir(self, prefix=None, status=None): if prefix is None or isinstance(prefix, str): prefixes = [prefix] else: prefixes = prefix assert all([ prefix is None or isinstance(prefix, str) for prefix in prefixes ]), "All path prefixes must be strings." prefixes = [ prefix if prefix is None else posixpath.relpath(prefix) for prefix in prefixes ] if isinstance(status, str): if status == 'all': status = [ self.PostStatus.DRAFT, self.PostStatus.SUBMITTED, self.PostStatus.PUBLISHED, self.PostStatus.UNPUBLISHED ] else: raise ValueError( 'Status alias `{}` not recognised.'.format(status)) if status is not None and not isinstance(status, list): status = [status] elif status is None: status = [self.PostStatus.PUBLISHED] # Use old syntax for "yielding from" to maintain support for python 2 for prefix in prefixes: for path in self._dir(prefix=prefix, statuses=status): yield path @abstractmethod def _dir(self, prefix, statuses): raise NotImplementedError def has_post(self, path, revision=None): return self._kp_exists(self._kp_path(path), revision=revision) def post_status(self, path, revision=None, detailed=False): return self._kp_status(self._kp_path(path), revision=revision, detailed=detailed) def post_statuses(self, paths, detailed=False): return OrderedDict([(path, self.post_status(path, detailed=detailed)) for path in paths]) def posts(self, status=None, only_valid=False): for path in self.dir(status=status): post = self.post(path) if only_valid and not post.is_valid(): continue yield post def __getitem__(self, path): return self.post(path) def __len__(self): return len(self.dir()) def __iter__(self): return self.posts() def __contains__(self, path): return self.has_post(path) # -------------- Post submission / addition user flow -------------------- def add(self, kp, path=None, update=False, **kwargs): # Create a new knowledge post draft assert isinstance( kp, KnowledgePost ), "One can only add KnowledgePost objects to a KnowledgeRepository." path = path or kp.path if not path: raise ValueError( "Post path not provided for Knowledge Post, and one is not specified within the knowledge post. Either add the path to post headers using `path: <path>` or specify the project path on the command line adding `-p <path>` to the current command." ) path = self._kp_path(path) path = self.config.path_parse(path) current_datetime = datetime.datetime.now() authors = kp.headers['authors'] new_authors = [ self.config.username_parse(author) for author in authors ] if new_authors != authors or kp.headers[ 'updated_at'] < current_datetime: kp.update_headers(authors=new_authors, updated_at=current_datetime) for postprocessor in self.config.postprocessors: KnowledgePostProcessor._get_subclass_for(postprocessor).process(kp) cleanup_kwargs = self._add_prepare(kp, path, update, **kwargs) self._kp_save(kp, path, update=update) if cleanup_kwargs: kwargs.update(cleanup_kwargs) self._add_cleanup(kp, path, update, **kwargs) return kp @abstractmethod def _add_prepare(self, kp, path, update=False): raise NotImplementedError @abstractmethod def _add_cleanup(self, kp, path, update=False): raise NotImplementedError def revise(self, kp, path, **kwargs): return self.add(kp, path, update=True, **kwargs) def submit(self, path): # Submit a post for review return self._submit(self._kp_path(path)) @abstractmethod def _submit(self, path): # Submit a post for review raise NotImplementedError def accept(self, path): # Submit a post for review return self._accept(self._kp_path(path)) @abstractmethod def _accept(self, path): # Submit a post for review raise NotImplementedError def publish(self, path): # Publish a post for general perusal return self._publish(self._kp_path(path)) @abstractmethod def _publish(self, path): # Publish a post for general perusal raise NotImplementedError def unpublish(self, path): # Unpublish a post for general perusal return self._unpublish(self._kp_path(path)) @abstractmethod def _unpublish(self, path): # Unpublish a post for general perusal raise NotImplementedError def remove(self, path, all=False): return self._remove(self._kp_path(path), all=all) @abstractmethod def _remove(self, path, all=False): raise NotImplementedError # ----------- Knowledge Post Data Retrieval/Pushing Methods -------------------- def _kp_repository_uri(self, path): return self.uri @abstractmethod def _kp_uuid(self, path): raise NotImplementedError def _kp_path(self, path, rel='/'): if path is None: return None path = os.path.relpath(os.path.abspath(os.path.join(rel, path)), rel) if os.name == 'nt': path = path.replace(os.path.sep, os.path.altsep) assert all([ not segment.endswith('.kp') for segment in path.split('/')[:-1] ]), "The post path may not contain a directory named '*.kp'." if path == '.' or path.startswith('..'): raise ValueError( "Provided path '{}' is outside of the knowledge repository.". format(path)) if not path.endswith('.kp'): path += '.kp' return path @abstractmethod def _kp_exists(self, path, revision=None): raise NotImplementedError @abstractmethod def _kp_status(self, path, revision=None, detailed=False): raise NotImplementedError @abstractmethod def _kp_get_revision(self, path, status=None): raise NotImplementedError @abstractmethod def _kp_get_revisions(self, path): raise NotImplementedError @abstractmethod def _kp_read_ref(self, path, reference, revision=None): raise NotImplementedError @abstractmethod def _kp_dir(self, path, parent=None, revision=None): raise NotImplementedError @abstractmethod def _kp_has_ref(self, path, reference, revision=None): raise NotImplementedError @abstractmethod def _kp_diff(self, path, head, base): raise NotImplementedError @abstractmethod def _kp_write_ref(self, path, reference, data, uuid=None, revision=None): raise NotImplementedError @abstractmethod def _kp_new_revision(self, path, uuid=None): raise NotImplementedError def _kp_web_uri(self, path): return self.config.web_uri(path) def _kp_save(self, kp, path, update=False): if not update and self.has_post(path): raise ValueError( "A knowledge post with the same path already exists. To update it, set the update flag." ) kp.uuid = self._kp_uuid(path) or kp.uuid kp.path = path kp.revision = self._kp_new_revision(path, uuid=kp.uuid) kp.repository = self for ref in kp._dir(): self._kp_write_ref(path, ref, kp._read_ref(ref), uuid=kp.uuid, revision=kp.revision) @property def web_uri(self): return self.config.web_uri() # ----------- Interface with web app ---------------------------------- def get_app(self, *args, **kwargs): from . import app return self.config.prepare_app( app.KnowledgeFlask(self, *args, **kwargs))
class Annotations(with_metaclass(Singleton, object)): ax_ea_docker_enable = { "$schema": "http://json-schema.org/schema#", "title": "Validation schema for enabling docker", "type": "object", "properties": { "graph-storage-size": { "type": "string", "pattern": "^[0-9]+Gi$" }, "cpu_cores": { "type": "number", "minimum": 0, "exclusiveMinimum": True }, "mem_mib": { "type": "integer", "minimum": 32 } }, "required": ["graph-storage-size", "mem_mib", "cpu_cores"] } ax_ea_executor = { "$schema": "http://json-schema.org/schema#", "title": "Validation schema for enabling docker", "type": "object", "properties": { "disable": { "type": "boolean" } }, "required": ["disable"] } ax_ea_privileged = { "$schema": "http://json-schema.org/schema#", "title": "Validation schema for enabling docker", "type": "boolean" } ax_ea_graph_storage_volume = { "$schema": "http://json-schema.org/schema#", "title": "Validation schema for enabling docker", "type": "object", "properties": { "graph-storage-size": { "type": "string", "pattern": "^[0-9]+Gi$" } }, "required": ["graph-storage-size"] } ax_ea_hostname = { "$schema": "http://json-schema.org/schema#", "title": "Validation schema for enabling docker", "type": "string" } def __init__(self): self._annotations = {} for ax_ea_annotation in self.__class__.__dict__: if ax_ea_annotation.startswith("ax_ea_"): self.register(ax_ea_annotation, self.__class__.__dict__[ax_ea_annotation]) def register(self, annotation, schema): self._annotations[annotation] = schema def parse(self, annotation, data): try: schema = self._annotations.get(annotation, None) if not schema: raise AXIllegalArgumentException( "annotation {} is not supported".format(annotation)) validate(data, schema) except ValidationError as e: raise AXIllegalArgumentException(e.message, detail=e)
class Appliance(with_metaclass(ABCMeta, ExceptionalThread)): @abstractmethod def _getRole(self): return 'leader' @abstractmethod def _containerCommand(self): raise NotImplementedError() @abstractmethod def _entryPoint(self): raise NotImplementedError() # Lock is used because subprocess is NOT thread safe: http://tinyurl.com/pkp5pgq lock = threading.Lock() def __init__(self, outer, mounts, cleanMounts=False): """ :param ApplianceTestSupport outer: """ assert all( ' ' not in v for v in itervalues(mounts)), 'No spaces allowed in mounts' super(ApplianceTestSupport.Appliance, self).__init__() self.outer = outer self.mounts = mounts self.cleanMounts = cleanMounts self.containerName = str(uuid.uuid4()) self.popen = None def __enter__(self): with self.lock: image = applianceSelf() # Omitting --rm, it's unreliable, see https://github.com/docker/docker/issues/16575 args = list( concat('docker', 'run', '--entrypoint=' + self._entryPoint(), '--net=host', '-i', '--name=' + self.containerName, [ '--volume=%s:%s' % mount for mount in iteritems(self.mounts) ], image, self._containerCommand())) log.info('Running %r', args) self.popen = subprocess.Popen(args) self.start() self.__wait_running() return self # noinspection PyUnusedLocal def __exit__(self, exc_type, exc_val, exc_tb): try: try: self.outer._run('docker', 'stop', self.containerName) self.join() finally: if self.cleanMounts: self.__cleanMounts() finally: self.outer._run('docker', 'rm', '-f', self.containerName) return False # don't swallow exception def __wait_running(self): log.info( "Waiting for %s container process to appear. " "Expect to see 'Error: No such image or container'.", self._getRole()) while self.isAlive(): try: running = self.outer._run('docker', 'inspect', '--format={{ .State.Running }}', self.containerName, capture=True).strip() except subprocess.CalledProcessError: pass else: if 'true' == running: break time.sleep(1) def __cleanMounts(self): """ Deletes all files in every mounted directory. Without this step, we risk leaking files owned by root on the host. To avoid races, this method should be called after the appliance container was stopped, otherwise the running container might still be writing files. """ # Delete all files within each mounted directory, but not the directory itself. cmd = 'shopt -s dotglob && rm -rf ' + ' '.join( v + '/*' for k, v in iteritems(self.mounts) if os.path.isdir(k)) self.outer._run('docker', 'run', '--rm', '--entrypoint=/bin/bash', applianceSelf(), '-c', cmd) def tryRun(self): self.popen.wait() log.info('Exiting %s', self.__class__.__name__) def runOnAppliance(self, *args, **kwargs): # Check if thread is still alive. Note that ExceptionalThread.join raises the # exception that occurred in the thread. self.join(timeout=0) # noinspection PyProtectedMember self.outer._run('docker', 'exec', '-i', self.containerName, *args, **kwargs) def writeToAppliance(self, path, contents): self.runOnAppliance('tee', path, input=contents) def deployScript(self, path, packagePath, script): """ Deploy a Python module on the appliance. :param path: the path (absolute or relative to the WORDIR of the appliance container) to the root of the package hierarchy where the given module should be placed. The given directory should be on the Python path. :param packagePath: the desired fully qualified module name (dotted form) of the module :param str|callable script: the contents of the Python module. If a callable is given, its source code will be extracted. This is a convenience that lets you embed user scripts into test code as nested function. """ if callable(script): script = self.outer._getScriptSource(script) packagePath = packagePath.split('.') packages, module = packagePath[:-1], packagePath[-1] for package in packages: path += '/' + package self.runOnAppliance('mkdir', '-p', path) self.writeToAppliance(path + '/__init__.py', '') self.writeToAppliance(path + '/' + module + '.py', script)
class Entry(with_metaclass(_EntryMeta, object)): """This class wraps file or directory. It is an abstract class, but it returns a derived instance. You can make an instance such as:: directory = Entry('/home/someone/public_html') assert isinstance(foler, Directory) file = Entry('/home/someone/public_html/favicon.ico') assert isinstance(file, File) """ HIDDEN = re.compile('^\.') def __new__(cls, *args, **kwargs): """Returns a file or directory instance.""" path, rootdir, autoindex = _make_args_for_entry(args, kwargs) if rootdir: abspath = os.path.join(rootdir.abspath, path) else: abspath = os.path.abspath(path) if os.path.isdir(abspath): return Directory.__new__(Directory, path, rootdir, autoindex) elif os.path.isfile(abspath): return File.__new__(File, path, rootdir, autoindex) else: raise IOError('{0} does not exists.'.format(abspath)) def __init__(self, path, rootdir=None, autoindex=None): """Initializes an entry instance.""" self.rootdir = rootdir self.autoindex = autoindex try: rootpath = self.rootdir.abspath if not autoindex and self.rootdir: self.autoindex = self.rootdir.autoindex except AttributeError: rootpath = '' self.path = path self.abspath = os.path.join(rootpath, self.path) self.name = os.path.basename(self.abspath) self.hidden = bool(self.HIDDEN.match(self.name)) if self.rootdir: self.rootdir._register_descendant(self) def is_root(self): """Returns ``True`` if it is a root directory.""" return isinstance(self, RootDirectory) @property def parent(self): if self.is_root(): return None elif is_same_path(os.path.dirname(self.abspath), self.rootdir.abspath): return self.rootdir return Entry(os.path.dirname(self.path), self.rootdir) @property def modified(self): """Returns modified time of this.""" return datetime.fromtimestamp(os.path.getmtime( self.abspath)).replace(microsecond=0) @classmethod def add_icon_rule(cls, icon, rule=None): """Adds a new icon rule globally.""" cls.icon_map.append((icon, rule)) @classmethod def add_icon_rule_by_name(cls, icon, name): """Adds a new icon rule by the name globally.""" cls.add_icon_rule(icon, lambda ent: ent.name == name) @classmethod def add_icon_rule_by_class(cls, icon, _class): """Adds a new icon rule by the class globally.""" cls.add_icon_rule(icon, lambda ent: isinstance(ent, _class)) def guess_icon(self): """Guesses an icon from itself.""" def get_icon_url(): try: if self.autoindex: icon_map = self.autoindex.icon_map + self.icon_map else: icon_map = self.icon_map for icon, rule in icon_map: if not rule and callable(icon): matched = icon = icon(self) else: matched = rule(self) if matched: return icon except AttributeError: pass try: return self.default_icon except AttributeError: raise GuessError('There is no matched icon.') try: return urljoin(url_for('.silkicon', filename=''), get_icon_url()) except (AttributeError, RuntimeError): return 'ERROR' return get_icon_url()
class ConfigManager(with_metaclass(mixin.Singleton, object)): """Unified command-line & file config option manager The main user methods are : * :py:meth:`register_section` * :py:meth:`get` * :py:meth:`set_usage` Main documentation for option definition : :py:class:`Option` Attributes: __metaclass__ (:py:class:`xtd.core.mixin.Singleton`) : makes this object a singleton """ def __init__(self): self.m_data = {} self.m_options = [] self.m_sections = {} self.m_usage = "usage: %prog [options]" self.m_cmdParser = None self.m_cmdOpts = None self.m_cmdArgs = [] def register_section(self, p_section, p_title, p_options): """ Register a set of options to a given section See :py:class:`Option` for full documentation of option properties Args: p_section (str): section tag p_title (str): the section title in the command-line usage p_options (list of dict): options definition Returns: ConfigManager: self Raises: xtd.core.error.ConfigError: invalid option definition """ self.m_sections[p_section] = p_title for c_opt in p_options: if not "name" in c_opt: raise ConfigError("missing mandatory option property 'name'") self.register(p_section, c_opt["name"], c_opt) return self def register(self, p_section, p_name, p_props): """ Register an option in a specific section See :py:class:`Option` for full documentation of option properties Args: p_name (str): option name p_section (str): section name p_props (dict): option properties Returns: ConfigManager: self """ l_option = Option(p_section, p_name, p_props) self.m_options.append(l_option) return self def sections(self): """ Get sections tags Returns: (list): array of str of all section names """ return list(self.m_data.keys()) def section_exists(self, p_section): """ Indicates if specified section has been registered Args: p_section (str): section name Returns: bool : true is ``p_section`` is registered """ return p_section in self.m_data def options(self, p_section): """ Get the list of all registered option names for specefic a section Args: p_section (str): section name Raises: xtd.core.error.ConfigError: ``p_section`` not registered Returns: list: array of str of option names """ if not p_section in self.m_data: raise ConfigError("section '%s' doesn't exist" % p_section) return list(self.m_data[p_section].keys()) def option_exists(self, p_section, p_name): """ Indicates if specified option has been registered in section Args: p_section (str): section name p_option (str): option name Returns: bool : true is ``p_section`` is registered and contains ``p_option`` """ if not p_section in self.m_data: return False return p_name in self.m_data[p_section].keys() def get(self, p_section, p_name): """ Get option value Args: p_section (str): section name p_option (str): option name Raises: xtd.core.error.ConfigValueError: section/option not found Returns: (undefined): current option value """ if not p_section in self.m_data or not p_name in self.m_data[p_section]: raise ConfigValueError(p_section, p_name, "unknown configuration entry") return self.m_data[p_section][p_name] def set(self, p_section, p_name, p_value): """set option value Warning: This method stores the input value immediately without validating it against option's checks. Args: p_section (str): section name p_option (str): option name Raises: xtd.core.error.ConfigValueError: section/option not found """ if not p_section in self.m_data or not p_name in self.m_data[p_section]: raise ConfigValueError(p_section, p_name, "unknown configuration entry") self.m_data[p_section][p_name] = p_value def help(self, p_file=None): """ Display command line help message Args: p_file (file): output stream, defaults to sys.stdout """ self.m_cmdParser.print_help(p_file) def initialize(self): """ Initializes object Usually called by :py:class:`~xtd.core.application.Application` object. """ self.m_cmdParser = None self.m_cmdOpts = None self.m_cmdArgs = [] self.m_data = {} self._load_data() self._cmd_parser_create() def parse(self, p_argv=None): """ Parses command line and file options Usually called by :py:class:`~xtd.core.application.Application` object. Args: p_argv (list of str) : list of command line arguments """ if p_argv is None: p_argv = sys.argv self._cmd_parser_load(p_argv) self._file_parser_load() def get_name(self): """Get parsed application name ``sys.argv[0]`` Returns: str: program's ``sys.argv[0]`` """ return self.m_cmdArgs[0] def get_args(self): """Get command line post-parse remaining options Returns: list: unparsed command line options """ return self.m_cmdArgs[1:] def set_usage(self, p_usage): """Set command line usage message See :py:class:`optparse.OptionParser` Args: p_usage (str): usage string """ self.m_usage = p_usage def _get_option(self, p_section, p_name): l_values = [ x for x in self.m_options if x.m_section == p_section and x.m_name == p_name ] if not len(l_values): raise ConfigValueError(p_section, p_name, "unknown configuration entry") return l_values[0] def _load_data(self): for c_option in self.m_options: if not c_option.m_section in self.m_data: self.m_data[c_option.m_section] = {} self.m_data[c_option.m_section][c_option.m_name] = c_option.m_default @staticmethod def _cmd_attribute_name(p_section, p_option): return "parse_%(section)s_%(key)s" % { "section" : p_section, "key" : p_option.replace("-", "_") } def _cmd_parser_create(self): self.m_cmdParser = optparse.OptionParser(usage=self.m_usage, formatter=IndentedHelpFormatterWithNL()) l_sections = set([ x.m_section for x in self.m_options ]) for c_section in sorted(l_sections): l_sectionName = self.m_sections.get(c_section, "") l_group = optparse.OptionGroup(self.m_cmdParser, l_sectionName) l_options = [ x for x in self.m_options if x.m_section == c_section and x.m_cmdline ] for c_opt in l_options: l_args = [] l_kwds = { "help" : c_opt.m_description, "default" : None, "action" : "store", "dest" : self._cmd_attribute_name(c_section, c_opt.m_name) } if not c_opt.m_valued: l_kwds["action"] = "store_true" else: l_kwds["metavar"] = "ARG" if c_opt.m_default != None: l_kwds["help"] += " [default:%s]" % str(c_opt.m_default) l_args.append(c_opt.m_longopt) l_group.add_option(*l_args, **l_kwds) self.m_cmdParser.add_option_group(l_group) def _cmd_parser_load(self, p_argv): self.m_cmdOpts, self.m_cmdArgs = self.m_cmdParser.parse_args(p_argv) for c_option in [ x for x in self.m_options if x.m_cmdline ]: l_attribute = self._cmd_attribute_name(c_option.m_section, c_option.m_name) l_value = getattr(self.m_cmdOpts, l_attribute) if l_value != None: l_value = self._validate(c_option.m_section, c_option.m_name, l_value) self.set(c_option.m_section, c_option.m_name, l_value) elif c_option.m_mandatory: raise ConfigValueError(c_option.m_section, c_option.m_name, "option is mandatory") def option_cmdline_given(self, p_section, p_option): if self.option_exists(p_section, p_option): l_name = self._cmd_attribute_name(p_section, p_option) l_value = getattr(self.m_cmdOpts, l_name) return l_value != None return False def _file_parser_load(self): if not self.section_exists("general") or not self.option_exists("general", "config-file"): return l_fileName = self._validate("general", "config-file") try: with future_open(l_fileName, mode="r", encoding="utf-8") as l_file: l_lines = [ x for x in l_file.readlines() if not re.match(r"^\s*//.*" ,x) ] l_content = "\n".join(l_lines) l_data = json.loads(l_content) except Exception as l_error: l_message = "invalid json configuration : %s" % str(l_error) raise ConfigValueError("general", "config-file", l_message) for c_section, c_data in l_data.items(): for c_option, c_value in c_data.items(): l_option = self._get_option(c_section, c_option) if l_option.m_config and not self.option_cmdline_given(c_section, c_option): l_value = self._validate(c_section, c_option, c_value) self.set(c_section, c_option, l_value) def _validate(self, p_section, p_name, p_value = None): if p_value is None: p_value = self.get(p_section, p_name) l_option = self._get_option(p_section, p_name) return l_option.validate(p_value)