Example #1
0
class EntityQueue:
    def __init__(self, maxsize = 1000):
        self.queue = Queue(maxsize)
        self.enqueuing_flags = {}

    def put(self, item, block = True, timeout = None):
        self.queue.put(item, block, timeout=timeout)

    def get(self, block = True, timeout = None):
        return self.queue.get(block, timeout)

    def qsize(self):
        return self.queue.qsize()

    def empty(self):
        return self.queue.empty() and not self.is_enqueuing()

    def full(self):
        return self.queue.full()

    def add_enqueuing_flag(self, id):
        self.enqueuing_flags[id] = True

    def update_enqueuing_flag(self, id, state):
        self.enqueuing_flags[id] = state

    def is_enqueuing(self):
        is_enqueuing = True

        for flag in self.enqueuing_flags.values():
            is_enqueuing = is_enqueuing and flag

        return is_enqueuing
Example #2
0
class FileVideoStream:
	def __init__(self, path, queueSize=128):
		# initialize the file video stream along with the boolean
		# used to indicate if the thread should be stopped or not
		self.stream = cv2.VideoCapture(path)
		self.stopped = False

		# initialize the queue used to store frames read from
		# the video file
		self.Q = Queue(maxsize=queueSize)

	def start(self):
		# start a thread to read frames from the file video stream
		t = Thread(target=self.update, args=())
		t.daemon = True
		t.start()
		return self

	def update(self):
		# keep looping infinitely
		while True:
			# if the thread indicator variable is set, stop the
			# thread
			if self.stopped:
				return

			# otherwise, ensure the queue has room in it
			if not self.Q.full():
				# read the next frame from the file
				(grabbed, frame) = self.stream.read()

				# if the `grabbed` boolean is `False`, then we have
				# reached the end of the video file
				if not grabbed:
					self.stop()
					return

				# add the frame to the queue
				self.Q.put(frame)

	def read(self):
		# return next frame in the queue
		return self.Q.get()

	def more(self):
		# return True if there are still frames in the queue
		return self.Q.qsize() > 0

	def stop(self):
		# indicate that the thread should be stopped
		self.stopped = True
Example #3
0
class ScrambleGenerator():
    def __init__(self, size = 3, capacity = 10, random_state = True, moves = -1):
        self.cube = Cube(size)
        self.queue = Queue(max((capacity, 0)))
        self.random_state = random_state
        self.moves = moves
        self.thread = Thread(target=self.enqueue_scramble)
        self.stopped = False
        self.thread.start()

    def enqueue_scramble(self):
        """Fill a given Queue with scramble until it is either full or a given capacity has been reached"""
        while not self.stopped:
            if not self.queue.full():
                self.queue.put(self.cube.get_scramble(self.random_state, self.moves))

    def __next__(self):
        """Remove and return the next scramble in the queue"""
        return self.queue.get()

    def __enter__(self):
        """Start the scramble generating thread"""
        if self.stopped:
            self.stopped = False
            self.thread.start()
        return self

    def __exit__(self, type = None, value = None, traceback = None):
        """Stop the scramble generating thread"""
        if not self.stopped:
            self.stopped = True
            self.thread.join()

    def __iter__(self):
        """Make this generator iterable by return itself"""
        return self

    start, stop = __enter__, __exit__
Example #4
0
class ScrambleGenerator:
    def __init__(self, puzzle=None, random=True, length=None, capacity=10):
        self.puzzle = puzzle if puzzle else Cube(3)
        self.queue = Queue(max((capacity, 1)))
        self.random = random
        self.length = length
        self.thread = Thread(target=self.enqueue_scramble)
        self.stopped = False
        self.thread.start()

    def enqueue_scramble(self):
        """Fill a given Queue with scramble until it is either full or a given capacity has been reached"""
        while not self.stopped:
            if not self.queue.full():
                self.queue.put(self.puzzle.get_scramble(self.random, self.length))

    def __next__(self):
        """Remove and return the next scramble in the queue"""
        return self.queue.get()

    def __enter__(self):
        """Start the scramble generating thread"""
        if self.stopped:
            self.stopped = False
            self.thread.start()
        return self

    def __exit__(self, type=None, value=None, traceback=None):
        """Stop the scramble generating thread"""
        if not self.stopped:
            self.stopped = True
            self.thread.join()

    def __iter__(self):
        """Make this generator iterable by return itself"""
        return self

    start, stop = __enter__, __exit__
class FgVinsEnv(AbstractEnv):
  # State space representation: [x, y, z, phi, theta, psi, x_dot, y_dot, z_dot, p, q, r]
  #   - where phi, theta and psi are Euler angles
  # Control input (Action space): 4 propellers' speed
  # Use notation in Vijar Kumar's paper "Minimum Snap Trajectory Generation and Control for Quadrotors"

  def __init__(self, start_time):
    super(FgVinsEnv, self).__init__(start_time)
    self.vins_queue = Queue(maxsize=1)
    self.is_vins_inited = False

  def _vins_callback(self, data):
    """State estimated from VINS Mono

    :param data:
    :return:
    """
    if self.vins_queue.full():
      self.vins_queue.get(False)

    if not self.is_vins_inited:
      print('successfully initialize VINS!!!!')

    self.is_vins_inited = True

    # get estimated position
    position = np.array(
      [data.pose.pose.position.x, data.pose.pose.position.y, data.pose.pose.position.z + self.xyz_offset[2]])

    # get estimated pose
    (roll, pitch, yaw) = tf.transformations.euler_from_quaternion(
      [data.pose.pose.orientation.x, data.pose.pose.orientation.y, data.pose.pose.orientation.z,
       data.pose.pose.orientation.w])
    pose = np.array([roll, pitch, yaw])

    # get estimated velocity
    velocity = np.array([data.twist.twist.linear.x, data.twist.twist.linear.y, data.twist.twist.linear.z])
    self.vins_queue.put((position, pose, velocity))

  def estimate_states(self):
    if not self.imu_queue.empty():
      try:
        self.states[9:12] = self.imu_queue.get(False)
      except Exception as e:
        pass

    try:
      if not self.is_vins_inited:
        if not self.range_finder_queue.empty():
          height = self.range_finder_queue.get(False)
          self.states[2] = height + self.height_offset
      else:
        if not self.vins_queue.empty():
          position, pose, velocity = self.vins_queue.get(False)
          self.states[0:3] = position
          self.states[3:6] = pose
          self.states[6:9] = velocity
    except Exception as e:
      pass

  def attach_listeners(self):
    rospy.Subscriber('/tf', tf2_msgs.msg.TFMessage, self._ground_truth_callback, queue_size=1000, buff_size=2 ** 20)
    rospy.Subscriber('/uav/sensors/imu', s_msgs.Imu, self._imu_callback, queue_size=1000, buff_size=2 ** 20)
    rospy.Subscriber('/uav/camera/left/ir_beacons', fg_msg.IRMarkerArray, self._ir_marker_callback, queue_size=1,
                     buff_size=2 ** 20)
    rospy.Subscriber('/uav/sensors/downward_laser_rangefinder', s_msgs.Range, self._range_finder_callback, queue_size=1,
                     buff_size=2 ** 20)
    rospy.Subscriber('/vins_estimator/odometry', nav_msgs.Odometry, self._vins_callback, queue_size=1,
                     buff_size=2 ** 20)
    rospy.spin()
Example #6
0
class FileVideoStream(VideoStream):
    def __init__(self, path, resolution, queueSize=128):
        VideoStream.__init__(self, resolution = resolution)
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.stream = cv2.VideoCapture(path)
        self.stopped = False

        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)


    def start(self):
        # start a thread to read frames from the file video stream
        self.__thread = Thread(target=self.update, args=())
        self.__thread.daemon = True
        self.__thread.start()
        
        return self

    def update(self):
        # keep looping infinitely
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                break
     
            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                # read the next frame from the file
                (grabbed, frame) = self.stream.read()
     
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    break
     
                # add the frame to the queue
                self.Q.put(frame)
            else:
                time.sleep(0.01)

        self.stream.release()
                
    def running(self):
        return self.more() or not self.stopped

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.1)
            tries += 1
            
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
        # wait until stream resources are released (producer thread might be still grabbing frame)
        self.__thread.join()
Example #7
0
class Build(object):
    """
    A build is a single execution of any configured job. This class:
        - exposes the overall status of the build
        - keeps track of the build's subjobs and their completion state
        - manages slaves that have been assigned to accept this build's subjobs

    :type _build_id: int
    :type _build_request: BuildRequest
    :type _build_artifact: None | BuildArtifact
    :type _error_message: None | str
    :type _project_type: None | ProjectType
    :type _timing_file_path: None | str
    """
    _build_id_counter = Counter()  # class-level counter for assigning build ids

    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self._build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None

        self._error_message = None
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once

        self._project_type = None
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None  # WIP(joey): Move subjob queues to BuildScheduler class.
        self._finished_subjobs = None
        self._failed_atoms = None
        self._postbuild_tasks_are_finished = False  # WIP(joey): Remove and use build state.
        self._timing_file_path = None

        self._state_machine = BuildFsm(
            build_id=self._build_id,
            enter_state_callbacks={
                BuildState.ERROR: self._on_enter_error_state,
                BuildState.CANCELED: self._on_enter_canceled_state,
            }
        )

    def api_representation(self):
        failed_atoms_api_representation = None
        if self._get_failed_atoms() is not None:
            failed_atoms_api_representation = [failed_atom.api_representation()
                                               for failed_atom in self._get_failed_atoms()]
        build_state = self._status()
        # todo: PREPARING/PREPARED are new states -- make sure clients can handle them before exposing.
        if build_state in (BuildState.PREPARING, BuildState.PREPARED):
            build_state = BuildState.QUEUED

        return {
            'id': self._build_id,
            'status': build_state,
            'artifacts': self._artifacts_archive_file,  # todo: this should probably be a url, not a file path
            'details': self._detail_message,
            'error_message': self._error_message,
            'num_atoms': self._num_atoms,
            'num_subjobs': len(self._all_subjobs_by_id),
            'failed_atoms': failed_atoms_api_representation,
            'result': self._result(),
            'request_params': self.build_request.build_parameters(),
            # Convert self._state_timestamps to OrderedDict to make raw API response more readable. Sort the entries
            # by numerically increasing dict value, with None values sorting highest.
            'state_timestamps': OrderedDict(sorted(
                [(state.lower(), timestamp) for state, timestamp in self._state_machine.transition_timestamps.items()],
                key=lambda item: item[1] or float('inf'))),
        }

    def generate_project_type(self):
        """
        Instantiate the project type for this build, populating the self._project_type instance variable.

        As a side effect, this method also updates the build request's build_parameters dictionary
        with the unique workspace directory path for this build.

        :raises BuildProjectError when failed to instantiate project type
        """
        # Generate a unique project build directory name that will be symlinked to the actual project directory
        # later on when the project gets fetched.
        build_specific_project_directory = self._generate_unique_symlink_path_for_build_repo()

        # Because build_specific_project_directory is entirely internal and generated by ClusterRunner (it is a
        # build-unique generated symlink), we must manually add it to the project_type_params
        project_type_params = self.build_request.build_parameters()
        project_type_params.update({'build_project_directory': build_specific_project_directory})
        self._project_type = util.create_project_type(project_type_params)
        if self._project_type is None:
            raise BuildProjectError('Build failed due to an invalid project type.')

    def prepare(self, subjob_calculator):
        """
        :param subjob_calculator: Used after project fetch to atomize and group subjobs for this build
        :type subjob_calculator: SubjobCalculator
        """
        if not isinstance(self.build_request, BuildRequest):
            raise RuntimeError('Build {} has no associated request object.'.format(self._build_id))

        if not isinstance(self.project_type, ProjectType):
            raise RuntimeError('Build {} has no project set.'.format(self._build_id))

        if not self._preparation_coin.spend():
            raise RuntimeError('prepare() was called more than once on build {}.'.format(self._build_id))

        self._state_machine.trigger(BuildEvent.START_PREPARE)
        # WIP(joey): Move the following code into a PREPARING state callback
        #  (so that it won't execute if the build has already been canceled.)

        self._logger.info('Fetching project for build {}.', self._build_id)
        self.project_type.fetch_project()
        self._logger.info('Successfully fetched project for build {}.', self._build_id)

        job_config = self.project_type.job_config()
        if job_config is None:
            raise RuntimeError('Build failed while trying to parse clusterrunner.yaml.')

        subjobs = subjob_calculator.compute_subjobs_for_build(self._build_id, job_config, self.project_type)

        self._unstarted_subjobs = Queue(maxsize=len(subjobs))  # WIP(joey): Move this into BuildScheduler?
        self._finished_subjobs = Queue(maxsize=len(subjobs))  # WIP(joey): Remove this and just record finished count.

        for subjob in subjobs:
            self._all_subjobs_by_id[subjob.subjob_id()] = subjob
            self._unstarted_subjobs.put(subjob)

        self._timing_file_path = self._project_type.timing_file_path(job_config.name)
        app.util.fs.create_dir(self._build_results_dir())
        self._state_machine.trigger(BuildEvent.FINISH_PREPARE)

    def build_id(self):
        """
        :rtype: int
        """
        return self._build_id

    @property
    def build_request(self):
        """
        :rtype: BuildRequest
        """
        return self._build_request

    def all_subjobs(self):
        """
        Returns a list of subjobs for this build
        :rtype: list[Subjob]
        """
        return [subjob for subjob in self._all_subjobs_by_id.values()]

    def subjob(self, subjob_id):
        """
        Returns a single subjob
        :type subjob_id: int
        :rtype: Subjob
        """
        subjob = self._all_subjobs_by_id.get(subjob_id)
        if subjob is None:
            raise ItemNotFoundError('Invalid subjob id.')
        return subjob

    def complete_subjob(self, subjob_id, payload=None):
        """
        Handle the subjob payload and mark the given subjob id for this build as complete.
        :type subjob_id: int
        :type payload: dict
        """
        try:
            self._handle_subjob_payload(subjob_id, payload)
            self._mark_subjob_complete(subjob_id)

        except Exception:
            self._logger.exception('Error while completing subjob; marking build as failed.')
            self.mark_failed('Error occurred while completing subjob {}.'.format(subjob_id))
            raise

    def _parse_payload_for_atom_exit_code(self, subjob_id):
        subjob = self.subjob(subjob_id)
        for atom_id in range(len(subjob.atoms)):
            artifact_dir = BuildArtifact.atom_artifact_directory(
                self.build_id(),
                subjob.subjob_id(),
                atom_id,
                result_root=Configuration['results_directory']
            )
            atom_exit_code_file_sys_path = os.path.join(artifact_dir, BuildArtifact.EXIT_CODE_FILE)
            with open(atom_exit_code_file_sys_path, 'r') as atom_exit_code_file:
                subjob.atoms[atom_id].exit_code = int(atom_exit_code_file.read())

    def _handle_subjob_payload(self, subjob_id, payload):
        if not payload:
            self._logger.warning('No payload for subjob {} of build {}.', subjob_id, self._build_id)
            return

        # Assertion: all payloads received from subjobs are uniquely named.
        result_file_path = os.path.join(self._build_results_dir(), payload['filename'])

        try:
            app.util.fs.write_file(payload['body'], result_file_path)
            app.util.fs.extract_tar(result_file_path, delete=True)
            self._parse_payload_for_atom_exit_code(subjob_id)
        except:
            self._logger.warning('Writing payload for subjob {} of build {} FAILED.', subjob_id, self._build_id)
            raise

    def _read_subjob_timings_from_results(self):
        """
        Collect timing data from all subjobs
        :rtype: dict [str, float]
        """
        timings = {}
        for _, subjob in self._all_subjobs_by_id.items():
            timings.update(subjob.read_timings())

        return timings

    def _mark_subjob_complete(self, subjob_id):
        """
        :type subjob_id: int
        """
        subjob = self.subjob(subjob_id)
        subjob.mark_completed()
        with self._build_completion_lock:
            self._finished_subjobs.put(subjob, block=False)
            should_trigger_postbuild_tasks = self._all_subjobs_are_finished() and not self._is_stopped()

        # We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
        if should_trigger_postbuild_tasks:
            self._logger.info("All results received for build {}!", self._build_id)
            SafeThread(target=self._perform_async_postbuild_tasks, name='PostBuild{}'.format(self._build_id)).start()

    def mark_started(self):
        """
        Mark the build as started.
        """
        self._state_machine.trigger(BuildEvent.START_BUILDING)

    def finish(self):
        """
        Perform postbuild task and mark this build as finished.
        """
        # This method also transitions the FSM to finished after the postbuild tasks are complete.
        self._perform_async_postbuild_tasks()

    def mark_failed(self, failure_reason):
        """
        Mark a build as failed and set a failure reason. The failure reason should be something we can present to the
        end user of ClusterRunner, so try not to include detailed references to internal implementation.
        :type failure_reason: str
        """
        self._state_machine.trigger(BuildEvent.FAIL, error_msg=failure_reason)

    def mark_setup_failed(self, failure_reason):
        """
        Mark a build as failed and set a failure reason. Because setup failures don't have any logs, we put the build_id
        in the setup_failed file for easier querying of worker logs.
        :type failure_reason: str
        """
        self._state_machine.trigger(BuildEvent.FAIL, error_msg='{} Build Id: {}.'.format(failure_reason, self._build_id))
        setup_failure_file = os.path.join(self._build_results_dir(), BuildArtifact.SETUP_FAILED_FILE)
        app.util.fs.write_file(str(self._build_id), setup_failure_file)
        self._create_build_artifact()

    def _on_enter_error_state(self, event):
        """
        Store an error message for the build and log the failure. This method is triggered by
        a state machine transition to the ERROR state.
        :param event: The Fysom event object
        """
        # WIP(joey): Should this be a reenter_state callback also? Should it check for previous error message?
        default_error_msg = 'An unspecified error occurred.'
        self._error_message = getattr(event, 'error_msg', default_error_msg)
        self._logger.warning('Build {} failed: {}', self.build_id(), self._error_message)

    def cancel(self):
        """
        Cancel a running build.
        """
        self._logger.notice('Request received to cancel build {}.', self._build_id)
        self._state_machine.trigger(BuildEvent.CANCEL)

    def _on_enter_canceled_state(self, event):
        # Deplete the unstarted subjob queue.
        # WIP(joey): Just remove this completely and adjust behavior of other methods based on self._is_canceled().
        # TODO: Handle situation where cancel() is called while subjobs are being added to _unstarted_subjobs
        while self._unstarted_subjobs is not None and not self._unstarted_subjobs.empty():
            try:
                # A subjob may be asynchronously pulled from this queue, so we need to avoid blocking when empty.
                self._unstarted_subjobs.get(block=False)
            except Empty:
                break

    def validate_update_params(self, update_params):
        """
        Determine if a dict of update params are valid, and generate an error if not
        :param update_params: Params passed into a PUT for this build
        :type update_params: dict [str, str]
        :return: Whether the params are valid and a response containing an error message if not
        :rtype: tuple [bool, dict [str, str]]
        """
        keys_and_values_allowed = {'status': ['canceled']}
        message = None
        for key, value in update_params.items():
            if key not in keys_and_values_allowed.keys():
                message = 'Key ({}) is not in list of allowed keys ({})'.\
                    format(key, ",".join(keys_and_values_allowed.keys()))
            elif value not in keys_and_values_allowed[key]:
                message = 'Value ({}) is not in list of allowed values ({}) for {}'.\
                    format(value, keys_and_values_allowed[key], key)

        if message is not None:
            return False, {'error': message}
        return True, {}

    def update_state(self, update_params):
        """
        Make updates to the state of this build given a set of update params
        :param update_params: The keys and values to update on this build
        :type update_params: dict [str, str]
        """
        success = False
        for key, value in update_params.items():
            if key == 'status':
                if value == 'canceled':
                    self.cancel()
                    success = True
        return success

    @property
    def project_type(self):
        """
        :rtype: ProjectType
        """
        return self._project_type

    @property
    def artifacts_archive_file(self):
        return self._artifacts_archive_file

    # WIP(joey): Change some of these private @properties to methods.
    @property
    def _num_subjobs_total(self):
        return len(self._all_subjobs_by_id)

    @property
    def _num_subjobs_finished(self):
        return 0 if not self._finished_subjobs else self._finished_subjobs.qsize()

    @property
    def _num_atoms(self):
        # todo: blacklist states instead of whitelist, or just check _all_subjobs_by_id directly
        if self._status() not in [BuildState.BUILDING, BuildState.FINISHED]:
            return None
        return sum([len(subjob.atomic_commands()) for subjob in self._all_subjobs_by_id.values()])

    def _all_subjobs_are_finished(self):
        return self._finished_subjobs and self._finished_subjobs.full()

    @property
    def is_finished(self):
        # WIP(joey): Calling logic should check _is_canceled if it needs to instead of including the check here.
        return self._is_canceled() or self._postbuild_tasks_are_finished

    @property
    def _detail_message(self):
        if self._num_subjobs_total > 0:
            return '{} of {} subjobs are complete ({:.1f}%).'.format(
                self._num_subjobs_finished,
                self._num_subjobs_total,
                100 * self._num_subjobs_finished / self._num_subjobs_total
            )
        return None

    def _status(self):  # WIP(joey): Rename to _state.
        """
        :rtype: BuildState
        """
        return self._state_machine.state

    @property
    def has_error(self):
        return self._status() is BuildState.ERROR

    def _is_canceled(self):
        return self._status() is BuildState.CANCELED

    def _is_stopped(self):
        return self._status() in (BuildState.ERROR, BuildState.CANCELED)

    def _get_failed_atoms(self):
        """
        The atoms that failed. Returns None if the build hasn't completed yet. Returns empty set if
        build has completed and no atoms have failed.
        :rtype: list[Atom] | None
        """
        if self._failed_atoms is None and self.is_finished:
            if self._is_canceled():
                return []

            self._failed_atoms = []
            for subjob_id, atom_id in self._build_artifact.get_failed_subjob_and_atom_ids():
                subjob = self.subjob(subjob_id)
                atom = subjob.atoms[atom_id]
                self._failed_atoms.append(atom)

        return self._failed_atoms

    def _result(self):
        """
        Can return three states:
            None:
            FAILURE:
            NO_FAILURES:
        :rtype: BuildResult | None
        """
        if self._is_canceled():
            return BuildResult.FAILURE

        if self.is_finished:
            if len(self._build_artifact.get_failed_subjob_and_atom_ids()) == 0:
                return BuildResult.NO_FAILURES
            return BuildResult.FAILURE
        return None

    def _perform_async_postbuild_tasks(self):
        """
        Once a build is complete, certain tasks can be performed asynchronously.
        """
        self._create_build_artifact()
        self._delete_temporary_build_artifact_files()
        self._postbuild_tasks_are_finished = True
        self._state_machine.trigger(BuildEvent.POSTBUILD_TASKS_COMPLETE)

    def _create_build_artifact(self):
        self._build_artifact = BuildArtifact(self._build_results_dir())
        self._build_artifact.generate_failures_file()
        self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results())
        self._artifacts_archive_file = app.util.fs.compress_directory(self._build_results_dir(),
                                                                      BuildArtifact.ARTIFACT_FILE_NAME)

    def _delete_temporary_build_artifact_files(self):
        """
        Delete the temporary build result files that are no longer needed, due to the creation of the
        build artifact tarball.

        ONLY call this method after _create_build_artifact() has completed. Otherwise we have lost the build results.
        """
        build_result_dir = self._build_results_dir()
        start_time = time.time()
        for path in os.listdir(build_result_dir):
            # The build result tar-ball is also stored in this same directory, so we must not delete it.
            if path == BuildArtifact.ARTIFACT_FILE_NAME:
                continue
            full_path = os.path.join(build_result_dir, path)
            # Do NOT use app.util.fs.async_delete() here. That call will generate a temp directory for every
            # atom, which can be in the thousands per build, and can lead to running up against the ulimit -Hn.
            if os.path.isdir:
                shutil.rmtree(full_path, ignore_errors=True)
            else:
                os.remove(full_path)
        end_time = time.time() - start_time
        self._logger.info('Completed deleting artifact files for {}, took {:.1f} seconds.', self._build_id, end_time)

    def _build_results_dir(self):
        return BuildArtifact.build_artifact_directory(self.build_id(), result_root=Configuration['results_directory'])

    def _generate_unique_symlink_path_for_build_repo(self):
        """
        Generate a unique symlink path for a build-specific repo. This method does NOT generate the symlink itself.
        :rtype: str
        """
        return os.path.join(Configuration['build_symlink_directory'], str(uuid.uuid4()))
Example #8
0
class Scraper(object):
	"""Scraper for LifeMiles airmiles data.

		For each user account, it runs a ScraperSession (one per thread).
		Note: It is not possible to make parallel requests from a single account as search results
		      are not returned immediately but stored on the server and only presented after two
		      redirects.
		A target search date is generated that falls on a weekday about one month in the future;
		this is to avoid failed searches due to sold out flights for dates in the near future.
	"""

	DATE_FORMAT = "%m-%d-%Y"
	DATE_OUTPUT_FORMAT = "%d %B %Y (%A)"

	def __init__(self, credentials, routes, raw_results_fname, date_offset=28, want_weekday=True):
		self.date = self._generate_future_date(date_offset, want_weekday)
		self.sessions = [ScraperSession(cred, self.date) for cred in credentials]
		self.nsessions = len(self.sessions)
		self.logged_in, self.scraped_routes = False, []
		self.routes, self.pending_routes, self.scraped_routes = routes, None, []
		self.results = Results(raw_results_fname)
		self.lock = Lock()
		self.rq = self.sq = None

	def import_scraped_routes(self):
		"""Imports previously scraped results and removes them from the target routes to avoid repeat searches.

			Should be called before prioritisation or order will be reset.
		"""
		if not self.routes:
			raise Exception("Add all target routes before importing scraped routes")

		self.results.import_into(self.routes)

		self.scraped_routes = [r for r in self.routes if r.scraped]
		self.pending_routes = [r for r in self.routes if not r.scraped]
		logging.info("Imported {} scraped routes".format(len(self.scraped_routes)))

		duplicates = len(self.scraped_routes) - len(set(self.scraped_routes))
		if duplicates:
			logging.info("Found {} duplicate scraped routes".format(duplicates))

	def prioritise_routes_by_success(self):
		"""Reorders the list of routes according to the frequency of successful scrapes for each airport.

			Must be run manually, and is only of benefit if there are already many results from a previous scrape.
			The first ~20% of routes searched will yield the most data.
		"""
		if not (self.scraped_routes and self.pending_routes):
			return

		airport_counts = {}
		routes_with_airmiles = [r for r in self.scraped_routes if r.airmiles != -1]
		for r in routes_with_airmiles:
			airport_counts[r.origin.code] = airport_counts.get(r.origin.code, 0) + 1
			airport_counts[r.dest.code] = airport_counts.get(r.dest.code, 0) + 1

		def priority(route):
			origin_priority = airport_counts.get(route.origin.code, 0)
			dest_priority = airport_counts.get(route.dest.code, 0)

			bonus = 2**20 if (origin_priority > 0 and dest_priority > 0) else 0
			if origin_priority == 0 and dest_priority == 0:
				bonus = -2**20
			p = (origin_priority + 1) * (dest_priority + 1) + bonus
			return p

		self.pending_routes = sorted(self.pending_routes, key=priority, reverse=True)
		logging.info("Prioritised routes")

	def randomise_routes(self):
		if not self.pending_routes:
			self.pending_routes = list(routes)
		self.pending_routes = sorted(self.pending_routes, key=lambda _: randint(-10,10))
		logging.info("Randomised routes")

	def rescan(self, date):
		try:
			self.date = datetime.strptime(date, Scraper.DATE_FORMAT)
		except ValueError:
			logging.error("Invalid date specified; should be in the format MM-DD-YYYY")
			return

		print("\n")
		msg = datetime.strftime(self.date, "Rescanning failed routes (SoldOutOrInvalid) for date "+Scraper.DATE_OUTPUT_FORMAT)
		logging.info(msg)
		logging.info("If the scan fails again, the repeat result will still be output (with the new search date)")

		self.pending_routes = [r for r in self.routes if r.scraped and r.no_search_result]
		for r in self.pending_routes:
			r.reset()

		self.randomise_routes()

		logging.info("Set target routes to search to scraped routes with no search result ('ERROR:SoldOutOrInvalid')")
		logging.info("{} routes in total".format(len(self.pending_routes)))
		print("\n")

		self.start()


	def start(self):
		"""Initiate the scrape.

			If the route list is very long, it may take several days or even weeks to complete,
			but can be gracefully interrupted by calling stop() and resumed later.

			The (overall average) scrape rate is estimated at 1000 per hour per account.
			With 5 user accounts, that is about 100,000 per day (running uninterrupted).
		"""
		if not self._init_scrape():
			return

		self.sq = Queue(self.nsessions)
		self.rq = Queue(len(self.pending_routes))
		for route in self.pending_routes:
			self.rq.put(route)

		for i in range(self.nsessions):
			t = Thread(target=self._do_scrape)
			t.daemon = True
			t.start()

		self.results.begin_write()

		for session in self.sessions:
			self.sq.put(session)

		while(True):
			if(self.rq.empty()):
				break
			# necessary because rq.join() blocks and prevents interrupt; signal.pause() not an option on Windows
			time.sleep(1)

		self.results.end_write()

	def stop(self):
		"""Stop the scrape (awaiting completion of active searches."""
		logging.info("Stopping scrape")
		if(self.rq):
			logging.info("Awaiting completion of active searches...\n")
			self.rq.queue.clear()
			while(not self.sq.full()):
				time.sleep(1)
			self.rq = self.sq = None
		self.results.end_write()
		logging.info("Total scraped routes: {}".format(len(self.scraped_routes)))

	def _init_scrape(self):
		logging.info("Scrape started - {} total routes with {} accounts".format(len(self.routes), self.nsessions))
		logging.info(self.date.strftime(("Search date is "+Scraper.DATE_OUTPUT_FORMAT).format(self.date)))
		logging.info("{} of {} target routes scraped".format(len(self.scraped_routes), len(self.routes)))
		logging.info("Appending results to {}".format(self.results.filename))

		if not self.logged_in:
			logging.info("Logging in...\n")
			for session in self.sessions:
				session.login()
			if all(s.logged_in for s in self.sessions):
				self.logged_in = True
			else:
				logging.error("Scrape aborted")
				logging.error("Failed to log in to one or more user accounts. Scrape aborted.")
				return False
		print("\n")
		return True

	def _generate_future_date(self, days_to_add, want_weekday):
		today = datetime.today()
		is_weekend = today.weekday() in (5, 6)
		if is_weekend and want_weekday:
			days_to_add += 3
		future_date = today + timedelta(days=days_to_add)
		return future_date

	def _do_scrape(self, fast_filter_mode=False):
		while(not self.rq.empty()):
			session = self.sq.get()
			route = self.rq.get()

			if fast_filter_mode:
				exists = session._check_route_exists(route)
				if not exists:
					self._save_result(route)
				else:
					pass
			else:
				session.scrape_airmiles(route)
				with self.lock:
					self._save_result(route)

			self.sq.put(session)
			self.sq.task_done()
			self.rq.task_done()

	def _save_result(self, route, print_result=True):
		self.results.write(route)

		self.scraped_routes += [route]
		if print_result:
			nscraped = len(self.scraped_routes)
			if(nscraped % 250 == 0):
				logging.info("Scrape Count = {}".format(nscraped))
			print(route.description(with_error=True))
Example #9
0
class ReplayActor(object):
    def __init__(self,
                 learner_addr,
                 replay_dir,
                 replay_converter_type,
                 policy=None,
                 policy_config=None,
                 model_pool_addrs=None,
                 n_v=1,
                 log_interval=50,
                 step_mul=8,
                 SC2_bin_root='/root/',
                 game_version='3.16.1',
                 unroll_length=32,
                 update_model_freq=32,
                 converter_config=None,
                 agent_cls=None,
                 infserver_addr=None,
                 compress=True,
                 da_rate=-1.,
                 unk_mmr_dft_to=4000,
                 post_process_data=None):
        self._data_pool_apis = ImLearnerAPIs(learner_addr)
        self._SC2_bin_root = SC2_bin_root
        self._log_interval = log_interval
        self._replay_dir = replay_dir
        self._step_mul = step_mul
        self._game_version = game_version
        self._unroll_length = unroll_length
        self._data_queue = Queue(unroll_length)
        self._push_thread = Thread(target=self._push_data,
                                   args=(self._data_queue, ))
        self._push_thread.daemon = True
        self._push_thread.start()
        self.converter_config = {} if converter_config is None else converter_config
        self.converter_config['game_version'] = game_version
        self.replay_converter_type = replay_converter_type
        self._replay_converter = replay_converter_type(**self.converter_config)
        self._use_policy = policy is not None
        self._update_model_freq = update_model_freq
        self.model_key = 'IL-model'
        self._da_rate = da_rate
        self._unk_mmr_dft_to = unk_mmr_dft_to
        self._system = platform.system()
        self._post_process_data = post_process_data
        ob_space, ac_space = self._replay_converter.space
        if self._post_process_data:
            ob_space, ac_space = self._post_process_data(ob_space, ac_space)
        if self._use_policy:
            self.model = None
            policy_config = {} if policy_config is None else policy_config
            agent_cls = agent_cls or PGAgent
            policy_config['batch_size'] = 1
            policy_config['rollout_len'] = 1
            policy_config['use_loss_type'] = 'none'
            self.infserver_addr = infserver_addr
            self.agent = agent_cls(policy,
                                   ob_space,
                                   ac_space,
                                   n_v=n_v,
                                   scope_name="self",
                                   policy_config=policy_config,
                                   use_gpu_id=-1,
                                   infserver_addr=infserver_addr,
                                   compress=compress)
            if infserver_addr is None:
                self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
        self.ds = ILData(ob_space, ac_space, self._use_policy,
                         1)  # hs_len does not matter

    def run(self):
        self.replay_task = self._data_pool_apis.request_replay_task()
        while self.replay_task != "":
            game_version = self.replay_task.game_version or self._game_version
            self._adapt_system(game_version)
            if game_version != self._game_version:
                # need re-init replay converter
                self._game_version = game_version
                self.converter_config['game_version'] = game_version
                self._replay_converter = self.replay_converter_type(
                    **self.converter_config)
            game_core_config = ({} if 'game_core_config'
                                not in self.converter_config else
                                self.converter_config['game_core_config'])
            extractor = ReplayExtractor(
                replay_dir=self._replay_dir,
                replay_filename=self.replay_task.replay_name,
                player_id=self.replay_task.player_id,
                replay_converter=self._replay_converter,
                step_mul=self._step_mul,
                version=game_version,
                game_core_config=game_core_config,
                da_rate=self._da_rate,
                unk_mmr_dft_to=self._unk_mmr_dft_to)
            self._steps = 0
            first_frame = True
            if self._use_policy:
                self.agent.reset()
                self._update_agent_model()
            for frame in extractor.extract():
                if self._post_process_data:
                    obs, act = self._post_process_data(*frame[0])
                else:
                    obs, act = frame[0]
                if self._use_policy:
                    data = (obs, act, self.agent.state,
                            np.array(first_frame, np.bool))
                    self.agent.update_state(obs)
                    first_frame = False
                else:
                    data = (obs, act)
                data = self.ds.flatten(self.ds.structure(data))
                if self._data_queue.full():
                    logger.log("Actor's queue is full.", level=logger.WARN)
                self._data_queue.put((TensorZipper.compress(data), frame[1]))
                logger.log('successfully put one tuple.', level=logger.DEBUG)
                self._steps += 1
                if self._steps % self._log_interval == 0:
                    logger.log(
                        "%d frames of replay task [%s] sent to learner." %
                        (self._steps, self.replay_task))
                if self._use_policy and self._steps % self._update_model_freq == 0:
                    self._update_agent_model()
            logger.log("Replay task [%s] done. %d frames sent to learner." %
                       (self.replay_task, self._steps))
            self.replay_task = self._data_pool_apis.request_replay_task()
        logger.log("All tasks done.")

    def _adapt_system(self, game_version):
        # TODO(pengsun): any stuff for Darwin, Window?
        if self._system == 'Linux':
            # set the SC2PATH for sc2 binary. See deepmind/pysc2 doc.
            if game_version != '4.7.1' or 'SC2PATH' in os.environ:
                os.environ['SC2PATH'] = os.path.join(self._SC2_bin_root,
                                                     game_version)
        return

    def _update_agent_model(self):
        if self.infserver_addr is not None:
            return
        logger.log('entering _update_agents_model',
                   'steps: {}'.format(self._steps),
                   level=logger.DEBUG + 5)
        if self._should_update_model(self.model, self.model_key):
            model = self._model_pool_apis.pull_model(self.model_key)
            self.agent.load_model(model.model)
            self.model = model

    def _should_update_model(self, model, model_key):
        if model is None:
            return True
        else:
            return self._model_pool_apis.pull_attr(
                'updatetime', model_key) > model.updatetime

    def _push_data(self, data_queue):
        """ push trajectory for the learning agent (id 0). Invoked in a thread """
        while data_queue.empty():
            time.sleep(5)
        logger.log('entering _push_data_to_learner',
                   'steps: {}'.format(self._steps),
                   level=logger.DEBUG + 5)
        while True:
            task = self.replay_task
            frames = []
            weights = []
            for _ in range(self._unroll_length):
                frame, weight = data_queue.get()
                frames.append(frame)
                weights.append(weight)
            self._data_pool_apis.push_data((task, frames, weights))
Example #10
0
    for x in range(10):
        num = randint(0, 1000)
        q.put(num)


def f1(q):
    for y in range(5):
        num = q.get()
        print(num)


# t1 = Thread(target=f, args=(q,))
# t2 = Thread(target=f1, args=(q,))
# t1.start()
# t2.start()
# t1.join()
# t2.join()
q.put(1)
print(q.qsize())
q.get(1)
print(q.empty())
q.put(1)
q.put(1)
q.put(1)
q.put(1)
print(q.full())
q.task_done()
q.task_done()
q.task_done()
q.task_done()
q.task_done()
Example #11
0
class LockingDeque():
  '''merge of some deque and Queue object features

  This provides the locking interface of the Queue and pop, popleft, append,
  appendleft and clear features of the deque.

  Example:
    import time

    ld = LockingQueue()
    def example_task(ld):
      lq.wait() # => task will stop until an item is appended
      print(lq.pop())

    thread = Thread(target=example_task, args=(ld,))
    thread.start()   # thread started and locked
    time.sleep(10)
    ld.append("bob") # thread prints "bob"
                     # thread finished
    time.sleep(0.1)
    assert(thread.is_alive() == False)

  '''

  def __init__(self, *args, **kwargs):
    self.deque         = deque(maxlen=HsmWithQueues.QUEUE_SIZE)
    self.locking_queue = Queue(maxsize=HsmWithQueues.QUEUE_SIZE)

  def get(self, block=True, timeout=None):
    '''block on the locking queue, popleft from deque'''
    return self.locking_queue.get(block, timeout)

  def wait(self, block=True, timeout=None):
    '''wait for an append/appendleft event'''
    return self.get(block, timeout)

  def popleft(self):
    return self.deque.popleft()

  def pop(self):
    return self.deque.pop()

  def append(self, item):
    if self.locking_queue.full() is False:
      # we don't care about storing items in the locking_queue, our information
      # is in the deque, the locking_queue provides the 'get' unlocking feature
      self.locking_queue.put("ready")
      self.deque.append(item)
    else:
      self.deque.rotate(1)
      self.deque.append(item)

    if self.locking_queue.qsize() < len(self.deque):
      while self.locking_queue.qsize() != len(self.deque):
        self.locking_queue.put("ready")

  def appendleft(self, item):
    if self.locking_queue.full() is False:
      # we don't care about storing items in the locking_queue, our information
      # is in the deque, the locking_queue provides the 'get' locking feature
      self.locking_queue.put("ready")
      self.deque.appendleft(item)

    if self.locking_queue.qsize() < len(self.deque):
      while self.locking_queue.qsize() != len(self.deque):
        self.locking_queue.put("ready")

  def clear(self):
    self.deque.clear()
    try:
      while(True):
        self.locking_queue.get_nowait()
    except:
      self.locking_queue.task_done()

  def task_done(self):
    self.locking_queue.task_done()  # so that join can work

  def qsize(self):
    return self.locking_queue.qsize()

  def __len__(self):
    return len(self.deque)

  def len(self):
    return len(self.deque)
from queue import Queue

try:
    while True:
        max_num = int(input(''))
        num = int(input(''))
        q = Queue(maxsize=max_num)
        l = []
        counter = 0
        for i in range(num):
            x = int(input(''))
            if q.full():
                if x not in l:
                    t = q.get()
                    q.put(x)
                    l.remove(t)
                    l.append(x)
                    counter += 1
            else:
                if x not in l:
                    q.put(x)
                    l.append(x)
                    counter += 1
        print(counter)
except EOFError as e:
    pass
Example #13
0
class Build(object):
    """
    A build is a single execution of any configured job. This class:
        - exposes the overall status of the build
        - keeps track of the build's subjobs and their completion state
        - manages slaves that have been assigned to accept this build's subjobs
    """
    _build_id_counter = Counter()  # class-level counter for assigning build ids

    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._setup_is_started = False
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once
        self._is_canceled = False

        self._project_type = None
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None  # WIP: Move subjob queues to BuildScheduler class.
        self._finished_subjobs = None
        self._failed_atoms = None
        self._postbuild_tasks_are_finished = False
        self._timing_file_path = None

        self._state_timestamps = {status: None
                                  for status in BuildStatus}   # initialize all timestamps to None
        self._record_state_timestamp(BuildStatus.QUEUED)

    def api_representation(self):
        failed_atoms_api_representation = None
        if self._get_failed_atoms() is not None:
            failed_atoms_api_representation = [failed_atom.api_representation()
                                               for failed_atom in self._get_failed_atoms()]

        return {
            'id': self._build_id,
            'status': self._status(),
            'artifacts': self._artifacts_archive_file,  # todo: this should probably be a url, not a file path
            'details': self._detail_message,
            'error_message': self._error_message,
            'num_atoms': self._num_atoms,
            'num_subjobs': len(self._all_subjobs_by_id),
            'failed_atoms': failed_atoms_api_representation,
            'result': self._result(),
            'request_params': self.build_request.build_parameters(),
            # Convert self._state_timestamps to OrderedDict to make raw API response more readable. Sort the entries
            # by numerically increasing dict value, with None values sorting highest.
            'state_timestamps': OrderedDict(sorted(
                [(state.lower(), timestamp) for state, timestamp in self._state_timestamps.items()],
                key=lambda item: item[1] or float('inf'))),
        }

    def generate_project_type(self):
        """
        Instantiate the project type for this build, populating the self._project_type instance variable.

        As a side effect, this method also updates the build request's build_parameters dictionary
        with the unique workspace directory path for this build.

        :raises BuildProjectError when failed to instantiate project type
        """
        # Generate a unique project build directory name that will be symlinked to the actual project directory
        # later on when the project gets fetched.
        build_specific_project_directory = self._generate_unique_symlink_path_for_build_repo()

        # Because build_specific_project_directory is entirely internal and generated by ClusterRunner (it is a
        # build-unique generated symlink), we must manually add it to the project_type_params
        project_type_params = self.build_request.build_parameters()
        project_type_params.update({'build_project_directory': build_specific_project_directory})
        self._project_type = util.create_project_type(project_type_params)

        if self._project_type is None:
            raise BuildProjectError('Build failed due to an invalid project type.')

    def prepare(self, subjob_calculator):
        """
        :param subjob_calculator: Used after project fetch to atomize and group subjobs for this build
        :type subjob_calculator: SubjobCalculator
        """
        if not isinstance(self.build_request, BuildRequest):
            raise RuntimeError('Build {} has no associated request object.'.format(self._build_id))

        if not isinstance(self.project_type, ProjectType):
            raise RuntimeError('Build {} has no project set.'.format(self._build_id))

        if not self._preparation_coin.spend():
            raise RuntimeError('prepare() was called more than once on build {}.'.format(self._build_id))

        self._logger.info('Fetching project for build {}.', self._build_id)
        self.project_type.fetch_project()
        self._logger.info('Successfully fetched project for build {}.', self._build_id)

        job_config = self.project_type.job_config()
        if job_config is None:
            raise RuntimeError('Build failed while trying to parse clusterrunner.yaml.')

        subjobs = subjob_calculator.compute_subjobs_for_build(self._build_id, job_config, self.project_type)

        self._unstarted_subjobs = Queue(maxsize=len(subjobs))
        self._finished_subjobs = Queue(maxsize=len(subjobs))

        for subjob in subjobs:
            self._all_subjobs_by_id[subjob.subjob_id()] = subjob
            self._unstarted_subjobs.put(subjob)

        self._timing_file_path = self._project_type.timing_file_path(job_config.name)
        self.is_prepared = True
        self._record_state_timestamp(BuildStatus.PREPARED)

    def build_id(self):
        """
        :rtype: int
        """
        return self._build_id

    def all_subjobs(self):
        """
        Returns a list of subjobs for this build
        :rtype: list[Subjob]
        """
        return [subjob for subjob in self._all_subjobs_by_id.values()]

    def subjob(self, subjob_id):
        """
        Returns a single subjob
        :type subjob_id: int
        :rtype: Subjob
        """
        subjob = self._all_subjobs_by_id.get(subjob_id)
        if subjob is None:
            raise ItemNotFoundError('Invalid subjob id.')
        return subjob

    def complete_subjob(self, subjob_id, payload=None):
        """
        Handle the subjob payload and mark the given subjob id for this build as complete.
        :type subjob_id: int
        :type payload: dict
        """
        try:
            self._handle_subjob_payload(subjob_id, payload)
            self._mark_subjob_complete(subjob_id)

        except Exception:
            self._logger.exception('Error while completing subjob; marking build as failed.')
            self.mark_failed('Error occurred while completing subjob {}.'.format(subjob_id))
            raise

    def _parse_payload_for_atom_exit_code(self, subjob_id):
        subjob = self.subjob(subjob_id)
        for atom_id in range(len(subjob.atoms)):
            artifact_dir = BuildArtifact.atom_artifact_directory(
                self.build_id(),
                subjob.subjob_id(),
                atom_id,
                result_root=Configuration['results_directory']
            )
            atom_exit_code_file_sys_path = os.path.join(artifact_dir, BuildArtifact.EXIT_CODE_FILE)
            with open(atom_exit_code_file_sys_path, 'r') as atom_exit_code_file:
                subjob.atoms[atom_id].exit_code = int(atom_exit_code_file.read())

    def _handle_subjob_payload(self, subjob_id, payload):
        if not payload:
            self._logger.warning('No payload for subjob {} of build {}.', subjob_id, self._build_id)
            return

        # Assertion: all payloads received from subjobs are uniquely named.
        result_file_path = os.path.join(self._build_results_dir(), payload['filename'])

        try:
            app.util.fs.write_file(payload['body'], result_file_path)
            app.util.fs.extract_tar(result_file_path, delete=True)
            self._parse_payload_for_atom_exit_code(subjob_id)
        except:
            self._logger.warning('Writing payload for subjob {} of build {} FAILED.', subjob_id, self._build_id)
            raise

    def _read_subjob_timings_from_results(self):
        """
        Collect timing data from all subjobs
        :rtype: dict [str, float]
        """
        timings = {}
        for _, subjob in self._all_subjobs_by_id.items():
            timings.update(subjob.read_timings())

        return timings

    def _mark_subjob_complete(self, subjob_id):
        """
        :type subjob_id: int
        """
        subjob = self.subjob(subjob_id)
        subjob.mark_completed()
        with self._build_completion_lock:
            self._finished_subjobs.put(subjob, block=False)
            subjobs_are_finished = self._subjobs_are_finished

        # We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
        if subjobs_are_finished:
            self._logger.info("All results received for build {}!", self._build_id)
            SafeThread(target=self._perform_async_postbuild_tasks, name='PostBuild{}'.format(self._build_id)).start()

    def mark_started(self):
        self._setup_is_started = True
        self._record_state_timestamp(BuildStatus.BUILDING)

    def mark_failed(self, failure_reason):
        """
        Mark a build as failed and set a failure reason. The failure reason should be something we can present to the
        end user of ClusterRunner, so try not to include detailed references to internal implementation.

        :type failure_reason: str
        """
        self._logger.error('Build {} failed: {}', self.build_id(), failure_reason)
        self._error_message = failure_reason
        self._record_state_timestamp(BuildStatus.ERROR)

    def cancel(self):
        """
        Cancel a running build
        """
        # Early exit if build is not running
        if self._status() in [BuildStatus.FINISHED, BuildStatus.ERROR, BuildStatus.CANCELED]:
            self._logger.notice('Ignoring cancel request for build {}. Build is already in state {}.',
                                self._build_id, self._status())
            return

        self._logger.notice('Canceling build {}.', self._build_id)
        self._is_canceled = True
        self._record_state_timestamp(BuildStatus.CANCELED)

        # Deplete the unstarted subjob queue.
        # TODO: Handle situation where cancel() is called while subjobs are being added to _unstarted_subjobs
        while self._unstarted_subjobs is not None and not self._unstarted_subjobs.empty():
            try:
                # A subjob may be asynchronously pulled from this queue, so we need to avoid blocking when empty.
                self._unstarted_subjobs.get(block=False)
            except Empty:
                break

    def validate_update_params(self, update_params):
        """
        Determine if a dict of update params are valid, and generate an error if not
        :param update_params: Params passed into a PUT for this build
        :type update_params: dict [str, str]
        :return: Whether the params are valid and a response containing an error message if not
        :rtype: tuple [bool, dict [str, str]]
        """
        keys_and_values_allowed = {'status': ['canceled']}
        message = None
        for key, value in update_params.items():
            if key not in keys_and_values_allowed.keys():
                message = 'Key ({}) is not in list of allowed keys ({})'.\
                    format(key, ",".join(keys_and_values_allowed.keys()))
            elif value not in keys_and_values_allowed[key]:
                message = 'Value ({}) is not in list of allowed values ({}) for {}'.\
                    format(value, keys_and_values_allowed[key], key)

        if message is not None:
            return False, {'error': message}
        return True, {}

    def update_state(self, update_params):
        """
        Make updates to the state of this build given a set of update params
        :param update_params: The keys and values to update on this build
        :type update_params: dict [str, str]
        """
        success = False
        for key, value in update_params.items():
            if key == 'status':
                if value == 'canceled':
                    self.cancel()
                    success = True
        return success

    @property
    def project_type(self):
        """
        :rtype: ProjectType
        """
        return self._project_type

    @property
    def artifacts_archive_file(self):
        return self._artifacts_archive_file

    @property
    def _num_subjobs_total(self):
        return len(self._all_subjobs_by_id)

    @property
    def _num_subjobs_finished(self):
        return 0 if not self._finished_subjobs else self._finished_subjobs.qsize()

    @property
    def _num_atoms(self):
        if self._status() not in [BuildStatus.BUILDING, BuildStatus.FINISHED]:
            return None
        return sum([len(subjob.atomic_commands()) for subjob in self._all_subjobs_by_id.values()])

    @property
    def _subjobs_are_finished(self):
        return self._is_canceled or (self.is_prepared and self._finished_subjobs.full())

    @property
    def is_finished(self):
        # TODO: Clean up this logic or move everything into a state machine
        return self._is_canceled or self._postbuild_tasks_are_finished

    @property
    def is_unstarted(self):
        return self.is_prepared and not self._setup_is_started and self._unstarted_subjobs.full()

    @property
    def has_error(self):
        return self._error_message is not None

    @property
    def _detail_message(self):
        if self._num_subjobs_total > 0:
            return '{} of {} subjobs are complete ({:.1f}%).'.format(
                self._num_subjobs_finished,
                self._num_subjobs_total,
                100 * self._num_subjobs_finished / self._num_subjobs_total
            )
        return None

    def _status(self):
        """
        :rtype: BuildStatus
        """
        if self.has_error:
            return BuildStatus.ERROR
        elif self._is_canceled:
            return BuildStatus.CANCELED
        elif not self.is_prepared or self.is_unstarted:
            return BuildStatus.QUEUED
        elif self.is_finished:
            return BuildStatus.FINISHED
        else:
            return BuildStatus.BUILDING

    def _get_failed_atoms(self):
        """
        The atoms that failed. Returns None if the build hasn't completed yet. Returns empty set if
        build has completed and no atoms have failed.
        :rtype: list[Atom] | None
        """
        if self._failed_atoms is None and self.is_finished:
            if self._is_canceled:
                return []

            self._failed_atoms = []
            for subjob_id, atom_id in self._build_artifact.get_failed_subjob_and_atom_ids():
                subjob = self.subjob(subjob_id)
                atom = subjob.atoms[atom_id]
                self._failed_atoms.append(atom)

        return self._failed_atoms

    def _result(self):
        """
        :rtype: str | None
        """
        if self._is_canceled:
            return BuildResult.FAILURE

        if self.is_finished:
            if len(self._build_artifact.get_failed_subjob_and_atom_ids()) == 0:
                return BuildResult.NO_FAILURES
            return BuildResult.FAILURE
        return None

    def _perform_async_postbuild_tasks(self):
        """
        Once a build is complete, certain tasks can be performed asynchronously.
        """
        self._create_build_artifact()
        self._logger.debug('Postbuild tasks completed for build {}', self.build_id())
        self._postbuild_tasks_are_finished = True
        self._record_state_timestamp(BuildStatus.FINISHED)

    def _create_build_artifact(self):
        self._build_artifact = BuildArtifact(self._build_results_dir())
        self._build_artifact.generate_failures_file()
        self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results())
        self._artifacts_archive_file = app.util.fs.compress_directory(self._build_results_dir(), 'results.tar.gz')

    def _build_results_dir(self):
        return BuildArtifact.build_artifact_directory(self.build_id(), result_root=Configuration['results_directory'])

    def _generate_unique_symlink_path_for_build_repo(self):
        """
        Generate a unique symlink path for a build-specific repo. This method does NOT generate the symlink itself.
        :rtype: str
        """
        return os.path.join(Configuration['build_symlink_directory'], str(uuid.uuid4()))

    def get_state_timestamp(self, build_status):
        """
        Get the recorded timestamp for a given build status. This may be None if the build has not yet reached
        the specified state.
        :param build_status: The build status for which to retrieve the corresponding timestamp
        :type build_status: BuildStatus
        :return: The timestamp for the specified status
        :rtype: float | None
        """
        return self._state_timestamps.get(build_status)

    def _record_state_timestamp(self, build_status):
        """
        Record a timestamp for a given build status. This is used to record the timing of the various build phases and
        is exposed via the Build object's API representation.
        :param build_status: The build status for which to record a timestamp
        :type build_status: BuildStatus
        """
        if self._state_timestamps.get(build_status) is not None:
            self._logger.warning(
                'Overwriting timestamp for build {}, status {}'.format(self.build_id(), build_status))
        self._state_timestamps[build_status] = time.time()
Example #14
0
class SerialPort:

    __QUEUE_SIZE = 256
    
    def __init__(self, portNumber, baudRate):

        # Decrement the port number as the serial module starts
        # count at 0
        self._portNumber = portNumber - 1
        self._baudRate = baudRate
        
        self._receiveQueue = Queue(SerialPort.__QUEUE_SIZE)

        # Thread, Comm, and thread flag initialisation
        self.reset()

    def openPort(self):
        # Check to make sure the port isn't already created or open
        if self._serialPort is not None and self._serialPort.isOpen():
            raise SerialPortException("Serial Port is already openned.")

        # Create and open the serial port
        self._serialPort = serial.Serial(self._portNumber, self._baudRate)

    def beginReceiving(self):
        # Check if the serial port is open
        if self._serialPort is None:
            raise SerialPortException("Serial Port hasn't been initialised.")

        # Check if a comm thread already exists
        if self._communicationThread is not None:
            raise ThreadException("A communication thread is already running.")

        # Create the thread and start it reading the port    
        self._communicationThread = Thread(target=self.read)
        self._communicationThread.daemon = True
        self._communicationThread.start()
        
    def read(self):
        while not self._killThread:
            # If the queue becomes full (hasn't been read from in ages)
            # discard the oldest item
            if self._receiveQueue.full():
                self._receiveQueue.get()
    
            self._receiveQueue.put(self._serialPort.readline());
            
    def readBuffer(self):
        # Should think of what is the best output format for this
        # Just going to use an array at this stage

        output = list()
        while not self._receiveQueue.empty():
            output.append(self._receiveQueue.get())

        return output

    def reset(self):
        # Initialise the serial port and comm thread to null
        self._serialPort = None
        self._communicationThread = None

        # Initiaise the thread termination flag
        self._killThread = False
        
    def closePort(self):
        # Make sure the port isn't already closed
        if self._serialPort is None or not self._serialPort.isOpen():
            raise SerialPortException("Serial Port is either already closed or not initialised.")

        # Set the termination flag and wait for the thread to terminate execution
        while self._communicationThread and self._communicationThread.isAlive():
            self._killThread = True

        self._serialPort.close()

        self.reset()
# importing only the Queue from the queue module
from queue import Queue

# taking an object of Queue()
q = Queue()
print("Initially the size of queue is %s" % q.qsize())
print("Checking whether queue is empty or not. Empty?? = %s" % q.empty())

# enqueueing some value in the object of Queue
q.put('A')
q.put('B')
q.put('C')
q.put('D')
q.put('E')

print("After adding some value, size of queue is %s" % q.qsize())
print("Checking whether queue is full or not. Full?? = %s" % q.full())

# retrieving the values of the Queue
for i in range(q.qsize()):
    print("Retrieved = ", end=' ')
    print(q.get())

# after retrieving, check the size of the object
print("Size of queue is = %s " % q.qsize())
Example #16
0
class FileVideoStream:
    def __init__(self, path, transform=None, queue_size=200):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.frame_number = 0
        self.skip_value = 10
        self.next = self.frame_number + self.skip_value
        self.reset = False
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.grabbed = False
        self.transform = transform

        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queue_size)
        # intialize thread
        self.thread = Thread(target=self.update, args=())
        self.thread.daemon = True

    def set_next_frame(self, frame_num):
        self.stream.set(1, frame_num)

    def start(self):
        # start a thread to read frames from the file video stream
        self.thread = Thread(target=self.update, args=())
        self.thread.start()
        return self

    def update(self):
        # keep looping infinitely
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                break
            # print(self.next, self.frame_number + 1)
            if self.reset is True:

                # self.next += 1
                self.stream.set(1, self.frame_number)
                self.reset = False
                self.Q.queue.clear()
                #

            # otherwise, ensure the queue has room in it
            if not self.Q.full() or self.grabbed:
                # read the next frame from the file
                # if self.frame_number == 0:
                #     (self.grabbed, frame) = self.stream.read()

                # time.sleep(0.025)
                # self.frame_number += 1
                (self.grabbed, frame) = self.stream.read()
                if self.grabbed:
                    # self.frame_number += 1
                    self.frame_number += self.skip_value

                for i in range(self.skip_value - 1):
                    # self.frame_number += 1
                    self.grabbed = self.stream.grab()

                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not self.grabbed:
                    self.stream.set(
                        1,
                        self.stream.get(cv2.CAP_PROP_FRAME_COUNT) - 1)

                # Sets frame to beginning if frame is past end. This buffers the beginning after it buffers the end
                elif self.frame_number >= self.stream.get(
                        cv2.CAP_PROP_FRAME_COUNT):
                    self.stream.set(1, 0)
                    self.frame_number = 0

                # if there are transforms to be done, might as well
                # do them on producer thread before handing back to
                # consumer thread. ie. Usually the producer is so far
                # ahead of consumer that we have time to spare.
                #
                # Python is not parallel but the transform operations
                # are usually OpenCV native so release the GIL.
                #
                # Really just trying to avoid spinning up additional
                # native threads and overheads of additional
                # producer/consumer queues since this one was generally
                # idle grabbing frames.
                if self.transform:
                    frame = self.transform(frame)

                # add the frame to the queue
                if self.grabbed:
                    # self.thread.join()
                    self.Q.put((frame, self.frame_number))

            else:
                time.sleep(0.1)  # Rest for 10ms, we have a full queue

        self.stream.release()

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    # Insufficient to have consumer use while(more()) which does
    # not take into account if the producer has reached end of
    # file stream.
    def running(self):
        return self.more() or not self.stopped

    def more(self):
        # return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.1)
            tries += 1

        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
        # wait until stream resources are released (producer thread might be still grabbing frame)
        try:
            self.thread.join()
            del self.thread
            cv2.destroyAllWindows()
            # os._exit(1)
        except:
            print("Cannot Join Thread (Should only happen while Quitting)")
            cv2.destroyAllWindows()
            exit(0)
Example #17
0
class Build(object):
    """
    A build is a single execution of any configured job. This class:
        - exposes the overall status of the build
        - keeps track of the build's subjobs and their completion state
        - manages slaves that have been assigned to accept this build's subjobs
    """
    _build_id_counter = Counter()  # class-level counter for assigning build ids

    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once
        self._is_canceled = False

        self._project_type = None
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish
        self._subjob_assignment_lock = Lock()  # prevents subjobs from being skipped
        self._slaves_allocated = []
        self._num_executors_allocated = 0
        self._num_executors_in_use = 0

        self._max_executors = float('inf')
        self._max_executors_per_slave = float('inf')

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None
        self._finished_subjobs = None
        self._postbuild_tasks_are_finished = False
        self._timing_file_path = None

    def api_representation(self):
        return {
            'id': self._build_id,
            'status': self._status(),
            'artifacts': self._artifacts_archive_file,  # todo: this should probably be a url, not a file path
            'details': self._detail_message,
            'error_message': self._error_message,
            'num_atoms': self._num_atoms,
            'num_subjobs': len(self._all_subjobs_by_id),
            'failed_atoms': self._failed_atoms(),  # todo: print the file contents instead of paths
            'result': self._result(),
            'request_params': self.build_request.build_parameters(),
        }

    def generate_project_type(self):
        """
        Instantiate the project type for this build, populating the self._project_type instance variable.

        As a side effect, this method also updates the build request's build_parameters dictionary
        with the unique workspace directory path for this build.

        :raises BuildProjectError when failed to instantiate project type
        """
        # Generate a unique project build directory name that will be symlinked to the actual project directory
        # later on when the project gets fetched.
        build_specific_project_directory = self._generate_unique_symlink_path_for_build_repo()

        # Because build_specific_project_directory is entirely internal and generated by ClusterRunner (it is a
        # build-unique generated symlink), we must manually add it to the project_type_params
        project_type_params = self.build_request.build_parameters()
        project_type_params.update({'build_project_directory': build_specific_project_directory})
        self._project_type = util.create_project_type(project_type_params)

        if self._project_type is None:
            raise BuildProjectError('Build failed due to an invalid project type.')

    def prepare(self, subjobs, job_config):
        """
        :type subjobs: list[Subjob]
        :type job_config: JobConfig
        """
        if self.project_type is None:
            raise RuntimeError('prepare() was called before generate_project_type() on build {}.'
                               .format(self._build_id))

        if not self._preparation_coin.spend():
            raise RuntimeError('prepare() was called more than once on build {}.'.format(self._build_id))

        self._unstarted_subjobs = Queue(maxsize=len(subjobs))
        self._finished_subjobs = Queue(maxsize=len(subjobs))

        for subjob in subjobs:
            self._all_subjobs_by_id[subjob.subjob_id()] = subjob
            self._unstarted_subjobs.put(subjob)

        self._max_executors = job_config.max_executors
        self._max_executors_per_slave = job_config.max_executors_per_slave
        self._timing_file_path = self.project_type.timing_file_path(job_config.name)
        self.is_prepared = True

    def build_id(self):
        """
        :rtype: int
        """
        return self._build_id

    def needs_more_slaves(self):
        """
        Determine whether or not this build should have more slaves allocated to it.

        :rtype: bool
        """
        return self._num_executors_allocated < self._max_executors and not self._unstarted_subjobs.empty()

    def allocate_slave(self, slave):
        """
        Allocate a slave to this build. This tells the slave to execute setup commands for this build.

        :type slave: Slave
        """
        self._slaves_allocated.append(slave)
        slave.setup(self)
        self._num_executors_allocated += min(slave.num_executors, self._max_executors_per_slave)
        analytics.record_event(analytics.BUILD_SETUP_START, build_id=self.build_id(), slave_id=slave.id)

    def all_subjobs(self):
        """
        Returns a list of subjobs for this build
        :rtype: list[Subjob]
        """
        return [subjob for subjob in self._all_subjobs_by_id.values()]

    def subjob(self, subjob_id):
        """
        Returns a single subjob
        :type subjob_id: int
        :rtype: Subjob
        """
        subjob = self._all_subjobs_by_id.get(subjob_id)
        if subjob is None:
            raise ItemNotFoundError('Invalid subjob id.')
        return subjob

    def begin_subjob_executions_on_slave(self, slave):
        """
        Begin subjob executions on a slave. This should be called once after the specified slave has already run
        build_setup commands for this build.

        :type slave: Slave
        """
        analytics.record_event(analytics.BUILD_SETUP_FINISH, build_id=self.build_id(), slave_id=slave.id)
        for slave_executor_count in range(slave.num_executors):
            if (self._num_executors_in_use >= self._max_executors
                    or slave_executor_count >= self._max_executors_per_slave):
                break
            slave.claim_executor()
            self._num_executors_in_use += 1
            self.execute_next_subjob_or_teardown_slave(slave)

    def execute_next_subjob_or_teardown_slave(self, slave):
        """
        Grabs an unstarted subjob off the queue and sends it to the specified slave to be executed. If the unstarted
        subjob queue is empty, we teardown the slave to free it up for other builds.

        :type slave: Slave
        """
        try:
            # This lock prevents the scenario where a subjob is pulled from the queue but cannot be assigned to this
            # slave because it is shutdown, so we put it back on the queue but in the meantime another slave enters
            # this method, finds the subjob queue empty, and is torn down.  If that was the last 'living' slave, the
            # build would be stuck.
            with self._subjob_assignment_lock:
                subjob = self._unstarted_subjobs.get(block=False)
                self._logger.debug('Sending subjob {} (build {}) to slave {}.',
                                   subjob.subjob_id(), subjob.build_id(), slave.url)
                try:
                    slave.start_subjob(subjob)
                except SlaveMarkedForShutdownError:
                    self._unstarted_subjobs.put(subjob)
                    # An executor is currently allocated for this subjob in begin_subjob_executions_on_slave.
                    # Since the slave has been marked for shutdown, we need to free the executor.
                    self._free_slave_executor(slave)

        except Empty:
            self._free_slave_executor(slave)

    def _free_slave_executor(self, slave):
        num_executors_in_use = slave.free_executor()
        if num_executors_in_use == 0:
            try:
                self._slaves_allocated.remove(slave)
            except ValueError:
                pass  # We have already deallocated this slave, no need to teardown
            else:
                slave.teardown()

    def complete_subjob(self, subjob_id, payload=None):
        """
        Handle the subjob payload and mark the given subjob id for this build as complete.
        :type subjob_id: int
        :type payload: dict
        """
        try:
            self._handle_subjob_payload(subjob_id, payload)
            self._mark_subjob_complete(subjob_id)

        except Exception:
            self._logger.exception('Error while completing subjob; marking build as failed.')
            self.mark_failed('Error occurred while completing subjob {}.'.format(subjob_id))
            raise

    def _handle_subjob_payload(self, subjob_id, payload):
        if not payload:
            self._logger.warning('No payload for subjob {} of build {}.', subjob_id, self._build_id)
            return

        # Assertion: all payloads received from subjobs are uniquely named.
        result_file_path = os.path.join(
            self._build_results_dir(),
            payload['filename'])

        try:
            app.util.fs.write_file(payload['body'], result_file_path)
            app.util.fs.extract_tar(result_file_path, delete=True)
        except:
            self._logger.warning('Writing payload for subjob {} of build {} FAILED.', subjob_id, self._build_id)
            raise

    def _read_subjob_timings_from_results(self):
        """
        Collect timing data from all subjobs
        :rtype: dict [str, float]
        """
        timings = {}
        for _, subjob in self._all_subjobs_by_id.items():
            timings.update(subjob.read_timings())

        return timings

    def _mark_subjob_complete(self, subjob_id):
        """
        :type subjob_id: int
        """
        subjob = self._all_subjobs_by_id[int(subjob_id)]
        with self._build_completion_lock:
            self._finished_subjobs.put(subjob, block=False)
            subjobs_are_finished = self._subjobs_are_finished

        # We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
        if subjobs_are_finished:
            self._logger.info("All results received for build {}!", self._build_id)
            SafeThread(target=self._perform_async_postbuild_tasks, name='PostBuild{}'.format(self._build_id)).start()

    def mark_failed(self, failure_reason):
        """
        Mark a build as failed and set a failure reason. The failure reason should be something we can present to the
        end user of ClusterRunner, so try not to include detailed references to internal implementation.

        :type failure_reason: str
        """
        self._logger.error('Build {} failed: {}', self.build_id(), failure_reason)
        self._error_message = failure_reason

    def cancel(self):
        """
        Cancel a running build
        """
        # Early exit if build is not running
        if self._status() in [BuildStatus.FINISHED, BuildStatus.ERROR, BuildStatus.CANCELED]:
            return

        self._is_canceled = True

        # Deplete the unstarted subjob queue.
        # TODO: Handle situation where cancel() is called while subjobs are being added to _unstarted_subjobs
        while self._unstarted_subjobs is not None and not self._unstarted_subjobs.empty():
            try:
                # A subjob may be asynchronously pulled from this queue, so we need to avoid blocking when empty.
                self._unstarted_subjobs.get(block=False)
            except Empty:
                break

    def validate_update_params(self, update_params):
        """
        Determine if a dict of update params are valid, and generate an error if not
        :param update_params: Params passed into a PUT for this build
        :type update_params: dict [str, str]
        :return: Whether the params are valid and a response containing an error message if not
        :rtype: tuple [bool, dict [str, str]]
        """
        keys_and_values_allowed = {'status': ['canceled']}
        message = None
        for key, value in update_params.items():
            if key not in keys_and_values_allowed.keys():
                message = 'Key ({}) is not in list of allowed keys ({})'.\
                    format(key, ",".join(keys_and_values_allowed.keys()))
            elif value not in keys_and_values_allowed[key]:
                message = 'Value ({}) is not in list of allowed values ({}) for {}'.\
                    format(value, keys_and_values_allowed[key], key)

        if message is not None:
            return False, {'error': message}
        return True, {}

    def update_state(self, update_params):
        """
        Make updates to the state of this build given a set of update params
        :param update_params: The keys and values to update on this build
        :type update_params: dict [str, str]
        """
        success = False
        for key, value in update_params.items():
            if key == 'status':
                if value == 'canceled':
                    self.cancel()
                    success = True
        return success

    @property
    def project_type(self):
        """
        :rtype: ProjectType
        """
        return self._project_type

    @property
    def num_executors_allocated(self):
        """
        :rtype: int
        """
        return self._num_executors_allocated

    @property
    def artifacts_archive_file(self):
        return self._artifacts_archive_file

    @property
    def _num_subjobs_total(self):
        return len(self._all_subjobs_by_id)

    @property
    def _num_subjobs_finished(self):
        return 0 if not self._finished_subjobs else self._finished_subjobs.qsize()

    @property
    def _num_atoms(self):
        if self._status() not in [BuildStatus.BUILDING, BuildStatus.FINISHED]:
            return None
        return sum([len(subjob.atomic_commands()) for subjob in self._all_subjobs_by_id.values()])

    @property
    def _subjobs_are_finished(self):
        return self._is_canceled or (self.is_prepared and self._finished_subjobs.full())

    @property
    def is_finished(self):
        # TODO: Clean up this logic or move everything into a state machine
        return self._is_canceled or self._postbuild_tasks_are_finished

    @property
    def is_unstarted(self):
        return self.is_prepared and self._num_executors_allocated == 0 and self._unstarted_subjobs.full()

    @property
    def has_error(self):
        return self._error_message is not None

    @property
    def _detail_message(self):
        if self._num_subjobs_total > 0:
            return '{} of {} subjobs are complete ({:.1f}%).'.format(
                self._num_subjobs_finished,
                self._num_subjobs_total,
                100 * self._num_subjobs_finished / self._num_subjobs_total
            )
        return None

    def _status(self):
        """
        :rtype: BuildStatus
        """
        if self.has_error:
            return BuildStatus.ERROR
        elif self._is_canceled:
            return BuildStatus.CANCELED
        elif not self.is_prepared or self.is_unstarted:
            return BuildStatus.QUEUED
        elif self.is_finished:
            return BuildStatus.FINISHED
        else:
            return BuildStatus.BUILDING

    def _failed_atoms(self):
        """
        The commands which failed
        :rtype: list [str] | None
        """
        if self._is_canceled:
            return []

        if self.is_finished:
            # dict.values() returns a view object in python 3, so wrapping values() in a list
            return list(self._build_artifact.get_failed_commands().values())
        return None

    def _result(self):
        """
        :rtype: str | None
        """
        if self._is_canceled:
            return BuildResult.FAILURE

        if self.is_finished:
            if len(self._build_artifact.get_failed_commands()) == 0:
                return BuildResult.NO_FAILURES
            return BuildResult.FAILURE
        return None

    def _perform_async_postbuild_tasks(self):
        """
        Once a build is complete, certain tasks can be performed asynchronously.
        """
        self._create_build_artifact()
        self._logger.debug('Postbuild tasks completed for build {}', self.build_id())
        self._postbuild_tasks_are_finished = True

    def _create_build_artifact(self):
        self._build_artifact = BuildArtifact(self._build_results_dir())
        self._build_artifact.generate_failures_file()
        self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results())
        self._artifacts_archive_file = app.util.fs.compress_directory(self._build_results_dir(), 'results.tar.gz')

    def _build_results_dir(self):
        return os.path.join(
            Configuration['results_directory'],
            str(self.build_id()),
        )

    def _generate_unique_symlink_path_for_build_repo(self):
        """
        Generate a unique symlink path for a build-specific repo. This method does NOT generate the symlink itself.
        :rtype: str
        """
        return os.path.join(Configuration['build_symlink_directory'], str(uuid.uuid4()))
class UnifiClient(object):
    def __init__(self,
                 username,
                 password,
                 host='localhost',
                 port=8443,
                 ssl_verify=False,
                 timeout=10.0):
        self.username = username
        self.password = password
        self.host = host
        self.port = port
        self.ssl_verify = ssl_verify
        self.timeout = timeout
        self.last_received_event = time.time()

        self.url = 'https://' + self.host + ':' + str(port) + '/'
        self.login_url = self.url + 'api/login'
        self.initial_info_url = self.url + 'api/s/default/stat/device'
        self.params = {'_depth': 4, 'test': 0}
        self.ws_url = 'wss://{}:{}/wss/s/default/events'.format(
            self.host, self.port)
        self.thread = None
        self.running = False

        # dictionary for storing unifi data
        self.unifi_data = collections.OrderedDict()

        self.event_q = Queue(100)

        logger.debug('Python: %s' % repr(sys.version_info))

        self.connect_websocket()

    def terminate(self):
        self.running = False

    def connect_websocket(self):
        self.thread = threading.Thread(target=self.start_websocket)
        self.thread.daemon = True
        self.thread.start()

    def start_websocket(self):
        logger.info('Python 3 websocket')
        self.running = True
        loop = asyncio.new_event_loop()
        while self.running:
            loop.run_until_complete(self.async_websocket())
            time.sleep(30)
            logger.warning('Reconnecting websocket')

    async def async_websocket(self):
        """
        By default ClientSession uses strict version of aiohttp.CookieJar. RFC 2109 explicitly forbids cookie
        accepting from URLs with IP address instead of DNS name (e.g. http://127.0.0.1:80/cookie).
        It’s good but sometimes for testing we need to enable support for such cookies. It should be done by
        passing unsafe=True to aiohttp.CookieJar constructor:
        """

        # enable support for unsafe cookies
        jar = aiohttp.CookieJar(unsafe=True)

        logger.info('login() %s as %s' % (self.url, self.username))

        json_request = {
            'username': self.username,
            'password': self.password,
            'strict': True
        }

        try:
            async with aiohttp.ClientSession(cookie_jar=jar) as session:
                async with session.post(self.login_url,
                                        json=json_request,
                                        ssl=self.ssl_verify) as response:
                    assert response.status == 200
                    json_response = await response.json()
                    logger.debug('Received json response to login:'******'Received json response to initial data:')
                    # logger.debug(json.dumps(json_response, indent=2))
                    self.process_unifi_message(json_response)

                async with session.ws_connect(self.ws_url,
                                              ssl=self.ssl_verify) as ws:
                    async for msg in ws:
                        if msg.type == aiohttp.WSMsgType.TEXT:
                            self.last_received_event = time.time()
                            # logger.debug('received: %s' % json.dumps(json.loads(msg.data), indent=2))
                            self.process_unifi_message(
                                msg.json(loads=json.loads))
                        elif msg.type == aiohttp.WSMsgType.CLOSED:
                            logger.info('WS closed')
                            self.running = False
                            break
                        elif msg.type == aiohttp.WSMsgType.ERROR:
                            logger.error('WS closed with Error')
                            self.running = False
                            break

        except AssertionError as e:
            logger.error('failed to connect: %s' % e)
            self.running = False

        logger.info('async_websocket: Exited')

    def process_unifi_message(self, message):
        """
        takes data from the websocket and puts events in the queue
        Uses OrderDict to preserve the order for repeatable output.
        """
        unifi_data = collections.OrderedDict()

        meta = message['meta']
        update_type = meta.get("message", "_unknown_")
        # "events", "device:sync", "device:update", "speed-test:update", "user:sync", "sta:sync", possibly others

        if update_type == "events":
            # logger.info('\n: %s' % json.dumps(message, indent=2))
            data = message['data']
            data_len = len(data)
            if data_len > 1:
                logger.info('nr of items in data: %d' % data_len)
            if self.event_q.full():
                # discard oldest event
                self.event_q.get()
                self.event_q.task_done()
            self.event_q.put(data)
        # else:
        # logger.debug('received %s message' % update_type)
        # logger.debug('\n: %s' % json.dumps(data, indent=2))

        if logger.getEffectiveLevel() == logging.DEBUG:
            with open('raw_data.json', 'w') as f:
                f.write(json.dumps(unifi_data, indent=2))

    def events(self, blocking=True):
        """
        returns a list of event updates
        if blocking, waits for a new update, then returns it as a list
        if not blocking, returns any updates in the queue, or an empty list if there are none
        """
        if blocking:
            unifi_events = self.event_q.get()
            self.event_q.task_done()
        else:
            unifi_events = []
            while not self.event_q.empty():
                unifi_events += self.event_q.get()
                self.event_q.task_done()
        return unifi_events
Example #19
0
class rpiBaseClass:
    """
	Implements the base class for common functionalities.
	"""

    def __init__(self, name, rpi_apscheduler, rpi_events):

        # Public

        # Custom name
        self.name = name or "Job"

        # EoD and End OAM events
        self.eventDayEnd = Event()
        self.eventDayEnd.clear()
        self.eventEnd = Event()
        self.eventEnd.clear()

        # Private

        # Reference to the APScheduler
        self._sched = rpi_apscheduler or None
        self._sched_lock = self._create_lock()

        # Reference to eventErr (can be accessed via the rpi_events)
        self._eventErr = rpi_events.eventErrList[self.name]

        # Error event related
        self._eventErrdelay = 0
        self._eventErrcount = 0
        self._eventErrtime = 0

        # Job start/stop and interval times
        self._dtstart = None
        self._dtstop = None
        self._interval_sec = 13

        # The commands queue, check/process interval, job name
        self._cmds = Queue(10)
        self._proccmd_interval_sec = 11
        self._cmdname = "%s_Cmd%d" % (self.name, self._proccmd_interval_sec)

        # The state flags and state/cmd value codes
        self._state = {}
        self._state['run'] = False
        self._state['stop'] = False
        self._state['pause'] = False
        self._state['init'] = False
        self._state['resch'] = False
        self._state['cmdval'] = -1
        self._state['errval'] = 0
        self._stateVal = 0
        self._state_lock = self._create_lock()

        # The last 10 status messages
        self._statusmsg = deque([], 10)

        # ATExit handler
        atexit.register(self._clean_exit)

        # Init class
        self._initclass()

    def __repr__(self):
        return "<%s (name=%s, rpi_apscheduler=%s, rpi_events=dict())>" % (
            self.__class__.__name__, self._sched, self.name)

    def __str__(self):
        return "%s::: Cmd(tstart_per:%s, tstop_per:%s, interval_sec=%d), eventErr(count: %d, time: %s, delay: %s), state: %s, stateVal: %d, cmds: %s, statusmsg: %s" % \
         (self.name, self._dtstart, self._dtstop, self._interval_sec, self._eventErrcount, time.ctime(self._eventErrtime), self._eventErrdelay, self._state, self._stateVal, self._cmds, self._statusmsg)

    def __del__(self):
        with self._sched_lock:
            if self._sched is not None:
                if self._sched.get_job(self._cmdname) is not None:
                    self._sched.remove_job(self._cmdname)
                if self._sched.get_job(self.name) is not None:
                    self._sched.remove_job(self.name)

        logging.debug("%s::: Deleted!" % self.name)
        self._statusmsg.append(("%s Deleted" % self.name, ERRNONE))

    #
    # Subclass interface methods to be overriden by user defined methods.
    #
    def jobRun(self):
        """
		Main function. To be overriden by user defined method.
		"""
        pass

    def initClass(self):
        """"
		(re)Initialize the class. To be overriden by user defined method.
		"""
        pass

    def endDayOAM(self):
        """"
		End-Of-Day OAM. To be overriden by user defined method.
		"""
        pass

    def endOAM(self):
        """"
		End OAM. To be overriden by user defined method.
		"""
        pass

    #
    # Subclass interface methods to be used externally.
    #

    def queueCmd(self, cmdrx_tuple):
        """
		Puts a remote command (tuple) in the cmd queue.
		Returns boolean to indicate success status.
		"""

        if self._cmds.full():
            self._seteventerr('queueCmd()', ERRLEV0)
            logging.warning(
                "%s::: Cmd queue is full: %s" % (self.name, self._cmds))
            return False

        self._cmds.put(cmdrx_tuple, True, 5)
        return True

    def setInit(self):
        """
		Run Init mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['init'] or self.eventDayEnd.is_set(
        ) or self.eventEnd.is_set():
            logging.debug("%s::: %s: _initclass not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            self._initclass()
            return True

    def setRun(self, tstartstopintv=None):
        """
		Run Run mode and set flags.
		When the tstartstopintv=(start, stop, interval) tuple is specified (re)configure and add self._run() job to the scheduler.
		Return boolean to indicate state change.
		"""
        if self._state['run'] or self.eventDayEnd.is_set(
        ) or self.eventEnd.is_set():
            logging.debug("%s::: %s: add/resume_run not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            if tstartstopintv is not None:
                self.timePeriodIntv = tstartstopintv
                self._remove_run()
                self._add_run()
            else:
                self._resume_run()
            return True

    def setStop(self):
        """
		Run Stop mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['stop'] or self.eventDayEnd.is_set(
        ) or self.eventEnd.is_set():
            logging.debug("%s::: %s: _remove_run not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            self._remove_run()
            return True

    def setPause(self):
        """
		Run Pause mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['pause'] or self.eventDayEnd.is_set(
        ) or self.eventEnd.is_set():
            logging.debug("%s::: %s: _pause_run not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            self._pause_run()
            return True

    def setResch(self):
        """
		Run Re-schedule mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['resch'] or self.eventDayEnd.is_set(
        ) or self.eventEnd.is_set():
            return False
        else:
            self._reschedule_run()
            return True

    def setEndDayOAM(self):
        """
		Run End-of-Day OAM mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['run'] or self._state['pause'] or self._state['resch'] or \
         self.eventDayEnd.is_set() or self.eventEnd.is_set():
            logging.debug("%s::: %s: _enddayoam_run not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            self._enddayoam_run()
            return True

    def setEndOAM(self):
        """
		Run End OAM mode and set flags.
		Return boolean to indicate state change.
		"""
        if self._state['run'] or self._state['pause'] or self._state['resch'] or \
         self.eventDayEnd.is_set() or self.eventEnd.is_set():
            logging.debug("%s::: %s: _endoam_run not run" %
                          (self.name, sys._getframe().f_code.co_filename))
            return False
        else:
            self._endoam_run()
            return True

    @property
    def statusUpdate(self):
        """
		Get the last status message (tuple) in the deque.
		"""
        try:
            str, val = self._statusmsg.pop()
            return str, val
        except IndexError as e:
            return None, ERRNONE

    @statusUpdate.setter
    def statusUpdate(self, status_tuple):
        """
		Update status message (tuple) in the deque.
		"""
        #message_str=None, message_value=ERRNONE
        #(message_str, message_value)
        self._statusmsg.append(status_tuple)

    @property
    def timePeriodIntv(self):
        """
		Get the start and stop datetime and interval seconds values as a tuple.
		"""
        return (self._dtstart, self._dtstop, self._interval_sec)

    @timePeriodIntv.setter
    def timePeriodIntv(self, tstartstopintv):
        """
		Set the start and stop datetime and interval seconds values.
		"""
        self._dtstart = tstartstopintv[0]
        self._dtstop = tstartstopintv[1]
        self._interval_sec = tstartstopintv[2]

    @property
    def errorDelay(self):
        """
		Return the allowed time delay (grace period) before re-initializing the class after a fatal error.
		"""
        return self._eventErrdelay

    @errorDelay.setter
    def errorDelay(self, delay_sec):
        """
		Set the allowed time delay (grace period) before re-initializing the class after a fatal error.
		"""
        self._eventErrdelay = delay_sec

    @property
    def errorTime(self):
        """
		Return the time (time.time()) when the last error was set.
		"""
        return self._eventErrtime

    @property
    def errorCount(self):
        """
		Return the number of times the job has run while in the delay time period (self._eventErrdelay).
		"""
        return self._eventErrcount

    @property
    def stateValue(self):
        """
		Return the combined/encoded state value corresponding to the cmd and err states.
		"""
        self._setstateval()
        return self._stateVal

    #
    # Private
    #

    def _run(self):
        """
		Run first the internal functionalities, and then call the user defined runJob method.
		Catch all rpiBaseClassError exceptions.
		"""

        ###	Run the internal functionalities first then the user defined method	(self.jobRun)
        try:
            # Apply re-initialization grace period after a fatal error (ERRCRIT level)
            # Re-initialize the self._run() method
            # after self._eventErrdelay seconds from the last failed access/run attempt
            if self._eventErr.is_set():
                self._eventErrcount += 1
                logging.info("%s::: eventErr is set (run %d)!" %
                             (self.name, self._eventErrcount))
                if (time.time() - self._eventErrtime) < self._eventErrdelay:
                    logging.debug("%s::: eventErr was set at %s (run %d)!" %
                                  (self.name, time.ctime(self._eventErrtime),
                                   self._eventErrcount))
                    return

                self._initclass()
                self._add_run()
                #return

            # Set Run state
            self._run_state()

            # Run the user defined method
            self.jobRun()

        except rpiBaseClassError as e:
            if e.errval > ERRNONE:
                if e.errval < ERRCRIT:
                    self._seteventerr('_run()', e.errval)
                    logging.warning("%s" % e.errmsg)
                    pass
                else:
                    self._seteventerr('_run()', ERRCRIT)
                    logging.error("%s\nExiting job!" % e.errmsg, exc_info=True)
                    raise
            else:
                logging.warning("A non-error was raised: %s" % e.errmsg)
                pass

        except RuntimeError as e:
            self._seteventerr('_run()', ERRCRIT)
            logging.error("RuntimeError: %s\nExiting!" % str(e), exc_info=True)
            raise

        except:
            self._seteventerr('_run()', ERRCRIT)
            logging.error(
                "Unhandled Exception: %s\nExiting!" % str(sys.exc_info()),
                exc_info=True)
            raise

        finally:
            self._setstateval()

    def _initclass(self):
        """"
		(re)Initialize the class.
		"""

        logging.info("%s::: Intialize class" % self.name)

        ### Stop and remove the self._run()  and self._proccmd() jobs from the scheduler
        self._remove_run()
        with self._sched_lock:
            if self._sched is not None:
                if self._sched.get_job(self._cmdname) is not None:
                    self._sched.remove_job(self._cmdname)

        ### Empty the cmd queue
        while not self._cmds.empty():
            (cmdstr, cmdval) = self._cmds.get()
            self._cmds.task_done()

        ### Empty the status message queue
        self._statusmsg.clear()

        ### Init error event and state
        self._cleareventerr('_initclass()')
        self._state['errval'] = ERRNONE

        ### Clear other events
        self.eventDayEnd.clear()
        self.eventEnd.clear()

        ### User defined init method
        self.initClass()

        ### Add the self._proccmd() job to the scheduler
        with self._sched_lock:
            if self._sched is not None:
                self._sched.add_job(
                    self._proccmd,
                    trigger='interval',
                    id=self._cmdname,
                    seconds=self._proccmd_interval_sec,
                    misfire_grace_time=5,
                    name=self._cmdname)

        ### Set Init state
        self._init_state()

    def _proccmd(self):
        """
		Set the Stop state if the Job is not scheduled.
		Process and act upon received commands.
		"""

        # Set the Stop state if the Job is not scheduled
        if self._sched is not None:
            with self._sched_lock:
                if not self._state['stop'] and self._sched.get_job(
                        self.name) is None:
                    self._stop_state()

        # Process and act upon received commands.
        if self._cmds.empty():
            logging.debug("%s::: _proccmd: Cmd queue is empty!" % self.name)

        elif (not self.eventDayEnd.is_set()) and (not self.eventEnd.is_set()):

            # Process the command
            (cmdstr, cmdval) = self._cmds.get()

            logging.debug("%s::: _proccmd: Get cmdstr:%s, cmdval:%d" %
                          (self.name, cmdstr, cmdval))

            if cmdval == CMDRUN and self.setRun():
                self._statusmsg.append(("%s run" % self.name, ERRNONE))

            elif cmdval == CMDSTOP and self.setStop():
                self._statusmsg.append(("%s stop" % self.name, ERRNONE))

            elif cmdval == CMDPAUSE and self.setPause():
                self._statusmsg.append(("%s pause" % self.name, ERRNONE))

            elif cmdval == CMDINIT and self.setInit():
                self._statusmsg.append(("%s init" % self.name, ERRNONE))

            elif cmdval == CMDRESCH and self.setResch():
                self._statusmsg.append(("%s init" % self.name, ERRNONE))

            elif cmdval == CMDEOD and self.setEndDayOAM():
                self._statusmsg.append(("%s eod" % self.name, ERRNONE))

            elif cmdval == CMDEND and self.setEndOAM():
                self._statusmsg.append(("%s end" % self.name, ERRNONE))

            self._cmds.task_done()

    def _setstateval(self):
        """
		Set the combined/encoded state value corresponding to the cmd and err states (4 bits each).
		"""
        with self._state_lock:
            self._stateVal = self._state['errval'] + 16 * self._state['cmdval']

    def _seteventerr(self, str_func, err_val=ERRLEV0):
        """
		Set eventErr, set the error value (ERRLEV0, ERRLEV1, ERRLEV2 or ERRCRIT) and store timestamp.
		"""
        if err_val > ERRNONE:
            str = "%s: %s SetError %d" % (self.name, str_func, err_val)
            self._statusmsg.append((str, -1 * err_val))
            logging.debug(
                "%s::: Set eventErr %d in %s at %s!" %
                (self.name, err_val, str_func, time.ctime(self._eventErrtime)))
            self._eventErr.set()
            self._eventErrtime = time.time()
            self._state['errval'] = err_val
            self._setstateval()

    def _cleareventerr(self, str_func):
        """
		Clear eventErr and reset error value and reset timestamp.
		"""
        str = "%s: %s ClrError %d" % (self.name, str_func,
                                      self._state['errval'])
        self._statusmsg.append((str, ERRNONE))
        logging.debug("%s::: Clear eventErr %d in %s!" %
                      (self.name, self._state['errval'], str_func))
        self._eventErr.clear()
        self._eventErrtime = 0
        self._state['errval'] = ERRNONE
        self._setstateval()

    def _add_run(self):
        """
		Add the self._run() method as a job in the APScheduler.
		"""
        with self._sched_lock:
            if self._sched is not None:
                if self._sched.get_job(self.name) is None:
                    self._sched.add_job(
                        self._run,
                        trigger='interval',
                        id=self.name,
                        seconds=self._interval_sec,
                        start_date=self._dtstart,
                        end_date=self._dtstop,
                        misfire_grace_time=10,
                        name=self.name)
                else:
                    self._reschedule_run(self.name)

        # Why is it needed?
        self._run_state()

    def _init_state(self):
        """
		Set Init state for the scheduled self._run() job.
		"""
        self._state['run'] = False
        self._state['stop'] = False
        self._state['pause'] = False
        self._state['init'] = True
        self._state['resch'] = False
        self._state['cmdval'] = CMDINIT

        self._setstateval()

        logging.debug("%s::: Init state." % self.name)

    def _run_state(self):
        """
		Set Run state for the scheduled self._run() job.
		"""
        self._state['run'] = True
        self._state['stop'] = False
        self._state['pause'] = False
        self._state['init'] = False
        self._state['resch'] = False
        self._state['cmdval'] = CMDRUN

        self._setstateval()

        logging.debug("%s::: Run state." % self.name)

    def _pause_state(self):

        self._state['run'] = False
        self._state['stop'] = False
        self._state['pause'] = True
        self._state['init'] = False
        self._state['resch'] = False
        self._state['cmdval'] = CMDPAUSE

        self._setstateval()

        logging.debug("%s::: Pause state." % self.name)

    def _stop_state(self):
        """
		Set Run state for the scheduled self._run() job.
		"""
        self._state['run'] = False
        self._state['stop'] = True
        self._state['pause'] = False
        self._state['init'] = False
        self._state['resch'] = False
        self._state['cmdval'] = CMDSTOP

        self._setstateval()

        logging.debug("%s::: Stop state." % self.name)

    def _resume_run(self):
        """
		Resume/add the paused or stopped self._run() job.
		Set the Run state.
		"""
        if self._state['stop'] or self._state['pause']:
            with self._sched_lock:
                if self._sched is not None:
                    if self._sched.get_job(self.name) is not None:
                        self._sched.resume_job(self.name)
                    else:
                        self._add_run()

        self._run_state()

    def _pause_run(self):
        """
		Pause the scheduled self._run() job.
		Set the Pause state.
		"""
        if not self._state['pause']:
            with self._sched_lock:
                if self._sched is not None:
                    if self._sched.get_job(self.name) is not None:
                        self._sched.pause_job(self.name)

            self._pause_state()

    def _remove_run(self):
        """
		Remove the scheduled self._run() job.
		Set the Stop state.
		"""
        if not self._state['stop']:
            with self._sched_lock:
                if self._sched is not None:
                    if self._sched.get_job(self.name) is not None:
                        self._sched.remove_job(self.name)

            self._stop_state()

    def _reschedule_run(self):
        """
		Re-schedule the self._run() job.
		Set the ReScheduled state.
		"""
        if not self._state['resch']:
            with self._sched_lock:
                if self._sched is not None:
                    if self._sched.get_job(self.name) is not None:
                        self._sched.reschedule_job(
                            job_id=self.name,
                            trigger='interval',
                            seconds=self._interval_sec,
                            start_date=self._dtstart,
                            end_date=self._dtstop,
                            name=self.name)

        self._state['run'] = False
        self._state['stop'] = False
        self._state['pause'] = False
        self._state['init'] = False
        self._state['resch'] = True
        self._state['cmdval'] = CMDRESCH

        self._setstateval()

        logging.debug("%s::: Rescheduled state." % self.name)

    def _enddayoam_run(self):
        """
		End-of-Day OAM procedure.
		"""

        ### Execute only if eventErr is not set
        if not self._eventErr.is_set():

            ### Set the event
            self.eventDayEnd.set()

            ### User defined EoD
            self.endDayOAM()

            logging.info(
                "%s::: endDayOAM(): Maintenance sequence run" % self.name)

            ### Clear the event
            self.eventDayEnd.clear()

        else:
            logging.debug("%s::: _enddayoam(): eventErr is set" % self.name)

    def _endoam_run(self):
        """
		End OAM procedure.
		"""

        ### Execute only if eventErr is not set
        if not self._eventErr.is_set():

            ### Set the event
            self.eventEnd.set()

            ### User defined EoD
            self.endOAM()

            logging.info(
                "%s::: endOAM(): Maintenance sequence run" % self.name)

            ### Clear the event
            self.eventEnd.clear()

        else:
            logging.debug("%s::: _endoam(): eventErr is set" % self.name)

    def _create_lock(self):
        """
		Creates a reentrant lock object.
		"""
        return RLock()

    def _clean_exit(self):
        """
		An atexit handler for the current job.
		Stop and remove the self._run()  and self._proccmd() jobs from the scheduler.
		"""
        logging.warning("The %s job is exiting!" % self.name)

        self._remove_run()
        with self._sched_lock:
            if self._sched is not None:
                if self._sched.get_job(self._cmdname) is not None:
                    self._sched.remove_job(self._cmdname)

        logging.debug("%s::: Exit!" % self.name)
        self._statusmsg.append(("%s Exit" % self.name, ERRNONE))
Example #20
0
class FillPatches(threading.Thread):
    def __init__(self,
                 setting,
                 batch_size=None,
                 max_queue_size=None,
                 stage_sequence=None,
                 full_image=None,
                 chunk_length_force_to_multiple_of=None):
        if batch_size is None:
            batch_size = setting['NetworkValidation']['BatchSize']
        if max_queue_size is None:
            max_queue_size = setting['NetworkValidation']['MaxQueueSize']
        if stage_sequence is None:
            stage_sequence = setting['stage_sequence']
        if full_image is None:
            full_image = setting['FullImage']
        if chunk_length_force_to_multiple_of is None:
            chunk_length_force_to_multiple_of = setting['NetworkValidation'][
                'ChunkLengthForceToMultipleOf']
        threading.Thread.__init__(self)
        self.paused = False
        self.pause_cond = threading.Condition(threading.Lock())
        self.daemon = True
        self._batch_size = batch_size
        self._chunks_completed = False
        self._PatchQueue = Queue(maxsize=max_queue_size)
        im_list_info = su.get_im_info_list_from_train_mode(
            setting, train_mode='Validation')
        self._reading = chunk_image_seq.Images(
            setting=setting,
            class_mode='Direct',
            number_of_images_per_chunk=setting['NetworkValidation']
            ['NumberOfImagesPerChunk'],
            samples_per_image=setting['NetworkValidation']['SamplesPerImage'],
            im_info_list_full=im_list_info,
            stage_sequence=stage_sequence,
            train_mode='Validation',
            full_image=full_image,
            chunk_length_force_to_multiple_of=chunk_length_force_to_multiple_of
        )
        self._reading.fill()

    def run(self):
        while True:
            with self.pause_cond:
                while self.paused:
                    self.pause_cond.wait()
                try:
                    time_before_put = time.time()
                    item_queue = self._reading.next_batch(self._batch_size) + (
                        copy.copy(self._reading._chunks_completed), )
                    self._PatchQueue.put(item_queue)
                    time_after_put = time.time()
                    logging.debug(
                        'ValQueue: put {:.2f} s'.format(time_after_put -
                                                        time_before_put))
                    if self._reading._chunks_completed:
                        logging.debug(
                            'ValQueue: chunk is completed: resetValidation() ')
                        self._chunks_completed = True
                        self._reading.reset_validation()
                    if self._PatchQueue.full():
                        self.pause()
                finally:
                    time.sleep(0.1)

    def pause(self):
        if not self.paused:
            self.paused = True
            # If in sleep, we acquire immediately, otherwise we wait for thread
            # to release condition. In race, worker will still see self.paused
            # and begin waiting until it's set back to False
            self.pause_cond.acquire()

    def resume(self):
        if self.paused:
            self.paused = False
            # Notify so thread will wake after lock released
            self.pause_cond.notify()
            # Now release the lock
            self.pause_cond.release()
Example #21
0
class TaskManager(object):
    """
    Task manager class based on thread module which
    executes assigned tasks concurently. It uses a
    pool of thread workers, queue of tasks and pid
    set to monitor jobs execution.

    .. doctest::

        Use case:
        mgr  = TaskManager()
        jobs = []
        jobs.append(mgr.spawn(func, args))
        mgr.joinall(jobs)

    """
    def __init__(self, nworkers=10, name='TaskManager', logger=None):
        self.logger = getMSLogger(verbose=True, logger=logger)
        self.name = name
        self.pids = set()
        self.uids = UidSet()
        self.tasks = Queue()
        self.workers = [Worker(name, self.tasks, self.pids, self.uids, logger) \
                        for _ in range(0, nworkers)]

    def status(self):
        "Return status of task manager queue"
        info = {
            'qsize': self.tasks.qsize(),
            'full': self.tasks.full(),
            'unfinished': self.tasks.unfinished_tasks,
            'nworkers': len(self.workers)
        }
        return {self.name: info}

    def nworkers(self):
        """Return number of workers associated with this manager"""
        return len(self.workers)

    def spawn(self, func, *args, **kwargs):
        """Spawn new process for given function"""
        pid = kwargs.get('pid', genkey(str(args) + str(kwargs)))
        evt = threading.Event()
        if not pid in self.pids:
            self.pids.add(pid)
            task = (evt, pid, func, args, kwargs)
            self.tasks.put(task)
        else:
            # the event was not added to task list, invoke set()
            # to pass it in wait() call, see joinall
            evt.set()
        return evt, pid

    def remove(self, pid):
        """Remove pid and associative process from the queue"""
        self.pids.discard(pid)

    def is_alive(self, pid):
        """Check worker queue if given pid of the process is still running"""
        return pid in self.pids

    def clear(self, tasks):
        """
        Clear all tasks in a queue. It allows current jobs to run, but will
        block all new requests till workers event flag is set again
        """
        _ = [t[0].clear() for t in tasks
             ]  # each task is return from spawn, i.e. a pair (evt, pid)

    def joinall(self, tasks):
        """Join all tasks in a queue and quit"""
        _ = [t[0].wait() for t in tasks
             ]  # each task is return from spawn, i.e. a pair (evt, pid)

    def quit(self):
        """Put None task to all workers and let them quit"""
        _ = [self.tasks.put(None) for _ in self.workers]
        time.sleep(1)  # let workers threads cool-off and quit
Example #22
0
def test_mutate_input_with_threads():
    """Input is mutable when using the threading backend"""
    q = Queue(maxsize=5)
    Parallel(n_jobs=2, backend="threading")(delayed(q.put, check_pickle=False)(1) for _ in range(5))
    nose.tools.assert_true(q.full())
Example #23
0
# import queue class
from queue import Queue
from time import sleep

print("\n-new queue-\n")
# set the queue
queue = Queue(maxsize=10)

for i in range(1, 11):
    sleep(0.1)
    queue.put(i)
    print("Person " + str(i) + " in line")

print('\nInitial queue')
print(queue)
print("\nQueue is full: ", queue.full())

# Removing element from queue
print("\nElements dequeued from the queue")
for i in range(1, 11):
    sleep(0.1)
    print("Person " + str(queue.get()) + " is no longer in queue")

print("\nQueue is Empty: ", queue.empty())
print("Queue is Full: ", queue.full())

# --------------------------------

from collections import deque

queueOrStack = deque()
class FluentSender(sender.FluentSender):
    def __init__(self,
                 tag,
                 host='localhost',
                 port=24224,
                 bufmax=1 * 1024 * 1024,
                 timeout=3.0,
                 verbose=False,
                 buffer_overflow_handler=None,
                 nanosecond_precision=False,
                 msgpack_kwargs=None,
                 queue_maxsize=DEFAULT_QUEUE_MAXSIZE,
                 queue_circular=DEFAULT_QUEUE_CIRCULAR,
                 **kwargs):
        """
        :param kwargs: This kwargs argument is not used in __init__. This will be removed in the next major version.
        """
        super(FluentSender, self).__init__(tag=tag, host=host, port=port, bufmax=bufmax, timeout=timeout,
                                           verbose=verbose, buffer_overflow_handler=buffer_overflow_handler,
                                           nanosecond_precision=nanosecond_precision,
                                           msgpack_kwargs=msgpack_kwargs,
                                           **kwargs)
        self._queue_maxsize = queue_maxsize
        self._queue_circular = queue_circular

        self._thread_guard = threading.Event()  # This ensures visibility across all variables
        self._closed = False

        self._queue = Queue(maxsize=queue_maxsize)
        self._send_thread = threading.Thread(target=self._send_loop,
                                             name="AsyncFluentSender %d" % id(self))
        self._send_thread.daemon = True
        self._send_thread.start()

    def close(self, flush=True):
        with self.lock:
            if self._closed:
                return
            self._closed = True
            if not flush:
                while True:
                    try:
                        self._queue.get(block=False)
                    except Empty:
                        break
            self._queue.put(_TOMBSTONE)
            self._send_thread.join()

    @property
    def queue_maxsize(self):
        return self._queue_maxsize

    @property
    def queue_blocking(self):
        return not self._queue_circular

    @property
    def queue_circular(self):
        return self._queue_circular

    def _send(self, bytes_):
        with self.lock:
            if self._closed:
                return False
            if self._queue_circular and self._queue.full():
                # discard oldest
                try:
                    self._queue.get(block=False)
                except Empty:  # pragma: no cover
                    pass
            try:
                self._queue.put(bytes_, block=(not self._queue_circular))
            except Full:    # pragma: no cover
                return False    # this actually can't happen

            return True

    def _send_loop(self):
        send_internal = super(FluentSender, self)._send_internal

        try:
            while True:
                bytes_ = self._queue.get(block=True)
                if bytes_ is _TOMBSTONE:
                    break

                send_internal(bytes_)
        finally:
            self._close()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
Example #25
0
class VideoCap:
    def __init__(self, video_source=0, qsize=128):
        # Opening the video
        self.vid = cv2.VideoCapture(video_source)

        if not self.vid.isOpened():
            raise ValueError("Unable to open the video", video_source)

        self.stopped = False

        # Getting video's width and height
        self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)

        # Bufferization
        self.Q = Queue(maxsize=qsize)
        self.thread = Thread(target=self.get_frame, args=())
        self.thread.daemon = True
        self.timestamps = np.empty(qsize)
        self.position_in_Q = 0

    def __del__(self):
        if self.vid.isOpened():
            self.vid.release()

    def get_frame(self):
        while self.vid.isOpened():
            if self.stopped:
                break

            if not self.Q.full():
                grabbed, frame = self.vid.read()
                if self.position_in_Q < len(self.timestamps):
                    self.timestamps[self.position_in_Q] = self.vid.get(
                        cv2.CAP_PROP_POS_MSEC)
                    self.position_in_Q += 1
                else:
                    self.timestamps[:-1] = self.timestamps[1:]
                    self.timestamps[-1] = self.vid.get(cv2.CAP_PROP_POS_MSEC)

                if not grabbed:
                    self.stop()
                    break

                # cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                self.Q.put(frame)

            else:
                time.sleep(0.05)

        # Releasing the video source when the object is destroyed
        self.vid.release()

    def start(self):
        # start a thread to read frames from the file video stream
        self.thread.start()
        return self

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.05)
            tries += 1

        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Example #26
0
class LevelCache:

    def __init__(self, level, cache_arr, no_of_ops_to_track=10):
        self.level = level
        self.caches = cache_arr #.extend([None] * (self.level - len(cache_arr)))
        self.last_read = Queue(no_of_ops_to_track)
        self.last_write = Queue(no_of_ops_to_track)

    def write(self, key, value):
        # return write time
        # print(key, value, cache.lookup(key, value))
        write_time = 0
        level_found_on = None
        for level, cache in enumerate(self.caches):
            if not level_found_on:
                myvalue = cache.read(key)
                write_time += cache.read_time
                if myvalue:
                    level_found_on = level + 1
            if not cache.lookup(key, value):
                cache.write(key, value)
                write_time += cache.write_time
            else:
                #write_time += cache.read_time
                break
        #print("level {}".format(level_found_on))

        self.add_write_time(write_time)
        return write_time

    def read(self, key):
        # return read time
        read_time = 0
        level, value = 0, 0
        for level, cache in enumerate(self.caches):
            value = cache.read(key)
            read_time += cache.read_time
            if value:
                for cache in self.caches[:level]:
                    cache.write(key, value)
                    read_time += cache.write_time
                self.add_read_time(read_time)
                return read_time
        return 0

    def stats(self):
        for cache in self.caches:
            print("Usage: {}/{}".format(cache.filled(), cache.capacity))
        #print("Avg Read Time: {}".format(avg(self.last_read)))

    def add_write_time(self, time):
        if not self.last_write.full():
            self.last_write.put(time)
        else:
            self.last_write.get()
            self.last_write.put(time)

    def add_read_time(self, time):
        if not self.last_read.full():
            self.last_read.put(time)
        else:
            self.last_read.get()
            self.last_read.put(time)
Example #27
0
class VideoLoader:
    def __init__(self, path, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.stream = cv2.VideoCapture(path)
        assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False
        self.len = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def length(self):
        return self.len

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping infinitely
        while True:
            time.sleep(0.02)
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                return
            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                # read the next frame from the file
                (grabbed, frame) = self.stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    return
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img, orig_img, dim = prep_frame(frame, inp_dim)
                inp = im_to_torch(orig_img)
                im_dim_list = torch.FloatTensor([dim]).repeat(1, 2)

                self.Q.put((img, orig_img, inp, im_dim_list))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Example #28
0
from queue import Queue

# Initializing a queue
queue = Queue(maxsize=3)

# qsize() give the maxsize of the Queue
print(queue.qsize())

# Adding of element to queue
queue.put('a')
queue.put('b')
queue.put('c')

# Return Boolean for Full Queue
print("\nFull: ", queue.full())

# Removing element from queue
print("\nElements dequeued from the queue")
print(queue.get())
print(queue.get())
print(queue.get())

# Return Boolean for Empty
# Queue
print("\nEmpty: ", queue.empty())

queue.put(1)
print("\nEmpty: ", queue.empty())
print("Full: ", queue.full())
Example #29
0
class VideoCap:

    def __init__(self, video_source=0, qsize=512):
        # Opening the video
        self.vid = cv2.VideoCapture(video_source)

        if not self.vid.isOpened():
            raise ValueError("Unable to open the video", video_source)

        self.stopped = False
        self.OnHold = False
        self.Qsize = qsize

        # Getting video's width and height
        self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)

        # Bufferization
        self.Q = Queue(maxsize=qsize)
        self.thread = threading.Thread(target=self.get_frame, args=())
        self.thread.daemon = True
        self.timestamps = np.empty(qsize)
        self.position_in_Q = 0
        self.state = threading.Condition()

        # video length
        self.length = int(self.vid.get(cv2.CAP_PROP_FRAME_COUNT))


    def __del__(self):
        if self.vid.isOpened():
            self.vid.release()

    def get_frame(self):
        while self.vid.isOpened():
            if self.stopped:
                break

            with self.state:
                if self.OnHold:
                    self.position_in_Q = 0
                    self.state.wait()

            if not self.Q.full():
                grabbed, frame = self.vid.read()
                if self.position_in_Q < len(self.timestamps):
                    self.timestamps[self.position_in_Q] = self.vid.get(cv2.CAP_PROP_POS_MSEC)
                    self.position_in_Q += 1
                else:
                    self.timestamps[:-1] = self.timestamps[1:]
                    self.timestamps[-1] = self.vid.get(cv2.CAP_PROP_POS_MSEC)

                if not grabbed:
                    self.stop()
                    break

                self.Q.put(frame)

            else:
                time.sleep(0.1)

    def start(self):
        # start a thread to read frames from the file video stream
        self.thread.start()
        return self

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.05)
            tries += 1

        return self.Q.qsize() > 0

    def resume(self):
        # resuming a thread
        with self.state:
            self.OnHold = False
            self.state.notify()

    def pause(self):
        # blocking a thread
        with self.state:
            self.OnHold = True

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Example #30
0
class Lane:
    def __init__(self, idx):
        self.number = idx + 1
        self.index = idx
        self.reporting = None
        self.connection = None
        self.address = None
        self.queue = Queue(maxsize=2)
        self.host = ''
        self.port = ''
        self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.check_button = None
        self.drop_button = None

    def add_lane_to_window(self, parent: tk.Widget):
        self.reporting = tk.BooleanVar()
        self.reporting.set(True)
        frame = tk.Frame(parent)
        frame.pack()
        self.check_button = tk.Checkbutton(frame,
                                           text="Lane {}".format(self.number),
                                           variable=self.reporting)
        self.check_button.pack(side=tk.LEFT)
        self.drop_button = tk.Button(frame,
                                     text="Drop",
                                     command=self.drop_connection)
        self.drop_button.pack(side=tk.RIGHT)

    def drop_connection(self):
        if self.drop_button['text'] == 'Drop':
            try:
                self._socket.shutdown(socket.SHUT_RDWR)
            except OSError:
                pass
            self._socket.close()
            self.drop_button['text'] = 'Connect'
        else:
            self.start_socket()

    def start_socket(self):
        Thread(target=self._await_connection, daemon=True).start()

    def _await_connection(self):
        print("Setting up connection to {}:{}".format(self.host, self.port))
        while True:
            try:
                self._socket.bind((self.host, self.port))
            except OSError:
                print(
                    f"Unable to connect to {self.host}:{self.port}. It is probably already in use. Retry in 5 seconds."
                )
                time.sleep(5.0)
            else:
                break
        self._socket.listen(2)
        print("Awaiting connection on {}:{}".format(self.host, self.port))
        new_conn, new_addr = self._socket.accept()
        mutex.acquire()
        self.queue.put(new_conn)
        self.queue.put(new_addr)
        print("Connection from {} established.".format(self.host))
        self.queue.task_done()
        mutex.release()

    def close_socket(self):
        try:
            self.connection.close()
        except AttributeError:
            pass

    def get_connections(self):
        if self.queue.full():
            self.connection = self.queue.get()
            self.address = self.queue.get()
        return self.connection, self.address

    def shutdown_connection(self):
        try:
            self.connection.shutdown(socket.SHUT_RDWR)
        except AttributeError:
            pass

    def close_connection(self):
        try:
            self.connection.close()
        except AttributeError:
            pass
Example #31
0
from queue import Queue

q = Queue(3)

print("==========================")
print(q.head)
print(q.tail)
print(q.count())
print(q.capacity)
print(q.full())
print(q.empty())
for i in q:
    print(i)

q.put_in(3)

print("==========================")
print(q.head)
print(q.tail)
print(q.count())
print(q.capacity)
print(q.full())
print(q.empty())
for i in q:
    print(i)
    
q.put_in('aaa')

print("==========================")
print(q.head)
print(q.tail)
Example #32
0
class FifoReadout(object):
    def __init__(self, dut):
        self.dut = dut
        self.callback = None
        self.errback = None
        self.readout_thread = None
        self.worker_thread = None
        self.watchdog_thread = None
        self.fill_buffer = False
        self.readout_interval = 0.05
        self._moving_average_time_period = 10.0
        self._data_deque = deque()
        self._data_buffer = deque()
        self._words_per_read = deque(
            maxlen=int(self._moving_average_time_period /
                       self.readout_interval))
        self._result = Queue(maxsize=1)
        self._calculate = Event()
        self.stop_readout = Event()
        self.force_stop = Event()
        self.timestamp = None
        self.update_timestamp()
        self._is_running = False
        self.reset_sram_fifo()
        self._record_count_lock = Lock()
        self.set_record_count(0, reset=True)

    @property
    def is_running(self):
        return self._is_running

    @property
    def is_alive(self):
        if self.worker_thread:
            return self.worker_thread.is_alive()
        else:
            False

    @property
    def data(self):
        if self.fill_buffer:
            return self._data_buffer
        else:
            logging.warning(
                'Data requested but software data buffer not active')

    def data_words_per_second(self):
        if self._result.full():
            self._result.get()
        self._calculate.set()
        try:
            result = self._result.get(timeout=self.readout_interval)
        except Empty:
            self._calculate.clear()
            return None
        return result / float(self._moving_average_time_period)

    def start(self,
              callback=None,
              errback=None,
              reset_sram_fifo=False,
              clear_buffer=False,
              fill_buffer=False,
              no_data_timeout=None):
        if self._is_running:
            raise RuntimeError(
                'Readout already running: use stop() before start()')
        self._is_running = True
        logging.info('Starting FIFO readout...')
        self.callback = callback
        self.errback = errback
        self.fill_buffer = fill_buffer
        self._record_count = 0
        if reset_sram_fifo:
            self.reset_sram_fifo()
        else:
            fifo_size = self.dut['stream_fifo']['SIZE']
            if fifo_size != 0:
                logging.warning(
                    'SRAM FIFO not empty when starting FIFO readout: size = %i',
                    fifo_size)
        self._words_per_read.clear()
        if clear_buffer:
            self._data_deque.clear()
            self._data_buffer.clear()
        self.stop_readout.clear()
        self.force_stop.clear()
        if self.errback:
            self.watchdog_thread = Thread(target=self.watchdog,
                                          name='WatchdogThread')
            self.watchdog_thread.daemon = True
            self.watchdog_thread.start()
        if self.callback:
            self.worker_thread = Thread(target=self.worker,
                                        name='WorkerThread')
            self.worker_thread.daemon = True
            self.worker_thread.start()
        self.readout_thread = Thread(
            target=self.readout,
            name='ReadoutThread',
            kwargs={'no_data_timeout': no_data_timeout})
        self.readout_thread.daemon = True
        self.readout_thread.start()

    def stop(self, timeout=10.0):
        if not self._is_running:
            raise RuntimeError(
                'Readout not running: use start() before stop()')
        self._is_running = False
        self.stop_readout.set()
        try:
            self.readout_thread.join(timeout=timeout)
            if self.readout_thread.is_alive():
                self.force_stop.set()
                if timeout:
                    raise StopTimeout(
                        'FIFO stop timeout after %0.1f second(s)' % timeout)
                else:
                    logging.warning('FIFO stop timeout')
        except StopTimeout as e:
            if self.errback:
                self.errback(sys.exc_info())
            else:
                logging.error(e)
        if self.readout_thread.is_alive():
            self.readout_thread.join()
        if self.errback:
            self.watchdog_thread.join()
        if self.callback:
            self.worker_thread.join()
        self.callback = None
        self.errback = None
        logging.info('Stopped FIFO readout')

    def print_readout_status(self):
        tlu_lost_count = self.get_data_tlu_fifo_lost_count()
        logging.info('Received words: %d', self._record_count)
        logging.info('Data queue size: %d', len(self._data_deque))
        logging.info('SRAM FIFO size: %d', self.dut['stream_fifo']['SIZE'])
        logging.info('Channel:                     %s', " | ".join(['TLU']))
        logging.info('Lost data counter:           %s',
                     " | ".join([str(tlu_lost_count).rjust(3)]))

        if tlu_lost_count:
            logging.warning('Errors detected')

    def readout(self, no_data_timeout=None):
        '''Readout thread continuously reading SRAM.

        Readout thread, which uses read_data() and appends data to self._data_deque (collection.deque).
        '''
        logging.debug('Starting %s', self.readout_thread.name)
        curr_time = self.get_float_time()
        time_wait = 0.0
        while not self.force_stop.wait(time_wait if time_wait >= 0.0 else 0.0):
            try:
                time_read = time()
                if no_data_timeout and curr_time + no_data_timeout < self.get_float_time(
                ):
                    raise NoDataTimeout(
                        'Received no data for %0.1f second(s)' %
                        no_data_timeout)
                data = self.read_data()
                self._record_count += len(data)
                # print self._record_count
            except Exception:
                logging.warn('Exception occured %s', sys.exc_info()[2])
                no_data_timeout = None  # raise exception only once
                if self.errback:
                    self.errback(sys.exc_info())
                else:
                    raise
                if self.stop_readout.is_set():
                    break
            else:
                data_words = data.shape[0]
                if data_words > 0:
                    last_time, curr_time = self.update_timestamp()
                    status = 0
                    skip_triggers = self.get_data_tlu_skipped_trigger_count()
                    if self.callback:
                        self._data_deque.append((data, last_time, curr_time,
                                                 status, skip_triggers))
                    if self.fill_buffer:
                        self._data_buffer.append((data, last_time, curr_time,
                                                  status, skip_triggers))
                    self._words_per_read.append(data_words)
                elif self.stop_readout.is_set():
                    break
                else:
                    self._words_per_read.append(0)
            finally:
                time_wait = self.readout_interval - (time() - time_read)
            if self._calculate.is_set():
                self._calculate.clear()
                self._result.put(sum(self._words_per_read))
        if self.callback:
            self._data_deque.append(None)  # last item, will stop worker
        logging.debug('Stopped %s', self.readout_thread.name)

    def worker(self):
        '''Worker thread continuously calling callback function when data is available.
        '''
        logging.debug('Starting %s', self.worker_thread.name)
        while True:
            try:
                data = self._data_deque.popleft()
            except IndexError:
                self.stop_readout.wait(
                    self.readout_interval /
                    2.0)  # sleep a little bit, reducing CPU usage
            else:
                if data is None:  # if None then exit
                    break
                else:
                    try:
                        self.callback(data)
                    except Exception:
                        self.errback(sys.exc_info())

        logging.debug('Stopped %s', self.worker_thread.name)

    def watchdog(self):
        logging.debug('Starting %s', self.watchdog_thread.name)
        while True:
            try:
                if self.get_data_tlu_fifo_lost_count():
                    raise FifoError('TLU FIFO lost data error(s) detected')
            except Exception:
                self.errback(sys.exc_info())
            if self.stop_readout.wait(self.readout_interval * 10):
                break
        logging.debug('Stopped %s', self.watchdog_thread.name)

    def read_data(self):
        return self.dut.get_fifo_data()

    def update_timestamp(self):
        curr_time = self.get_float_time()
        last_time = self.timestamp
        self.timestamp = curr_time
        return last_time, curr_time

    def read_status(self):
        raise NotImplementedError()

    def get_record_count(self):
        self._record_count_lock.acquire()
        cnt = self._record_count
        self._record_count_lock.release()
        return cnt

    def set_record_count(self, cnt, reset=False):
        self._record_count_lock.acquire()
        if reset:
            self._record_count = cnt
        else:
            self._record_count = self._record_count + cnt
        self._record_count_lock.release()

    def reset_sram_fifo(self):
        fifo_size = self.dut['stream_fifo']['SIZE']
        logging.info('Resetting SRAM FIFO: size = %i', fifo_size)
        self.update_timestamp()
        self.dut['stream_fifo']['RESET']
        sleep(0.2)  # sleep here for a while
        fifo_size = self.dut['stream_fifo']['SIZE']
        if fifo_size != 0:
            logging.warning('SRAM FIFO not empty after reset: size = %i',
                            fifo_size)

    def get_data_tlu_fifo_lost_count(self, channels=None):
        return self.dut['tlu_master'].LOST_DATA_CNT

    def get_data_tlu_skipped_trigger_count(self):
        return self.dut['tlu_master'].SKIP_TRIG_COUNTER

    def get_float_time(self):
        '''returns time as double precision floats - Time64 in pytables - mapping to and from python datetime's
        '''
        t1 = time()
        t2 = datetime.datetime.fromtimestamp(t1)
        return mktime(t2.timetuple()) + 1e-6 * t2.microsecond
Example #33
0
class VideoLoader:
    def __init__(self, path, batchSize=1, queueSize=50):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.path = path
        self.stream = cv2.VideoCapture(path)
        assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False

        self.batchSize = batchSize
        self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover

        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def length(self):
        return self.datalen

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        stream = cv2.VideoCapture(self.path)
        assert stream.isOpened(), 'Cannot capture source'

        for i in range(self.num_batches):
            img = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                inp_dim = int(opt.inp_dim)
                (grabbed, frame) = stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.Q.put((None, None, None, None))
                    print('===========================> This video get ' +
                          str(k) + ' frames in total.')
                    sys.stdout.flush()
                    return
                # process and add the frame to the queue
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)

                img.append(img_k)
                orig_img.append(orig_img_k)
                im_name.append(str(k) + '.jpg')
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                # Human Detection
                img = torch.cat(img)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list_ = im_dim_list

            while self.Q.full():
                time.sleep(2)

            self.Q.put((img, orig_img, im_name, im_dim_list))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def getitem(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        return self.Q.qsize()
Example #34
0
from queue import Queue
#Stack
#implementation using 

stack = deque()
stack.append('a1')
stack.append('b1')
stack.append('c1')

print(stack.pop())
print(stack)

stacker=LifoQueue(maxsize=5)
print(stacker.qsize())

stacker.put('Hello')
stacker.put('World')
print('Size is ',stacker.qsize())
print(stacker.get())



#Queue

q=Queue(maxsize=4)

q.put('asd')
q.put('sad')
print('Queue maxsize is',q.qsize())
print(' Queue is Full ?',q.full())
Example #35
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet(
            "/home/a/roborts_project/src/alpha_pose/src/yolo/cfg/yolov3-spp.cfg"
        )
        self.det_model.load_weights(
            '/home/a/roborts_project/src/alpha_pose/src/models/yolo/yolov3-spp.weights'
        )
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cpu()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cpu()
                prediction = self.det_model(img, CUDA=False)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
class GridEYEKit():
    def __init__(self):
        self._connected = False
        self.ser = serial.Serial()  #serial port object
        self.tarr_queue = Queue(1)
        self.thermistor_queue = Queue(1)
        self.multiplier_tarr = 0.25
        self.multiplier_th = 0.0125
        self._error = 0
        # if not self.connect():
        #     print "please connect Eval Kit"
        t = threading.Thread(target=self._connected_thread).start()

    def connect(self):
        """trys to open ports and look for valid data
        returns: true - connection good
        returns: False - not found / unsupported plattform
        """
        if self.ser.isOpen():
            self.ser.close()
        else:
            try:
                ports_available = self._list_serial_ports()
            except EnvironmentError:
                self._connected = False
                return False
            """try if kit is connected to com port"""
            for port in ports_available:
                self.ser = serial.Serial(
                    port=port, baudrate=57600, timeout=0.5
                )  #COM Port error is handled in list serial ports
                for i in range(5):
                    if self.serial_readline(
                            bytes_timeout=300):  #if 3 bytes identifyer found
                        self._connected = True
                        return True  # GridEye found
                self.ser.close()
            self._connected = False
            return False

    def _list_serial_ports(self):
        """ This function is taken from Stackoverflow and will list all serial ports"""
        """Lists serial ports

        :raises EnvironmentError:
            On unsupported or unknown platforms
        :returns:
            A list of available serial ports
        """
        if sys.platform.startswith('win'):
            ports = ['COM' + str(i + 1) for i in range(256)]

        elif sys.platform.startswith('linux') or sys.platform.startswith(
                'cygwin'):
            # this is to exclude your current terminal "/dev/tty"
            ports = glob.glob('/dev/tty[A-Za-z]*')

        elif sys.platform.startswith('darwin'):
            ports = glob.glob('/dev/tty.*')

        else:
            raise EnvironmentError('Unsuppteorted platform')

        result = []
        for port in ports:
            try:
                s = serial.Serial(port)
                s.close()
                result.append(port)
            except (OSError, serial.SerialException):
                pass
        return result

    def _get_GridEye_data(self):
        """ get grid Eye data fron serial port and convert it to numpy array - also for further calculations"""
        tarr = np.zeros((8, 8))
        thermistor = 0
        data = self.serial_readline()  # read grideye value
        if len(data) >= 135:
            self._error = 0
            if not data[
                    1] & 0b00001000 == 0:  # Grid-Eye uses 12 bit signed data for calculating thermistor
                data[1] &= 0b00000111
                thermistor = -struct.unpack('<h',
                                            data[0:2])[0] * self.multiplier_th
            else:
                thermistor = struct.unpack('<h',
                                           data[0:2])[0] * self.multiplier_th
            r = 0
            c = 0
            for i in range(2, 130, 2):  # 2,130
                # convert data to array
                if not data[
                        i +
                        1] & 0b00001000 == 0:  # Grid-Eye uses 12 bit two's complement for calculating data
                    data[
                        i +
                        1] |= 0b11111000  # if 12 bit complement, set bits 12 to 16 to convert to 16 bit two's complement
                tarr[r][c] = struct.unpack(
                    '<h', data[i:i + 2]
                )[0] * self.multiplier_tarr  #combine hign and low byte to short int and calculate temperature
                c = c + 1
                if c == 8:
                    r = r + 1
                    c = 0
        else:
            self._error = self._error + 1
            print("Serial Fehler")
        """ Flip Image L-R or U-D""" ""
        # tarr = np.fliplr(tarr)
        # tarr = np.flipud(tarr)
        return thermistor, tarr

    def _connected_thread(self):
        """" Background task reads Serial port and puts one value to queue"""
        while True:
            if self._connected == True:
                data = self._get_GridEye_data()
                if self.tarr_queue.full():
                    self.tarr_queue.get()
                    self.tarr_queue.put(data[1])
                else:
                    self.tarr_queue.put(data[1])

                if self.thermistor_queue.full():
                    self.thermistor_queue.get()
                    self.thermistor_queue.put(data[0])
                else:
                    self.thermistor_queue.put(data[0])
                # was arbitrarily 5?
                if self._error > 10:
                    try:
                        self.ser.close()
                    except:
                        pass
                    self._connected = False
                    self._error = 0

    def get_thermistor(self):
        try:
            return self.thermistor_queue.get(True, 1)
        except:
            sleep(0.1)
            return 0

    def get_temperatures(self):
        try:
            return self.tarr_queue.get(True, 1)
        except:
            sleep(0.1)
            return np.zeros((8, 8))

    def get_raw(self):
        try:
            return self.ser.readline()
        except:
            sleep(0.1)
            return np.zeros((8, 8))

    def close(self):
        self._connected = False
        try:
            self.ser.close()
        except:
            pass

    def serial_readline(self, eol='***', bytes_timeout=300):
        """ in python 2.7 serial.readline is not able to handle special EOL strings - own implementation
        Returns byte array if EOL found in a message of max timeout_bytes byte
        Returns empty array with len 0 if not"""
        length = len(eol)
        line = bytearray()
        while True:
            c = self.ser.read(1)
            if c:
                line += c
                if line[-length:] == eol:
                    break
                if len(line) > bytes_timeout:  #timeout
                    return []
            else:
                break
        return line
Example #37
0
class VideoDetectionLoader:
    def __init__(self, path, batchSize=4, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cpu()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(path)
        assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False
        self.batchSize = batchSize
        self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def length(self):
        return self.datalen

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole video
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                (grabbed, frame) = self.stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    return
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cpu()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cpu()

                prediction = self.det_model(img, CUDA=False)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Example #38
0
class Socket(Service):
    __slots__ = ("connection_pool", "timeout", "connection",
                 "send_and_receive")

    on_unix = getattr(socket, "AF_UNIX", False)
    Connection = namedtuple("Connection", ("connect_to", "proto", "sockopts"))
    protocols = {
        "tcp": (socket.AF_INET, socket.SOCK_STREAM),
        "udp": (socket.AF_INET, socket.SOCK_DGRAM),
    }
    streams = set(("tcp", ))
    datagrams = set(("udp", ))
    inet = set(("tcp", "udp"))
    unix = set()

    if on_unix:
        protocols.update({
            "unix_dgram": (socket.AF_UNIX, socket.SOCK_DGRAM),
            "unix_stream": (socket.AF_UNIX, socket.SOCK_STREAM),
        })
        streams.add("unix_stream")
        datagrams.add("unix_dgram")
        unix.update(("unix_stream", "unix_dgram"))

    def __init__(self,
                 connect_to,
                 proto,
                 version=None,
                 headers=empty.dict,
                 timeout=None,
                 pool=0,
                 raise_on=(500, ),
                 **kwargs):
        super().__init__(timeout=timeout,
                         raise_on=raise_on,
                         version=version,
                         **kwargs)
        connect_to = tuple(connect_to) if proto in Socket.inet else connect_to
        self.timeout = timeout
        self.connection = Socket.Connection(connect_to, proto, set())
        self.connection_pool = Queue(maxsize=pool if pool else 1)

        if proto in Socket.streams:
            self.send_and_receive = self._stream_send_and_receive
        else:
            self.send_and_receive = self._dgram_send_and_receive

    def settimeout(self, timeout):
        """Set the default timeout"""
        self.timeout = timeout

    def setsockopt(self, *sockopts):
        """Add socket options to set"""
        if type(sockopts[0]) in (list, tuple):
            for sock_opt in sockopts[0]:
                level, option, value = sock_opt
                self.connection.sockopts.add((level, option, value))
        else:
            level, option, value = sockopts
            self.connection.sockopts.add((level, option, value))

    def _register_socket(self):
        """Create/Connect socket, apply options"""
        _socket = socket.socket(*Socket.protocols[self.connection.proto])
        _socket.settimeout(self.timeout)

        # Reconfigure original socket options.
        if self.connection.sockopts:
            for sock_opt in self.connection.sockopts:
                level, option, value = sock_opt
                _socket.setsockopt(level, option, value)

        _socket.connect(self.connection.connect_to)
        return _socket

    def _stream_send_and_receive(self, _socket, message, *args, **kwargs):
        """TCP/Stream sender and receiver"""
        data = BytesIO()

        _socket_fd = _socket.makefile(mode="rwb", encoding="utf-8")
        _socket_fd.write(message.encode("utf-8"))
        _socket_fd.flush()

        for received in _socket_fd:
            data.write(received)
        data.seek(0)

        _socket_fd.close()
        return data

    def _dgram_send_and_receive(self,
                                _socket,
                                message,
                                buffer_size=4096,
                                *args):
        """User Datagram Protocol sender and receiver"""
        _socket.send(message.encode("utf-8"))
        data, address = _socket.recvfrom(buffer_size)
        return BytesIO(data)

    def request(self, message, timeout=False, *args, **kwargs):
        """Populate connection pool, send message, return BytesIO, and cleanup"""
        if not self.connection_pool.full():
            self.connection_pool.put(self._register_socket())

        _socket = self.connection_pool.get()

        # setting timeout to None enables the socket to block.
        if timeout or timeout is None:
            _socket.settimeout(timeout)

        data = self.send_and_receive(_socket, message, *args, **kwargs)

        if self.connection.proto in Socket.streams:
            _socket.shutdown(socket.SHUT_RDWR)

        return Response(data, None, None)
Example #39
0
class ImageLoader:
    def __init__(self, im_names, batchSize=1, format='yolo', queueSize=50):
        self.img_name_num = 0
        self.img_dir = opt.inputpath
        self.imglist = im_names
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])
        self.format = format

        self.batchSize = batchSize
        self.datalen = len(self.imglist)  # 1
        leftover = 0
        if (self.datalen) % batchSize:  # 1 % 1 = 0
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover  # 1 // 1 + 0 = 1

        # initialize the queue used to store data
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if self.format == 'ssd':
            if opt.sp:
                p = Thread(target=self.getitem_ssd, args=())
            else:
                p = mp.Process(target=self.getitem_ssd, args=())
        elif self.format == 'yolo':
            if opt.sp:
                p = Thread(target=self.getitem_yolo, args=())
            else:
                p = mp.Process(target=self.getitem_yolo, args=())
        else:
            raise NotImplementedError
        p.daemon = True
        p.start()
        return self

    def getitem_ssd(self):
        length = len(self.imglist)
        for index in range(length):
            im_name = self.imglist[index].rstrip('\n').rstrip('\r')
            im_name = os.path.join(self.img_dir, im_name)
            im = Image.open(im_name)
            inp = load_image(im_name)
            if im.mode == 'L':
                im = im.convert('RGB')

            ow = oh = 512
            im = im.resize((ow, oh))
            im = self.transform(im)
            while self.Q.full():
                time.sleep(2)
            self.Q.put((im, inp, im_name))

    def getitem_yolo(self):
        for i in range(self.num_batches):  # for i in range(1):
            img = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize,
                               self.datalen)):  # for k in range(0, 1):
                inp_dim = int(opt.inp_dim)
                # im_name_k = self.imglist[k].rstrip('\n').rstrip('\r')

                # im_name_k = os.path.join(self.img_dir, im_name_k)
                img_k, orig_img_k, im_dim_list_k = prep_frame(
                    self.imglist[k], inp_dim)

                img.append(img_k)
                orig_img.append(orig_img_k)
                # im_name.append(im_name_k)
                im_name.append(str(self.img_name_num) + '.jpg')
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                # Human Detection
                img = torch.cat(img)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list_ = im_dim_list

            while self.Q.full():
                time.sleep(2)

            self.Q.put((img, orig_img, im_name, im_dim_list))
            self.img_name_num += 1

    def getitem(self):
        return self.Q.get()

    def length(self):
        return len(self.imglist)

    def len(self):
        return self.Q.qsize()
Example #40
0
class VideoStreamer:
    def __init__(self,
                 twitch_url,
                 queueSize=1,
                 resolution='1080p60',
                 n_frame=60):
        self.stopped = False
        self.twitch_url = twitch_url
        self.res = resolution
        self.n_frame = n_frame

        # initialize the queue used to store frames read from
        # the video stream
        self.Q = Queue(maxsize=queueSize)
        checkIfStreamsWorks = self.create_pipe()

        if checkIfStreamsWorks:
            self.start_buffer()

    def create_pipe(self):
        streamer_name = self.twitch_url.split("/")[3]
        try:
            streams = streamlink.streams(self.twitch_url)
        except streamlink.exceptions.NoPluginError:
            print("NO STREAM AVAILABLE for " + streamer_name)
            return False
        except:
            print("NO STREAM AVAILABLE no exception " + streamer_name)
            return False

        #print("available streams: "+ str(streams))

        resolutions = {
            '360p': {
                "byte_lenght": 640,
                "byte_width": 360
            },
            '480p': {
                "byte_lenght": 854,
                "byte_width": 480
            },
            '720p': {
                "byte_lenght": 1280,
                "byte_width": 720
            },
            '1080p60': {
                "byte_lenght": 1920,
                "byte_width": 1080
            }
        }
        if self.res in streams:
            finalRes = self.res
        else:
            for key in resolutions:
                if key != self.res and key in streams:
                    print("USED FALL BACK " + key)
                    finalRes = key
                    break
            else:  # das else gehört zur foor loop! wenn sie nicht breaked dann wird der teil ausgeführt https://docs.python.org/2/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops
                print("COULD NOT FIND STREAM " + streamer_name)
                return False

        self.byte_lenght = resolutions[finalRes]["byte_lenght"]
        self.byte_width = resolutions[finalRes]["byte_width"]

        stream = streams[finalRes]
        self.stream_url = stream.url

        self.pipe = sp.Popen(
            [
                'ffmpeg',
                "-i",
                self.stream_url,
                "-loglevel",
                "quiet",  # no text output
                "-an",  # disable audio
                "-f",
                "image2pipe",
                "-pix_fmt",
                "bgr24",
                "-vcodec",
                "rawvideo",
                "-"
            ],
            stdin=sp.PIPE,
            stdout=sp.PIPE)
        return True

    def start_buffer(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update_buffer, args=())
        t.daemon = True
        t.start()
        return self

    def update_buffer(self):

        count_frame = 0

        while True:

            if count_frame % self.n_frame == 0:

                raw_image = self.pipe.stdout.read(
                    self.byte_lenght * self.byte_width *
                    3)  # read length*width*3 bytes (= 1 frame)

                frame = numpy.fromstring(raw_image, dtype='uint8').reshape(
                    (self.byte_width, self.byte_lenght, 3))

                if not self.Q.full():
                    self.Q.put(frame)
                    count_frame += 1
                else:
                    count_frame += 1
                    continue
            else:
                count_frame += 1
                continue

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Example #41
0
class BaseCrawl(object):
    """基本类"""
    proxy_dict = {1: get_remote_proxies, 2: get_data5u_proxies, 3: ''}

    def __init__(self, ):
        self._start_time = time.time()
        self.header = self.headers_list  # UA头
        self.proxies = None  # 代理
        self._temp_container = set()  # 临时容器
        self._formal_container = set()  # 正式容器
        self._error_container = set()  # 异常容器
        self._data_conn = None  # 数据句柄
        self._mysql_conn = None  # mysql的句柄
        self._data_conn_flag = None  # 数据库句柄类型,后续根据这个进行操作
        self.queue = Queue(maxsize=120)  # 队列

    @property
    def headers_list(self):
        """
        读取模板格式
        :return:
        """
        return USER_AGENT

    @property
    def get_headers(self):
        ua = random.choice(self.header)
        header = {'User-Agent': ua}
        return header

    def change_container(self, flag, types):
        """
        修改容器类型
        :param flag: 临时还是正式容器
        :param types: 要转成的类型
        :return:
        """
        _types_dict = {'list': list, 'tuple': tuple, 'dict': dict, 'set': set}
        if flag not in ('temp', 'formal', 'error'):
            raise ValueError('需要指定目标容器')
        if types not in _types_dict:
            raise ValueError('不符合的容器类型')
        if flag == 'temp':
            self._temp_container = _types_dict[types](self._temp_container)
        if flag == 'formal':
            self._formal_container = _types_dict[types](self._formal_container)
        if flag == 'error':
            self._error_container = _types_dict[types](self._error_container)

    def get_proxy(self):
        """
        随机获取一个代理
        :return:
        """
        if self.proxies:
            proxies = random.choice(self.proxies)
            return proxies

    def get_proxies(self, is_online=False):
        """
        随机获取各个地方的代理池
        :param is_online: 是否是线上
        :return:
        """
        if not is_online:
            flag = random.randint(1, 3)
        else:
            flag = random.randint(1, 2)
        func = self.proxy_dict[flag]
        if func:
            self.proxies = func()
        else:
            self.proxies = None

    def time_to_timestemp(self, date):
        """
        转换日期格式为时间戳
        :return:
        """
        if not date:
            return

        if ':' in date:
            try:
                timestemp = int(
                    time.mktime(time.strptime(date, "%Y-%m-%d %H:%M")))
            except Exception:
                timestemp = int(
                    time.mktime(time.strptime(date, "%Y-%m-%d %H:%M:%S")))
        else:
            timestemp = int(time.mktime(time.strptime(date, "%Y-%m-%d")))
        return timestemp

    def timestemp_to_time(self, times):
        """
        时间戳转为日期格式
        :param times: 时间戳
        :return:
        """
        time_array = time.localtime(times)
        style_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
        return style_time

    def get_sleep_time(self):
        """
        获取一个随机不定的停顿时间
        :return:
        """
        # number = random.uniform(3, 5)
        number = random.uniform(0.8, 3)
        number = round(number, 2)
        return number

    def get_sleep_time_v2(self):
        """
        获取一个随机不定的停顿时间
        :return:
        """
        # number = random.uniform(3, 5)
        number = random.uniform(4, 8)
        number = round(number, 2)
        return number

    def filter_data(self, data):
        """
        过滤数据
        :param data: 待审核的数据
        :return:
        """
        # if not re.search(r'[\u4e00-\u9fa5]', data):  # 只有数字和字母,屏蔽掉
        #     return True
        for key in FILTER_KEYWORDS:  # 含有屏蔽关键词,屏蔽掉
            if key in data:
                return True

    def request_normal(self,
                       url,
                       method='get',
                       data=None,
                       is_solve=True,
                       headers=None,
                       data_is_json=False):
        """
        通用的请求参数
        :param url: 请求网址
        :param method: 请求方式
        :param data: 请求参数
        :param is_solve: 是否需要对返回结果解码
        :param headers: 请求头,如果不带默认就是随机获取
        :param data_is_json: 请求参数是否是个字典
        :return:
        """
        req = None
        proxy = self.get_proxy()
        try:
            headers = headers if headers else self.get_headers
            if method.lower() == 'post' or data:
                if data_is_json:
                    req = requests.post(url,
                                        headers=headers,
                                        json=data,
                                        verify=False,
                                        proxies=proxy,
                                        timeout=(3, 7))
                else:
                    req = requests.post(url,
                                        headers=headers,
                                        data=data,
                                        verify=False,
                                        proxies=proxy,
                                        timeout=(3, 7))
            else:
                req = requests.get(url,
                                   headers=headers,
                                   verify=False,
                                   proxies=proxy,
                                   timeout=(3, 7))
        except Exception as e:
            print(e)
            headers = headers if headers else self.get_headers  # 错误请求后应该换下请求头,固定请求头除外
            time.sleep(self.get_sleep_time())
            # 先判断是否还有代理,删除上次出错的代理
            if proxy and self.proxies and proxy in self.proxies:
                self.proxies.remove(proxy)
                # 最后判断还有没有代理,没有就跳出去
                if not self.proxies:
                    self.get_proxies()
            try:
                # 拿到新的代理去请求
                proxy = self.get_proxy()
                if method.lower() == 'post' or data:
                    if data_is_json:
                        req = requests.post(url,
                                            headers=headers,
                                            json=data,
                                            verify=False,
                                            proxies=proxy,
                                            timeout=(3, 7))
                    else:
                        req = requests.post(url,
                                            headers=headers,
                                            data=data,
                                            verify=False,
                                            proxies=proxy,
                                            timeout=(3, 7))
                else:
                    req = requests.get(url,
                                       headers=headers,
                                       verify=False,
                                       proxies=proxy,
                                       timeout=(3, 7))
            except Exception as s:
                print(s)
        if req and req.status_code == 200:
            if is_solve:  # 需要解析,返回已解析过的数据
                res = self.solve_response(req)
                if res:
                    return res
            else:  # 不需要解析直接返回response对象
                return req
        time.sleep(self.get_sleep_time_v2())

    def solve_response(self, response, url=None):
        """
        对返回数据解码
        :param url: 解析链接
        :param response: 响应对象
        :return:
        """
        # 拿到请求体,接触数据
        result = ''
        try:
            result = response.content.decode('utf-8')
        except Exception:
            try:
                result = response.content.decode('gbk')
            except Exception:
                try:
                    result = response.content.decode('gb18030')
                except Exception:  # 数据异常
                    try:
                        result = response.text
                    except Exception:
                        try:
                            result = response.content.decode('utf-8', 'ignore')
                        except Exception:
                            try:
                                result = response.content.decode(
                                    'gbk', 'ignore')
                            except Exception:
                                try:
                                    result = response.content.decode(
                                        'gb18030', 'ignore')
                                except Exception as e:  # 数据异常
                                    LOG.error(
                                        'decode_url__sp2__%s__sp1__error__sp2__%s'
                                        % (url, e))
        if result:
            return result

    def get_rsa_key(self, res, js=JS):
        """
        破解福建省的rsa加密
        :param res: 网址返回源码结果
        :param js: js的rsa加密方式
        :return:
        """
        start_index = res.index('function RsaFunc')
        end_index = res.index('var isReflash = false;')
        rsafunc = res[start_index:end_index]
        rsafunc += 'return RsaEncrypted;\n}'
        js += rsafunc
        cx = execjs.compile(js)
        cookie = cx.call('RsaFunc')
        return cookie

    def add_to_16(self, s):
        """
        转为16位编码
        :param s:
        :return:
        """
        while len(s) % 16 != 0:
            s += (16 - len(s) % 16) * chr(16 - len(s) % 16)
        return str.encode(s)  # 返回bytes

    def get_secret_url(self, text, key):
        """
        加密链接
        :param text: 待加密的字符串
        :param key: key值
        :return:
        """
        aes = AES.new(str.encode(key), AES.MODE_ECB)  # 初始化加密器,本例采用ECB加密模式
        encrypted_text = str(base64.encodebytes(
            aes.encrypt(self.add_to_16(text))),
                             encoding='utf8').replace('\n', '')  # 加密
        encrypted_text = encrypted_text.replace('/',
                                                "^")  # ddd.replace(/\//g, "^")
        return encrypted_text[:-2]

    def get_real_url(self, first_url, key=AES_KEY):
        """
        aes加密
        :param first_url: 未加密的url
        :param key: key值
        :return:
        """
        aa = first_url.split('/')
        aaa = len(aa)
        bbb = aa[aaa - 1].split('.')
        ccc = bbb[0]
        secret_text = self.get_secret_url(ccc, key=key)
        return first_url.replace(ccc, secret_text)

    def get_file_name(self):
        """
        开始请求
        :param data:
        :return:
        """
        now = time.time()
        file_name = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
        self.file = open('%s.txt' % str(file_name), 'w', encoding='utf-8')
        return file_name + '.txt'

    def get_redis_conn(self, pool=POOL):
        """
        创建连接数据库的句柄
        :return:
        """
        conn = redis.Redis(connection_pool=pool)
        return conn

    def get_mysql_conn(self):
        """
        获取mysql的句柄
        :return:
        """
        conn = pymysql.connect(host=REMOTE_MYSQL_HOST,
                               port=REMOTE_MYSQL_PORT,
                               user=REMOTE_MYSQL_USER,
                               passwd=REMOTE_MYSQL_PASSWORD,
                               db=REMOTE_MYSQL_DB)
        cursor = conn.cursor(pymysql.cursors.DictCursor)
        self._mysql_conn = conn
        return cursor

    def get_mongodb_conn(self, db=''):
        """
        获取mongodb的句柄
        :param db:
        :return:
        """
        client = pymongo.MongoClient('mongodb://localhost:27017/')
        return client

    def get_sql_cursor(self, sql_type, *args, **kwargs):
        """
        获取数据句柄
        :param sql_type: file,redis,mysql,mongodb
        :param args:
        :param kwargs:
        :return:
        """
        if sql_type == 'file':
            self._data_conn = self.get_file_name()
            self._data_conn_flag = 'file'
        elif sql_type == 'redis':
            self._data_conn = self.get_redis_conn()
            self._data_conn_flag = 'redis'
        elif sql_type == 'mysql':
            self._data_conn = self.get_mysql_conn()
            self._data_conn_flag = 'mysql'
        elif sql_type == 'mongodb':
            self._data_conn = self.get_mongodb_conn()
            self._data_conn_flag = 'mongodb'
        else:
            raise ValueError('不存在的数据库句柄')

    def save_file(self):
        """
        写入数据
        :return:
        """
        data = ''  # 从正式库读取数据
        if not data:
            print('没有数据')
            return
        file_name = self.get_file_name()  # 初始化输出文件
        print('正在存储数据到根目录下的文件 %s,请稍等片刻....' % file_name)
        for item in data:
            '业务逻辑'
            # self.file.write()
        print('已写入 %s 条信息' % len(data))
        # 备份当次已采集数据
        print('存储完毕!')

    def save_redis(self):
        """
        保存到redis
        :return:
        """
        conn = self.get_redis_conn(pool=FIRST_POOL)
        for item in self._formal_container:
            '业务逻辑'
            pass

        print('已保存')
        conn.close()

    def save_mongodb(self):
        """
        保存到mongodb
        :return:
        """

    def save_mysql(self):
        """
        保存到mysql
        :return:
        """
        sql = 'replace into ....'
        for item in self._formal_container:
            "业务逻辑"
            self._data_conn.execute(sql)
            pass
        self._mysql_conn.close()

    def save_data(self):
        """
        保存数据
        :return:
        """
        if not self._data_conn_flag:
            raise ValueError('输出数据库类型未知')
        if self._data_conn_flag == 'file':
            self.save_file()
        elif self._data_conn_flag == 'redis':
            self.save_redis()
        elif self._data_conn_flag == 'mongodb':
            self.save_mongodb()
        elif self._data_conn_flag == 'msyql':
            self.save_mysql()
        self._data_conn.close()

    def login_user(self):
        """
        登录组件
        :return:
        """
        login_url = ''
        data = "用户名,密码"
        self.normal_request(login_url, data=data)

    def get_apscheduler(self):
        """获取定时任务类"""
        cls = BlockingScheduler
        return cls()

    def move_data_queue(self):
        """
        移动数据到queue里
        :return:
        """
        if self.queue.full() or len(self.item_data) == 0:  # 队列已满或者数据为空,跳出去
            return
        print('正在移动数据到队列内')
        try:
            for i in range(120):
                if len(self._temp_container) == 0:  # 在取数据中途数据为空,跳出去
                    break
                item = self.item_data.pop()
                if self.queue.full():  # 队列存满,将数据再加回去退出
                    if item not in self.item_data:
                        self.item_data.append(item)
                    break
                else:  # 没存满,放进队列里
                    self.queue.put(item)
        except Exception as e:
            LOG.error('data to queue error__sp2__%s' % e)
        gc.collect()  # 不定期垃圾回收

    def save_data_redis(self):
        """
        保存
        :return:
        """
        if self.queue.empty():  # 如果队列里为空跳出去
            return
        conn = self.get_redis_conn()
        print('当前已获取 %s 条数据,正在保存中......' % self.queue.qsize())
        for i in range(120):
            if self.queue.empty():  # 如果队列里为空跳出去
                break
            item = self.queue.get()
            conn.lpush('data', json.dumps(item))
        print('已保存')
        conn.close()
        gc.collect()  # 不定期垃圾回收

    def deduplicate(self, data):
        """
        去重
        :return:
        """
        # 根据实际情况去重

    def get_current_time(self):
        """
        获取当前时间戳
        :return:
        """
        return time.time()

    def get_execute_time(self):
        """
        耗时组件
        :return:
        """
        print('总共用时 %s' % (self.get_current_time() - self._start_time))

    def crawl(self):
        '业务逻辑'
        pass

    def ocr_get_data(self, data):
        """
        ocr提取
        :param data: 图片名
        :return:
        """
        img_path = os.path.join(BASE_DIR, data)
        if not os.path.exists(img_path):
            raise ValueError('图片位置有误')
        if not os.path.isfile(img_path):
            raise ValueError('不是文件')
        pytesseract.pytesseract.tesseract_cmd = OCR_PATH

        data = pytesseract.image_to_string(
            Image.open(img_path),
            config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
        if data:
            return data

    def regex_data(self, part, data, flag):
        """
        正则匹配组件
        :param part: 正则表达式
        :param data: 待匹配数据
        :param flag: 全部还是只是某一个
        :return:
        """
        function = getattr(re, flag)
        if function:
            data = function(part, data, re.S)
            return data

    def error_level(self):
        """异常等级"""
        error_dict = {0: 'warning', 1: 'error', 2: ''}

    def freed_memory(self):
        """
        释放内存
        :return:
        """
        gc.collect()

    def auth_area(self, area, source):
        """
        验证匹配的地区是否属于当前省市来源网址
        :param area: 地区
        :param source: 来源网址名
        :return:
        """
        if not area or not source:
            return
        # 查询到来源对应的省市
        current_province = None
        for province in CITY_PROVINCE:
            if province in source:
                current_province = province  # 这里其实可以直接使用city变量,但是多线程时怕数据有误
                break
        # 从省市去取从属的所有地区名数组
        citys = CITYLIST.get(current_province)
        if not citys:
            area = '全国'
        else:
            # 验证当前匹配的地区名是否在地区名数组内
            if area not in citys:
                area = current_province
        return area

    def deduplication_str(self, data):
        """
        对重复的字去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        temp = list(data)
        end_data = []
        for item in temp:
            if item not in end_data:
                end_data.append(item)
        end_str = ''.join(end_data)
        del temp, end_data
        return end_str

    def deduplication_tuple(self, data):
        """
        对重复的字去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        temp = list(data)
        end_data = []
        for item in temp:
            if item not in end_data:
                end_data.append(item)
        end_tuple = tuple(end_data)
        del temp, end_data
        return end_tuple

    def deduplication_list(self, data):
        """
        对重复的字去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        end_data = []
        for item in data:
            if item not in end_data:
                end_data.append(item)
        return end_data

    def deduplication_normal(self, data):
        """
        对列表嵌套字典的去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        func = lambda x, y: x if y in x else x + [y]
        li = reduce(func, [
            [],
        ] + data)
        return li

    def deduplication_normal_v2(self, data):
        """
        对列表嵌套字典的去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        temp_list = list(set([str(i) for i in data]))
        li = [eval(i) for i in temp_list]
        return li

    def deduplication_normal_v3(self, data):
        """
        对列表嵌套字典的去重,不改变文字顺序
        :param data: 待处理数据
        :return:
        """
        return [dict(t) for t in set([tuple(d.items()) for d in data])]

    def compare_file(self, file1, file2):
        """
        比对两个文件的重合度
        :param file1:
        :param file2:
        :return:
        """
        f1 = open(file1, encoding='utf-8')
        f1_cont = f1.read()
        f1.close()
        f2 = open(file2, encoding='utf-8')
        f2_cont = f2.read()
        f2.close()
        temp_f1_cont = f1_cont.strip().replace('\n', '').replace('\r',
                                                                 '').replace(
                                                                     ' ', '')
        temp_f2_cont = f2_cont.strip().replace('\n', '').replace('\r',
                                                                 '').replace(
                                                                     ' ', '')
        temp_f1_cont = re.sub(
            r''',|\.|。|;|,|\?|?|\*|!|!|\.\.\.|【|】|"|:|“|”|、|\-|=|\(|\)|\\''',
            '', temp_f1_cont)
        temp_f2_cont = re.sub(
            r''',|\.|。|;|,|\?|?|\*|!|!|\.\.\.|【|】|"|:|“|”|、|\-|=|\(|\)|\\''',
            '', temp_f2_cont)
        if temp_f1_cont == temp_f2_cont:
            return '100%'
        else:
            f1_he = md5()
            f1_he.update(f1_cont)
            f2_he = md5()
            f2_he.update(f2_cont)
            if f1_he.hexdigest() == f2_he.hexdigest():
                return '100%'

    def compare_data_fuzzy(self, data1, data2):
        """
        对文本段使用fuzzywuzzy库进行模糊匹配
        :param data1:
        :param data2:
        :return:
        """
        flag = fuzz.ratio(data1, data2)
        flag2 = fuzz.partial_ratio(data1, data2)
        flag3 = fuzz.token_set_ratio(data1, data2)
        end = (flag + flag2 + flag3) / 3
        if end >= 80:
            return int(end)

    def compare_data_difflib(self, data1, data2):
        """
        对文本段使用difflib库进行模糊匹配
        :param data1:
        :param data2:
        :return:
        """
        hd = difflib.HtmlDiff()
        with open('htmlout.html', 'a+') as fo:
            fo.write(hd.make_file(data1, data2))
            fo.close()

    def compare_data_simhash(self, data1, data2):
        """
        对文本使用simhash进行近似判断
        :param data1:
        :param data2:
        :return:
        """
        data1_sim = Simhash(data1)
        data2_sim = Simhash(data2)
        # 汉明距离
        dis = data1_sim.distance(data2_sim)
        if dis < 2:
            return True

    def bloom_data(self, data):
        """
        使用布隆过滤器对数据进行去重过滤
        :param data:
        :return:
        """
        bf = BloomFilter(capacity=100)
        end_data = []
        for item in data:
            if item not in bf:
                flag = bf.add(item)
                if not flag:
                    end_data.append(item)
        return end_data

    def generate_base64(self, *args, **kwargs):
        """
        base64摘要
        :param args:
        :param kwargs:
        :return:
        """
        mds = kwargs.get('md5_str')
        flag_str = kwargs.get('flag_str')
        base64_str = mds + flag_str
        end_str = base64.b64encode(base64_str.encode('utf-8'))
        return end_str

    def solve_base64(self, base_str):
        """
        base64解码
        :param base_str:
        :return:
        """
        solve_str = base64.b16decode(base_str)
        return solve_str

    def clear_data(self, flag, data, *args, **kwargs):
        """
        清洗数据
        :param flag: 数据类型
        :param data: 数据类型
        :return:
        """
        if flag == 'str':  # 去除重复的字
            self.deduplication_str(data)
        elif flag == 'tuple':
            self.deduplication_tuple(data)
        elif flag == 'list':
            self.deduplication_list(data)

    def run(self):
        """
        入口方法
        :return:
        """
        # 获取定时对象
        sched = self.get_apscheduler()
        # 一定时间内将数据存入queue内
        sched.add_job(self.move_data_queue,
                      'interval',
                      seconds=APS_QUEUE_INTERVAL,
                      max_instances=3)
        # 一定时间内将queue中的数据存库
        sched.add_job(self.save_data_redis,
                      'interval',
                      seconds=APS_REDIS_INTERVAL,
                      max_instances=3)
        # 启动定时任务
        sched.start()
        # 程序启动时立即运行
        self.crawl()
        # try:
        #     # 获取定时对象
        #     sched = self.get_apscheduler()
        #     # 一定时间内将数据存入queue内
        #     sched.add_job(self.move_data_queue, 'interval', seconds=APS_QUEUE_INTERVAL, max_instances=3)
        #     # 一定时间内将queue中的数据存库
        #     sched.add_job(self.save_data_redis, 'interval', seconds=APS_REDIS_INTERVAL, max_instances=3)
        #     # 启动定时任务
        #     sched.start()
        #     # 程序启动时立即运行
        #     self.request_sites()
        #     # 从错误日志里读取,这个暂时用不到,不开启
        #     # self.get_log_urls()
        # except Exception as e:
        #     LOG.error('run_function__sp2__%s' % e)

        # 如果还有数据没存完
        if self.item_data:
            self.item_data = self.deduplicate(self._formal_container)
            self.move_data_queue()
            # 存库
            self.save_data_redis()
Example #42
0
def test_mutate_input_with_threads():
    """Input is mutable when using the threading backend"""
    q = Queue(maxsize=5)
    Parallel(n_jobs=2,
             backend="threading")(delayed(q.put)(1) for _ in range(5))
    assert q.full()
class FileVideoStream:
    def __init__(self, path, transform=None, queue_size=128, play_audio=False):
        """
        Read video file
        :param path: (string) Path to the video
        :param transform: (function with one parameter) If you want to apply a transformation
        :param queue_size: (int) Size of the frame queue
        :param play_audio: (bool) If you want to play the audio of the video
        """
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.transform = transform

        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queue_size)
        # intialize thread
        self.thread = Thread(target=self.__update, args=())
        self.thread.daemon = True
        self.play_audio = None
        self.video_path = path

    def start(self):
        """
        Start the video file reading
        :return: (FileVideoStream) the class
        """
        # start a thread to read frames from the file video stream
        self.thread.start()
        return self

    def __update(self):
        """
        Read and add frame into the queue
        """
        player = MediaPlayer(self.video_path) if self.play_audio else None
        # keep looping infinitely
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                break

            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                # read the next frame from the file
                (grabbed, frame) = self.stream.read()

                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stopped = True

                # if there are transforms to be done, might as well
                # do them on producer thread before handing back to
                # consumer thread. ie. Usually the producer is so far
                # ahead of consumer that we have time to spare.
                #
                # Python is not parallel but the transform operations
                # are usually OpenCV native so release the GIL.
                #
                # Really just trying to avoid spinning up additional
                # native threads and overheads of additional
                # producer/consumer queues since this one was generally
                # idle grabbing frames.
                if self.transform:
                    frame = self.transform(frame)

                # add the frame to the queue
                self.Q.put(frame)
            else:
                time.sleep(0.1)  # Rest for 10ms, we have a full queue
        if player is not None:
            player.close_player()
        self.stream.release()

    def read(self):
        """
        Get frame. Block if necessary until an item is available
        :return: (ndarray) Frame
        """
        # return next frame in the queue
        return self.Q.get()

    def running(self):
        """
        Get if the file reader his running
        :return:
        """
        return self.more() or not self.stopped

    def more(self):
        """
        Look if the his frame into the queue
        :return: (bool)
        """
        # return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.1)
            tries += 1

        return self.Q.qsize() > 0

    def stop(self):
        """
        Stop the video file reader
        """
        # indicate that the thread should be stopped
        self.stopped = True
        # wait until stream resources are released (producer thread might be still grabbing frame)
        self.thread.join()
Example #44
0
class Socket(Service):
    __slots__ = ('connection_pool', 'timeout', 'connection', 'send_and_receive')

    on_unix = getattr(socket, 'AF_UNIX', False)
    Connection = namedtuple('Connection', ('connect_to', 'proto', 'sockopts'))
    protocols = {
        'tcp': (socket.AF_INET, socket.SOCK_STREAM),
        'udp': (socket.AF_INET, socket.SOCK_DGRAM),
    }
    streams = set(('tcp',))
    datagrams = set(('udp',))
    inet = set(('tcp', 'udp',))
    unix = set()

    if on_unix:
        protocols.update({
            'unix_dgram': (socket.AF_UNIX, socket.SOCK_DGRAM),
            'unix_stream': (socket.AF_UNIX, socket.SOCK_STREAM)
        })
        streams.add('unix_stream')
        datagrams.add('unix_dgram')
        unix.update(('unix_stream', 'unix_dgram'))

    def __init__(self, connect_to, proto, version=None,
                headers=empty.dict, timeout=None, pool=0, raise_on=(500, ), **kwargs):
        super().__init__(timeout=timeout, raise_on=raise_on, version=version, **kwargs)
        connect_to = tuple(connect_to) if proto in Socket.inet else connect_to
        self.timeout = timeout
        self.connection = Socket.Connection(connect_to, proto, set())
        self.connection_pool = Queue(maxsize=pool if pool else 1)

        if proto in Socket.streams:
            self.send_and_receive = self._stream_send_and_receive
        else:
            self.send_and_receive = self._dgram_send_and_receive

    def settimeout(self, timeout):
        """Set the default timeout"""
        self.timeout = timeout

    def setsockopt(self, *sockopts):
        """Add socket options to set"""
        if type(sockopts[0]) in (list, tuple):
            for sock_opt in sockopts[0]:
                level, option, value = sock_opt
                self.connection.sockopts.add((level, option, value))
        else:
            level, option, value = sockopts
            self.connection.sockopts.add((level, option, value))

    def _register_socket(self):
        """Create/Connect socket, apply options"""
        _socket = socket.socket(*Socket.protocols[self.connection.proto])
        _socket.settimeout(self.timeout)

        # Reconfigure original socket options.
        if self.connection.sockopts:
            for sock_opt in self.connection.sockopts:
                level, option, value = sock_opt
                _socket.setsockopt(level, option, value)

        _socket.connect(self.connection.connect_to)
        return _socket

    def _stream_send_and_receive(self, _socket, message, *args, **kwargs):
        """TCP/Stream sender and receiver"""
        data = BytesIO()

        _socket_fd = _socket.makefile(mode='rwb', encoding='utf-8')
        _socket_fd.write(message.encode('utf-8'))
        _socket_fd.flush()

        for received in _socket_fd:
            data.write(received)
        data.seek(0)

        _socket_fd.close()
        return data

    def _dgram_send_and_receive(self, _socket, message, buffer_size=4096, *args):
        """User Datagram Protocol sender and receiver"""
        _socket.sendto(message.encode('utf-8'), self.connection.connect_to)
        data, address = _socket.recvfrom(buffer_size)
        return BytesIO(data)

    def request(self, message, timeout=False, *args, **kwargs):
        """Populate connection pool, send message, return BytesIO, and cleanup"""
        if not self.connection_pool.full():
            self.connection_pool.put(self._register_socket())

        _socket = self.connection_pool.get()

        # setting timeout to None enables the socket to block.
        if timeout or timeout is None:
            _socket.settimeout(timeout)

        data = self.send_and_receive(_socket, message, *args, **kwargs)

        if self.connection.proto in Socket.streams:
            _socket.shutdown(socket.SHUT_RDWR)

        return Response(data, None, None)
class FileVideoStream:
	def __init__(self, path, transform=None, queueSize=128):
		# initialize the file video stream along with the boolean
		# used to indicate if the thread should be stopped or not
		self.stream = cv2.VideoCapture(path)
		self.stopped = False
		self.transform = transform

		# initialize the queue used to store frames read from
		# the video file
		self.Q = Queue(maxsize=queueSize)

	def start(self):
		# start a thread to read frames from the file video stream
		t = Thread(target=self.update, args=())
		t.daemon = True
		t.start()
		return self

	def update(self):
		# keep looping infinitely
		while True:
			# if the thread indicator variable is set, stop the
			# thread
			if self.stopped:
				return

			# otherwise, ensure the queue has room in it
			if not self.Q.full():
				# read the next frame from the file
				(grabbed, frame) = self.stream.read()

				# if the `grabbed` boolean is `False`, then we have
				# reached the end of the video file
				if not grabbed:
					self.stop()
					return

				# if there are transforms to be done, might as well
				# do them on producer thread before handing back to
				# consumer thread. ie. Usually the producer is so far
				# ahead of consumer that we have time to spare.
				#
				# Python is not parallel but the transform operations
				# are usually OpenCV native so release the GIL.
				#
				# Really just trying to avoid spinning up additional
				# native threads and overheads of additional 
				# producer/consumer queues since this one was generally
				# idle grabbing frames.
				if self.transform:
					frame = self.transform(frame)

				# add the frame to the queue
				self.Q.put(frame)
			else:
				time.sleep(0.1)  # Rest for 10ms, we have a full queue  

	def read(self):
		# return next frame in the queue
		return self.Q.get()

	# Insufficient to have consumer use while(more()) which does
	# not take into account if the producer has reached end of
	# file stream. 
	def running(self):
		return self.more() or not self.stopped

	def more(self):
		# return True if there are still frames in the queue
		return self.Q.qsize() > 0

	def stop(self):
		# indicate that the thread should be stopped
		self.stopped = True
Example #46
0
class Build(object):
    """
    A build is a single execution of any configured job. This class:
        - exposes the overall status of the build
        - keeps track of the build's subjobs and their completion state
        - manages slaves that have been assigned to accept this build's subjobs
    """
    _build_id_counter = Counter()  # class-level counter for assigning build ids

    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once

        self._project_type = None
        self._num_slaves_in_use = 0
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish
        self._num_allocated_executors = 0
        self._max_executors = float('inf')
        self._build_completion_lock = Lock()

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None
        self._finished_subjobs = None
        self._postbuild_tasks_are_finished = False
        self._teardowns_finished = False

    def api_representation(self):
        return {
            'id': self._build_id,
            'status': self._status(),
            'artifacts': self._artifacts_archive_file,  # todo: this should probably be a url, not a file path
            'details': self._detail_message,
            'error_message': self._error_message,
            'num_atoms': self._num_atoms,
            'num_subjobs': len(self._all_subjobs_by_id),
            'failed_atoms': self._failed_atoms(),  # todo: print the file contents instead of paths
            'result': self._result(),
        }

    def prepare(self, subjobs, project_type, job_config):
        """
        :type subjobs: list[Subjob]
        :type project_type: project_type.project_type.ProjectType
        :type job_config: master.job_config.JobConfig
        """
        if not self._preparation_coin.spend():
            raise RuntimeError('prepare() was called more than once on build {}.'.format(self._build_id))

        self._project_type = project_type
        self._unstarted_subjobs = Queue(maxsize=len(subjobs))
        self._finished_subjobs = Queue(maxsize=len(subjobs))

        for subjob in subjobs:
            self._all_subjobs_by_id[subjob.subjob_id()] = subjob
            self._unstarted_subjobs.put(subjob)

        self._max_executors = job_config.max_executors
        self._timing_file_path = project_type.timing_file_path(job_config.name)
        self.is_prepared = True

    def finish(self):
        """
        Called when all slaves are done with this build (and any teardown is complete)
        """
        if self._subjobs_are_finished:
            self._teardowns_finished = True
        else:
            raise RuntimeError('Tried to finish build {} but not all subjobs are complete'.format(self._build_id))

    def build_id(self):
        """
        :return:
        :rtype: int
        """
        return self._build_id

    def needs_more_slaves(self):
        return self._num_allocated_executors < self._max_executors and not self._unstarted_subjobs.empty()

    def allocate_slave(self, slave):
        """
        Allocate a slave to this build.
        :type slave: master.Slave
        """
        self._num_slaves_in_use += 1
        slave.setup(self.build_id(), project_type_params=self.build_request.build_parameters())

        for _ in range(slave.num_executors):
            if self._num_allocated_executors >= self._max_executors:
                break
            slave.claim_executor()
            self._num_allocated_executors += 1
            self.execute_next_subjob_on_slave(slave)

    def all_subjobs(self):
        """
        Returns a list of subjobs for this build
        :rtype: list[Subjob]
        """
        return [subjob for subjob in self._all_subjobs_by_id.values()]

    def subjob(self, subjob_id):
        """
        Returns a single subjob
        :type subjob_id: int
        :rtype: Subjob
        """
        subjob = self._all_subjobs_by_id.get(subjob_id)
        if subjob is None:
            raise ItemNotFoundError('Invalid subjob id.')
        return subjob

    def execute_next_subjob_on_slave(self, slave):
        """
        Grabs an unstarted subjob off the queue and sends it to the specified slave to be executed. If the unstarted
        subjob queue is empty, we mark the slave as idle.

        :type slave: master.Slave
        """
        try:
            subjob = self._unstarted_subjobs.get(block=False)
            self._logger.debug('Sending subjob {} (build {}) to slave {}.',
                               subjob.subjob_id(), subjob.build_id(), slave.url)
            slave.start_subjob(subjob)

        except Empty:
            num_executors_in_use = slave.free_executor()
            if num_executors_in_use == 0:
                slave.teardown()

    def handle_subjob_payload(self, subjob_id, payload=None):
        if not payload:
            self._logger.warning('No payload for subjob {}.', subjob_id)
            return

        # Assertion: all payloads received from subjobs are uniquely named.
        result_file_path = os.path.join(
            self._build_results_dir(),
            payload['filename'])

        try:
            app.util.fs.write_file(payload['body'], result_file_path)
            app.util.fs.extract_tar(result_file_path, delete=True)
            self._logger.debug('Payload for subjob {} written.', subjob_id)
        except:
            self._logger.warning('Writing payload for subjob {} FAILED.', subjob_id)
            raise

    def _read_subjob_timings_from_results(self):
        """
        Collect timing data from all subjobs
        :rtype: dict [str, float]
        """
        timings = {}
        for _, subjob in self._all_subjobs_by_id.items():
            timings.update(subjob.read_timings())

        return timings

    def mark_subjob_complete(self, subjob_id):
        """
        :type subjob_id: int
        """
        subjob = self._all_subjobs_by_id[int(subjob_id)]
        with self._build_completion_lock:
            self._finished_subjobs.put(subjob, block=False)
            subjobs_are_finished = self._subjobs_are_finished

        # We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
        if subjobs_are_finished:
            self._logger.info("All results received for build {}!", self._build_id)
            SafeThread(target=self._perform_async_postbuild_tasks, name='PostBuild{}'.format(self._build_id)).start()

    def mark_failed(self, failure_reason):
        """
        Mark a build as failed and set a failure reason. The failure reason should be something we can present to the
        end user of ClusterRunner, so try not to include detailed references to internal implementation.

        :type failure_reason: str
        """
        self._logger.error('Build {} failed: {}', self.build_id(), failure_reason)
        self._error_message = failure_reason

    @property
    def artifacts_archive_file(self):
        return self._artifacts_archive_file

    @property
    def _num_subjobs_total(self):
        return len(self._all_subjobs_by_id)

    @property
    def _num_subjobs_finished(self):
        return 0 if not self._finished_subjobs else self._finished_subjobs.qsize()

    @property
    def _num_atoms(self):
        if self._status() not in [BuildStatus.BUILDING, BuildStatus.FINISHED]:
            return None
        return sum([len(subjob.atomic_commands()) for subjob in self._all_subjobs_by_id.values()])

    @property
    def _subjobs_are_finished(self):
        return self.is_prepared and self._finished_subjobs.full()

    @property
    def is_finished(self):
        return self._subjobs_are_finished and self._postbuild_tasks_are_finished and self._teardowns_finished

    @property
    def is_unstarted(self):
        return self.is_prepared and self._unstarted_subjobs.full()

    @property
    def has_error(self):
        return self._error_message is not None

    @property
    def _detail_message(self):
        if self._num_subjobs_total > 0:
            return '{} of {} subjobs are complete ({:.1f}%).'.format(
                self._num_subjobs_finished,
                self._num_subjobs_total,
                100 * self._num_subjobs_finished / self._num_subjobs_total
            )
        return None

    def _status(self):
        """
        :rtype: str
        """
        if self.has_error:
            return BuildStatus.ERROR
        elif not self.is_prepared or self.is_unstarted:
            return BuildStatus.QUEUED
        elif self.is_finished:
            return BuildStatus.FINISHED
        else:
            return BuildStatus.BUILDING

    def _failed_atoms(self):
        """
        The commands which failed
        :rtype: list [str] | None
        """
        if self.is_finished:
            # dict.values() returns a view object in python 3, so wrapping values() in a list
            return list(self._build_artifact.get_failed_commands().values())
        return None

    def _result(self):
        """
        :rtype: str | None
        """
        if self.is_finished:
            if len(self._build_artifact.get_failed_commands()) == 0:
                return BuildResult.NO_FAILURES
            return BuildResult.FAILURE
        return None

    def _perform_async_postbuild_tasks(self):
        """
        Once a build is complete, certain tasks can be performed asynchronously.
        """
        # @TODO There is a race condition here where the build is marked finished before the results archive
        # is prepared.  If the user requests the build status before archival finishes, the 'artifacts'
        # value in the post body will be None.  self.is_finished should be conditional on whether archival
        # is finished.
        self._create_build_artifact()
        self._logger.debug('Postbuild tasks completed for build {}', self.build_id())
        self._postbuild_tasks_are_finished = True

    def _create_build_artifact(self):
        self._build_artifact = BuildArtifact(self._build_results_dir())
        self._build_artifact.generate_failures_file()
        self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results())
        self._artifacts_archive_file = app.util.fs.compress_directory(self._build_results_dir(), 'results.tar.gz')

    def _build_results_dir(self):
        return os.path.join(
            Configuration['results_directory'],
            str(self.build_id()),
        )
class ImageReader:
    def __init__(self, image_file_list, queue_size=100):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.image_files = image_file_list
        self.total_frames = len(image_file_list)
        self.stopped = False
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queue_size)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        time.sleep(1)
        return self

    def update(self):
        # keep looping infinitely
        frame_num = 0
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                return

            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                # read the next frame from the file
                image_np = cv2.imread(self.image_files[frame_num])
                image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
                # Expand dimensions since the model expects images
                #to have shape: [1, None, None, 3]
                image_np = np.expand_dims(image_np, axis=0)
                # add the frame to the queue
                self.Q.put(image_np)

                frame_num += 1

                if frame_num >= self.total_frames:
                    self.stop()
                    return

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def read_batch(self, n_frames, asarray=False):
        frames = []
        for idx in range(n_frames):
            frames.append(self.read())
        return frames

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        print('stopping')
        self.stopped = True

    def close(self):
        self.stop()
Example #48
0
class ClusterSlave(object):

    API_VERSION = 'v1'

    def __init__(self, port, host, num_executors=10):
        """
        :param port: The port number the slave service is running on
        :type port: int
        :param host: The hostname at which the slave is reachable
        :type host: str
        :param num_executors: The number of executors this slave should operate with -- this determines how many
            concurrent subjobs the slave can execute.
        :type num_executors: int
        """
        self.port = port
        self.host = host
        self.is_alive = True
        self._slave_id = None
        self._num_executors = num_executors
        self._logger = log.get_logger(__name__)

        self._idle_executors = Queue(maxsize=num_executors)
        self.executors_by_id = {}
        for executor_id in range(num_executors):
            executor = SubjobExecutor(executor_id)
            self._idle_executors.put(executor)
            self.executors_by_id[executor_id] = executor

        self._master_url = None
        self._network = Network(min_connection_poolsize=num_executors)
        self._master_api = None  # wait until we connect to a master first

        self._project_type = None  # this will be instantiated during build setup
        self._current_build_id = None
        self._build_teardown_coin = None

    def api_representation(self):
        """
        Gets a dict representing this resource which can be returned in an API response.
        :rtype: dict [str, mixed]
        """
        executors_representation = [executor.api_representation() for executor in self.executors_by_id.values()]
        return {
            'is_alive': self.is_alive,
            'master_url': self._master_url,
            'current_build_id': self._current_build_id,
            'slave_id': self._slave_id,
            'executors': executors_representation,
        }

    def get_status(self):
        """
        Just returns a dumb message and prints it to the console.
        """
        return 'Slave service is up. <Port: {}>'.format(self.port)

    def setup_build(self, build_id, project_type_params, build_executor_start_index):
        """
        Usually called once per build to do build-specific setup. Will block any subjobs from executing until setup
        completes. The actual setup is performed on another thread and will unblock subjobs (via an Event) once it
        finishes.

        :param build_id: The id of the build to run setup on
        :type build_id: int
        :param project_type_params: The parameters that define the project_type this build will execute in
        :type project_type_params: dict
        :param build_executor_start_index: How many executors have alreayd been allocated on other slaves for
        this build
        :type build_executor_start_index: int
        """
        self._logger.info('Executing setup for build {} (type: {}).', build_id, project_type_params.get('type'))
        self._current_build_id = build_id
        self._build_teardown_coin = SingleUseCoin()  # protects against build_teardown being executed multiple times

        # create an project_type instance for build-level operations
        self._project_type = util.create_project_type(project_type_params)

        # verify all executors are idle
        if not self._idle_executors.full():
            raise RuntimeError('Slave tried to setup build but not all executors are idle. ({}/{} executors idle.)'
                               .format(self._idle_executors.qsize(), self._num_executors))

        # Collect all the executors to pass to project_type.fetch_project(). This will create a new project_type for
        # each executor (for subjob-level operations).
        executors = list(self._idle_executors.queue)
        SafeThread(
            target=self._async_setup_build,
            name='Bld{}-Setup'.format(build_id),
            args=(executors, project_type_params, build_executor_start_index)
        ).start()

    def _async_setup_build(self, executors, project_type_params, build_executor_start_index):
        """
        Called from setup_build(). Do asynchronous setup for the build so that we can make the call to setup_build()
        non-blocking.

        :type executors: list[SubjobExecutor]
        :type project_type_params: dict
        :type build_executor_start_index: int
        """
        self._base_executor_index = build_executor_start_index
        try:
            self._project_type.fetch_project()
            for executor in executors:
                executor.configure_project_type(project_type_params)
            self._project_type.run_job_config_setup()

        except SetupFailureError as ex:
            self._logger.error(ex)
            self._logger.info('Notifying master that build setup has failed for build {}.', self._current_build_id)
            self._notify_master_of_state_change(SlaveState.SETUP_FAILED)

        else:
            self._logger.info('Notifying master that build setup is complete for build {}.', self._current_build_id)
            self._notify_master_of_state_change(SlaveState.SETUP_COMPLETED)

    def teardown_build(self, build_id=None):
        """
        Called at the end of each build on each slave before it reports back to the master that it is idle again.

        :param build_id: The build id to teardown -- this parameter is used solely for correctness checking of the
            master, to make sure that the master is not erroneously sending teardown commands for other builds.
        :type build_id: int | None
        """
        if self._current_build_id is None:
            raise BadRequestError('Tried to teardown a build but no build is active on this slave.')

        if build_id is not None and build_id != self._current_build_id:
            raise BadRequestError('Tried to teardown build {}, '
                                  'but slave is running build {}!'.format(build_id, self._current_build_id))
        SafeThread(
            target=self._async_teardown_build,
            name='Bld{}-Teardwn'.format(build_id)
        ).start()

    def _async_teardown_build(self):
        """
        Called from teardown_build(). Do asynchronous teardown for the build so that we can make the call to
        teardown_build() non-blocking. Also take care of posting back to the master when teardown is complete.
        """
        self._do_build_teardown_and_reset()
        while not self._idle_executors.full():
            time.sleep(1)
        self._send_master_idle_notification()

    def _do_build_teardown_and_reset(self, timeout=None):
        """
        Kill any currently running subjobs. Run the teardown_build commands for the current build (with an optional
        timeout). Clear attributes related to the currently running build.

        :param timeout: A maximum time in seconds to allow the teardown process to run before killing
        :type timeout: int | None
        """
        # Kill all subjob executors' processes. This only has an effect if we are tearing down before a build completes.
        for executor in self.executors_by_id.values():
            executor.kill()

        # Order matters! Spend the coin if it has been initialized.
        if not self._build_teardown_coin or not self._build_teardown_coin.spend() or not self._project_type:
            return  # There is no build to tear down or teardown is already in progress.

        self._logger.info('Executing teardown for build {}.', self._current_build_id)
        # todo: Catch exceptions raised during teardown_build so we don't skip notifying master of idle/disconnect.
        self._project_type.teardown_build(timeout=timeout)
        self._logger.info('Build teardown complete for build {}.', self._current_build_id)
        self._current_build_id = None
        self._project_type = None

    def _send_master_idle_notification(self):
        if not self._is_master_responsive():
            self._logger.notice('Could not post idle notification to master because master is unresponsive.')
            return

        # Notify master that this slave is finished with teardown and ready for a new build.
        self._logger.info('Notifying master that this slave is ready for new builds.')
        self._notify_master_of_state_change(SlaveState.IDLE)

    def _disconnect_from_master(self):
        """
        Perform internal bookkeeping, as well as notify the master, that this slave is disconnecting itself
        from the slave pool.
        """
        self.is_alive = False

        if not self._is_master_responsive():
            self._logger.notice('Could not post disconnect notification to master because master is unresponsive.')
            return

        # Notify master that this slave is shutting down and should not receive new builds.
        self._logger.info('Notifying master that this slave is disconnecting.')
        self._notify_master_of_state_change(SlaveState.DISCONNECTED)

    def connect_to_master(self, master_url=None):
        """
        Notify the master that this slave exists.

        :param master_url: The URL of the master service. If none specified, defaults to localhost:43000.
        :type master_url: str | None
        """
        self.is_alive = True
        self._master_url = master_url or 'localhost:43000'
        self._master_api = UrlBuilder(self._master_url)
        connect_url = self._master_api.url('slave')
        data = {
            'slave': '{}:{}'.format(self.host, self.port),
            'num_executors': self._num_executors,
        }
        response = self._network.post(connect_url, data=data)
        self._slave_id = int(response.json().get('slave_id'))
        self._logger.info('Slave {}:{} connected to master on {}.', self.host, self.port, self._master_url)

        # We disconnect from the master before build_teardown so that the master stops sending subjobs. (Teardown
        # callbacks are executed in the reverse order that they're added, so we add the build_teardown callback first.)
        UnhandledExceptionHandler.singleton().add_teardown_callback(self._do_build_teardown_and_reset, timeout=30)
        UnhandledExceptionHandler.singleton().add_teardown_callback(self._disconnect_from_master)

    def _is_master_responsive(self):
        """
        Ping the master to check if it is still alive. Code using this method should treat the return value as a
        *probable* truth since the state of the master can change at any time. This method is not a replacement for
        error handling.

        :return: Whether the master is responsive or not
        :rtype: bool
        """
        # todo: This method repeats some logic we have in the deployment code (checking a service). We should DRY it up.
        is_responsive = True
        try:
            self._network.get(self._master_api.url())
        except requests.ConnectionError:
            is_responsive = False

        return is_responsive

    def start_working_on_subjob(self, build_id, subjob_id, subjob_artifact_dir, atomic_commands):
        """
        Begin working on a subjob with the given build id and subjob id. This just starts the subjob execution
        asynchronously on a separate thread.

        :type build_id: int
        :type subjob_id: int
        :type subjob_artifact_dir: str
        :type atomic_commands: list[str]
        :return: The text to return in the API response.
        :rtype: dict[str, int]
        """
        if build_id != self._current_build_id:
            raise BadRequestError('Attempted to start subjob {} for build {}, '
                                  'but current build id is {}.'.format(subjob_id, build_id, self._current_build_id))

        # get idle executor from queue to claim it as in-use (or block until one is available)
        executor = self._idle_executors.get()

        # Start a thread to execute the job (after waiting for setup to complete)
        SafeThread(
            target=self._execute_subjob,
            args=(build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands),
            name='Bld{}-Sub{}'.format(build_id, subjob_id),
        ).start()

        self._logger.info('Slave ({}:{}) has received subjob. (Build {}, Subjob {})', self.host, self.port, build_id,
                          subjob_id)
        return {'executor_id': executor.id}

    def _execute_subjob(self, build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands):
        """
        This is the method for executing a subjob asynchronously. This performs the work required by executing the
        specified command, then does a post back to the master results endpoint to signal that the work is done.

        :type build_id: int
        :type subjob_id: int
        :type executor: SubjobExecutor
        :type subjob_artifact_dir: str
        :type atomic_commands: list[str]
        """
        subjob_event_data = {'build_id': build_id, 'subjob_id': subjob_id, 'executor_id': executor.id}

        analytics.record_event(analytics.SUBJOB_EXECUTION_START, **subjob_event_data)
        results_file = executor.execute_subjob(build_id, subjob_id, subjob_artifact_dir, atomic_commands,
                                               self._base_executor_index)
        analytics.record_event(analytics.SUBJOB_EXECUTION_FINISH, **subjob_event_data)

        results_url = self._master_api.url('build', build_id, 'subjob', subjob_id, 'result')
        data = {
            'slave': '{}:{}'.format(self.host, self.port),
            'metric_data': {'executor_id': executor.id},
        }
        files = {'file': ('payload', open(results_file, 'rb'), 'application/x-compressed')}

        self._idle_executors.put(executor)  # work is done; mark executor as idle
        self._network.post(results_url, data=data, files=files)  # todo: check return code

        self._logger.info('Build {}, Subjob {} completed and sent results to master.', build_id, subjob_id)

    def _notify_master_of_state_change(self, new_state):
        """
        Send a state notification to the master. This is used to notify the master of events occurring on the slave
        related to build execution progress.

        :type new_state: SlaveState
        """
        state_url = self._master_api.url('slave', self._slave_id)
        self._network.put_with_digest(state_url, request_params={'slave': {'state': new_state}},
                                      secret=Secret.get(), error_on_failure=True)

    def kill(self):
        """
        Exits without error.
        """
        sys.exit(0)
class FileStream:
    def __init__(self, path, transform=None, queue_size=128):
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.transform = transform

        self.Q = Queue(maxsize=queue_size)
        self.thread = Thread(target=self.update, args=())
        self.thread.daemon = True

    def start(self):
        self.thread.start()
        return self

    def update(self):
        # Loop Infinitely
        while True:

            # if thread indicator variable is set stop thread
            if self.stopped:
                break

            # read the next frame from file
            if not self.Q.full():
                (grabbed, frame) = self.stream.read()
                """If gtrabbed = False ==> End of video"""
                if not grabbed:
                    self.stop = True
                """
                    # if there are transforms to be done, might as well
                    # do them on producer thread before handing back to
                    # consumer thread. ie. Usually the producer is so far
                    # ahead of consumer that we have time to spare.
                    #
                    # Python is not parallel but the transform operations
                    # are usually OpenCV native so release the GIL.
                    #
                    # Really just trying to avoid spinning up additional
                    # native threads and overheads of additional
                    # producer/consumer queues since this one was generally
                    # idle grabbing frames.
                """
                if self.transform:
                    frame = self.transform(frame)

                # Add Frame to Queue
                self.Q.put(frame)
            else:
                # Wait 10ms to get a full Queue
                time.sleep(0.1)
        self.stream.release()

    def read(self):
        # Return next frame in the Queue
        return self.Q.get()

    """
        # Insufficient to have consumer use while(more()) which does
        # not take into account if the producer has reached end of
        # file stream.
    """

    def more(self):
        # return True if there are still frames in the queue.
        # If stream is not stopped, try to wait a moment
        tries = 0
        while self.Q.qsize() == 0 and not self.stopped and tries < 5:
            time.sleep(0.1)
            tries += 1
        return self.Q.qsize() > 0

    def stop(self):
        # Indicate the thread should be stopped
        self.stopped = True
        # wait until stream resources are released (producer thread might be still grabbing frame)
        self.thread.join()
class ClusterSlave(object):

    API_VERSION = 'v1'

    def __init__(self, port, host, num_executors=10):
        """
        :param port: The port number the slave service is running on
        :type port: int
        :param host: The hostname at which the slave is reachable
        :type host: str
        :param num_executors: The number of executors this slave should operate with -- this determines how many
            concurrent subjobs the slave can execute.
        :type num_executors: int
        """
        self.port = port
        self.host = host
        self._slave_id = None
        self._num_executors = num_executors
        self._logger = log.get_logger(__name__)

        self._idle_executors = Queue(maxsize=num_executors)
        self.executors = {}
        for executor_id in range(num_executors):
            executor = SubjobExecutor(executor_id)
            self._idle_executors.put(executor)
            self.executors[executor_id] = executor

        self._setup_complete_event = Event()
        self._master_url = None
        self._network = Network(min_connection_poolsize=num_executors)
        self._master_api = None  # wait until we connect to a master first

        self._project_type = None  # this will be instantiated during build setup
        self._current_build_id = None

        UnhandledExceptionHandler.singleton().add_teardown_callback(self._async_teardown_build,
                                                                    should_disconnect_from_master=True)

    def api_representation(self):
        """
        Gets a dict representing this resource which can be returned in an API response.
        :rtype: dict [str, mixed]
        """
        executors_representation = [executor.api_representation() for executor in self.executors.values()]
        return {
            'connected': str(self._is_connected()),
            'master_url': self._master_url,
            'setup_complete': str(self._setup_complete_event.isSet()),
            'slave_id': self._slave_id,
            'executors': executors_representation,
        }

    def _is_connected(self):
        return self._master_url is not None

    def get_status(self):
        """
        Just returns a dumb message and prints it to the console.
        """
        return 'Slave service is up. <Port: {}>'.format(self.port)

    def setup_build(self, build_id, project_type_params):
        """
        Usually called once per build to do build-specific setup. Will block any subjobs from executing until setup
        completes. The actual setup is performed on another thread and will unblock subjobs (via an Event) once it
        finishes.

        :param build_id: The id of the build to run setup on
        :type build_id: int
        :param project_type_params: The parameters that define the project_type this build will execute in
        :type project_type_params: dict
        """
        self._logger.info('Executing setup for build {} (type: {}).', build_id, project_type_params.get('type'))
        self._setup_complete_event.clear()
        self._current_build_id = build_id

        # create an project_type instance for build-level operations
        self._project_type = util.create_project_type(project_type_params)

        # verify all executors are idle
        if not self._idle_executors.full():
            raise RuntimeError('Slave tried to setup build but not all executors are idle. ({}/{} executors idle.)'
                               .format(self._idle_executors.qsize(), self._num_executors))

        # Collect all the executors to pass to project_type.setup_build(). This will create a new project_type for
        # each executor (for subjob-level operations).
        executors = list(self._idle_executors.queue)
        SafeThread(target=self._async_setup_build, args=(executors, project_type_params)).start()

    def _async_setup_build(self, executors, project_type_params):
        """
        Called from setup_build(). Do asynchronous setup for the build so that we can make the call to setup_build()
        non-blocking.
        """
        # todo(joey): It's strange that the project_type is setting up the executors, which in turn set up projects.
        # todo(joey): I think this can be untangled a bit -- we should call executor.configure_project_type() here.
        self._project_type.setup_build(executors, project_type_params)

        self._logger.info('Build setup complete for build {}.', self._current_build_id)
        self._setup_complete_event.set()  # free any subjob threads that are waiting for setup to complete

    def teardown_build(self, build_id=None):
        """
        Called at the end of each build on each slave before it reports back to the master that it is idle again.

        :param build_id: The build id to teardown -- this parameter is used solely for correctness checking of the
            master, to make sure that the master is not erroneously sending teardown commands for other builds.
        :type build_id: int | None
        """
        if self._current_build_id is None:
            raise BadRequestError('Tried to teardown a build but no build is active on this slave.')

        if build_id is not None and build_id != self._current_build_id:
            raise BadRequestError('Tried to teardown build {}, '
                                  'but slave is running build {}!'.format(build_id, self._current_build_id))

        self._logger.info('Executing teardown for build {}.', self._current_build_id)

        SafeThread(target=self._async_teardown_build).start()

    def _async_teardown_build(self, should_disconnect_from_master=False):
        """
        Called from teardown_build(). Do asynchronous teardown for the build so that we can make the call to
        teardown_build() non-blocking. Also take care of posting back to the master when teardown is complete.
        """
        if self._project_type:
            self._project_type.teardown_build()
            self._logger.info('Build teardown complete for build {}.', self._current_build_id)
            self._current_build_id = None
            self._project_type = None

        if not should_disconnect_from_master:
            # report back to master that this slave is finished with teardown and ready for a new build
            self._logger.info('Notifying master that this slave is ready for new builds.')
            idle_url = self._master_api.url('slave', self._slave_id, 'idle')
            response = self._network.post(idle_url)
            if response.status_code != http.client.OK:
                raise RuntimeError("Could not post teardown completion to master at {}".format(idle_url))

        elif self._is_master_responsive():
            # report back to master that this slave is shutting down and should not receive new builds
            self._logger.info('Notifying master to disconnect this slave.')
            disconnect_url = self._master_api.url('slave', self._slave_id, 'disconnect')
            response = self._network.post(disconnect_url)
            if response.status_code != http.client.OK:
                self._logger.error('Could not post disconnect notification to master at {}'.format(disconnect_url))

    def connect_to_master(self, master_url=None):
        """
        Notify the master that this slave exists.

        :param master_url: The URL of the master service. If none specified, defaults to localhost:43000.
        :type master_url: str
        """
        self._master_url = master_url or 'localhost:43000'
        self._master_api = UrlBuilder(self._master_url)
        connect_url = self._master_api.url('slave')
        data = {
            'slave': '{}:{}'.format(self.host, self.port),
            'num_executors': self._num_executors,
        }
        response = self._network.post(connect_url, data)
        self._slave_id = int(response.json().get('slave_id'))
        self._logger.info('Slave {}:{} connected to master on {}.', self.host, self.port, self._master_url)

    def _is_master_responsive(self):
        """
        Ping the master to check if it is still alive. Code using this method should treat the return value as a
        *probable* truth since the state of the master can change at any time. This method is not a replacement for
        error handling.

        :return: Whether the master is responsive or not
        :rtype: bool
        """
        # todo: This method repeats some logic we have in the deployment code (checking a service). We should DRY it up.
        is_responsive = True
        try:
            self._network.get(self._master_api.url())
        except requests.ConnectionError:
            is_responsive = False

        return is_responsive

    def start_working_on_subjob(self, build_id, subjob_id, subjob_artifact_dir, atomic_commands):
        """
        Begin working on a subjob with the given build id and subjob id. This just starts the subjob execution
        asynchronously on a separate thread.

        :type build_id: int
        :type subjob_id: int
        :type subjob_artifact_dir: str
        :type atomic_commands: list[str]
        :return: The text to return in the API response.
        :rtype: dict[str, int]
        """
        if build_id != self._current_build_id:
            raise BadRequestError('Attempted to start subjob {} for build {}, '
                                  'but current build id is {}.'.format(subjob_id, build_id, self._current_build_id))

        # get idle executor from queue to claim it as in-use (or block until one is available)
        executor = self._idle_executors.get()

        # Start a thread to execute the job (after waiting for setup to complete)
        SafeThread(
            target=self._execute_subjob,
            args=(build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands),
            name='Build{}-Sub{}'.format(build_id, subjob_id),
        ).start()

        self._logger.info('Slave ({}:{}) has received subjob. (Build {}, Subjob {})', self.host, self.port, build_id,
                          subjob_id)
        return {'executor_id': executor.id}

    def _execute_subjob(self, build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands):
        """
        This is the method for executing a subjob asynchronously. This performs the work required by executing the
        specified command, then does a post back to the master results endpoint to signal that the work is done.

        :type build_id: int
        :type subjob_id: int
        :type executor: SubjobExecutor
        :type subjob_artifact_dir: str
        :type atomic_commands: list[str]
        """
        self._logger.debug('Waiting for setup to complete (Build {}, Subjob {})...', build_id, subjob_id)
        self._setup_complete_event.wait()  # block until setup completes
        subjob_event_data = {'build_id': build_id, 'subjob_id': subjob_id, 'executor_id': executor.id}

        analytics.record_event(analytics.SUBJOB_EXECUTION_START, **subjob_event_data)
        results_file = executor.execute_subjob(build_id, subjob_id, subjob_artifact_dir, atomic_commands)
        analytics.record_event(analytics.SUBJOB_EXECUTION_FINISH, **subjob_event_data)

        results_url = self._master_api.url('build', build_id, 'subjob', subjob_id, 'result')
        data = {
            'slave': '{}:{}'.format(self.host, self.port),
            'metric_data': {'executor_id': executor.id},
        }
        files = {'file': ('payload', open(results_file, 'rb'), 'application/x-compressed')}

        self._idle_executors.put(executor)  # work is done; mark executor as idle
        self._network.post(results_url, data=data, files=files)  # todo: check return code

        self._logger.info('Build {}, Subjob {} completed and sent results to master.', build_id, subjob_id)

    def kill(self):
        # TODO(dtran): Kill the threads and this server more gracefully
        sys.exit(0)
class DataLoader(object):
    def __init__(self,
                 dataset,
                 batch_size,
                 max_spl_per_cls,
                 nDataLoaderThread,
                 gSize,
                 maxQueueSize=10,
                 **kwargs):
        self.dataset = dataset
        self.nWorkers = nDataLoaderThread
        self.max_spl_per_cls = max_spl_per_cls
        self.batch_size = batch_size
        self.maxQueueSize = maxQueueSize

        self.data_dict = {}
        self.data_list = []
        self.nFiles = 0
        self.gSize = gSize
        ## number of clips per sample (e.g. 1 for softmax, 2 for triplet or pm)

        self.dataLoaders = []

        for data, label in dataset:
            if not (label in self.data_dict):
                self.data_dict[label] = []

            self.data_dict[label].append(data)

        # print("Total # classes: ", len(self.data_dict))
        # print("Total # samples: ", sum(map(len, self.data_dict.values())))

        self.datasetQueue = Queue(self.maxQueueSize)

        self.batch_size = min(len(self.data_dict), self.batch_size)

    def dataLoaderThread(self, nThreadIndex):

        index = nThreadIndex * self.batch_size

        if (index >= self.nFiles):
            return

        while (True):
            if (self.datasetQueue.full() == True):
                time.sleep(1.0)
                continue

            if (index + self.batch_size > self.nFiles):  # drop last
                break

            in_data = []
            for ii in range(0, self.gSize):
                feat = []
                for ij in range(index, index + self.batch_size):
                    feat.append(loadIMG(self.data_list[ij][ii]))
                in_data.append(torch.stack(feat, dim=0))

            in_data = torch.stack(in_data, axis=1)  #(batch,seg,c,h,w)
            in_label = numpy.asarray(self.data_label[index:index +
                                                     self.batch_size])

            self.datasetQueue.put([in_data, in_label])

            index += self.batch_size * self.nWorkers

    def __iter__(self):

        dictkeys = list(self.data_dict.keys())
        dictkeys.sort()

        lol = lambda lst, sz: [lst[i:i + sz] for i in range(0, len(lst), sz)]

        flattened_list = []
        flattened_label = []

        ## Data for each class
        for findex, key in enumerate(dictkeys):
            data = self.data_dict[key]
            numSeg = round_down(min(len(data), self.max_spl_per_cls),
                                self.gSize)

            rp = lol(numpy.random.permutation(len(data))[:numSeg], self.gSize)
            flattened_label.extend([findex] * (len(rp)))
            for indices in rp:
                flattened_list.append([data[i] for i in indices])

        ## Data in random order
        mixid = numpy.random.permutation(len(flattened_label))
        mixlabel = []
        mixmap = []

        ## Prevent two pairs of the same classes in the same batch
        for ii in mixid:
            startbatch = len(mixlabel) - len(mixlabel) % self.batch_size
            if flattened_label[ii] not in mixlabel[startbatch:]:
                mixlabel.append(flattened_label[ii])
                mixmap.append(ii)

        self.data_list = [flattened_list[i] for i in mixmap]
        self.data_label = [flattened_label[i] for i in mixmap]

        ## Iteration size
        self.nFiles = len(self.data_label)
        # print("Total # batches: ", self.nFiles)

        ### Make and Execute Threads...
        for index in range(0, self.nWorkers):
            self.dataLoaders.append(
                threading.Thread(target=self.dataLoaderThread, args=[index]))
            self.dataLoaders[-1].start()

        return self

    def __next__(self):

        while (True):
            isFinished = True

            if (self.datasetQueue.empty() == False):
                return self.datasetQueue.get()
            for index in range(0, self.nWorkers):
                if (self.dataLoaders[index].is_alive() == True):
                    isFinished = False
                    break

            if (isFinished == False):
                time.sleep(1.0)
                continue

            for index in range(0, self.nWorkers):
                self.dataLoaders[index].join()

            self.dataLoaders = []
            raise StopIteration

    def __call__(self):
        pass

    def qsize(self):
        return self.datasetQueue.qsize()
for i in range(5):

    print("---Loop---- :", i) 
    print("queue empty : ", q.empty())

    """
    we have defines queue's max size is 4. Hence it runs successfully for 4 iterations.
    During the fifth iteration, the queue has already 4 tasks in it and waits for TIMEOUT
    time and throws an error QUEUE.FULL. TRY block will handle when the queue is full.
    """
    try :
        q.put(i, timeout = 10)
    except :
        print("queue size is fill, max value is ", q.maxsize)
        break
        
    print("queue size : ", q.qsize())
    
    print("queue full : ", q.full())

print("------Queue info when getting -----")        

for i in range(q.maxsize): 

    print("---Loop---- :",i)
    print("get : ", q.get(i, timeout = 10))
    print("task done : ", q.task_done())
    print("queue empty : ", q.empty())
    

from queue import Queue
from queue import Empty
from queue import Full

# Demonstration of the use of FIFO Queue

lineup = Queue(maxsize=3)
try:
	lineup.get(block=False)
except Empty as e:
	print("Queue is empty")

lineup.put("one")
lineup.put("two")
lineup.put("three")
try:
	lineup.put("four", timeout=1)
except Full as e:
	print("Queue is full")

print("lineup.full() = {}".format(lineup.full()))
print("lineup.get() = {}".format(lineup.get()))
print("lineup.get() = {}".format(lineup.get()))
print("lineup.get() = {}".format(lineup.get()))
print("lineup.empty() = {}".format(lineup.empty()))