Ejemplo n.º 1
0
class Counter(object):
    """
    A process-safe counter providing atomic incrementAndGet() and value() functions
    """
    def __init__(self, initval=0):
        """
        Initialize this counter
        Args:
            initval (int): set the initialize value of the counter
        """
        self.val = Value('i', initval)

    def incrementAndGet(self):
        """
        Atomically increment this counter, and return the new value stored.

        Returns:
            int: The updated value of this counter.
        """
        with self.val.get_lock():
            self.val.value += 1
            return self.val.value

    def value(self):
        """
        Atomically get the current value of this counter.

        Returns:
            int: The current value of this counter.
        """
        with self.val.get_lock():
            return self.val.value
Ejemplo n.º 2
0
def run_check(ok: Value, path: Path, args):
    """Run check without updating progress."""
    path_md5 = Path("%s.md5" % path)
    if not _check_gz_integrity(path):
        logger.error("GZip file integrity check failed: %s", path)
        with ok.get_lock():
            ok.value = False
    elif not path.exists():  # does not exist => error
        logger.error("Does not exist: %s", path)
        with ok.get_lock():
            ok.value = False
    elif path_md5.exists():
        logger.debug("MD5 file exists: %s", path_md5)
        if _check_md5(path):
            logger.debug("  => MD5 OK")
        else:
            logger.error("  => MD5 mismatch for %s", path)
            with ok.get_lock():
                ok.value = False
    elif args.missing_md5_error:  # => .md5 missing and is error => error
        logger.error("MD5 file does not exist for: %s", path)
        with ok.get_lock():
            ok.value = False
    elif args.compute_md5:  # .md5 missing and is not error => recreate
        recreated = _recreate_md5_for(path)
        if recreated:
            logger.info("Created MD5 file: %s", path_md5)
        elif args.create_md5_fail_error:
            logger.error("Could not create MD5 file for: %s", path)
            with ok.get_lock():
                ok.value = False
    else:
        logger.info("Not attempting to recreate %s", path_md5)
Ejemplo n.º 3
0
def _test_all_options(position: int, death_count: Value, kill_count: Value, play_map: ManuelCalculatedGame,
                      is_not_6th_step: bool, move_list: []):
    processes = []

    # Iterates every possible action for the active player/ the player at this position.
    for move in move_list:
        tmp_map = deepcopy(play_map)

        # Calls the _calculate_move-function to set the new action and checking whether the player survives.
        if play_map.players[position].surviving:
            tmp_map = _calculate_move(position, move, tmp_map, is_not_6th_step)

        # If the last player is reached, the result will be calculated here.
        if position == len(play_map.players) - 1:
            for index, player in enumerate(tmp_map.players):
                if not player.surviving:
                    if index == 0:
                        with death_count.get_lock():
                            death_count.value += 1
                    else:
                        with kill_count.get_lock():
                            kill_count.value += 1
        else:
            # Function calls itself (recursion)
            p = Process(target=_test_all_options, args=(position + 1, death_count, kill_count, deepcopy(play_map),
                                                        is_not_6th_step, move_list))
            processes.append(p)
            p.start()

    for process in processes:
        process.join()
Ejemplo n.º 4
0
def work(simulate_one, queue, n_eval: Value, n_acc: Value, n: int,
         check_max_eval: bool, max_eval: int, all_accepted: bool,
         sample_factory):
    # unwrap arguments
    if isinstance(simulate_one, bytes):
        simulate_one = pickle.loads(simulate_one)

    random.seed()
    np.random.seed()

    sample = sample_factory()
    while (n_acc.value < n and (not all_accepted or n_eval.value < n)
           and (not check_max_eval or n_eval.value < max_eval)):
        with n_eval.get_lock():
            particle_id = n_eval.value
            n_eval.value += 1

        new_sim = simulate_one()
        sample.append(new_sim)

        if new_sim.accepted:

            # increase number of accepted particles
            with n_acc.get_lock():
                n_acc.value += 1

            # put into queue
            queue.put((particle_id, sample))

            # create empty sample and record until next accepted
            sample = sample_factory()

    # indicate worker finished
    queue.put(DONE)
Ejemplo n.º 5
0
def work(simulate_one, queue, n_eval: Value, n_acc: Value, n: int,
         all_accepted: bool, sample_factory):
    random.seed()
    np.random.seed()

    sample = sample_factory()

    while n_acc.value < n and \
            (not all_accepted or n_eval.value < n):
        with n_eval.get_lock():
            particle_id = n_eval.value
            n_eval.value += 1

        new_sim = simulate_one()
        sample.append(new_sim)

        if new_sim.accepted:

            # increase number of accepted particles
            with n_acc.get_lock():
                n_acc.value += 1

            # put into queue
            queue.put((particle_id, sample))

            # create empty sample and record until next accepted
            sample = sample_factory()

    # indicate worker finished
    queue.put(DONE)
Ejemplo n.º 6
0
class TTS:
  def __init__(self):
    self.text_queue = Queue(maxsize=3)
    self.run = Value('i', 1)
    self.done_loading = Value('i', 0)
    self._is_speaking = Value('i', 0)
    self.process = Process(target=tts_worker,
                           args=(self.text_queue, self.run, self.done_loading, self._is_speaking))

  @property
  def is_speaking(self):
    return self._is_speaking.value

  def say(self, text, block=False):
    with self._is_speaking.get_lock():
      self._is_speaking.value = 1
    self.text_queue.put(text)
    while block and self.is_speaking:
      time.sleep(0.1)

  def start(self):
    logging.info('Starting TTS process...')
    self.process.start()
    while not self.done_loading.value:
      time.sleep(0.1)
    logging.info('TTS process finished starting.')

  def stop(self):
    with self.run.get_lock():
      self.run.value = 0
    self.process.join()
Ejemplo n.º 7
0
class ProgressTracker(Thread):
    def __init__(self):
        super().__init__()
        self.lock = Condition()
        self.done = Value("H", 0)
        self.file = Value(c_wchar_p, "")
        self.progress = Value("d", 0.0)
        self.callbacks = []

    def update(self, file, progress):
        with self.lock:
            self.file = file
            self.progress = progress
            self.lock.notifyAll()

    def complete(self):
        with self.done.get_lock():
            self.done.value = 1
        with self.lock:
            self.lock.notifyAll()

    def registerUpdateCallback(self, callback):
        self.callbacks.append(callback)

    def run(self):
        while True:
            with self.done.get_lock():
                if self.done.value:
                    break
                self.lock.wait()
                with self.file.get_lock() and self.progress.get_lock():
                    for callback in self.callbacks:
                        callback(file.value, progress.value)
Ejemplo n.º 8
0
def work(simulate_one, queue, n_eval: Value, n_particles: Value,
         sample_factory):
    random.seed()
    np.random.seed()

    sample = sample_factory()

    while n_particles.value > 0:
        with n_eval.get_lock():
            particle_id = n_eval.value
            n_eval.value += 1

        new_sim = simulate_one()
        sample.append(new_sim)

        if new_sim.accepted:

            # reduce number of required particles
            with n_particles.get_lock():
                n_particles.value -= 1

            # put into queue
            queue.put((particle_id, sample))

            # create empty sample and record until next accepted
            sample = sample_factory()

    # indicate worker finished
    queue.put(DONE)
Ejemplo n.º 9
0
class Counter(object):
    def __init__(self, initval=0):
        self.val = Value('i', initval)

    def __repr__(self):
        return str(self.val.value)

    def __add__(self, other):
        with self.val.get_lock():
            self.val.value += other
            return self

    def __sub__(self, other):
        with self.val.get_lock():
            self.val.value -= other
            return self

    def increase(self, other=1):
        with self.val.get_lock():
            self.val.value += other

    def decrease(self, other=1):
        with self.val.get_lock():
            self.val.value -= other

    def set(self, value):
        with self.val.get_lock():
            self.val.value = value

    @property
    def value(self):
        return self.val.value
Ejemplo n.º 10
0
def worker(inq: Queue, outq: Queue, sharedAlpha: Value):

    while True:

        args = inq.get()
        if args is None: break  # Stop worker

        board, turn, depth, alpha, beta, future, options = args
        move = board.peek()

        maxDepth = options.get("maxDepth", depth)
        # If there at least three moves in tree,
        # we know the best next move from the previous iteration
        # and trying to dig into it with more depth
        if len(future) >= 12:  # each moves tree coded like: f8b4c2c3f6e4
            if move.uci() == future[8:12]:
                depth = maxDepth

        with sharedAlpha.get_lock():
            alpha = max(alpha, sharedAlpha.value)

        score, tree = negamax(board, turn, depth - 1, -beta, -alpha,
                              move.uci())
        score = -score

        with sharedAlpha.get_lock():
            if score > sharedAlpha.value:
                sharedAlpha.value = score
                ##if score >= beta: # TODO Is it ever possible?
                ##    outq.put( (board.peek(), beta) )

        # Return bestMove and bestScore
        outq.put((move, score, tree))
Ejemplo n.º 11
0
class Query_To_Database(object):
	def __init__(self):
		self.params = pika.ConnectionParameters(host='rmq')
		self.params.heartbeat = 0
		self.params.socket_timeout = 2
		self.connection = pika.BlockingConnection(self.params)
		self.channel = self.connection.channel()
	
		self.write_response_data = None
		self.write_correl_id = None
		self.read_correl_id = None
		self.read_response_data = None
		self.write_lock = Value('i', 0)

		self.write_result = self.channel.queue_declare(queue='', exclusive=True)
		self.read_result = self.channel.queue_declare(queue='Response_Queue', exclusive=True)
		self.write_callback_queue = self.write_result.method.queue

		self.channel.basic_consume(queue=self.write_callback_queue,on_message_callback=self.write_response,auto_ack=True)
		self.channel.basic_consume(queue='Response_Queue',on_message_callback=self.read_response,auto_ack=True)

	def write_response(self,channel,method,properties,message_body):
		if self.write_correl_id == properties.correlation_id:
			self.write_response_data = json.loads(message_body)

	def read_response(self,channel,method,properties,message_body):
		if self.read_correl_id == properties.correlation_id:
			self.read_response_data = json.loads(message_body)
	
	def write_query_to_database(self,request_data):
		try:
			with self.write_lock.get_lock():
				self.write_response_data = None
				self.write_correl_id = str(uuid.uuid4())
				data_to_send = request_data.get_json()
				self.channel.basic_publish(exchange='',routing_key='Write_Queue',body=json.dumps(data_to_send),
				properties=pika.BasicProperties(reply_to=self.write_callback_queue,correlation_id=self.write_correl_id,))
				while(self.write_response_data is None):
					self.connection.process_data_events()
			return Response("{}".format(self.write_response_data['data']), status = self.write_response_data['status_code'],mimetype = 'application/json')
		except Exception as e:
			return Response("{}".format(e), status = 500, mimetype = 'application/json')
	
	def read_query_from_database(self,request_data):
		try:
			with self.write_lock.get_lock():
				self.read_response_data = None
				self.read_correl_id = str(uuid.uuid4())
				data_to_send = request_data.get_json()
				self.channel.basic_publish(exchange='',routing_key='Read_Queue',body=json.dumps(data_to_send),
				properties=pika.BasicProperties(reply_to='Response_Queue',correlation_id=self.read_correl_id,))
				while(self.read_response_data is None):
					self.connection.process_data_events()
			return Response("{}".format(self.read_response_data['data']), status = self.read_response_data['status_code'],mimetype = 'application/json')
		except Exception as e:
			return Response("{}".format(e), status = 500, mimetype = 'application/json')
Ejemplo n.º 12
0
class SyncedCrawlingProgress:
    def __init__(self, total_count=1000, update_every=100000):
        # Variables that need to be synced across Threads
        self.count = Value('i', 0)
        self.last_time = Value('d', time.time())
        self.last_count = Value('i', 0)

        self.start_time = time.time()
        self.update_every = update_every
        self.total_count = total_count
        print(self.row_string(COLUMNS))
        print(ROW_SEPARATOR * (len(COLUMNS) * COL_WIDTH + len(COLUMNS) - 1))

    def row_string(self, values):
        string = ""
        for value in values[0:-1]:
            string += str(value).center(COL_WIDTH) + COL_SEPARATOR
        string += str(values[-1]).center(COL_WIDTH)
        return string

    def inc(self, by=1):
        with self.count.get_lock():
            self.count.value += by
            if self.count.value - self.last_count.value >= self.update_every:
                # Print update
                self.print_update()
                # Then update relevant variables
                with self.last_time.get_lock(), self.last_count.get_lock():
                    self.last_count.value = self.count.value
                    self.last_time.value = time.time()

    def print_update(self):
        # Prints current number, total number, percentage, runtime, increase per second, expected remaining runtime
        percentage = self.count.value / self.total_count * 100
        runtime = time.time() - self.start_time
        increases_per_second = (self.count.value - self.last_count.value) / (
            time.time() - self.last_time.value)
        expected_remaining_runtime = (self.total_count -
                                      self.count.value) / increases_per_second

        print(
            self.row_string([
                self.count.value, self.total_count,
                "%02.0d%%" % percentage,
                self.time_str(runtime),
                "%.02f" % increases_per_second,
                self.time_str(expected_remaining_runtime)
            ]))

    def time_str(self, seconds):
        return '%02d:%02d:%02d' % (seconds / 3600, seconds / 60 % 60,
                                   seconds % 60)

    def set_total_count(self, total_count):
        self.total_count = total_count
Ejemplo n.º 13
0
class StopWatch(Process):
    def __init__(self, hotkey=None):
        super().__init__(name='Stop Watch')
        self._stop_event = Event()
        self._stop_event.clear()

        self._waiter = Event()
        self._waiter.clear()

        self._elapsed_time = Value('Q', 0)
        self._hotkey = hotkey or 'space'

        super().start()
        # Waits till the process is started.
        self._stop_event.wait()

    def start(self):
        if self.is_alive:
            self._stop_event.set()
        else:
            raise RuntimeError()

    def join(self, *args, forceStop=False, **kwarks):
        zeroTimer = not self._waiter.is_set()
        if forceStop and self.is_alive():
            self._waiter.set()
        super().join(*args, **kwarks)
        if zeroTimer:
            with self._elapsed_time.get_lock():
                self._elapsed_time.value = 0

    def getValue(self):
        return self._elapsed_time.value or None

    def run(self):
        s, e = None, None

        def hotkey_action():
            if s is not None:
                self._waiter.set()

        remove = add_hotkey(self._hotkey, hotkey_action)
        # Inform the main process that this process is started
        self._stop_event.set()
        # Clears the stop event so that process halts till it is started again with start function.
        self._stop_event.clear()
        self._stop_event.wait()

        s = timer()
        self._waiter.wait()
        e = timer()

        with self._elapsed_time.get_lock():
            self._elapsed_time.value = e - s
        remove_hotkey(remove)
Ejemplo n.º 14
0
class Counter:
    def __init__(self):
        self.counter = Value('i', 0)

    def increment(self):
        with self.counter.get_lock():
            self.counter.value += 1

    def get_value(self):
        with self.counter.get_lock():
            return self.counter.value
Ejemplo n.º 15
0
class PoolManager:
    def __init__(self, n_workers=None):
        self.n_workers = ensure_n_workers(n_workers)

        self.ready = False
        self.workers = None  # type: Pool

        self._remain_tasks = None
        self._work_done_event = None

    def __enter__(self):
        self.open()
        return self

    def __exit__(self, exc_type, exc_value, tb):
        self.close()

    def open(self):
        assert not self.ready

        self._remain_tasks = Value('i', 0)
        self._work_done_event = Event()
        self.workers = Pool(self.n_workers, initializer=pool_init)
        self.ready = True

        return self

    def close(self, force=False):
        assert self.ready

        if not force:
            self._work_done_event.clear()
            while self.count_remaining_tasks() > 0:
                self._work_done_event.wait()
                self._work_done_event.clear()

        self.workers.close()
        self.workers.join()

    def count_remaining_tasks(self):
        with self._remain_tasks.get_lock():
            return self._remain_tasks.value

    def increase_task_counter(self):
        with self._remain_tasks.get_lock():
            self._remain_tasks.value += 1

    def decrease_task_counter(self):
        with self._remain_tasks.get_lock():
            self._remain_tasks.value -= 1

        self._work_done_event.set()
Ejemplo n.º 16
0
class Control(object):
    """Shared (long) value for passing control information between main and
    worker threads.
    
    Args:
        initial_value: Initial value of the shared control variable.
    """
    def __init__(self, initial_value=CONTROL_ACTIVE):
        self.control = Value('l', initial_value)
    
    def check_value(self, value, lock=False):
        """Check that the current control value == `value`.
        
        Args:
            value: The value to check.
            lock: Whether to lock the shared variable before checking.
        
        Returns:
            True if the values are equal.
        """
        return self.get_value(lock=lock) == value
    
    def check_value_positive(self, lock=False):
        """Check that the current control value is positive.
        
        Args:
            lock: Whether to lock the shared variable before checking.
        """
        return self.get_value(lock=lock) > 0
    
    def get_value(self, lock=True):
        """Returns the current control value.
        
        Args:
            lock: Whether to lock the shared variable before checking.
        """
        if lock:
            with self.control.get_lock():
                return self.control.value
        else:
            return self.control.value
    
    def set_value(self, value):
        """Set the control value. The shared variable is always locked.
        
        Args:
            value: The value to set.
        """
        with self.control.get_lock():
            self.control.value = value
Ejemplo n.º 17
0
class Counter:
	def __init__(self):
		self.value = Value(ctypes.c_int)

	def __enter__(self):
		with self.value.get_lock():
			self.value.value += 1

	def __exit__(self, exc_type, exc_val, exc_tb):
		with self.value.get_lock():
			self.value.value -= 1

	def __repr__(self):
		return str(self.value.value)
Ejemplo n.º 18
0
class Control(object):
    """Shared (long) value for passing control information between main and
    worker threads.

    Args:
        initial_value: Initial value of the shared control variable.
    """
    def __init__(self, initial_value=CONTROL_ACTIVE):
        self.control = Value('l', initial_value)

    def check_value(self, value, lock=False):
        """Check that the current control value == `value`.

        Args:
            value: The value to check.
            lock: Whether to lock the shared variable before checking.

        Returns:
            True if the values are equal.
        """
        return self.get_value(lock=lock) == value

    def check_value_positive(self, lock=False):
        """Check that the current control value is positive.

        Args:
            lock: Whether to lock the shared variable before checking.
        """
        return self.get_value(lock=lock) > 0

    def get_value(self, lock=True):
        """Returns the current control value.

        Args:
            lock: Whether to lock the shared variable before checking.
        """
        if lock:
            with self.control.get_lock():
                return self.control.value
        else:
            return self.control.value

    def set_value(self, value):
        """Set the control value. The shared variable is always locked.

        Args:
            value: The value to set.
        """
        with self.control.get_lock():
            self.control.value = value
Ejemplo n.º 19
0
class TemporaryChatAvatarsManager:
    """В директории /static/img/temporary_chat_avatars хранятся аватары чатов,
    которые выбирают пользователи на этапе создания чата, не создавая при этом чат окончательно.
    Эти фотографии сохраняются и через 10 секунд удаляются.
    Данный менеджер управляет этим процессом."""

    def __init__(self):
        self.dir = os.path.join(PATH_TO_ROOT, "static", "img", "temporary_chat_avatars")
        # Директория, в которой хранятся временные аватары
        self.files_counter = Value('i', 0)
        # Имя временного файла будет соответствовать его порядковому номеру, для этого нужен счётчик
        self.released_values = []
        # В этом списке будут лежать отработанные значения счётчика
        self.clear_temporary_chat_avatars_dir()
    
    def clear_temporary_chat_avatars_dir(self):
        for file in os.listdir(self.dir):
            if not file.endswith(".md"):
                os.remove(os.path.join(self.dir, file))

    def load_avatar(self, data: bytes) -> str:
        """Сохраняет файл, переданный пользователем, сжимая его до размера 200x200,
        и возвращает относительный путь до него"""
        with self.files_counter.get_lock():
            self.files_counter.value += 1
            current_value = self.files_counter.value
        filename = f"{self.files_counter.value}.png"
        path_to_avatar = os.path.join(self.dir, filename)
        make_icon(data, path_to_avatar)
        self.delete_avatar(path_to_avatar, current_value)
        return os.path.relpath(path_to_avatar, "app")
    
    @delayed_procedure(10)
    def delete_avatar(self, path_to_avatar, value):
        """Удаляет аватар и освобождает имя, отданное этому аватару"""
        os.remove(path_to_avatar)    
        self.release_value(value)
    
    def release_value(self, value):
        """Помещает значение счётчика в список отработанных и откатывает счётчик файлов насколько это возможно"""
        self.released_values.append(value)
        for released_value in reversed(self.released_values):
            if self.files_counter.value == released_value + 1:
                with self.files_counter.get_lock():
                    self.files_counter.value -= 1
                try:
                    self.released_values.pop()
                except IndexError:
                    continue
Ejemplo n.º 20
0
class WaitGroup(object):
    def __init__(self):
        self.counter = Value('i', 0)

    def wait(self, interval=0.001):
        while self.counter.value != 0:
            time.sleep(interval)

    def add(self, count):
        with self.counter.get_lock():
            self.counter.value += count

    def done(self):
        with self.counter.get_lock():
            self.counter.value -= 1
Ejemplo n.º 21
0
class Counter(object):
    def __init__(self):
        self.e = Value('i', 0)
        self.t = Value('i', 0)

    def inc_error(self):
        with self.e.get_lock():
            self.e.value += 1

    def inc_total(self):
        with self.t.get_lock():
            self.t.value += 1

    def print_error(self):
        print(self.e.value / self.t.value * 100, "% error.")
Ejemplo n.º 22
0
class Transformator(Device):
    def __init__(self, nomenclature="", width=0., height=0.):

        Device.__init__(self, nomenclature, width, height)
        self.count = Value('i', 0)

    def __repr__(self):

        r = str(self) + "("
        r += "width=" + str(self.width) + "m, "
        r += "height=" + str(self.height) + "m, "
        r += "length=" + str(self.length) + "m, "
        r += "count=" + str(self.count.value) + ")"
        return r

    def transport(self, particle):
        if not self.is_particle_lost(particle):
            with self.count.get_lock():
                self.count.value += 1
            if self.next:
                return self.next.transport(particle)

    def reset(self):

        self.count.value = 0
        if self.next:
            self.next.reset()
Ejemplo n.º 23
0
    def start(self):
        global n_finished
        n_finished = Value('i', 0)

        print('generating gridpacks for %s:' % self.card_dir +
              (', '.join(' %s' % i for i in self.processes)))
        print('starting pool with %i workers' % self.worker)
        pool = Pool(processes=self.worker,
                    initializer=init,
                    initargs=(n_finished, ))

        result = pool.map_async(submit_job_unpack,
                                [(self, i) for i in range(self.worker)],
                                chunksize=1)

        while not result.ready():
            with n_finished.get_lock():
                done = n_finished.value
            sys.stdout.write("\r(" + str(done) + "/" + str(self.worker) +
                             ") done.")
            sys.stdout.flush()
            time.sleep(5)

        print(result.get())
        pool.close()
        pool.join()
Ejemplo n.º 24
0
class PoseEstimator:
  def __init__(self, camera_class, draw=True):
    self.camera_class = camera_class
    self.draw = draw
    self.keypoint_queue = Queue(maxsize=10)
    self.run = Value('i', 1)
    self.done_loading = Value('i', 0)
    self.process = Process(target=pose_estimation_worker,
                           args=(self.keypoint_queue, self.run, self.done_loading,
                                 self.camera_class, self.draw))

  @property
  def keypoints_available(self):
    return not self.keypoint_queue.empty()

  def get_keypoints(self):
    return self.keypoint_queue.get()

  def start(self):
    logging.info('Starting PoseEstimator process...')
    self.process.start()
    while not self.done_loading.value:
      time.sleep(0.1)
    logging.info('PoseEstimator process finished starting.')

  def stop(self):
    with self.run.get_lock():
      self.run.value = 0
    self.process.join()
Ejemplo n.º 25
0
class counter_obj(object):
    def __init__(self):
        # read saved count value
        self.val = Value('i', get_val_from_file())

    def increment(self):
        with self.val.get_lock():
            tmp_value = int(get_val_from_file() + 1)
            # save result
            with open(r'./static/count.txt', 'w') as f:
                f.write(str(tmp_value))

    def get_value(self):
        with self.val.get_lock():
            # return the value in counter file
            return get_val_from_file()
Ejemplo n.º 26
0
    def _calibrate_axis(self, axis_cur: multiprocessing.Value, axis_label,
                        axis_min, axis_max, axis_calibration_to_max):
        with axis_cur.get_lock():
            if axis_calibration_to_max:
                self._smc.write("G28 {0}{1}".format(
                    axis_label, config.CALIBRATION_DISTANCE))
                # "ok\r\n"
                response = self._smc.read_some()
                if response == self.RESPONSE_OK:
                    axis_cur.value = axis_max
                else:
                    return response
            else:
                self._smc.write("G28 {0}{1}".format(
                    axis_label, -config.CALIBRATION_DISTANCE))
                # "ok\r\n"
                response = self._smc.read_some()
                if response == self.RESPONSE_OK:
                    axis_cur.value = axis_min
                else:
                    return response

            # set fresh current coordinates on smoothie too
            self._smc.write("G92 {0}{1}".format(axis_label, axis_cur.value))
            # "ok\r\n"
            return self._smc.read_some()
    def run_with_exception_except_test(self):
        """ Subclass StoppableExceptionThread and raise exception in method `run_with_exception` """
        class IncrementThread(StoppableExceptionThread):
            """ Used to test _stop in `run` """
            def __init__(self, *args, **kwargs):
                self.x = args[0]
                StoppableExceptionThread.__init__(self, *args[1:], **kwargs)

            def run_with_exception(self):
                while not self._stop.is_set():
                    with self.x.get_lock():
                        self.x.value += 1
                        if self.x.value > 5:
                            raise ValueError('x > 5')

        x = Value('i', 0)
        st = IncrementThread(x)
        st.start()
        sleep(1)
        assert_equals(st.stopped, False)
        with self.assertRaises(ValueError):
            st.join()
        assert_equals(st.is_alive(), False)
        with x.get_lock():
            assert_equals(x.value, 6)
Ejemplo n.º 28
0
class TPSBucket:
    def __init__(self, expected_tps):
        self.number_of_tokens = Value('i', 0)
        self.expected_tps = expected_tps
        self.bucket_refresh_thread = threading.Thread(
            target=self.refill_bucket_per_second)
        self.bucket_refresh_thread.setDaemon(True)

    def refill_bucket_per_second(self):
        while True:
            self.refill_bucket()
            time.sleep(1)

    def refill_bucket(self):
        self.number_of_tokens.value = self.expected_tps

    def start(self):
        self.bucket_refresh_thread.start()

    def stop(self):
        self.bucket_refresh_thread.kill()

    def get_token(self):
        response = False
        if self.number_of_tokens.value > 0:
            with self.number_of_tokens.get_lock():
                if self.number_of_tokens.value > 0:
                    self.number_of_tokens.value -= 1
                    response = True

        return response
Ejemplo n.º 29
0
class ScriptRunnerCallbacks(DefaultRunnerCallbacks):
    def __init__(self, pbar):
        self.pbar = pbar
        self.counter = Value('i', 0)
        super(ScriptRunnerCallbacks, self).__init__()

    def on_failed(self, host, res, ignore_errors=False):
        self.update_pbar()

    def on_ok(self, host, res):
        self.update_pbar()

    def on_skipped(self, host, item=None):
        logger.warning('{host} skipped'.format(host=host))

    def on_unreachable(self, host, res):
        self.update_pbar()

    def on_no_hosts(self):
        print('no hosts matched\n', file=sys.stderr)

    def update_pbar(self):
        with self.counter.get_lock():
            self.counter.value += 1
            self.pbar.update(self.counter.value)
    def run_stop_test(self):
        """ Subclass StoppableThread and stop method `run` """
        class IncrementThread(StoppableThread):
            """ Used to test _stop in `run` """
            def __init__(self, *args, **kwargs):
                self.x = args[0]
                super(IncrementThread, self).__init__(*args[1:], **kwargs)

            def run(self):
                while not self._stop.is_set():
                    with self.x.get_lock():
                        self.x.value += 1

        x = Value('i', 0)
        st = IncrementThread(x)
        st.start()
        assert_equals(st.stopped, False)
        assert_equals(st.is_alive(), True)
        sleep(0.5)
        st.stop()
        assert_equals(st.stopped, True)
        st.join()
        assert_equals(st.is_alive(), False)
        with x.get_lock():
            assert_greater(x.value, 0)
Ejemplo n.º 31
0
    def process(page: int, page_leap: int, parsing: mp.Value):
        logger = init_logging(f'mtgtop8_scrapper_{page}.log')
        try:
            with psycopg2.connect(user=user, dbname=database) as con:
                con.autocommit = True
                with con.cursor() as cursor:
                    while parsing.value > 0:
                        base_url = re.match('.*.com/', search_url).group()
                        logger.info(f'Fetching for page {page} in format {url_format} from url {search_url}')
                        child_page_value = {'cp': page}  # set page value
                        url_soup = get_and_wait(search_url, child_page_value)
                        events = prs.get_events_from_page(url_soup, base_url, logger)

                        if events:
                            for event_name, event_date, event_url in events:
                                dbc.insert_into_tournament_info(event_name, event_date, url_format, event_url, cursor,
                                                                logger, prod_mode)
                                tourny_id = dbc.get_tournament_info_id(event_name, event_date, url_format, event_url,
                                                                       cursor)
                                parse_event(tourny_id, event_url, base_url, cursor, logger, prod_mode)

                            page += page_leap
                        else:
                            with parsing.get_lock():
                                parsing.value = 0
        except Exception as e:
            if prod_mode:
                logger.warning(str(e))
            else:
                raise e
Ejemplo n.º 32
0
def start_log_server(
    host: str,
    logname: str,
    event: threading.Event,
    port: multiprocessing.Value,
    filename: str,
    logging_config: Dict,
    output_dir: str,
) -> None:
    setup_logger(filename=filename,
                 logging_config=logging_config,
                 output_dir=output_dir)

    while True:
        # Loop until we find a valid port
        _port = random.randint(10000, 65535)
        try:
            receiver = LogRecordSocketReceiver(
                host=host,
                port=_port,
                logname=logname,
                event=event,
            )
            with port.get_lock():
                port.value = _port
            receiver.serve_until_stopped()
            break
        except OSError:
            continue
Ejemplo n.º 33
0
def create_image(queue: mp.Queue, magnification: int, n_generations: int, n_calculated: mp.Value):
    while True:
        data = queue.get()
        matrix: np.ndarray = data["board"]
        ants = data["ants"]
        n_gen = data["i"]

        print(f"creating {n_gen} image")

        image = np.zeros((matrix.shape[1] * magnification, matrix.shape[0] * magnification, 3))

        for y, x in np.ndindex(matrix.shape):
            color = [255, 255, 255]
            x_lower, x_upper, y_lower, y_upper = scale_indexes_to_range(x, y, magnification)
            if matrix[y, x]:
                color = [0, 0, 0]

            image[y_lower:y_upper, x_lower:x_upper, :] = color

        for ant in ants:
            color = [255, 0, 0]
            x_lower, x_upper, y_lower, y_upper = scale_indexes_to_range(ant[0], ant[1], magnification)
            image[y_lower:y_upper, x_lower:x_upper, :] = color

        im = Image.fromarray(image.astype('uint8'))
        im.save(f'results/gen{n_gen}.png', 'PNG')

        with n_calculated.get_lock():
            n_calculated.value += 1
Ejemplo n.º 34
0
class Stat:
    def __init__(self):
        self.total = Value('L')
        self.cerr = Counter()

    def inc(self):
        with self.total.get_lock():
            self.total.value += 1

    def err(self, id):
        self.cerr[id] += 1

    def print(self):
        tqdm.write('-------------------------------')
        self.print_errors()

    def print_errors(self):
        tqdm.write('%-20s | %s' % ('function', 'errors'))
        tqdm.write('---------------------+---------')
        for id, cnt in self.cerr.most_common():
            tqdm.write('%-20s | %d' % (id, cnt))

        tqdm.write('Processed lines: %d (%0.2f%% errors)' %
                   (self.total.value,
                    sum(self.cerr.values()) / self.total.value * 100))
        tqdm.write('')
Ejemplo n.º 35
0
class Control(object):
    def __init__(self, initial_value):
        self.control = Value('l', initial_value)
    
    def check_value(self, value, lock=False):
        return self.get_value(lock=lock) == value
    
    def check_value_positive(self, lock=False):
        return self.get_value(lock=lock) > 0
    
    def get_value(self, lock=True):
        if lock:
            with self.control.get_lock():
                return self.control.value
        else:
            return self.control.value
    
    def set_value(self, value):
        with self.control.get_lock():
            self.control.value = value
class Counter(object):
    def __init__(self):
        self.val = Value('i', 0)

    def increment(self, n=1):
        with self.val.get_lock():
            self.val.value += n

    @property
    def value(self):
        return self.val.value
Ejemplo n.º 37
0
class AtomicCounter(object):
    def __init__(self, init_value=0):
        self._val = Value('i', init_value)

    def increase(self, incr=1):
        with self._val.get_lock():
            self._val.value += incr
            return self._val.value

    def decrease(self, decr=1):
        with self._val.get_lock():
            self._val.value -= decr
            return self._val.value

    @property
    def value(self):
        with self._val.get_lock():
            return self._val.value

    @property
    def lock(self):
        return self._val.get_lock()
Ejemplo n.º 38
0
def run_in_parallel(cmds_queue, NPROC):
    running_ps = Value("i")
    while len(cmds_queue) > 0:
        # if we already have the maximum number of processes running,
        # then sleep and wait for a process to become free
        if running_ps.value >= NPROC:
            time.sleep(1)
            continue
        
        # get commands to run, and increment the number of processes
        # if we can't then we are out of commands, so break
        try: cmds = cmds_queue.pop()
        except KeyError: break
        
        with running_ps.get_lock(): 
            running_ps.value += 1
        # fork a process. If we are still in main, then do nothing. Otherwise,
        # run the grabbed commands, decrement the running processes value
        # and exit
        pid = os.fork()
        if pid != 0:
            print "FORKED"
            continue
        else:
            print(cmds)
            os.system(cmds)
            with running_ps.get_lock(): 
                running_ps.value -= 1
            os._exit(0)
    
    # wait for outstanding processes to finish
    while True:
        with running_ps.get_lock():
            if running_ps.value == 0: break
        time.sleep(1)
        continue

    return
Ejemplo n.º 39
0
class Counter(object):
    def __init__(self, maximum):
        self.max = Value('i', maximum)
        self.val = Value('i', 0)

    def increment_both(self):
        with self.max.get_lock():
            self.max.value += 1
        return self.increment()

    def increment(self, n=1):
        with self.val.get_lock():
            self.val.value += n
            result = self.value
        return result

    @property
    def value(self):
        return self.val.value

    @property
    def maximum(self):
        return self.max.value
Ejemplo n.º 40
0
Archivo: mp.py Proyecto: roolebo/rpyc
class count(object):
    def __init__(self, c=0):
        self.c = Value('L', c)

    def __iter__(self):
        return self

    def __next__(self):
        with self.c.get_lock():
            rv = self.c.value
            self.c.value += 1
        return rv

    def next(self):
        return self.__next__()
Ejemplo n.º 41
0
class State(object):
    def __init__(self):
        self.counter = Value('i', 0)
        self.start_ticks = Value('d', time.process_time())

    def increment(self, n=1):
        with self.counter.get_lock():
            self.counter.value += n

    @property
    def value(self):
        return self.counter.value

    @property
    def start(self):
        return self.start_ticks.value
    def target_except_test(self):
        """ propogate exception from target function """
        def target_with_exception(x, stop_event):
            stop_event.wait(0.5)
            while not stop_event.is_set():
                with x.get_lock():
                    x.value += 1
                    if x.value > 5:
                        raise ValueError('x > 5')

        x = Value('i', 0)
        st = StoppableExceptionThread(target=target_with_exception, args=(x,))
        st.start()
        assert_equals(st.stopped, False)
        assert_equals(st.is_alive(), True)
        with self.assertRaises(ValueError):
            st.join()
        assert_equals(st.stopped, False)
        assert_equals(st.is_alive(), False)
        with x.get_lock():
            assert_equals(x.value, 6)
    def target_with_args_finishes_test(self):
        """ run target function with arguments """

        def target_finite(x, stop_event):
            stop_event.wait(0.5)
            while not stop_event.is_set():
                with x.get_lock():
                    x.value += 1
                    if x.value > 5:
                        break

        x = Value('i', 0)
        st = StoppableExceptionThread(target=target_finite, args=(x,))
        st.start()
        assert_equals(st.stopped, False)
        assert_equals(st.is_alive(), True)
        st.join()
        assert_equals(st.stopped, False)
        assert_equals(st.is_alive(), False)
        with x.get_lock():
            assert_equals(x.value, 6)
Ejemplo n.º 44
0
class ltaSlave():
  def __init__(self, config):
    configFile   = config
    try:
      self.readConfig(configFile)
    except Exception as e:
      print ('\n%s' % e)
      print('The Configuration is incomplete, exiting')
      exit(2)

    self.jobs   = Value('i', 0)
    self.logger.info('Slave %s initialized' % self.host)

  def readConfig(self, configFile):
    exec(eval("'from %s import *' % configFile"))
    self.host          = host
    self.ltacpport     = ltacpport
    self.mailSlCommand = mailSlCommand
    self.jobsdir       = jobsdir
    self.logger        = logger
    self.logdir        = logdir
    self.ltaClient     = ltaClient
    self.exportClient  = exportClient
    self.momClient     = momClient
    self.pipelineRetry = pipelineRetry
    self.momRetry      = momRetry
    self.ltaRetry      = ltaRetry
    self.srmRetry      = srmRetry
    self.srmInit       = srmInit
    self.momServer     = momServer
    self.masterAddress = masterAddress
    self.masterPort    = masterPort
    self.masterAuth    = masterAuth
    self.maxTalkQueue  = maxSlaveTalkerQueue
    self.parallelJobs  = parallelJobs

  def serve(self):
    class Manager(SyncManager): pass
    Manager.register('add_slave')
    Manager.register('remove_slave')
    Manager.register('slave_done')
    self.manager = Manager(address=(self.masterAddress, self.masterPort), authkey=self.masterAuth)
    self.manager.connect()
    self.logger.debug('Master found')
    self.queue = self.manager.add_slave(self.host)

    self.momTalker = momTalker(self.logger, self.exportClient, self.momRetry, self.maxTalkQueue)
    self.momTalker.start()
    talker = self.momTalker.getQueue()

    self.logger.info('Slave %s started' % self.host)
    while True:
      if self.jobs.value < self.parallelJobs:
        try:
          job = self.queue.get(True, 10)
        except QueueEmpty:
          job = None
        if job:
          with self.jobs.get_lock():
            self.jobs.value += 1
          runner = executer(self.logger, self.logdir, job, talker, self.jobs, self.momClient, self.ltaClient, self.host, self.ltacpport, self.mailSlCommand, self.manager, self.pipelineRetry, self.momRetry, self.ltaRetry, self.srmRetry, self.srmInit)
          runner.start()
      else:
        time.sleep(10)
Ejemplo n.º 45
0
class Task(object):
    """ Container of Jobs"""

    # TODO: Implement timeout support in add/delJob
    def __init__(self, name, timeout=0, onstart=None, ondone=None, params=None, stdout=sys.stdout, stderr=sys.stderr):
        """Initialize task, which is a group of jobs to be executed

		name  - task name
		timeout  - execution timeout. Default: 0, means infinity
		onstart  - callback which is executed on the task starting (before the execution
			started) in the CONTEXT OF THE CALLER (main process) with the single argument,
			the task. Default: None
			ATTENTION: must be lightweight
		ondone  - callback which is executed on successful completion of the task in the
			CONTEXT OF THE CALLER (main process) with the single argument, the task. Default: None
			ATTENTION: must be lightweight
		params  - additional parameters to be used in callbacks
		stdout  - None or file name or PIPE for the buffered output to be APPENDED
		stderr  - None or file name or PIPE or STDOUT for the unbuffered error output to be APPENDED
			ATTENTION: PIPE is a buffer in RAM, so do not use it if the output data is huge or unlimited

		tstart  - start time is filled automatically on the execution start (before onstart). Default: None
		tstop  - termination / completion time after ondone
		"""
        assert isinstance(name, str) and timeout >= 0, "Parameters validaiton failed"
        self.name = name
        self.timeout = timeout
        self.params = params
        self.onstart = types.MethodType(onstart, self) if onstart else None
        self.ondone = types.MethodType(ondone, self) if ondone else None
        self.stdout = stdout
        self.stderr = stderr
        self.tstart = None
        self.tstop = None  # SyncValue()  # Termination / completion time after ondone
        # Private attributes
        self._jobsnum = Value(ctypes.c_uint)
        # Graceful completion of all tasks or at least one of the tasks was terminated
        self._graceful = Value(ctypes.c_bool)
        self._graceful.value = True

    def addJob(self):
        """Add one more job to the task

		return  - updated task
		"""
        initial = False
        with self._jobsnum.get_lock():
            if self._jobsnum.value == 0:
                initial = True
            self._jobsnum.value += 1
            # Run onstart if required
        if initial:
            self.tstart = time.time()
            if self.onstart:
                self.onstart()
        return self

    def delJob(self, graceful):
        """Delete one job from the task

		graceful  - the job is successfully completed or it was terminated
		return  - None
		"""
        final = False
        with self._jobsnum.get_lock():
            self._jobsnum.value -= 1
            if self._jobsnum.value == 0:
                final = True
                # Finalize if required
        if not graceful:
            self._graceful.value = False
        elif final:
            if self.ondone and self._graceful.value:
                self.ondone()
            self.tstop = time.time()
        return None
Ejemplo n.º 46
0
class imagequeue:
    """
    This class keeps a queue of images which may be worked on in threads.
    
    :param SAXS.calibration Cal: The SAXS Calibration to use for the processing
    :param optparser options: The object with the comandline options of the saxsdog
    :param list args: List of command line options
    
    """
    def __init__(self,Cals,options,args,conf):
         
         self.pool=[]
         self.cals=Cals
         self.conf=conf
         self.options=options
         self.picturequeue=Queue()
         self.histqueue=Queue()
         self.args=args
         self.allp=Value('i',0)
         self.stopflag=Value('i',0)
         self.dirwalker=False
         if not options.plotwindow: 
              plt.switch_backend("Agg")
         self.fig=plt.figure()
         if  options.plotwindow: 
              plt.ion()
       
    def getlastdata(self):
          print "getdatata" + str(self.lastfile)
          return self.lastfile,self.lastdata
    
    def fillqueuewithexistingfiles(self):
        """
        Fill the queue with the list of images that is already there.
        """
       
                
        if self.options.walkdirinthreads:
            self.dirwalker=Process(target=filler,args=(self.picturequeue,self.args[0]))
            self.dirwalker.start()
        else:
            self.dirwalker=Process()
            self.dirwalker.start()
            filler(self.picturequeue,self.args[0])
        
    def procimage(self,picture,threadid):
       
             
            #im=Image.open(picture,"r")
            #im.tag.tags
            max=60
            if not self.options.silent: print "[",threadid,"] open: ",picture 
            for i in range(max):
                try:
                    image=misc.imread(picture)
                    #tif = TiffFile(picture)
                    #image = tif.asarray()
                    
                except KeyboardInterrupt:
                    return
                except IOError as e:
                    try:
                        print "cannot open ", picture, ", lets wait.", max-i ," s"
                        print e.message,  sys.exc_info()[0]
                        time.sleep(1)
                        continue
                    except KeyboardInterrupt:
                        return
                except:
                    print "############"
                    print   sys.exc_info()
                    continue
                if image.shape==tuple(self.cals[0].config["Geometry"]["Imagesize"]):
                    break
                print "cannot open ", picture, ", lets wait.", max-i ," s"
                time.sleep(1)
                    
            else:
                print "image ", picture, " has wrong format"
                return
            
            if self.options.outdir!="":
                basename=self.options.outdir+os.sep+('_'.join(picture.replace('./','').split(os.sep))[:-3]).replace('/',"_")
                basename=basename.replace(':', '').replace('.','')
            else:
                reldir=os.path.join( 
                                      os.path.dirname(picture),
                                      self.options.relpath)
                if not os.path.isdir(reldir):
                    os.mkdir(reldir)
                basename=os.path.join( reldir,
                                      os.path.basename(picture)[:-3])
            data=[]
            for calnum,cal in enumerate(self.cals):   
                basename+="_"+str(calnum) 
                if not self.options.resume or not os.path.isfile(basename+'.chi'):
                    data.append((cal.integratechi(image,basename+".chi").tolist()))
                    if threadid==0 and self.options.plotwindow:
                        # this is a hack it really schould be a proper GUI
                       
                        cal.plot(image,fig=self.fig)
                        plt.draw()
                       
                             
                if self.options.writesvg: 
                    
                    if not self.options.resume or not os.path.isfile(basename+'.svg'):
                         cal.plot(image,basename+".svg",fig=self.fig)
                if self.options.writepng:
                     if not self.options.resume or not os.path.isfile(basename+'.svg'):
                          misc.imsave(basename+".png",image)
                   
                
                #self.picturequeue.task_done()
                with self.allp.get_lock():
                    self.allp.value+=1
                if self.options.silent:
                    
                    if np.mod(self.allp.value,100)==0:
                        print "[",threadid,"] ",self.allp.value
                else:
                    print "[",threadid,"] write: ",basename+".chi" 
            return basename ,data
    def start(self):  
        """
        Start threads and directory observer.
        """
        #start threads
        for threadid in range(1,self.options.threads):
            print "start proc [",threadid,"]"
           
            worker=Process(target=funcworker, args=(self,threadid))
            worker.daemon=True
            self.pool.append(worker)
            worker.start() 
            #self.processimage(picture,options)
        self.starttime=time.time() 
        if self.options.watch:
            eventhandler=addtoqueue(self.picturequeue)
            observer = Observer()
            observer.schedule(eventhandler, self.args[0], recursive=True)
            observer.start()
        #We let the master process do some work because its useful for matplotlib.
        if not self.dirwalker:
            self.dirwalker=Process()
            self.dirwalker.start()
        if self.options.servermode:
             
             context = zmq.Context()
             socket = context.socket(zmq.REQ)
             tokenlist=  self.conf['Server'].split(":")
             
             server=":".join([tokenlist[0],tokenlist[1],self.options.serverport])
             print server
             socket.connect (server)
             from Leash import addauthentication
        try:
            while ( self.options.servermode or 
                    (not self.picturequeue.empty()) 
                    or self.dirwalker.is_alive() 
                    or self.options.watch): 
                    try:
                        picture = self.picturequeue.get(timeout=1)
                    except KeyboardInterrupt :
                        break
                    except Empty:
                        continue
                    lastfile, data =self.procimage(picture,0)
                    self.histqueue.put(time.time())
                    if self.options.servermode:
                        request={"command":"putplotdata","argument":{"data":{
                                "result":"plot","data":{"filename":lastfile,"array":data,
                                                        "stat":{}}
                                  }}}
                        socket.send_multipart([json.dumps(addauthentication( request,self.conf))])
                        socket.recv()
                    if np.mod(self.allp.value,500)==0:
                        self.timreport()
        except  KeyboardInterrupt:            
            if self.options.watch:
                        observer.stop()
                        observer.join()   
            if self.options.servermode:
                 context.destroy()
        self.stop()
        self.timreport()
        return self.allp.value, time.time()-self.starttime
    def stop(self):
        print "\n\nWaiting for the processes to terminate."
        self.stopflag.value=1
        for worker in self.pool:
            worker.join(3)
    def timreport(self):
        tottime=time.time()-self.starttime
        if self.allp.value==0:
            print "We didn't do any pictures "
        else:
            print "\n\nelapsed time: ",tottime
            print "\nProcessed: ",self.allp.value," pic"
            print " time per pic: ", tottime/self.allp.value,"[s]"
            print " pic per second: ",self.allp.value/tottime,"[/s]"
Ejemplo n.º 47
0
class imagequeue:
    """
    This class keeps a queue of images which may be worked on in threads.
    
    :param SAXS.calibration Cal: The SAXS Calibration to use for the processing
    :param optparser options: The object with the comandline options of the saxsdog
    :param list args: List of command line options
    
    """
    def __init__(self,Cals,options,directory,conf):
         
         self.pool=[]
         self.cals=Cals
         self.conf=conf
         self.options=options
         self.picturequeue=Queue()
         self.histqueue=Queue(maxsize=10000)
         self.plotdataqueue=Queue(maxsize=1)
         self.directory=directory
         self.allp=Value('i',0)
         self.stopflag=Value('i',0)
         self.dirwalker=None
         self.observer=None
         if not options.plotwindow: 
              plt.switch_backend("Agg")
         self.fig=plt.figure()
         if  options.plotwindow: 
              plt.ion()
       
    def getlastdata(self):
          print "getdatata" + str(self.lastfile)
          return self.lastfile,self.lastdata
    
    def fillqueuewithexistingfiles(self):
        """
        Fill the queue with the list of images that is already there.
        """
       
                
        if self.options.walkdirinthreads:
            self.dirwalker=Thread(target=filler,args=(self.picturequeue,self.directory))
            self.dirwalker.start()
        else:
           
            filler(self.picturequeue,self.directory)
        
    def procimage(self,picture,threadid):
            filelist={}
            max=60
            if not self.options.silent: print "[",threadid,"] open: ",picture 
            for i in range(max):
                try:
                    image=misc.imread(picture)
                except KeyboardInterrupt:
                    return
                except IOError as e:
                    try:
                        print "cannot open ", picture, ", lets wait.", max-i ," s"
                        print e.message,  sys.exc_info()[0]
                        time.sleep(1)
                        continue
                    except KeyboardInterrupt:
                        return
                except:
                    print "############"
                    print   sys.exc_info()
                    continue
                if image.shape==tuple(self.cals[0].config["Geometry"]["Imagesize"]):
                    break
                print "cannot open ", picture, ", lets wait.", max-i ," s"
                time.sleep(1)
                    
            else:
                print "image ", picture, " has wrong format"
                return
            
            if self.options.outdir!="":
                basename=self.options.outdir+os.sep+('_'.join(picture.replace('./','').split(os.sep))[:-3]).replace('/',"_")
                basename=basename.replace(':', '').replace('.','')
            else:
                reldir=os.path.join( 
                                      os.path.dirname(picture),
                                      self.options.relpath)
                if not os.path.isdir(reldir):
                    os.mkdir(reldir)
                basename=os.path.join( reldir,
                                      os.path.basename(picture)[:-4])
            data=[]
            integparams={}
            imgMetaData=datamerge.readtiff(picture)
            if "date" in imgMetaData:
                imgTime=imgMetaData["date"]
            else:
                imgTime="" 
            for calnum,cal in enumerate(self.cals):
                if len(list(enumerate(self.cals)))==1:
                    filename=basename
                else:
                    filename=basename+"_c"+cal.kind[0]+str(calnum)
                chifilename=filename+".chi"
                if self.options.GISAXSmode == True and calnum==0: #pass on GISAXSmode information to calibration.integratechi
                    chifilename="xxx"
                filelist[cal.kind+str(calnum)]=chifilename
                if not self.options.resume or not os.path.isfile(chifilename):
                    result=cal.integratechi(image,chifilename,picture)
                    result["Image"]=picture
                    if "Integparam" in result:
                        integparams[cal.kind[0]+str(calnum)]=result["Integparam"]                                        
                    data.append(result)
                    if threadid==0 and self.options.plotwindow:
                        # this is a hack it really schould be a proper GUI
                       
                        cal.plot(image,fig=self.fig)
                        plt.draw()
                       
                             
                if self.options.writesvg:     
                    if not self.options.resume or not os.path.isfile(filename+'.svg'):
                         cal.plot(image,filename+".svg",fig=self.fig)
                if self.options.writepng:
                     if not self.options.resume or not os.path.isfile(filename+'.svg'):
                          misc.imsave(filename+".png",image)
                if self.options.silent:
                    if np.mod(self.allp.value,100)==0:
                        print "[",threadid,"] ",self.allp.value
                else:
                    print "[",threadid,"] write: ",filename+".chi" 
            with self.allp.get_lock():
                self.allp.value+=1
                
            filelist["JSON"]=basename+".json"
            
            try:
                self.histqueue.put({"Time":float(time.time()),
                                "ImgTime":imgTime, 
                                "FileList":filelist,
                                "BaseName":basename,
                                "IntegralParameters":integparams},block=False)
            except Full:
                print "Full"
            return basename ,data
        
    def clearqueue(self):
        while self.histqueue.empty()==False:
                self.histqueue.get()
        print "History Queue cleared"

        
    def start(self):  
        """
        Start threads and directory observer.
        """
        #start threads
        
        for threadid in range(1,self.options.threads):
            print "start proc [",threadid,"]"
           
            worker=Process(target=funcworker, args=(self,threadid))
            worker.daemon=True
            self.pool.append(worker)
            worker.start() 
            #self.processimage(picture,options)
        self.starttime=time.time() 
        if self.options.watch:
            eventhandler=addtoqueue(self.picturequeue)
            self.observer = Observer()
            self.observer.schedule(eventhandler, self.args[0], recursive=True)
            self.observer.start()
        #We let the master process do some work because its useful for matplotlib.
        if not self.options.nowalk:
            self.fillqueuewithexistingfiles()
        if self.options.servermode:
              
             from Leash import addauthentication
        try:
            while ( self.options.servermode or 
                    (not self.picturequeue.empty()) 
                    or (self.dirwalker and self.dirwalker.is_alive() )
                    or self.options.watch): 
                    try:
                        picture = self.picturequeue.get(timeout=1)
                    except Empty:
                        continue
                    lastfile, data =self.procimage(picture,0)
                    
                    if self.options.servermode:
                        request={"command":"putplotdata","argument":{"data":{
                                "result":"plot","data":{"filename":lastfile,"graphs":data,
                                                        "stat":{}}
                                  }}}
                     
                        self.plotdataqueue.put(request)
                    if np.mod(self.allp.value,500)==0:
                        self.timreport()
        except KeyboardInterrupt:
            pass
        
        self.stop()
        self.timreport()
        return self.allp.value, time.time()-self.starttime
    def stop(self):
        print "\n\nWaiting for the processes to terminate."
        if self.observer:
            self.observer.stop()
            self.observer.observer.join(1)   
        
        
        self.stopflag.value=1
        for worker in self.pool:
            print "join worker"
            worker.join(1)
        if self.dirwalker:
           
            self.dirwalker.join(1)
        print "empty pic queue"
        while True:
            try:
                self.picturequeue.get(False)
            except Empty:
                break
        print "empty hist queue"
        while True:
            try:
                self.histqueue.get(False)
            except Empty:
                break
        print "empty plot queue"
        while True:
            try:
                self.plotdataqueue.get(False)
            except Empty:
                break
        if os.sys.platform!="win32":
            try:
                self.histqueue.close()
                self.plotdataqueue.close()
            except Exception as e:
                print e
    def timreport(self):
        tottime=time.time()-self.starttime
        count=self.allp.value
        #print count
        if count==0:
            print "We didn't do any pictures "
        else:
            print "\n\nelapsed time: ",tottime
            print "\nProcessed: ",count," pic"
            print " time per pic: ", tottime/count,"[s]"
            print " pic per second: ",count/tottime,"[/s]"
        time.sleep(1)
        
Ejemplo n.º 48
0
class HogwildWorld(World):
    """Creates a separate world for each thread (process).

    Maintains a few shared objects to keep track of state:

    - A Semaphore which represents queued examples to be processed. Every call
      of parley increments this counter; every time a Process claims an
      example, it decrements this counter.

    - A Condition variable which notifies when there are no more queued
      examples.

    - A boolean Value which represents whether the inner worlds should shutdown.

    - An integer Value which contains the number of unprocessed examples queued
      (acquiring the semaphore only claims them--this counter is decremented
      once the processing is complete).
    """

    def __init__(self, world_class, opt, agents):
        self.inner_world = world_class(opt, agents)

        self.queued_items = Semaphore(0)  # counts num exs to be processed
        self.epochDone = Condition()  # notifies when exs are finished
        self.terminate = Value('b', False)  # tells threads when to shut down
        self.cnt = Value('i', 0)  # number of exs that remain to be processed

        self.threads = []
        for i in range(opt['numthreads']):
            self.threads.append(HogwildProcess(i, world_class, opt,
                                               agents, self.queued_items,
                                               self.epochDone, self.terminate,
                                               self.cnt))
        for t in self.threads:
            t.start()

    def __iter__(self):
        raise NotImplementedError('Iteration not available in hogwild.')

    def display(self):
        self.shutdown()
        raise NotImplementedError('Hogwild does not support displaying in-run' +
                                  ' task data. Use `--numthreads 1`.')

    def episode_done(self):
        return False

    def parley(self):
        """Queue one item to be processed."""
        with self.cnt.get_lock():
            self.cnt.value += 1
        self.queued_items.release()

    def getID(self):
        return self.inner_world.getID()

    def report(self):
        return self.inner_world.report()

    def save_agents(self):
        self.inner_world.save_agents()

    def synchronize(self):
        """Sync barrier: will wait until all queued examples are processed."""
        with self.epochDone:
            self.epochDone.wait_for(lambda: self.cnt.value == 0)

    def shutdown(self):
        """Set shutdown flag and wake threads up to close themselves"""
        # set shutdown flag
        with self.terminate.get_lock():
            self.terminate.value = True
        # wake up each thread by queueing fake examples
        for _ in self.threads:
            self.queued_items.release()
        # wait for threads to close
        for t in self.threads:
            t.join()
Ejemplo n.º 49
0
class StressRunner(object):
  """This class contains functionality related to producing/consuming queries for the
     purpose of stress testing Impala.

     Queries will be executed in separate processes since python threading is limited
     to the use of a single CPU.
  """

  # This is the point at which the work queue will block because it is full.
  WORK_QUEUE_CAPACITY = 10

  def __init__(self):
    self._mem_broker = None

    # Synchronized blocking work queue for producer/consumers.
    self._query_queue = Queue(self.WORK_QUEUE_CAPACITY)

    # The Value class provides cross-process shared memory.
    self._mem_mb_needed_for_next_query = Value("i", 0)

    # All values below are cumulative.
    self._num_queries_dequeued = Value("i", 0)
    self._num_queries_started = Value("i", 0)
    self._num_queries_finished = Value("i", 0)
    self._num_queries_exceeded_mem_limit = Value("i", 0)
    self._num_queries_cancelled = Value("i", 0)
    self._num_queries_timedout = Value("i", 0)

    self.cancel_probability = 0
    self.spill_probability = 0

  def run_queries(self, queries, impala, num_queries_to_run, mem_overcommit_pct,
      should_print_status):
    """Runs queries randomly chosen from 'queries' and stops after 'num_queries_to_run'
       queries have completed.

       Before a query is run, a mem limit will be chosen. 'spill_probability' determines
       the likelihood of choosing a mem limit that will cause spilling. To induce
       spilling, a value is randomly chosen below the min memory needed to avoid spilling
       but above the min memory needed with spilling. So the min/max query memory
       requirements must be determined before calling this method.

       If 'mem_overcommit_pct' is zero, an exception will be raised if any queries
       fail for any reason other than cancellation (controlled by the 'cancel_probability'
       property), since each query should have enough memory to run successfully. If
       non-zero, failures due to insufficient memory will be ignored if memory was
       overcommitted at any time during execution.

       If a query completes without error, the result will be verified. An error
       will be raised upon a result mismatch.
    """
    self._mem_broker = MemBroker(impala.min_impalad_mem_mb,
        int(impala.min_impalad_mem_mb * mem_overcommit_pct / 100))

    # Print the status to show the state before starting.
    if should_print_status:
      self._print_status_header()
      self._print_status()
      lines_printed = 1
      last_report_secs = 0

    # Start producing queries.
    def enque_queries():
      try:
        for _ in xrange(num_queries_to_run):
          self._query_queue.put(choice(queries))
      except Exception as e:
        current_thread().error = e
        raise e
    enqueue_thread = create_and_start_daemon_thread(enque_queries)

    # Start a thread to check if more producers are needed. More producers are needed
    # when no queries are currently dequeued and waiting to be started.
    runners = list()
    def start_additional_runners_if_needed():
      try:
        while self._num_queries_started.value < num_queries_to_run:
          # Remember num dequeued/started are cumulative.
          if self._num_queries_dequeued.value == self._num_queries_started.value:
            impalad = impala.impalads[len(runners) % len(impala.impalads)]
            runner = Process(target=self._start_single_runner, args=(impalad, ))
            runner.daemon = True
            runners.append(runner)
            runner.start()
          sleep(1)
      except Exception as e:
        current_thread().error = e
        raise e
    runners_thread = create_and_start_daemon_thread(start_additional_runners_if_needed)

    # Wait for everything to finish but exit early if anything failed.
    sleep_secs = 0.1
    while enqueue_thread.is_alive() or runners_thread.is_alive() or runners:
      if enqueue_thread.error or runners_thread.error:
        sys.exit(1)
      for idx, runner in enumerate(runners):
        if runner.exitcode is not None:
          if runner.exitcode == 0:
            del runners[idx]
          else:
            sys.exit(runner.exitcode)
      sleep(sleep_secs)
      if should_print_status:
        last_report_secs += sleep_secs
        if last_report_secs > 5:
          last_report_secs = 0
          lines_printed %= 50
          if lines_printed == 0:
            self._print_status_header()
          self._print_status()
          lines_printed += 1

    # And print the final state.
    if should_print_status:
      self._print_status()

  def _start_single_runner(self, impalad):
    """Consumer function to take a query of the queue and run it. This is intended to
       run in a separate process so validating the result set can use a full CPU.
    """
    runner = QueryRunner()
    runner.impalad = impalad
    runner.connect()

    while not self._query_queue.empty():
      try:
        query = self._query_queue.get(True, 1)
      except Empty:
        continue
      with self._num_queries_dequeued.get_lock():
        query_idx = self._num_queries_dequeued.value
        self._num_queries_dequeued.value += 1

      if not query.required_mem_mb_without_spilling:
        mem_limit = query.required_mem_mb_with_spilling
        solo_runtime = query.solo_runtime_secs_with_spilling
      elif self.spill_probability < random():
        mem_limit = query.required_mem_mb_without_spilling
        solo_runtime = query.solo_runtime_secs_without_spilling
      else:
        mem_limit = randrange(query.required_mem_mb_with_spilling,
            query.required_mem_mb_without_spilling + 1)
        solo_runtime = query.solo_runtime_secs_with_spilling

      while query_idx > self._num_queries_started.value:
        sleep(0.1)

      self._mem_mb_needed_for_next_query.value = mem_limit

      with self._mem_broker.reserve_mem_mb(mem_limit) as reservation_id:
        self._num_queries_started.value += 1
        should_cancel = self.cancel_probability > random()
        if should_cancel:
          timeout = randrange(1, max(int(solo_runtime), 2))
        else:
          timeout = solo_runtime * max(10, self._num_queries_started.value
              - self._num_queries_finished.value)
        report = runner.run_query(query, timeout, mem_limit)
        if report.timed_out and should_cancel:
          report.was_cancelled = True
        self._update_from_query_report(report)
        if report.non_mem_limit_error:
          error_msg = str(report.non_mem_limit_error)
          # There is a possible race during cancellation. If a fetch request fails (for
          # example due to hitting a mem limit), just before the cancellation request, the
          # server may have already unregistered the query as part of the fetch failure.
          # In that case the server gives an error response saying the handle is invalid.
          if "Invalid query handle" in error_msg and report.timed_out:
            continue
          # Occasionally the network connection will fail, and depending on when the
          # failure occurred during run_query(), an attempt to get the profile may be
          # made which results in "Invalid session id" since the server destroyed the
          # session upon disconnect.
          if "Invalid session id" in error_msg:
            continue
          raise Exception("Query failed: %s" % str(report.non_mem_limit_error))
        if report.mem_limit_exceeded \
            and not self._mem_broker.was_overcommitted(reservation_id):
          raise Exception("Unexpected mem limit exceeded; mem was not overcommitted\n"
              "Profile: %s" % report.profile)
        if not report.mem_limit_exceeded \
            and not report.timed_out \
            and report.result_hash != query.result_hash:
          raise Exception("Result hash mismatch; expected %s, got %s"
              % (query.result_hash, report.result_hash))

  def _print_status_header(self):
    print(" Done | Running | Mem Exceeded | Timed Out | Canceled | Mem Avail | Mem Over "
        "| Next Qry Mem")

  def _print_status(self):
    print("%5d | %7d | %12d | %9d | %8d | %9d | %8d | %12d" % (
        self._num_queries_finished.value,
        self._num_queries_started.value - self._num_queries_finished.value,
        self._num_queries_exceeded_mem_limit.value,
        self._num_queries_timedout.value - self._num_queries_cancelled.value,
        self._num_queries_cancelled.value,
        self._mem_broker.available_mem_mb,
        self._mem_broker.overcommitted_mem_mb,
        self._mem_mb_needed_for_next_query.value))

  def _update_from_query_report(self, report):
    self._num_queries_finished.value += 1
    if report.mem_limit_exceeded:
      self._num_queries_exceeded_mem_limit.value += 1
    if report.was_cancelled:
      self._num_queries_cancelled.value += 1
    if report.timed_out:
      self._num_queries_timedout.value += 1
Ejemplo n.º 50
0
class MemBroker(object):
  """Provides memory usage coordination for clients running in different processes.
     The broker fulfills reservation requests by blocking as needed so total memory
     used by clients never exceeds the total available memory (including an
     'overcommitable' amount).

     The lock built in to _available is also used to protect access to other members.

     The state stored in this class is actually an encapsulation of part of the state
     of the StressRunner class below. The state here is separated for clarity.
  """

  def __init__(self, real_mem_mb, overcommitable_mem_mb):
    """'real_mem_mb' memory should be the amount of memory that each impalad is able
       to use. 'overcommitable_mem_mb' is the amount of memory that will be dispensed
       over the 'real' amount.
    """
    self._available = Value("i", real_mem_mb + overcommitable_mem_mb)
    self._max_overcommitment = overcommitable_mem_mb

    # Each reservation will be assigned an id. Ids are monotonically increasing. When
    # a reservation crosses the overcommitment threshold, the corresponding reservation
    # id will be stored in '_last_overcommitted_reservation_id' so clients can check
    # to see if memory was overcommitted since their reservation was made (this is a race
    # but an incorrect result will be on the conservative side).
    self._next_reservation_id = Value("L", 0)
    self._last_overcommitted_reservation_id = Value("L", 0)

  @property
  def overcommitted_mem_mb(self):
    return max(self._max_overcommitment - self._available.value, 0)

  @property
  def available_mem_mb(self):
    return self._available.value

  @property
  def last_overcommitted_reservation_id(self):
    return self._last_overcommitted_reservation_id.value

  @contextmanager
  def reserve_mem_mb(self, mem_mb):
    """Blocks until the requested amount of memory is available and taken for the caller.
       This function should be used in a 'with' block. The taken memory will
       automatically be released when the 'with' context exits. A numeric id is returned
       so clients can compare against 'last_overcommitted_reservation_id' to see if
       memory was overcommitted since the reservation was obtained.

       with broker.reserve_mem_mb(100) as reservation_id:
         # Run query using 100 MB of memory
         if <query failed>:
           # Immediately check broker.was_overcommitted(reservation_id) to see if
           # memory was overcommitted.
    """
    reservation_id = self._wait_until_reserved(mem_mb)
    try:
      yield reservation_id
    finally:
      self._release(mem_mb)

  def _wait_until_reserved(self, req):
    while True:
      with self._available.get_lock():
        if req <= self._available.value:
          self._available.value -= req
          LOG.debug("Reserved %s MB; %s MB available; %s MB overcommitted", req,
              self._available.value, self.overcommitted_mem_mb)
          reservation_id = self._next_reservation_id.value
          self._next_reservation_id.value += 1
          if self.overcommitted_mem_mb > 0:
            self._last_overcommitted_reservation_id.value = reservation_id
          return reservation_id
      sleep(0.1)

  def _release(self, req):
    with self._available.get_lock():
      self._available.value += req
      LOG.debug("Released %s MB; %s MB available; %s MB overcommitted", req,
          self._available.value, self.overcommitted_mem_mb)

  def was_overcommitted(self, reservation_id):
    """Returns True if memory was overcommitted since the given reservation was made.
       For an accurate return value, this should be called just after the query ends
       or while the query is still running.
    """
    return reservation_id <= self._last_overcommitted_reservation_id.value
Ejemplo n.º 51
0
class FixedDialogTeacher(Teacher):
    """A teacher agent for all teachers involved in tasks with fixed data.

    This class provides the following functionality for its subclasses:

    - Resets a teacher
    - Provides an observe method
    - Computes and retrieves the next episode index for a teacher
    - Provides a threadpool option for loading data (especially useful for
      large data, e.g. images)

    To utilize the DataLoader for threadpool loading, a teacher should
    implement the ``submit_load_request`` function to send a load request
    to the DataLoader by calling ``self.data_loader.request_load`` with the
    appropriate arguments (``receive_fn, load_fn, args``). The DataLoader then
    returns the data to the teacher's ``data_queue``, which the teacher can
    poll in its ``act`` method.

    The following is an example of the DataLoader usage in the VQA-V1 teacher.

        1. In the teacher's ``init`` function, the teacher calls its
           ``submit_load_request`` function to preload an image.
        2. The ``submit_load_request`` function gets the next ``episode_idx``,
           and computes the image path for the load request.
        3. At the end of ``submit_load_request``, the teacher calls
           ``self.data_loader.request_load`` with three args:
           - ``self.receive_data`` - the function that the DataLoader calls to
               return the the loaded object
           - ``self.image_loader.load`` - the function used to load the image
               from the image path
           - ``[img_path]`` - a list of arguments for the load function, which
               in this case is the path of the image.
         4. In the teacher's ``act`` function, the teacher loads the data from
            its data queue.
         5. At the end of the ``act`` function, the teacher calls
            ``submit_load_request`` to preload an image for the next example.


    """
    def __init__(self, opt, shared=None):
        super().__init__(opt, shared)

        if not hasattr(self, 'datatype'):
            self.datatype = opt['datatype']
        if not hasattr(self, 'random'):
            self.random = self.datatype == 'train'
        if not hasattr(self, 'training'):
            self.training = self.datatype.startswith('train')
        if not hasattr(self, 'datafile'):
            self.datafile = opt.get('datafile')
        # set up support for multithreaded data loading
        self.data_queue = queue.Queue()
        if shared:
            self.index = shared['index']
            if 'data_loader' in shared:
                self.data_loader = shared['data_loader']
        else:
            self.index = AttrDict(value=-1)

        if not hasattr(self, 'data_loader'):
            self.data_loader = DataLoader(opt)
            self.data_loader.start()

        # set up batching
        self.bsz = opt.get('batchsize', 1)
        self.batchindex = opt.get('batchindex', 0)

        dt = opt.get('datatype', '').split(':')
        self.use_batch_act = (opt.get('batch_sort', False) and self.bsz > 1
                              and 'stream' not in dt)

        if self.use_batch_act:
            if shared:
                self.lastYs = shared['lastYs']
                if 'sorted_data' in shared:
                    self.sorted_data = shared['sorted_data']
                    self.batches = shared['batches']
            else:
                self.lastYs = [None] * self.bsz
                ordered_opt = opt.copy()
                ordered_opt['datatype'] = ':'.join((dt[0], 'ordered'))
                ordered_opt['batchsize'] = 1
                ordered_opt['numthreads'] = 1
                ordered_teacher = create_task_agent_from_taskname(ordered_opt)[0]

                clen = opt.get('context_length', -1)
                incl = opt.get('include_labels', True)

                if ordered_teacher.num_examples() > 1000000:  # one million
                    print('WARNING: this dataset is large, and batch sorting '
                          'may use too much RAM or take too long to set up. '
                          'Consider disabling batch sorting, setting '
                          'context-length to a small integer (if this dataset '
                          'has episodes of multiple examples), or streaming '
                          'the data using a streamed data mode if supported.')

                flatdata = flatten(ordered_teacher,
                                   context_length=clen, include_labels=incl)
                self.sorted_data = sort_data(flatdata)
                self.batches = make_batches(self.sorted_data, self.bsz)

    def _lock(self):
        if hasattr(self.index, 'get_lock'):
            return self.index.get_lock()
        else:
            return no_lock()

    def reset(self):
        """Reset the dialog so that it is at the start of the epoch,
        and all metrics are reset.
        """
        super().reset()
        self.metrics.clear()
        self.lastY = None
        self.episode_done = True
        self.epochDone = False
        self.data_queue = queue.Queue()

        self.episode_idx = -1
        with self._lock():
            self.index.value = -1
        if self.use_batch_act and self.random and hasattr(self, 'batches'):
            random.shuffle(self.batches)

    def submit_load_request(self):
        """An agent should implement this method to submit requests to the
        data loader. At the end of this method, the agent should call
        ``self.data_loader.request_load()`` with the appropriate args.
        """
        pass

    def receive_data(self, future):
        """Function for receiving data from the data loader."""
        data = future.result()
        self.data_queue.put(data)

    def share(self):
        shared = super().share()

        if hasattr(self, 'lastYs'):
            # share lastYs to communicate between batch_act and observe
            shared['lastYs'] = self.lastYs

        if self.opt.get('numthreads', 1) > 1:
            if type(self.index) is not multiprocessing.sharedctypes.Synchronized:
                # for multithreading need to move index into threadsafe memory
                self.index = Value('l', -1)
            if hasattr(self, 'sorted_data'):
                shared['sorted_data'] = self.sorted_data
                shared['batches'] = self.batches
        else:
            shared['data_loader'] = self.data_loader
        shared['index'] = self.index

        return shared

    def next_episode_idx(self, num_eps=None, loop=None):
        if num_eps is None:
            num_eps = self.num_episodes()
        if loop is None:
            loop = self.training
        if self.random:
            new_idx = random.randrange(num_eps)
        else:
            with self._lock():
                self.index.value += 1
                if loop:
                    self.index.value %= num_eps
                new_idx = self.index.value
        return new_idx

    def next_example(self):
        if self.episode_done:
            self.episode_idx = self.next_episode_idx()
            self.entry_idx = 0
        else:
            self.entry_idx += 1

        if self.episode_idx >= self.num_episodes():
            return {'episode_done': True}, True

        ex = self.get(self.episode_idx, self.entry_idx)
        self.episode_done = ex['episode_done']

        if (not self.random and self.episode_done
                and self.episode_idx + 1 >= self.num_episodes()):
            epoch_done = True
        else:
            epoch_done = False

        return ex, epoch_done

    def next_batch(self):
        # get next batch
        with self._lock():
            self.index.value += 1
            if self.training:
                self.index.value %= len(self.batches)
            batch_idx = self.index.value

            if batch_idx + 1 >= len(self.batches):
                if self.random:
                    random.shuffle(self.batches)
                self.epochDone = True
            else:
                self.epochDone = False

        if batch_idx >= len(self.batches):
            return [{'episode_done': True, 'id': self.getID()}] * self.bsz

        return self.batches[batch_idx]

    def num_episodes(self):
        """Get the number of episodes in this dataset."""
        if self.use_batch_act:
            # when using batch_act, this is length of sorted data
            return len(self.sorted_data)
        raise RuntimeError('"num_episodes" must be overriden by children.')

    def num_examples(self):
        """Get the total number of examples in this dataset."""
        if self.use_batch_act:
            # when using batch_act, this is length of sorted data
            return len(self.sorted_data)
        raise RuntimeError('"num_examples" must be overriden by children.')

    def get(self, episode_idx, entry_idx=0):
        """Get the specified episode and the specified entry in that episode.

        Many datasets have only single-entry episodes, so entry_idx defaults to
        zero. Children must override this method in order to inherit the
        `next_example` method.
        """
        raise RuntimeError('"Get" method must be overriden by children.')

    def observe(self, observation):
        """Process observation for metrics."""
        if self.use_batch_act:
            self.lastY = self.lastYs[self.batchindex]
            self.lastYs[self.batchindex] = None

        if hasattr(self, 'lastY') and self.lastY is not None:
            self.metrics.update(observation, self.lastY)
            self.lastY = None
        return observation

    def batch_act(self, observations):
        # we ignore observations
        if not hasattr(self, 'epochDone'):
            # reset if haven't yet
            self.reset()

        batch = self.next_batch()
        # pad batch
        if len(batch) < self.bsz:
            batch += [{'episode_done': True, 'id': self.getID()}] * (self.bsz - len(batch))

        # remember correct answer if available (for padding, None)
        for i, ex in enumerate(batch):
            self.lastYs[i] = ex.get('labels', ex.get('eval_labels'))

        return batch

    def act(self):
        """Send new dialog message."""
        if not hasattr(self, 'epochDone'):
            # reset if haven't yet
            self.reset()

        # get next example, action is episode_done dict if already out of exs
        action, self.epochDone = self.next_example()
        action['id'] = self.getID()

        # remember correct answer if available
        self.lastY = action.get('labels', None)
        if not self.datatype.startswith('train') and 'labels' in action:
            # move labels to eval field so not used for training
            # but this way the model can use the labels for perplexity or loss
            action['eval_labels'] = action.pop('labels')

        return action
Ejemplo n.º 52
0
class Brute_Force():
    minKeyLength = 6
    maxKeyLength = 16
    alphabet = string.ascii_lowercase + string.ascii_uppercase + string.digits #+ string.punctuation
    algorithm = None
    origHash = None
    key = ''
    rec = None
    charactersToCheck = 3
    queue = Queue(cpu_count()*5)
    chunk_size = 0
    countey = Value('I', 0)
    done = Value('b', False)
    total_work_units = 1
    possibilities_exhausted = False
    first_unit = True
    children = []
    result_queue = None
    processes_running = False
    hashlib_mode = False
    hash_list = []
    list_mode = False
    schemes = ["sha1_crypt", "sha256_crypt", "sha512_crypt", "md5_crypt",
               "des_crypt", 'ldap_salted_sha1', 'ldap_salted_md5',
               'ldap_sha1', 'ldap_md5', 'ldap_plaintext', "mysql323"]
    myctx = CryptContext(schemes)


    def __init__(self):
        if not __name__ == '__main__':
            return
        current_process().authkey = "Popcorn is awesome!!!"

    def set_params(self, alphabet, algorithm, origHash, min_key_length, max_key_length):
        self.alphabet = alphabet
        self.algorithm = algorithm
        self.origHash = origHash
        self.minKeyLength = min_key_length
        self.maxKeyLength = max_key_length
        self.set_chars_to_check()

    def resetVariables(self):
        self.minKeyLength = 1
        self.maxKeyLength = 16
        self.alphabet = string.ascii_lowercase + string.ascii_uppercase + string.digits + string.punctuation
        self.algorithm = ""
        self.origHash = ''
        self.key = ''
        self.rec = None
        self.charactersToCheck = 3
        self.queue = Queue(cpu_count()*5)
        self.chunk_size = 0
        self.countey = Value('I', 0)
        self.done = Value('b', False)
        self.total_work_units = 0
        self.possibilities_exhausted = False
        self.first_unit = True

    def isFound(self):
        return self.done.value

    def returnKey(self):
        return self.key

    def possibilitiesEhausted(self):
        return self.possibilities_exhausted

    def set_result_queue(self, result_queue):
        self.result_queue = result_queue

    def get_total_chunks(self):
        return self.total_work_units

    # start pool of check_keys workers
    def start_processes(self):
        if not self.processes_running:
            for j in range(0, cpu_count()):
                self.children.append(Process(target=self.check_keys, args=(self.queue,)))
                self.children[j].start()
                #print "bf internal process %i started." % self.children[j].pid
            self.processes_running = True

    # shutdown process pool
    def terminate_processes(self):
        for process in self.children:
            #print "killing process: %i" % process.pid
            process.terminate()
            process.join(timeout=.1)
            #if process.is_alive():
             #   print "process %i did not die." % process.pid

    # checks keys of length minKeyLength to charsToCheck, these keys will not use a prefix
    def check_short_keys(self):
        if self.done.value:
            return
        #print "check_short_keys called for lengths %d-%d and no prefix." % (self.minKeyLength, self.charactersToCheck)
        # compound iterable creates strings with a range of lengths
        keylist = itertools.chain.from_iterable(itertools.product(self.alphabet, repeat=j)
                                                for j in range(self.minKeyLength, self.charactersToCheck+1))
        for key in keylist:
            tempkey = ''.join(key)
            if self.isSolution(tempkey):
                self.result_queue.put(('w', tempkey))
                if not self.list_mode:
                    while not self.queue.empty():
                        self.queue.get()
                    self.countey.value += 1
                    #print "We win!"
                    return True
            self.countey.value += 1
        params = "bruteforce\n" + self.algorithm + "\n" + self.origHash + "\n" + self.alphabet + "\n" \
                 + str(self.minKeyLength) + "\n" + str(self.maxKeyLength) + "\n" \
                 + "-99999999999999999999999999999999999" + "\n0\n0\n0"
        self.result_queue.put(('f', params))
        return False

    # take prefixes from the job queue and iterate through the possibilities for keys starting with that prefix
    def check_keys(self, queue):
        while queue:
            if self.done.value:
                return
            # get a workunit off the queue
            workunit = queue.get()
            if workunit.prefix == "******possibilities exhausted******":
                time.sleep(10)
                self.result_queue.put(('e', "sadness"))
            self.algorithm = workunit.algorithm
            self.origHash = workunit.hash
            prefix = ''.join(workunit.prefix)
            #create an iterable to produce suffixes to append to the prefix
            keylist = itertools.product(self.alphabet, repeat=self.charactersToCheck)
            # check possibilities until iterable is consumed
            for key in keylist:
                tempkey = prefix + ''.join(key)
                #print tempkey
                if self.isSolution(tempkey):
                    try:
                        # send key with success message
                        if self.list_mode:
                            self.result_queue.put(('w', (tempkey + '\n' + hashlib.new(self.algorithm, tempkey).hexdigest(),)))
                        else:
                            self.result_queue.put(('w', tempkey), timeout=1)
                    except Exception:
                        return
                    if not self.list_mode:
                        while not self.queue.empty():
                            queue.get()
                        self.countey.value += 1
                        queue.close()
                        #print "We win!"
                        return True
            self.countey.value += 1
            # send back parameters with a fail result
            params = "bruteforce\n" + self.algorithm + "\n" + self.origHash + "\n" + self.alphabet + "\n" \
                     + str(self.minKeyLength) + "\n" + str(self.maxKeyLength) + "\n" + prefix + "\n0\n0\n0"
            self.result_queue.put(('f', params))

        return False

#   get_prefix() is an iterator which produces all possible prefixes of appropriate length
#   as defined by min/max key lengths and charactersToCheck
    def get_prefix(self):
        if self.minKeyLength < self.charactersToCheck:
            yield ''
        if self.minKeyLength < self.charactersToCheck:
            # all keys up to charsToCheck will be handled by check_short_keys, so start with 1 char prefixes
            min_length = 1
        else:
            min_length = self.minKeyLength-self.charactersToCheck
        for i in range(min_length, (self.maxKeyLength - self.charactersToCheck + 1)):
            prefixes = itertools.chain.from_iterable(itertools.product(self.alphabet, repeat=j)for j in range(i, i+1))
            for prefix in prefixes:
                if self.done.value:
                    return
                yield ''.join(prefix)
        yield "******possibilities exhausted******"

#   Hash a possible key and check if it is equal to the hashed input.
    def isSolution(self, key):
        if self.hashlib_mode:
            temp_key = hashlib.new(self.algorithm, key).hexdigest()
            if self.list_mode:
                for hash in self.hash_list:
                    if hash == temp_key:
                        return True
                return False
            else:
                if temp_key == self.origHash:
                    self.rec = "found"
                    #print "Solution found!\nKey is : %s\nWith a hash of %s" % (key, temp_key)
                    if not self.list_mode:
                        with self.done.get_lock():
                            self.done.value = True
                    self.key = key
                    return True
                else:
                    return False
        else:
            if self.myctx.verify(key, self.origHash):
                print "Solution found!\nKey is : %s\nWith a hash of %s" % (key, self.origHash)
                self.done.value = True
                self.key = key
                return True
            else:
                return False



#   setup here is to make chunks large enough that constant network communications are avoided but that won't last
#   forever on slower machines. A maximum chunk size of 10M hashes seemed a reasonable compromise.
    def set_chars_to_check(self):
        self.charactersToCheck = 1
        iterations = self.alphabet.__len__()
        while True:
            iterations *= self.alphabet.__len__()
            self.charactersToCheck += 1
            if iterations > 10000000:
                self.charactersToCheck -= 1
                iterations /= self.alphabet.__len__()
                break
        # calculate chunk size and total number of chunks
        self.chunk_size = self.alphabet.__len__() ** self.charactersToCheck
        for i in range(self.minKeyLength, self.maxKeyLength+1):
            self.total_work_units += ((self.alphabet.__len__() ** i)/self.chunk_size)

#   get_chunk() is an iterator which yields a new chunk of data each time get_chunk.next() is called.
        # BROKEN, DO NOT USE!
    # def get_chunk(self):
    #
    #     for prefix in self.get_prefix():
    #
    #         print "get chunk prefix: %s" % prefix
    #         if prefix == '':
    #             prefix = "-99999999999999999999999999999999999"
    #
    #         chunk = Chunk.Chunk()
    #         chunk.params = "bruteforce\n" + self.algorithm + "\n" + self.origHash + "\n" + self.alphabet + "\n" + str(self.minKeyLength) + "\n" + str(self.maxKeyLength) + "\n" + prefix + "\n0\n0\n0"
    #
    #         yield chunk
    #     self.possibilities_exhausted = True

#   run_chunk takes an object of type Chunk.Chunk(), checks all possibilities within the parameters of the chunk,
#   sets global variables according to the chunk data and returns True or False to indicate if the cracking succeeded.
    def run_chunk(self, chunk):
        settings = chunk.params
        settings_list = settings.split('\n')
        #print settings
        prefix = settings_list[6]
        if self.first_unit:
            self.algorithm = settings_list[1]
            self.hashlib_mode = True
            for algorithm in  self.schemes:
                if self.algorithm == algorithm:
                    self.hashlib_mode = False

            self.origHash = settings_list[2]
            self.alphabet = settings_list[3]
            self.minKeyLength = int(settings_list[4])
            self.maxKeyLength = int(settings_list[5])
            self.set_chars_to_check()

        if prefix == "-99999999999999999999999999999999999":
            prefix = ''
        if prefix == '' and self.first_unit:

            shorts = Process(target=self.check_short_keys)
            shorts.start()
            #shorts.join()
            #shorts.terminate()
            #print "short keys started"
        else:
            if self.done.value:
                return True
            else:
                #print "run chunk prefix: %s" % prefix
                self.queue.put(WorkUnit(prefix, self.charactersToCheck, self.alphabet, self.algorithm, self.origHash))

        self.first_unit = False
        if self.done:
            return True
        else:
            return False
Ejemplo n.º 53
0
class Downloader:
    def __init__(self):
        self.manager = Manager()
        self.queue = self.manager.dict()
        self.downloading = self.manager.list()
        self.index = Value('i', 0)
        self.print_lock = Lock()

        self.multi = MultitaskQueue(self.start_download)
        self.status = 0
        Timer(0.5, self.print_daemon).start()
        
        self.FNULL = open(os.devnull, 'w')

    options = {
        'format': 'flv',
        'logger': DownloadLogger(),
    }

    def start_download(self, dic):
        writeln('[' + color('DOWN', 'cyan') + '] Starting download of %s from %s, saving as ID %d'
                % (dic['name'], dic['url'], dic['id']))
        # cur_option = self.options
        # cur_option['progress_hooks'] = [partial(self.download_progress, dic['id'])]
        # cur_option['outtmpl'] = 'video/' + str(dic['id']) + '/' + str(dic['id']) + r'.%(title)s-%(id)s.%(ext)s'
        # downloader = youtube_dl.YoutubeDL(cur_option)
        # try:
        #     downloader.download([dic['url']])
        #     self.download_progress(dic['id'], {'status': 'complete'})
        # except youtube_dl.DownloadError as e:
        #     writeln('[' + color('ERROR', 'red') + '] youtube_dl error for %s: ' % dic['name'] + e.message)
        #     self.download_progress(dic['id'], {'status': 'error'})
        self.download_progress(dic['id'], {'status': 'downloading'})
        outpath = 'video/' + str(dic['id']) + '/'
        try:
            os.makedirs(outpath)
        except:
            pass
        log = open(outpath + 'log.txt', 'w')
        process = subprocess.Popen(["you-get", dic['url']], stdout=log, stderr=subprocess.STDOUT, cwd=outpath)
        retcode = process.wait()
        log.close()
        log = open(outpath + 'log.txt', 'r')
        if retcode != 0 or ' '.join(log.readlines()).find('error') != -1:
            self.download_progress(dic['id'], {'status': 'error'})
        else:
            self.download_progress(dic['id'], {'status': 'complete'})

    def print_progress(self):
        self.print_lock.acquire()

        down_cnt, all_cnt = len(self.downloading), len(self.queue)
        if down_cnt == 0:
            return

        with self.index.get_lock():
            index = self.index.value
            if index >= down_cnt:
                self.index.value = 0
                index = 0

        rows, columns = map(int, os.popen('stty size', 'r').read().split())

        dic = self.queue[self.downloading[index]]
        message = ''
        message += color('Job %d/%d' % (index + 1, down_cnt), 'green')
        if all_cnt > down_cnt:
            message += color(' (%d pending)' % (all_cnt - down_cnt), 'green')
        message += color(': ' + dic['name'] + '  Part %d' % (dic['done_part'] + 1), 'green')

        total, down = dic.get('total_bytes', None), dic.get('downloaded_bytes', None)
        if total is not None and down is not None:
            submessage = ' %6.2lf%%' % (float(down) * 100 / float(total))
            if length(message) + length(submessage) <= columns:
                message += submessage

        eta = dic.get('eta', None)
        if eta is not None:
            submessage = color('   ETA:', 'cyan') + ' %ds' % eta
            if length(message) + length(submessage) <= columns:
                message += submessage

        speed = dic.get('speed', None)
        if speed is not None:
            units = [('MB/s', 10 ** 6), ('KB/s', 10 ** 3)]
            unit = ('B/s', 1)
            for x in units:
                if speed > x[1]:
                    unit = x
                    break
            submessage = color('   Speed:', 'cyan') + ' %.1lf%s' % (speed / unit[1], unit[0])
            if length(message) + length(submessage) <= columns:
                message += submessage

        remain = columns - length(message)

        refresh(message)

        self.print_lock.release()

    REFRESH_TIME = 1

    def print_daemon(self):
        if len(self.downloading) > 0:
            with self.index.get_lock():
                self.index.value += 1
            self.print_progress()

        Timer(self.REFRESH_TIME, self.print_daemon).start()

    def download_progress(self, id, dic):
        self.print_lock.acquire()

        try:
            name = self.queue[id]['name']
            done_part = self.queue[id]['done_part']
        except KeyError:
            print 'KeyError'
            self.print_lock.release()
            return

        if self.queue[id]['status'] == 'none':
            self.downloading.append(id)

        dic['name'] = name
        dic['time'] = time.time()
        if dic['status'] in ['complete', 'error']:
            if dic['status'] == 'complete':
                message = 'All %d parts of %s downloaded' % (done_part, name)
                writeln(color('[DONE] ' + message, 'green'))
            else:
                message = 'Download of %s aborted due to error' % name
                writeln(color('[ABORT] ' + message, 'red'))
            self.downloading.remove(id)
            self.queue.pop(id)

        elif dic['status'] == 'finished':
            dic['done_part'] = done_part + 1
            message = 'Finished downloading part %d of %s. File saved to %s' \
                      % (dic['done_part'], name, dic['filename'])
            # message = 'Finished downloading part %d/%d of %s. File saved to %s' \
            #           % (dic['fragment_index'], dic['fragment_count'], name, dic['filename'])

            down = dic.get('downloaded_bytes', None)
            if down is not None:
                message += ', size is %.1lfMB' % (float(down) / (10 ** 6))
            writeln('[' + color('DOWN', 'green') + '] ' + message)
            self.queue[id] = dic

        elif dic['status'] == 'downloading':
            dic['done_part'] = done_part
            total = 0
            for x in ['total_bytes', 'total_bytes_estimate']:
                if x in dic and dic[x] is not None:
                    total = dic[x]
                    break
            dic['total_bytes'] = total
            self.queue[id] = dic

        self.print_lock.release()

        if dic['status'] == 'finished':
            self.print_progress()

    def download(self, name, url, id):
        self.queue[id] = {
            'name': name,
            'status': 'none',
            'time': time.time(),
            'done_part': 0,
        }
        self.multi.add({'name': name, 'url': url, 'id': id})
Ejemplo n.º 54
0
class FastqFilter(object):
    """
    @class  FastqFilter
    @brief Main class of the package
    Require the third party package Biopython
    """
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

    #~~~~~~~FONDAMENTAL METHODS~~~~~~~#

    def __init__(self, R1, R2,
        quality_filter=None,
        adapter_trimmer=None,
        outdir="./fastq/",
        input_qual="fastq-sanger",
        numprocs=None,
        compress_output=True):
        """
        Instanciate the object by storing call parameters and init shared memory counters for
        interprocess communication. A reader process iterate over the input paired fastq files
        and add coupled R1 and R2 sequences as Biopython seqRecord to a first shared queue.
        Then according to the initial parametring, a multiprocessing filter pull out seqRecord
        couples from the queue and apply a quality filtering and/or adapter trimming. Couples
        passing throught the filters are added to a second shared queue. Finally, couples in the
        second queue are written in an output fastq file
        @param R1 Path to the forward read fastq file (can be gzipped)
        @param R2 Path to the reverse read fastq file (can be gzipped)
        @param quality_filter A QualityFilter object, if a quality filtering is required.
        @param adapter_trimmer An AdapterTrimmer object, if a adapter trimming is required.
        @param outdir Directory where to write the filtered fastq sequences.
        @param input_qual Quality scale of the fastq (fastq-sanger for illumina 1.8+)
        @param numprocs Number of parrallel processes for the filtering steps. If not provide
        the maximum number of thread available will be automatically used.
        @param compress_output If True the output fastq will be written directly in a gzipped file.
        False will generate an uncompressed a much bigger file but will be around
        """
        # Start a timer
        start_time = time()

        # Create object variables
        self.numprocs = numprocs if numprocs else cpu_count()
        self.qual = quality_filter
        self.adapt = adapter_trimmer
        self.input_qual = input_qual
        self.R1_in = R1
        self.R2_in = R2
        self.outdir = outdir
        self.compress_output = compress_output
        if compress_output:
            self.R1_out = path.join(self.outdir, file_basename(self.R1_in)+"_1_filtered.fastq.gz")
            self.R2_out = path.join(self.outdir, file_basename(self.R2_in)+"_2_filtered.fastq.gz")
        else:
            self.R1_out = path.join(self.outdir, file_basename(self.R1_in)+"_1_filtered.fastq")
            self.R2_out = path.join(self.outdir, file_basename(self.R2_in)+"_2_filtered.fastq")

        # Init shared memory counters
        self.total = Value('i', 0)
        self.pass_qual = Value('i', 0)
        self.pass_trim = Value('i', 0)
        self.total_pass = Value('i', 0)
        if self.qual:
            self.min_qual_found = Value('i', 100)
            self.max_qual_found = Value('i', 0)
            self.weighted_mean = Value('d', 0.0)
        if self.adapt:
            self.seq_untrimmed = Value('i', 0)
            self.seq_trimmed = Value('i', 0)
            self.base_trimmed = Value('i', 0)
            self.len_pass = Value('i', 0)
            self.len_fail = Value('i', 0)

        # Count lines in fastq file to prepare a counter of progression
        print ("Count the number of fastq sequences")
        self.nseq = count_seq(R1, "fastq")
        print("fastq files contain {} sequences to align".format(self.nseq))
        self.nseq_list = [int(self.nseq*i/100.0) for i in range(5,101,5)] # 5 percent steps

        # Init queues for input file reading and output file writing (limited to 10000 objects)
        self.inq = Queue(maxsize=10000)
        self.outq = Queue(maxsize=10000)

        # Init processes for file reading, distributed filtering and file writing
        self.pin = Process(target=self.reader, args=())
        self.ps = [Process(target=self.filter, args=()) for i in range(self.numprocs)]
        self.pout = Process(target=self.writer, args=())

        # Start processes
        self.pin.start()
        self.pout.start()
        for p in self.ps:
            p.start()

        # Blocks until the process is finished
        self.pin.join()
        print ("\tReading done")
        for i in range(len(self.ps)):
            self.ps[i].join()
        print ("\tFiltering done")
        self.pout.join()
        print ("\tWriting done\n")

        # Stop timer and store the value
        self.exec_time = round(time()-start_time, 3)

    def __repr__(self):
        msg = "FASTQ FILTER Parallel Processing\n"
        msg += "\tExecution time : {} s\n".format(self.exec_time)
        msg += "\tInput fastq files\n\t\t{}\n\t\t{}\n".format (self.R1_in, self.R2_in)
        msg += "\tOutput fastq files\n\t\t{}\n\t\t{}\n".format (self.R1_out, self.R2_out)
        msg += "\tInput quality score : {}\n".format (self.input_qual)
        msg += "\tNumber of parallel processes : {}\n".format (self.numprocs)
        msg += "\tTotal pair processed : {}\n".format(self.total.value)
        msg += "\tTotal pair passed : {}\n".format(self.total_pass.value)
        if self.qual:
            msg += "QUALITY FILTER\n"
            msg += "\tPair pass quality filter : {}\n".format(self.pass_qual.value)
            msg += "\tMean quality value : {}\n".format(self.weighted_mean.value/self.total.value/2)
            msg += "\tMin quality value : {}\n".format(self.min_qual_found.value)
            msg += "\tMax quality value : {}\n".format(self.max_qual_found.value)
        if self.adapt:
            msg += "ADAPTER TRIMMER\n"
            msg += "\tPair pass adapter Trimming : {}\n".format(self.pass_trim.value)
            msg += "\tSequences untrimmed : {}\n".format(self.seq_untrimmed.value)
            msg += "\tSequences trimmed : {}\n".format(self.seq_trimmed.value)
            msg += "\tDNA base trimmed : {}\n".format(self.base_trimmed.value)
            msg += "\tFail len filtering: {}\n".format(self.len_fail.value)
            msg += "\tPass len filtering : {}\n".format(self.len_pass.value)
        return msg

    def __str__(self):
        return "<Instance of {} from {} >\n".format(self.__class__.__name__, self.__module__)

    def get(self, key):
        return self.__dict__[key]

    def getCTypeVal(self, key):
        return self.__dict__[key].value
    
    def getTrimmed (self):
        return (self.R1_out, self.R2_out)

    def set(self, key, value):
        self.__dict__[key] = value

    #~~~~~~~PRIVATE METHODS~~~~~~~#

    def reader(self):
        """
        Initialize SeqIO.parse generators to iterate over paired fastq files. Data ara sent over
        inqueue for the workers to do their thing and a n = numprocs STOP pills are added at the
        end of the queue for each worker.
        """
        try:
            # Open input fastq streams for reading
            if self.R1_in[-2:].lower() == "gz":
                in_R1 = gzip.open(self.R1_in, "rb")
            else:
                in_R1 = open(self.R1_in, "rb")

            if self.R2_in[-2:].lower() == "gz":
                in_R2 = gzip.open(self.R2_in, "rb")
            else:
                in_R2 = open(self.R2_in, "rb")

        except (IOError, TypeError, ValueError) as E:
            print E
            exit

        # Init generators to iterate over files
        genR1 = SeqIO.parse(in_R1, self.input_qual)
        genR2 = SeqIO.parse(in_R2, self.input_qual)

        i = 0
        while True:
            # Parse sequences in generators until one of then is empty
            seqR1 = next(genR1, None)
            seqR2 = next(genR2, None)
            if not seqR1 or not seqR2:
                break
            # Add a tuple position, seqR1 and seqR2 to the end of the queue
            self.inq.put( (seqR1, seqR2) )

            i+=1
            if i in self.nseq_list:
                print ("\t{} sequences: {}%".format(i, int(i*100.0/self.nseq)))

        # Close files
        in_R1.close()
        in_R2.close()

        # Add a STOP pill to the queue
        for i in range(self.numprocs):
            self.inq.put("STOP")

    def filter(self):
        """
        Parallelized filter that take as input a sequence couple in inqueue until a STOP pill is
        found. Sequences go through a QualityFilter and a AdapterTrimmer object and ifthe couple
        is able to pass filters then it is put at the end of outqueue. at the ebd of the process
        a STOP pill is added to the outqueue.
        """
        # Consume inq and produce answers on outq
        for seqR1, seqR2 in iter(self.inq.get, "STOP"):

            with self.total.get_lock():
                self.total.value+=1

            # Quality filtering
            if self.qual:
                seqR1 = self.qual.filter(seqR1)
                seqR2 = self.qual.filter(seqR2)
                if not seqR1 or not seqR2:
                    continue

            with self.pass_qual.get_lock():
                self.pass_qual.value+=1

            # Adapter trimming and size filtering
            if self.adapt:
                seqR1 = self.adapt.trimmer(seqR1)
                seqR2 = self.adapt.trimmer(seqR2)
                if not seqR1 or not seqR2:
                    continue

            with self.pass_trim.get_lock():
                self.pass_trim.value+=1

            # If both filters passed = add to the output queue
            self.outq.put( (seqR1, seqR2) )

        # Add a STOP pill to the queue
        self.outq.put("STOP")

        # Fill shared memomory counters from process specific object instances.
        if self.qual:
            with self.weighted_mean.get_lock():
                self.weighted_mean.value += (self.qual.get_mean_qual()*self.qual.get('total'))
            if self.qual.get_min_qual() < self.min_qual_found.value:
                self.min_qual_found.value = self.qual.get_min_qual()
            if self.qual.get_max_qual() > self.max_qual_found.value:
                self.max_qual_found.value = self.qual.get_max_qual()

        if self.adapt:
            with self.seq_untrimmed.get_lock():
                self.seq_untrimmed.value += self.adapt.get('seq_untrimmed')
            with self.seq_trimmed.get_lock():
                self.seq_trimmed.value += self.adapt.get('seq_trimmed')
            with self.base_trimmed.get_lock():
                self.base_trimmed.value += self.adapt.get('base_trimmed')
            with self.len_pass.get_lock():
                self.len_pass.value += self.adapt.get('len_pass')
            with self.len_fail.get_lock():
                self.len_fail.value += self.adapt.get('len_fail')

    def writer(self):
        """
        Write sequence couples from outqueue in a pair of compressed fastq.gz files. Sequences will
        remains paired (ie at the same index in the 2 files) but they may not be in the same order
        than in the input fastq files. The process will continue until n = numprocs STOP pills were
        found in the outqueue (ie. the queue is empty)
        """
        # Open output fastq streams for writing
        if self.compress_output:
            out_R1 = gzip.open(self.R1_out, "wb")
            out_R2 = gzip.open(self.R2_out, "wb")
        else:
            out_R1 = open(self.R1_out, "wb")
            out_R2 = open(self.R2_out, "wb")

        # Keep running until all numprocs STOP pills has been passed
        for works in range(self.numprocs):
            # Will exit the loop as soon as a Stop pill will be found
            for seqR1, seqR2 in iter(self.outq.get, "STOP"):
                out_R1.write(seqR1.format("fastq-sanger"))
                out_R2.write(seqR2.format("fastq-sanger"))
                with self.total_pass.get_lock():
                    self.total_pass.value+=1

        out_R1.close()
        out_R2.close()
Ejemplo n.º 55
0
class Pipeline:
    END_OF_STREAM_SIGNAL = "!end_of_stream!"

    RUNNING_STATUS_STANDBY = 0
    RUNNING_STATUS_RUNNING = 1
    RUNNING_STATUS_FINISH = 2
    RUNNING_STATUS_INTERRUPTED = -999

    @staticmethod
    def is_end_of_stream(data):
        return data == Pipeline.END_OF_STREAM_SIGNAL

    def __init__(self, alias=None):
        self.logger = _get_logger(__name__)
        self._alias = alias
        self._pipe_builders = []

        self._pipes = {}
        self._pipe_processes = []
        self._first_pipe = None
        self._last_pipe = None
        self._func_read_stream = (lambda: range(0))

        self._cleanups = []
        self._already_cleanup = Value(ctypes.c_bool, False)

        self._running_status = Value(ctypes.c_int, Pipeline.RUNNING_STATUS_STANDBY)
        self._interrupted_by_exception = False

        self._thread_watching_running_status = None
        self._thread_watching_remaining_processes = None
        self._stream_reader_process = None

    def reset(self):
        self._pipes = {}
        self._pipe_processes = []
        self._first_pipe = None
        self._last_pipe = None
        self._func_read_stream = (lambda: range(0))

        self._cleanups = []
        self._already_cleanup = Value(ctypes.c_bool, False)

        self._running_status = Value(ctypes.c_int, Pipeline.RUNNING_STATUS_STANDBY)
        self._interrupted_by_exception = False

        self._thread_watching_running_status = None
        self._thread_watching_remaining_processes = None
        self._stream_reader_process = None

    def add(self, builder):
        """
        :param builder:
        :return: Pipeline
        """
        self._pipe_builders.append(builder)
        return self

    def stream(self, generator=None):
        """
        start to stream data from generator into pipeline, yielding data passed through pipeline

        :param generator: Iterable or Generator implementation
        :return:
        """

        self._check_if_runnable()

        try:
            # change running status
            self._mark_started()

            # determine stream generator
            self._configure_stream_reader(generator)

            # configure pipes and create processes for them
            self._configure_pipes()

            # open pipes in a new process respectably
            self._open_pipes()

            # start process reading stream from generator
            self._start_streaming_data()

            # yield data passed through this pipeline
            self.logger.info("start to yield streams passed through pipeline...")
            while True:
                message = self._last_pipe.outbound.get()
                if Pipeline.is_end_of_stream(message):
                    break
                yield message
            self.logger.info("finished yielding streams passed through pipeline")

            # if interrupted
            if self._interrupted_by_exception:
                raise Exception("processing was interrupted by unexpected exception")

            self.logger.info("finished successfully")
        finally:
            self._cleanup()

    def _mark_started(self):
        self.set_running_status_to_running()
        self._add_running_status_reset_func_to_cleanup()
        self._configure_running_status_watcher()

    def _add_running_status_reset_func_to_cleanup(self):
        def cleanup_func_reset_running_status():
            with self._running_status.get_lock():
                if self.running_status != Pipeline.RUNNING_STATUS_INTERRUPTED:
                    self.set_running_status_to_finish()

        self._add_cleanup_func("reset running status of pipeline",
                               cleanup_func_reset_running_status)

    def _configure_running_status_watcher(self):
        def watch_running_status(pipeline=None):
            pipeline.logger.info("start thread watching running status...")
            while True:
                if pipeline.running_status == Pipeline.RUNNING_STATUS_INTERRUPTED:
                    pipeline.logger.error("got an interruption, stops pipeline, see logs")
                    pipeline._interrupted_by_exception = True
                    pipeline.stop_force()
                    pipeline.set_running_status_to_finish()
                    break
                elif pipeline.running_status == Pipeline.RUNNING_STATUS_FINISH:
                    break
                time.sleep(0.001)
            pipeline.logger.info("stop thread watching running status")

        self._thread_watching_running_status = Thread(
            name="running_status_watcher",
            target=watch_running_status,
            kwargs={"pipeline": self})
        self._thread_watching_running_status.daemon = True
        self._thread_watching_running_status.start()

    def _start_streaming_data(self):
        self.logger.info("start process for streaming data into pipeline...")
        self._add_cleanup_func("terminate the stream reader process",
                               lambda: self._stream_reader_process.terminate())
        self._stream_reader_process.start()

    def _open_pipes(self):
        self.logger.info("start Processes for pipes(%s)...", len(self._pipe_processes))
        map(lambda process: process.start(),
            reduce(lambda p_group1, p_group2: p_group1 + p_group2, self._pipe_processes, []))
        self._add_cleanup_func("terminate all the pipe processes",
                               lambda: map(lambda each_p: each_p.terminate(),
                                           reduce(lambda p1, p2: p1 + p2, self._pipe_processes, [])))

    def _configure_stream_reader(self, generator):
        if isinstance(generator, DataGenerator):
            self._func_read_stream = generator.produce
        elif isinstance(generator, collections.Iterable):
            self._func_read_stream = (lambda: generator)
        elif inspect.isgeneratorfunction(generator):
            self._func_read_stream = generator
        else:
            raise Exception("generator should be either Producer or Iterable")

        self._stream_reader_process = create_process_with(
            process_alias="stream_reader",
            target_func=lambda: self._read_and_stream_from_generator())

    def _check_if_runnable(self):
        # check running status
        if self.running_status != Pipeline.RUNNING_STATUS_STANDBY:
            raise Exception("invalid running status. Call reset() before call this")

    def _configure_pipes(self):
        if self._pipe_builders is None or len(self._pipe_builders) <= 0:
            raise Exception("There are no pipes to stream data")

        # chaining pipes
        pipes = []
        pipe_outbound = Queue()
        self._pipe_builders.reverse()
        for builder in self._pipe_builders:
            pipe = builder.build()
            pipe.outbound = pipe_outbound
            pipes.append(pipe)
            pipe_outbound = pipe.inbound

        self._pipe_builders.reverse()
        pipes.reverse()
        self._pipes = pipes

        # capture entry and terminal
        self._first_pipe = self._pipes[0]
        self._last_pipe = self._pipes[-1]

        processes = []
        for pipe in self._pipes:
            processes_for_pipe = map(lambda i: create_process_with(process_alias="process-%s-%s" % (pipe.alias, i),
                                                                   target_func=func_to_be_invoked_with_new_process,
                                                                   target_pipe=pipe,
                                                                   pipeline_running_status=self._running_status),
                                     range(pipe.number_of_consumer))
            processes.append(processes_for_pipe)
        self._pipe_processes = processes

    def _read_and_stream_from_generator(self):
        try:
            map(lambda m: self.__stream_data(m), self._func_read_stream())
            self.__stream_data(Pipeline.END_OF_STREAM_SIGNAL)
        except Exception as e:
            self.logger.error("while reading stream from generator, an unexpected exception occurred, stopping pipeline. "
                              "see cause -> %s\n%s", e, traceback.format_exc())

            self.set_running_status_to_interrupted()

    def __stream_data(self, data):
        self._first_pipe.inbound.put(data)

    def _join_pipes(self):
        def watch_remaining_processes(pipeline=None, processes=None):
            pipeline.logger.info("start thread watching pipe processes remaining...")
            while True:
                processes_alive = filter(lambda p: p.is_alive(), reduce(lambda plist1, plist2: plist1 + plist2, processes, []))
                if len(processes_alive) <= 0:
                    pipeline.logger.info("no remaining processes")
                    break
                else:
                    pipeline.logger.info("%s remaining processes : %s", len(processes_alive),
                                         map(lambda p: (p.pid, p.name), processes_alive))
                time.sleep(5)
            pipeline.logger.info("stop thread watching pipe processes remaining")

        self._thread_watching_remaining_processes = Thread(
            name="remaining_processes_watcher",
            target=watch_remaining_processes,
            kwargs={"pipeline": self,
                    "processes": self._pipe_processes}
        )
        self._thread_watching_remaining_processes.daemon = True
        self._thread_watching_remaining_processes.start()

        map(lambda p:
            self.logger.info("joining(waiting) the process(name:%s, id:%s, alive:%s)...", p.name, p.pid, p.is_alive())
            or p.join()
            or self.logger.info("released joining the process(name:%s, id:%s, alive:%s)", p.name, p.pid, p.is_alive()),
            reduce(lambda plist1, plist2: plist1 + plist2, self._pipe_processes, []))

        self._thread_watching_remaining_processes.join()

    def _add_cleanup_func(self, desc="", func=(lambda: None)):
        """
        :rtype : object
        """
        self._cleanups.append((desc, func))

    def _cleanup(self):
        with self._already_cleanup.get_lock():
            if self._already_cleanup.value:
                return

            self.logger.info("start cleaning up...")
            map(lambda cleanup_tuple:
                self.logger.info("call cleanup func -> %s", cleanup_tuple[0])
                or cleanup_tuple[1](),
                self._cleanups)
            self.logger.info("finished cleaning up")
            self._already_cleanup.value = True

    def stop_force(self):
        """
        terminate all spawned processes
        :return: void
        """
        # call registered cleanups
        self._cleanup()

        # send end signal to terminal queue for pipeline
        self._last_pipe.outbound.put(Pipeline.END_OF_STREAM_SIGNAL)

    @property
    def running_status(self):
        return self._running_status.value

    def set_running_status_to_standby(self):
        self._set_running_status(Pipeline.RUNNING_STATUS_STANDBY)

    def set_running_status_to_running(self):
        self._set_running_status(Pipeline.RUNNING_STATUS_RUNNING)

    def set_running_status_to_finish(self):
        self._set_running_status(Pipeline.RUNNING_STATUS_FINISH)

    def set_running_status_to_interrupted(self):
        self._set_running_status(Pipeline.RUNNING_STATUS_INTERRUPTED)

    def _set_running_status(self, value):
        with self._running_status.get_lock():
            self._running_status.value = value
Ejemplo n.º 56
0
    print "[+] Starting requests..."

    if (arg_verbosity > 0):
            print "[+]\t\tURL: ", arg_url
            print "[+]\t\tThreads: ", arg_nthreads
            print

    threads = []
    stdout_lock = Lock()
    if (arg_nthreads):
        for i in range(arg_nthreads):
            t = Process(target=wracost.run, args=(stdout_lock, arg_getreq))
            threads.append(t)
            t.start()
    else:
        paramsdictionary = parser.get_params_dict()
        for singleparam in paramsdictionary:
            # Meed a .copy() because each thread needs it's own object
            t = Process(target=wracost.run, args=(stdout_lock, singleparam.copy()))
            threads.append(t)
            t.start()

    if arg_auto or raw_input("[+] All threads synchronised! Launch attack?(Y/n): ") != 'n':
        with shared_lock_launch.get_lock():
            shared_lock_launch.value = False
            print "[+] Requests launched!"
    else:
        for thread in threads:
            thread.terminate()
Ejemplo n.º 57
0
class Child:
    """An abstraction upon a process for our :class:`~sw.pool.Pool`. Serves to more easily house a separate process and
    communicate with it crossprocess. A Child is not entirely a separate container that is spawned from Pool and given
    free reign. The bulk of a Child is stored on the primary thread with the Pool, UI, and Reporting. However, 
    :py:func:`~sw.child.Child.think` is on a separate `multiprocessing.Process` along with the provided *func* and
    GhostDriver / PhantomJS. All communication between Pool and Child is conducted over Child.statusVar (:py:func:`~multiprocessing.Value`) and
    Child.cq / Child.wq (:py:class:`~multiprocessing.Queue`) to avoid locks (they are multiprocess-safe).
    
    The off-thread child handles its own log, status reporting, error reporting, and getting new jobs. Once the process
    is started control is handed back over to the Pool which then manages the processes. 

    
    :param cq: ChildQueue reference from :class:`~sw.pool.Pool`. Used to transmit the status of this Child
        to our Pool.
    :param wq: WorkQueue reference from :class:`~sw.pool.Pool`. This Child pops a function off this Queue 
        then executes it, then repeats.
    :param num: Number of the Child relevant to :class:`~sw.pool.Pool`'s self.data array. This index is used to 
        easily communicate results and relate them to the child in that array. This number is actually one less
        than the index displayed on the console (which starts at 1 for the end user's sake).
    :param log: Base log directory which we spit logs and screenshots into. Just a string which should never change.
    :param options: Dict of kwargs which contain specific options passed to our wrapper.

    :return: Child (self)
    """
    def __init__( self, cq, wq, num, log, options ):
        self.cq = cq # Our shared output queue (childqueue) (multiprocessing)
        self.wq = wq  # Our shared input queue (workqueue) (multiprocessing)

        self.num = num
        self.driver = None
        self.log = log
        self.lh = "" 
        self.options = options
        self.level = self.options.get( 'level', NOTICE )
        self.func  = None
        self.sleepTime = self.options.get( 'childsleeptime', 1 )
        self.cache = ElementCache( )
        self.statusVar = Value( 'i', STARTING )
        
        self.start( )



    def think( self ):
        """This method is spawned on a separate process from our main thread. It takes no arguments, just reads from 
           self variables set in :py:class:`~sw.child.Child` that are multiprocess-safe: wq, cq, and statusVar (and various 
           static variables). It also uses various on class variables for storage which are not touched by pool.

           The purpose of this method is to cleanly start a loop of running PhantomJS with a fresh function pulled from
           wq every time. When think ends, our Child process ends as well.

           :return: None
        """

        # Change our status ASAP so users actually see it change to black (and know the Child started).
        self.display( DISP_START )

        wq = self.wq
        cq = self.cq

        # This allows custom service arguments to be forced into PhantomJS, as it is not supported with the Python
        # bindings by default.
        webdriver.phantomjs.webdriver.Service = PhantomJSNoImages

        sargs = [ ''.join( [ '--load-images=', str( self.options['images'] ).lower( ) ] ),
                  ''.join( [ '--disk-cache=', str( self.options.get( 'browsercache', "true" ) ).lower( ) ] ),
                  ''.join( [ '--ignore-ssl-errors=', str( self.options.get( 'ignoresslerrors', "yes" ) ).lower( ) ] ) ]

        if 'proxy' in self.options:
            sargs.append( ''.join( [ '--proxy=', self.options['proxy'] ] ) )
        if 'proxytype' in self.options:
            sargs.append( ''.join( [ '--proxy-type=', self.options['proxytype'] ] ) )

        try: 
            self.driver = webdriver.PhantomJS( service_log_path=os.path.join( self.log, self.options.get( 'ghostdriverlog', "ghostdriver.log" ) ), service_args=sargs )
        except Exception as e:
            self.logMsg( ''.join( [ "Webdriver failed to load: ", str( e ), "\n", traceback.format_exc( ) ] ), CRITICAL )
            try: 
                self.driver.quit( )
            except:
                return
            return

        # This enables custom callbacks from WebDriver to this Child. Primarily used to read options and throw errors. Usage
        # can be seen in `sw.utils`.
        self.driver.child = self

        # WebDriver, by default, waits 15 seconds while intensively scanning the DOM for an element. This forces it to
        # throw an error instantly if the element does not exist.
        self.driver.implicitly_wait( 0 )

        cq.put( [ self.num, READY, "" ] )

        self.logMsg( "Child process started and loaded" )

        while not wq.empty( ):
            # Block and wait otherwise exceptions are thrown. I've never seen it fail to get something here as
            # there's a check below.
            self.func = wq.get( True, 5 )
            res = []
            start = 0

            # Below we set to an error / done and wait.

            # FIXME: Waiting a second to show a status isn't appropriate. The Pool should change the status
            # for the child after enough time has elapsed.
            self.status( RUNNING )
            
            try:
                self.cache.clear( )
                start = time.time( )
                self.display( DISP_GOOD )
                cq.put( [ self.num, MESSAGE, time.time( ), R_JOB_START ] )
                self.func( self.driver )
            except TimeoutException as e:
                self.display( DISP_ERROR )

                screen = self.logError( str( e ) )
                self.logMsg( ''.join( [ "Stack trace: ", traceback.format_exc( ) ] ), CRITICAL )
                
                cq.put( [ self.num, FAILED, ( time.time( ) - start ), str( e ), screen ] )
                self.logMsg( "Timeout when finding element." )
                time.sleep( 1 )
            except Exception as e:
                self.display( DISP_ERROR )

                screen = self.logError( str( e ) ) # Capture the exception and log it
                self.logMsg( ''.join( [ "Stack trace: ", traceback.format_exc( ) ] ), CRITICAL )

                cq.put( [ self.num, FAILED, ( time.time( ) - start ), str( e ), screen ] )
                time.sleep( 1 )
                break
            else:
                self.display( DISP_FINISH )

                t = time.time( ) - start
                cq.put( [ self.num, DONE, ( time.time( ) - start ), "" ] )
                self.logMsg( ''.join( [ "Successfully finished job (", format( t ), "s)" ] ) )
                time.sleep( 0.5 )

        # This line will cleanly kill PhantomJs for us.
        self.driver.quit( )
        self.display( DISP_DONE )
        self.status( FINISHED )



    def logError( self, e, screenshot=True ):
        """Takes a JSON-encoded Selenium exception's text and spits it into the log in a more meaningful format.
            Can optionally take a screenshot too.

           :param e: Unicode JSON-encoded string from a WebDriver-thrown exception. *Must be a String*.
           :param True screenshot: Take a screenshot of the error automatically.

           :return: String for screenshot location, if any.
        """

        o = pformat( formatError( e, "log" ) )
        self.logMsg( o, CRITICAL )

        if screenshot:
            return self.screenshot( CRITICAL )



    def screenshot( self, level=NOTICE ):
        """Saves a screenshot to error_#.png and prints a message into the log specifying the file logged to.
           
           :param NOTICE level: This determines whether or not the error message will be logged according to the
               level set in self.level. The screenshot will print anyway. If this error is not greater or equal to the level specified in self.level,
               it is not printed. If it is, the message is printed into log.txt with the level specified by the timestamp.
           :return: String for screenshot location
        """
        fn = ""
        i = 0
        # If we are writing several errors, number them appropriately
        if not os.path.exists( self.log ):
            raise ValueError( ''.join( [ "Cannot write to a log directory that doesn't exist. ", self.log ] ), CRITICAL )
            return

        while True:
            fn = os.path.join( self.log, ''.join( [ 'error_', str( i ), '.png' ] ) )
            i += 1
            if not os.path.isfile( fn ):
                break

        self.driver.save_screenshot( fn ) 
        self.logMsg( ''.join( [ "Wrote screenshot to: ", fn ] ), level )

        return fn



    def logMsg( self, e, level=NOTICE, **kwargs ):
        """Writes to our message log if level is greater than or equal to our level (in self.log).
        
           :param e: The message to be written to the log.
            
           :param NOTICE level: This determines whether or not the error message will be logged according to the
               level set in self.level. If this error is not greater or equal to the level specified in self.level,
               it is not printed. If it is, the message is printed into log.txt with the level specified by the timestamp.

           :Kwargs:
              * **locals** (*None*): Optional locals dict to print out cleanly.
           :return: None
        """
        locals = kwargs.get( 'locals', None )

        # Send error if appropriate
        if level >= ERROR:
            self.display( DISP_ERROR )

        # Determine if we're logging this low
        if level < self.level:
            return

        # Get our timestamp
        timestamp = datetime.now( ).strftime( "%H:%M:%S" )
        
        # String
        w = ''.join( [ "[", timestamp, "] ", errorLevelToStr( level ), "\t", e, "\n" ] )

        # Locals if specified
        if locals != None:
            self.logMsg( ''.join( [ "Local variables: ", pformat( locals ) ] ), level )

        # This typically errors out the first time through
        try: 
            self.lh.write( w ) 
        except:
            self.lh = open( os.path.join( self.log, ''.join( [ 'log-', str( self.num + 1 ), '.txt' ] ) ), 'a+', 0 )
            self.lh.write( w ) 



    def display( self, t ):
        """Sends a display message to the main loop, which is then translated to the UI.
           
           :param t: The status this child will now show, a constant starting with DISP in const.py.

           :returns: None
        """
        self.cq.put( [ self.num, DISPLAY, t ] )



    def is_alive( self ):
        """Checks if the child's process is still running, if it is then it returns True, otherwise False.
           There's a check for if the process is None, which is set when a child terminates.

           :return: Boolean for if Child process is still active (different from if a child is processing data).
        """
        if self.proc != None:
            return self.proc.is_alive( )
        else:  
            return False



    def status( self, type=None ):
        """Uses a multiprocess-safe variable to transmit our status upstream. These values are listed under
           universal status types in const.py. The status types allow better logging and, for example, prevent
           children that were already terminated from being terminated again (and throwing an exception).

           When called with a type it will set this child's status on both the main process and the child's 
           process. When called without it, it reads from the status variable.

           :param None type: The new value of our status.

           :returns: If type isn't specified, our status. If it is, it sets our type and returns None.
        """
        if type is None:
            return self.statusVar.value
        else:
            with self.statusVar.get_lock( ):
                self.statusVar.value = type



    def start( self, flag=DISP_LOAD ):
        """Starts our child process off properly, used after a restart typically.
           
           :param DISP_LOAD flag: A custom flag to change the display color of the child, if desired.
           :return: None
        """
        # Not stopped anymore
        self.status( STARTING )

        # Create our path
        if not os.path.isdir( self.log ):
            os.makedirs( self.log )

        # Open our handle
        self.lh = open( os.path.join( self.log, ''.join( [ 'log-', str( self.num + 1 ), '.txt' ] ) ), 'a+' )

        # Show loading
        self.display( flag )

        # Our process 
        self.proc = Process( target=self.think, args=( ) )
        self.proc.start( )



    def restart( self, msg="restarting", flag=None ):
        """Restarts the child process and gets webdriver running again.

           :param "RESTARTING" msg: A message to print out in parenenthesis.
           :param None flag: A custom flag to change the display color of the child, if desired.

           :return: None
        """
        if flag is not None:
            self.stop( msg, flag )
            self.start( flag )
        else:
            self.stop( msg )
            self.start( )



    def stop( self, msg="", flag=FINISHED, disp_flag=DISP_DONE ):
        """Stops a child process properly and sets its self.proc to None. Optionally takes a message
           to print out.
        
           :param "" msg: A message to show in parenthesis on the console next to ``Child #: STOPPING (msg)``.
           :param FINISHED flag: A custom status flag for if the child is finished, paused, stopped, or whatever is desired.
           :param DISP_DONE disp_flag: A custom display flag for the status of the child after stopping.

           :return: None
        """
        if self.proc == None:
            return

        # Prevent the pool from trying to restart us
        self.status( flag )

        if msg != "":
            self.logMsg( ''.join( [ "Stopping child process: \"", msg, "\"" ] ) )
        else:
            self.logMsg( "Stopping child process" )

        # Kill our process
        if self.proc != None:
            if os.name != "posix":
                subprocess.call( [ 'taskkill', '/F', '/T', '/PID', str( self.proc.pid ) ], stdout=open( os.devnull, 'wb' ), stderr=open( os.devnull, 'wb' ) )
            else:
                subprocess.call( [ 'pkill', '-TERM', '-P', str( self.proc.pid ) ], stdout=open( os.devnull, 'wb' ), stderr=open( os.devnull, 'wb' ) )
            self.proc.join( )
            self.proc = None

        # Inform the TUI that we're done.
        self.display( disp_flag )

        # Close our log
        self.lh.close( )
        

    def flush( self ):
        """Flushes our log so that messages are retained on an internal error.

           :return: None
        """

        self.lh.flush( )
Ejemplo n.º 58
0
class Infinity:
    ''' Main class, does everything '''

    def __init__(self):

        parser = argparse.ArgumentParser()
        parser.add_argument("thread", help="URL of thread to scrape")
        parser.add_argument("--directory", "-d", help="Specify dir to save to (Default: ~/4chan)")
        parser.add_argument("--name", "-n", help="Specify name of dir to download to (Default: Topic/OP Post number)")
        parser.add_argument("--workers", type=int, help="Number of threads to run (Default: 10)")
        parser.add_argument("--version", "-v", action="version", version=VERSION)
        self.args = parser.parse_args()

        save_path = self.args.directory or os.path.join(
            os.path.expanduser('~'), "4chan")

        self.header = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11\
            (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
            'Accept': 'text/html,application/xhtml+xml,\
                application/xml;q=0.9,*/*;q=0.8'}
        self.thread_url = self.args.thread
        self.board = self.thread_url.split('/')[3]
        self.thread_name = ""
        self.downloads = []
        self.filename = []
        self.save_path = save_path
        self.counter = Value('i', 0)
        self.total_count = Value('i', 0)
        self.workers = self.args.workers
        self.down_dir = ""


    def url_open(self):
        """ Returns raw data from thread """
        url = "{0}.json".format(os.path.splitext(self.thread_url)[0])
        return requests.get(url, headers=self.header)

    def jsonify(self):
        """ Converts raw data to text to json object """
        jsonable = self.url_open().text
        return json.loads(jsonable)

    def download_image(self, url, filename):
        """ Creates new dir if doesn't exist, downloads image """
        down_dir = os.path.join(self.save_path, self.thread_name.title())
        img_dir = os.path.join(down_dir, filename)

        if not os.path.exists(down_dir):
            os.makedirs(down_dir)

        image = requests.get(url, headers=self.header, stream=True)

        with open(img_dir, 'wb') as location:
            image.raw.decode_content = True
            shutil.copyfileobj(image.raw, location)

        self.down_dir = down_dir

    def image_urls(self):
        """ Iterates over json obj, gets image links
            Creates pool of workers, creates new workers """
        json_obj = self.jsonify()

        for post in json_obj['posts']:
            if 'ext' in post:
                self.total_count.value += 1

        self.thread_name = json_obj['posts'][0]['semantic_url']

        for post in json_obj['posts']:
            if 'ext' in post:
                filename = str(post['tim']) + post['ext']
                image_url = 'https://i.4cdn.org/{board}/{file}'.format(
                    board=self.board, file=filename)
                self.filename.append(filename)
                self.downloads.append(image_url)
                self.download_image(image_url, filename)

                with self.counter.get_lock():
                    self.counter.value += 1
                    update_progress(self.counter.value, self.total_count.value)

        manager = Manager()
        pool_data = manager.list(self.downloads)
        partial_data = partial(self.download_image, pool_data)
        pool = Pool(self.workers)
        pool_map = pool.map_async(partial_data, self.filename)

        try:
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            print("Aborting")
            pool.terminate()
            pool.join()