Python Manager.RLock Examples

Programming Language: Python

Namespace/Package Name: multiprocessing

Class/Type: Manager

Method/Function: RLock

Examples at hotexamples.com: 16

The RLock (Reentrant Lock) in the multiprocessing.Manager module is a synchronization primitive that can be used to control access to a shared resource in a multi-process environment. It is similar to a regular Lock, but with the added feature of re-entrancy, meaning that a process can acquire the lock multiple times without deadlocking itself. This allows for more fine-grained control over access to the shared resource, preventing potential race conditions and ensuring thread-safety. The RLock object can be created using the Manager class provided by the multiprocessing module, allowing it to be shared among multiple processes.

Python Manager.RLock - 16 examples found. These are the top rated real world Python examples of multiprocessing.Manager.RLock extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Value(30)

dict(30)

Queue(30)

Namespace(30)

Manager(30)

Lock(30)

append(30)

get(30)

Event(28)

Semaphore(17)

empty(16)

RLock(16)

clear(14)

copy(11)

acquire(11)

get_nowait(11)

JoinableQueue(8)

extend(8)

Array(7)

Condition(7)

Pool(4)

full(3)

_getvalue(3)

count(2)

buff_num(2)

Barrier(2)

__exit__(2)

BoundedSemaphore(2)

frame_delay(2)

file(2)

F_cur(1)

Employee(1)

error(1)

exited(1)

Counter(1)

file_b(1)

file_a(1)

dXg(1)

finished(1)

flag(1)

found(1)

data(1)

Xd(1)

dXd(1)

GLOBAL_RUNNING_REWARD(1)

__enter__(1)

__str__(1)

_callmethod(1)

_debug_info(1)

X_cur(1)

Example #1

Show file

    def evaluate_parallel(self, ast):
        from multiprocessing import Pool, cpu_count, RLock, Manager
        workers = self.num_process
        if workers == -1:
            workers = cpu_count()
        times = int(ast.times.val)
        each_time = int(times / workers)
        # equally divide work between count - 1 threads
        work_times = [int(each_time)] * (workers - 1)
        # dump all the extra work on last thread
        work_times.append(times - (each_time * (workers - 1)))
        ast_list = [ast.val] * len(work_times)
        randomer = [random.Random() for _ in work_times]
        optional_arg = [(w, r) for w, r in zip(work_times, randomer)]
        manager = Manager()
        l = manager.RLock()
        u = manager.dict()
        pool = Pool(processes=workers, initializer=init_child, initargs=(u, l))

        ret = pool.starmap(self.visit_optional, zip(ast_list, optional_arg))
        pool.close()
        if not self.generate_only:
            result = []
            for y in ret:
                result.extend(y[0])
            return result

Example #2

Show file

def main_filter(args):
    """The main function for filtering the documents."""
    install_mp_handler()

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    logging.info('Filtering frequent paragraphs from index {}...'.format(
        args.index))

    with Pool(args.processes,
              initializer=init_filter,
              initargs=[args.frequents, args.old_frequents]) as pool:
        m = Manager()
        frequents_seen = m.dict()
        lock = m.RLock()
        group_it = enumerate(grouper2(read_index(args.index), args.documents),
                             start=1)
        f = partial(filter_file,
                    args=args,
                    frequents_seen=frequents_seen,
                    lock=lock)

        sum_stats = FilterStats()
        for stats in pool.starmap(f, group_it):
            sum_stats += stats

        pool.close()
        pool.join()

        logging.info(
            'Done filtering: documents {} -> {}, paragraphs {} -> {}.'.format(
                sum_stats.old_docs, sum_stats.new_docs, sum_stats.old_ps,
                sum_stats.new_ps))

Example #3

Show file

File: url_manager.py Project: YoRHazzz/Python_Crawler

 def __init__(self):
     manager = Manager()
     self.detail_dict = manager.dict()
     self.waiting_urls = manager.list()
     self.running_urls = manager.list()
     self.finished_urls = manager.list()
     self.failed_urls = manager.list()
     self.lock = manager.RLock()

Example #4

Show file

def train_ac(max_workers, global_var_kv):
    _max_workers = min(cpu_count(), max_workers) - 1

    manager = Manager()
    lock = manager.RLock()
    result_queue = manager.Queue()

    with ProcessPoolExecutor(max_workers=_max_workers, ) as executor:
        # executor.map(
        #     train_worker,
        #     #cpu_count
        #     [(lock, master_agent, worker_idx) for worker_idx in range(_max_workers)])
        future = executor.submit(train_worker, lock, result_queue,
                                 master_agent, 0)
        print(future.result())

Example #5

Show file

File: analyzer.py Project: IcanCheung/mediawiki-svn

def generate_chart_data(rts, func, **kwargs):
    '''
    This is the entry function to be called to generate data for creating 
    charts.
    '''

    stopwatch = timer.Timer()
    plugin = retrieve_plugin(func)

    if not plugin:
        available_plugins = inventory.available_analyses()
        raise exceptions.UnknownPluginError(plugin, available_plugins)
        plugin = getattr(plugin, func)

    feedback(func, rts)

    tasks = JoinableQueue()
    result = JoinableQueue()

    mgr = Manager()
    lock = mgr.RLock()
    obs = dict()
    obs_proxy = mgr.dict(obs)

    db = storage.init_database(rts.storage, rts.dbname, rts.collection)
    editors = db.retrieve_distinct_keys('editor')
    #editors = editors[:500]
    if rts.collection.find('editors_dataset') > -1:
        min_year, max_year = determine_project_year_range(db, 'new_wikipedian')
        kwargs['min_year'] = min_year
        kwargs['max_year'] = max_year

    fmt = kwargs.pop('format', 'long')
    time_unit = kwargs.pop('time_unit', 'year')

    var = dataset.Variable('count', time_unit, lock, obs_proxy, **kwargs)

    try:
        print 'Determining whether plugin requires preloaded data...'
        preloader = getattr(plugin, 'preload')
        print 'Preloading data...'
        data = preloader(rts)
    except Exception, error:
        data = None

Example #6

Show file

    def evaluate_population(self, population: Population) -> np.ndarray:
        """
        Evaluates population of solutions.

        Parameters
        ----------
        population : Population
            Collection of solutions wrapped as `Population`.

        Returns
        -------
        np.ndarray
            An array of fitness values.
            Order is the same as input population.
        """

        solutions = population.get_not_evaluated_solutions()

        if self.VERBOSE:
            print(f'\nEvaluating population of {population.size} solutions\n')
            solutions = tqdm(solutions)

        if self.MULTIPROCESSING:
            pool = Pool()
            manager = Manager()
            lock = manager.RLock()

            fitness_map = pool.map(
                partial(self.evaluate_solution,
                        gene_order=self.gene_order,
                        lock=lock), solutions)

            for solution, fitness in zip(solutions, fitness_map):
                solution.fitness = fitness

        else:
            for solution in solutions:
                solution.fitness = self.evaluate_solution(solution)

        return population.fitness

Example #7

Show file

File: parallel_evaluation.py Project: sandialabs/alert-triage

def evaluate(num_sub_iterations, evaluator):
    overall_results = collections.OrderedDict()
    start = time.time()
    for iteration, (train, test) in enumerate(evaluator._cv):
        start = time.time()
        manager = Manager()
        results = manager.dict()
        lock = manager.RLock()
        pool = Pool(processes=int(multiprocessing.cpu_count()))
        for _ in xrange(0, num_sub_iterations):
            pool.apply_async(_run_subiteration,
                             args=(lock, results, evaluator, train, test))
        pool.close()
        pool.join()
        for key, value in results.items():
            if overall_results.get(key) is None:
                overall_results[key] = numpy.array(value)
            else:
                overall_results[key] = numpy.vstack(
                    [overall_results[key],
                     numpy.array(value)])
        print "Iteration:", iteration + 1, " Time:", (time.time() - start)
    return overall_results

Example #8

Show file

File: u_pull_request.py Project: yxw027/ubxlib

def run_instances(database, instances, filter_string, ubxlib_dir, working_dir,
                  clean, summary_report_file, test_report_file, debug_file):
    '''Run the given instances'''
    return_value = 0
    processes = []
    platform_locks = []
    misc_locks = {}
    alive_count = 0
    report_thread = None
    report_queue = None
    reporter = None
    summary_report_file_path = None
    test_report_file_path = None
    debug_file_path = None
    summary_report_handle = None

    manager = Manager()

    # Create a lock to cover things that cross
    # platforms or that any process of u_run.main()
    # may need to perform outside of its working
    # directory
    misc_locks["system_lock"] = manager.RLock()

    # Create a lock which can be used on Nordic
    # platforms (nRF5 and Zephyer): performing a
    # JLink download to a board while JLink RTT logging
    # is active on any other board will often stop
    # the RTT logging even though the sessions are
    # aimed at debuggers with entirely different
    # serial numbers.
    misc_locks["jlink_lock"] = manager.RLock()

    # Create a "lock" that can be used on STM32F4
    # platforms to ensure that all downloads are
    # completed before logging commences.  We
    # can do this, rather than locking a tool for the
    # whole time as we have to do with Nordic, because
    # each STM32F4 board only runs a single instance
    misc_locks["stm32f4_downloads_list"] = manager.list()

    # It is possible for some platforms to be a bit
    # pants at running in multiple instances
    # hence here we create a lock per platform and pass it
    # into the instance for it to be able to manage
    # multiplicity if required
    create_platform_locks(database, instances, manager, platform_locks)

    # Launch a thread that prints stuff out
    # nicely from multiple sources
    print_queue = manager.Queue()
    print_thread = u_utils.PrintThread(print_queue)
    print_thread.start()

    # Set up a printer for this thread to print to the queue
    printer = u_utils.PrintToQueue(print_queue, None, True)

    if summary_report_file:
        # Launch a thread that manages reporting
        # from multiple sources
        summary_report_file_path = working_dir + os.sep + summary_report_file
        summary_report_handle = open(summary_report_file_path, "w")
        if summary_report_handle:
            printer.string("{}writing overall summary report to \"{}\".".  \
                           format(PROMPT, summary_report_file_path))
        else:
            printer.string("{}unable to open file \"{}\" for overall summary report.".   \
                           format(PROMPT, summary_report_file_path))
        report_queue = manager.Queue()
        report_thread = u_report.ReportThread(report_queue,
                                              summary_report_handle)
        report_thread.start()
        reporter = u_report.ReportToQueue(report_queue, None, None, printer)
        reporter.open()

    # From this post:
    # https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python
    # ...create a pool of worker processes to run our
    # instances, then they will handle sigint correctly
    # and tidy up after themselves.

    # SIGINT is ignored while the pool is created
    original_sigint_handler = signal(SIGINT, SIG_IGN)
    pool = NoDaemonPool(len(instances))
    signal(SIGINT, original_sigint_handler)

    # Create locks for connections
    u_connection.init_locks(manager)

    try:
        # Set up all the instances
        for instance in instances:
            # Provide a working directory that is unique
            # for each instance and make sure it exists
            if working_dir:
                this_working_dir = working_dir + os.sep +       \
                                   INSTANCE_DIR_PREFIX + \
                                   u_utils.get_instance_text(instance).replace(".", "_")
            else:
                this_working_dir = os.getcwd() + os.sep +       \
                                   INSTANCE_DIR_PREFIX + \
                                   u_utils.get_instance_text(instance).replace(".", "_")
            if not os.path.isdir(this_working_dir):
                os.makedirs(this_working_dir)
            # Only clean the working directory if requested
            if clean:
                u_utils.deltree(this_working_dir, printer, PROMPT)
                os.makedirs(this_working_dir)

            # Create the file paths for this instance
            if summary_report_file:
                summary_report_file_path = this_working_dir + os.sep + summary_report_file
            if test_report_file:
                test_report_file_path = this_working_dir + os.sep + test_report_file
            if debug_file:
                debug_file_path = this_working_dir + os.sep + debug_file

            # Start u_run.main in each worker thread
            process = {}
            process["platform"] = u_data.get_platform_for_instance(
                database, instance)
            process["instance"] = instance
            process["platform_lock"] = None
            process["connection_lock"] = u_connection.get_lock(instance)
            for platform_lock in platform_locks:
                if process["platform"] == platform_lock["platform"]:
                    process["platform_lock"] = platform_lock["lock"]
                    break
            process["handle"] = pool.apply_async(
                u_run.main,
                (database, instance, filter_string, True, ubxlib_dir,
                 this_working_dir, process["connection_lock"],
                 process["platform_lock"], misc_locks, print_queue,
                 report_queue, summary_report_file_path, test_report_file_path,
                 debug_file_path))
            alive_count += 1
            processes.append(process.copy())

        # Wait for all the launched processes to complete
        printer.string("{}all instances now launched.".format(PROMPT))
        loop_count = 0
        while alive_count > 0:
            for process in processes:
                instance_text = u_utils.get_instance_text(process["instance"])
                if not "dealt_with" in process and process["handle"].ready():
                    try:
                        # If the return value has gone negative, i.e.
                        # an infrastructure failure, leave it there,
                        # else add the number of test failures to it
                        if (return_value >= 0 and process["handle"].get() > 0) or \
                            (return_value <= 0 and process["handle"].get() < 0):
                            return_value += process["handle"].get()
                    except KeyboardInterrupt as ex:
                        raise KeyboardInterrupt from ex
                    except Exception as ex:
                        # If an instance threw an exception then flag an
                        # infrastructure error
                        return_value = -1
                        printer.string("{}instance {} threw exception \"{}:"    \
                                       " {}\" but I can't tell you where"       \
                                       " I'm afraid.".                          \
                                       format(PROMPT, instance_text,
                                              type(ex).__name__, str(ex)))
                        if reporter:
                            reporter.event(u_report.EVENT_TYPE_INFRASTRUCTURE,
                                           u_report.EVENT_FAILED,
                                           "instance {} threw exception \"{}: {}\"". \
                                           format(instance_text, type(ex).__name__,
                                                  str(ex)))
                    alive_count -= 1
                    process["dealt_with"] = True
                if not process["handle"].ready() and                         \
                   (loop_count == STILL_RUNNING_REPORT_SECONDS):
                    printer.string("{}instance {} still running.".           \
                                        format(PROMPT, instance_text))
            loop_count += 1
            if loop_count > STILL_RUNNING_REPORT_SECONDS:
                loop_count = 0
            sleep(1)

    except KeyboardInterrupt:
        # Pools can tidy themselves up on SIGINT
        printer.string(
            "{}caught CTRL-C, terminating instances...".format(PROMPT))
        if reporter:
            reporter.event(u_report.EVENT_TYPE_INFRASTRUCTURE,
                           u_report.EVENT_FAILED,
                           "CTRL-C received, terminating")
        pool.terminate()
        return_value = -1

    # Tidy up
    pool.close()
    pool.join()
    if reporter:
        reporter.event_extra_information("return value overall {} (0 = success, negative ="   \
                                         " probable infrastructure failure, positive ="       \
                                         " failure(s) (may still be due to infrastructure))". \
                                         format(return_value))
        reporter.close()

    # Wait for the print and report queues to empty
    # and stop the print process
    printer.string("{}all runs complete, return value {}.".format(
        PROMPT, return_value))
    sleep(1)
    print_thread.stop_thread()
    print_thread.join()

    # Stop the reporting process
    if report_thread:
        report_thread.stop_thread()
        report_thread.join()

    if summary_report_handle:
        summary_report_handle.close()

    return return_value

Example #9

Show file

import re
from datetime import datetime
from app import app, db, u
from sqlalchemy.orm.exc import NoResultFound, StaleDataError, MultipleResultsFound
import os, glob
from werkzeug.security import generate_password_hash, check_password_hash
from search import add_to_index, remove_from_index, query_index
from flask_login import UserMixin
from app import login
from flask_login import current_user
from diagnostic_text import *
from multiprocessing import RLock, Manager
from pathlib import Path

commit_manager = Manager()
commit_lock = commit_manager.RLock(
)  # Recursive lock because db.session.commit() is a non-thread-safe operation


class User(UserMixin, db.Model):
    id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String(64), index=True, unique=True)
    email = db.Column(db.String(120), index=True, unique=True)
    password_hash = db.Column(db.String(128))
    last_seen = db.Column(db.DateTime, default=datetime.utcnow)

    def set_password(self, password):
        self.password_hash = generate_password_hash(password)

    def check_password(self, password):
        return check_password_hash(self.password_hash, password)

Example #10

Show file

def main():
    time0 = time.time()
    parser = OptionParser()
    parser.add_option(
        '--years',
        dest='s_years',
        action='store',
        type=str,
        help='Give a list of years as a string, such as "1980,1981". Optional.'
    )
    parser.add_option('--noverify',
                      dest='do_noverify',
                      action='store_true',
                      default=False,
                      help='If chosen, do not verify the SSL connection.')
    parser.add_option('--local',
                      dest='do_local',
                      action='store_true',
                      default=False,
                      help='Check for locally running plex server.')
    parser.add_option(
        '--dirname',
        dest='dirname',
        action='store',
        type=str,
        default=os.getcwd(),
        help='Directory into which to store those plots. Default is %s.' %
        os.getcwd())
    opts, args = parser.parse_args()

    #
    ## function to do the processing

    step = 0
    print('%d, started on %s' %
          (step, datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p')))
    if opts.s_years is not None:
        try:
            years = sorted(
                set(map(lambda tok: int(tok), opts.s_years.split(','))))
        except:
            step += 1
            print('%d, did not give a valid set of years.' % step)
            years = []
    else:
        years = []

    #
    ## get plex server token
    dat = plexcore.checkServerCredentials(doLocal=True)
    if dat is None:
        step += 1
        print('\n'.join([
            '%d, error, could not access local Plex server in %0.3f seconds. Exiting...'
            % (step, time.time() - time0),
            '%d, finished on %s.' %
            (step + 1,
             datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
        ]))
        return
    fullURL, token = dat
    #
    ## first find out which libraries are the TV show ones
    library_dict = plexcore.get_libraries(token, fullURL=fullURL, do_full=True)
    if library_dict is None:
        step += 1
        print('\n'.join([
            '%d, error, could not access libraries in plex server in %0.3f seconds. Exiting...'
            % (step, time.time() - time0),
            '%d, finished on %s.' %
            (step + 1,
             datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
        ]))
        return
    #
    valid_keys = list(
        filter(lambda key: library_dict[key][-1] == 'show', library_dict))
    if len(valid_keys) == 0:
        step += 1
        print('\n'.join([
            '%d, Error, could not find a TV show library in %0.3f seconds. Exiting...'
            % (time.time() - time0, step),
            '%d, finished on %s.' %
            (step + 1,
             datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
        ]))
        return
    tvlib_title = library_dict[max(valid_keys)][0]
    step += 1
    print('%d, found TV library: %s.' % (step, tvlib_title))
    #
    ## now get the TV shows
    tvdata = plexcore.get_library_data(tvlib_title,
                                       token=token,
                                       fullURL=fullURL,
                                       num_threads=16)
    showsToExclude = plextvdb.get_shows_to_exclude(tvdata)
    if len(showsToExclude) != 0:
        step += 1
        print('%d, excluding these TV shows: %s.' %
              (step, '; '.join(showsToExclude)))

    #
    ## now actual meat of the computation
    tvdata_date_dict = plextvdb.get_tvdata_ordered_by_date(tvdata)
    min_year = min(tvdata_date_dict.keys()).year
    max_year = max(tvdata_date_dict.keys()).year
    possible_years_set = set(map(lambda date: date.year, tvdata_date_dict))
    step += 1
    if len(years) == 0:
        years = sorted(possible_years_set)
        print('%d, no years specified. We will use %s total: %s.' %
              (step, _print_years(len(years)), ', '.join(
                  map(lambda year: '%d' % year, years))))
    else:
        cand_years = sorted(set(years) & possible_years_set)
        if len(cand_years) == 0:
            print('\n'.join([
                '%d, no intersection between the %s chosen (%s) and the %d years in the library.'
                % (step, _print_years(len(years)), ', '.join(
                    lambda yr: '%d' % year, years), len(possible_years_set)),
                'Instead, we will use %s total: %s.' %
                (_print_years(len(possible_years_set)), ', '.join(
                    map(lambda year: '%d' % year, sorted(possible_years_set))))
            ]))
            years = sorted(possible_years_set)
        else:
            print('%d, we found %s to use: %s.' %
                  (step, _print_years(len(cand_years)), ', '.join(
                      map(lambda year: '%d' % year, cand_years))))
            years = cand_years

    step += 1
    print('%d, started processing %s of TV shows after %0.3f seconds.' %
          (step, _print_years(len(years)), time.time() - time0))
    manager = Manager()
    shared_step = manager.Value('step', step)
    num_procced = manager.Value('nump', 0)
    lock = manager.RLock()
    pool = Pool(processes=cpu_count())

    def _process_year(year):
        plextvdb.create_plot_year_tvdata(tvdata_date_dict,
                                         year,
                                         shouldPlot=True,
                                         dirname=opts.dirname)
        lock.acquire()
        shared_step.value += 1
        num_procced.value += 1
        print(
            '%d, finished processing year = %d (%02d / %02d) in %0.3f seconds.'
            % (shared_step.value, year, num_procced.value, len(years),
               time.time() - time0))
        lock.release()

    _ = list(pool.map(_process_year, years))
    step = shared_step.value + 1
    print('\n'.join([
        '%d, processed all %s in %0.3f seconds.' %
        (step, _print_years(len(years)), time.time() - time0),
        '%d, finished everything on %s.' %
        (step + 1, datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
    ]))

Example #11

Show file

File: MocaSharedMemory.py Project: el-ideal-ideas/MocaBot

class MocaSharedMemory:
    """
    This class can manage the shared data between processes.

    Attributes
    ----------
    self._manager: Manager
        the instance of multiprocessing.Manager.
    self._data_dict: dict
        {"some_name": self._manager.list([data_value, self._manager.RLock()])}

    """
    def __init__(
        self,
        other_shared_data_manager=None,
    ):
        self._data_dict: dict  # {"some_name": self._manager.list([data_value, self._manager.RLock()])}

        if other_shared_data_manager is None:
            self._manager = Manager()
            self._data_dict = self._manager.dict(
            )  # Create a shared dict for data
        else:
            self._manager = other_shared_data_manager._manager
            self._data_dict = other_shared_data_manager._data_dict

    def get(self, name: str, default: Any = None) -> Any:
        """Get value (copy) by name."""
        data = self._data_dict.get(name, None)
        if data is not None:
            return data[_DATA]
        return copy(default)

    def set(self, name: str, value: Any) -> None:
        """Set value by name."""
        if name in self._data_dict:
            self._data_dict[name][_DATA] = value
        else:
            self._data_dict[name] = self._manager.list(
                (value, self._manager.RLock()))

    def increment(self, name: str, value: int = 1) -> int:
        """Increment the value."""
        with self.lock(name):
            data = self.get(name, 0)
            data = data + value if isinstance(data, int) else 1
            self.set(name, data)
            return data

    def decrement(self, name: str, value: int) -> int:
        """Decrement the value."""
        with self.lock(name):
            data = self.get(name, 0)
            data = data - value if isinstance(data, int) else -1
            self.set(name, data)
            return data

    def change(self, name: str, func: Callable, *args, **kwargs) -> Any:
        """
        Use the current value call the function, and set the return value as the new value.
        If can't get a value by the `name` parameter, the current value will be None.
        :param name: the name of target data.
        :param func: the function to call.
        :param args: the arguments of the function.
        :param kwargs: the keyword arguments of the function.
        :return: the return value of the function.
        """
        with self.lock(name):
            data = self.get(name, None)
            new_value = func(data, *args, **kwargs)
            self.set(name, new_value)
            return new_value

    def lock(self, name):
        """
        Get a lock on the resource by name (with a wait if the lock is already captured).
        Returns ContextManager-object.
       """
        if name not in self._data_dict:
            self._data_dict[name] = self._manager.list(
                (None, self._manager.RLock()))

        return MocaMultiProcessLock(self._data_dict[name][_LOCK], True)

    def try_lock(self, name):
        """
        Get a lock on the resource by name (without waiting, if the lock is already captured).
        Returns ContextManager-object.
        """
        if name not in self._data_dict:
            self._data_dict[name] = self._manager.list(
                (None, self._manager.RLock()))

        return MocaMultiProcessLock(self._data_dict[name][_LOCK], False)

Example #12

Show file

        bin_locks[bin].release()
        #seq_dict_lock.release()
        #bin_list_lock.release()


if __name__ == '__main__':
    mgr = Manager()

    bins = ['a', 'b', 'c', 'd', 'e']
    num_bins = len(bins)
    seq_dict = mgr.dict()  #seq, bin tuple to idx number
    bin_list = mgr.list()  #idx to counts

    bin_locks = mgr.dict()  #lock for each bin
    seq_dict_lock = mgr.RLock()  #lock for seq_dict
    bin_list_lock = mgr.RLock()  #lock for whole bin list

    for bin in bins:
        bin_locks[bin] = mgr.RLock()

    seqs = mgr.dict()

    seqs['a'] = ['GATC', 'GATC', 'ATC', 'AAG']
    seqs['b'] = ['GATC', 'CCTC', 'TTC', 'GGT']
    seqs['c'] = ['GATC', 'CCTC', 'GCG', 'TCT']
    seqs['d'] = ['GATC', 'AAGT', 'CTC', 'CTC']
    seqs['e'] = ['GATC', 'AAGT', 'CCT', 'CCT']

    ppool = list()

Example #13

Show file

File: logger.py Project: aPere3/flogger

class AsynchronousDataStore(AbstractDataStore):
    def __init__(self):
        super(AsynchronousDataStore).__init__()
        self._manager = Manager()
        self._managed = self._manager.Namespace()
        self._managed.name = "data-logger"
        self._managed.path = "."
        self._managed.entries = self._manager.list()
        self._managed.data = self._manager.dict()
        self._managed.lockers = self._manager.dict()
        self._managed.counters = self._manager.dict()
        self._managed.on_push_callables = self._manager.dict()
        self._managed.on_reset_callables = self._manager.dict()
        self._managed.on_dump_callables = self._manager.dict()

    def get_name(self):
        return self._managed.name

    def set_name(self, name):
        self._managed.name = name

    def get_path(self):
        return self._managed.path

    def is_empty(self):
        return len(self._managed.entries) == 0

    def set_path(self, path):
        self._managed.path = path

    def declare_entry(self, entry, on_push_callables, on_dump_callables,
                      on_reset_callables):
        self._managed.entries.append(entry)
        self._managed.lockers[entry] = self._manager.RLock()
        self._managed.data[entry] = self._manager.dict()
        self._managed.counters[entry] = 0
        self._managed.on_push_callables[entry] = self._manager.list(
            on_push_callables)
        self._managed.on_reset_callables[entry] = self._manager.list(
            on_reset_callables)
        self._managed.on_dump_callables[entry] = self._manager.list(
            on_dump_callables)

    def has_entry(self, entry):
        return entry in self._managed.entries

    def get_entries(self):
        return self._managed.entries

    def get_locker(self, entry):
        return self._managed.lockers[entry]

    def get_push_callables(self, entry):
        return self._managed.on_push_callables[entry]

    def get_reset_callables(self, entry):
        return self._managed.on_reset_callables[entry]

    def get_dump_callables(self, entry):
        return self._managed.on_dump_callables[entry]

    def get_data(self, entry):
        return self._managed.data[entry]

    def append_data(self, entry, time, data):
        self._managed.data[entry][time] = data
        self._managed.counters[entry] += 1

    def clear_data(self, entry):
        self._managed.data[entry] = dict()
        self._managed.counters[entry] = 0

    def get_counter(self, entry):
        return self._managed.counters[entry]

Example #14

Show file

class Browser():
    """
    A browser site manager, with some data collection capabilities

    In case of using more than one browser it's highly recommended to open them all
    before opening them
    """
    def __init__(self, sites:Iterable[Union[str, Site]]=[], load_timeout:float=20,
                max_tabs:int=25, headless:bool=False, load_images:bool=True, autoload_videos:bool=False,
                load_wait:float=3, disable_downloads:bool=False, 
                proxy_dict:Dict[str,List[str]]={'socksProxy':[],'httpProxy':[],'ftpProxy':[],'sslProxy':[]}, 
                *, jq_filename:str=jq_file):
        
        ### Start loading
        jq = AGenerator()
        jq.append(load(jq_filename))

        ## Shared memory

        # Create a manager to create shared objects
        self._manager = Manager()

        # Check sites to be an OrderedList of {Site:depth}
        # Workaround. Improvement needed
        self._links = parse_sites_arg(sites, _manager=self._manager)

        # List to store timed out links
        self._timed_out_links = self._manager.list()

        # Some semaphores to aid in some functionalities
        self.__lock_loaded = self._manager.RLock()

        self.__lock_sited = self._manager.RLock()

        self.__lock_timed = self._manager.RLock()

        self.__lock_proxy = self._manager.RLock()

        # Store all visited sites and how many times it was visited
        self._visited_sites_counter = self._manager.dict()
        self._visited_domains_counter = self._manager.dict()

        # All proxys to be accessed
        self.proxys = self._manager.dict()
        for key, proxy_list in proxy_dict.items(): 
            shuffle(proxy_list)
            self.proxys[key] = Browser.check_proxys(proxy_list)

        ### PERFORMANCE settings

        # How many tabs per process will be able to open the browser
        # This setting affects severely the ram usage and speed of the crawling
        self.max_tabs = max_tabs

        # Should it load the images in the sites it visits?
        # This setting is meant to be used in low-speed connections
        self.load_images = load_images

        # If videos will be automatically loaded without the user's interaction.
        # May be necessary when video scrapping 
        self.autoload_videos = autoload_videos

        # How much time wait before releasing a timeout exception on a site.
        # This setting can have false positives on low-conn networks, but
        # it is meant to close blank urls
        self.load_timeout = load_timeout

        self.load_wait = load_wait

        # Store the jQuery script as plain text. There should've take shorter,
        # since it has been loading async
        self.__jQuery_script = next(jq)

                
        ###  OTHER configurations

        # If the browser will be shown
        self.headless = headless

        # Wether to block download/open file requests
        self.disable_downloads = disable_downloads
        
    def __enter__(self):
        self.open()

        return self

    def __exit__(self,_,__,___):
        self.close()

    def open(self, *, options=None, profile=None, capabilities=None, overwrite_config:bool=True,
                        proxys:Dict[str,List[str]]=None):
        from selenium import webdriver
        from os import getcwd
        try:
            from Settings.browser_configuration import get_configuration
            from Structures.Async_generator import AGenerator
        except ModuleNotFoundError:
            from .Settings.browser_configuration import get_configuration
            from .Structures.Async_generator import AGenerator

        print(f"Starting the opening of a Browser{' (headless)' if self.headless else ''}")

        #breakpoint()

        if(not proxys is None): self.proxys = proxys

        if(options is None or profile is None or capabilities is None or not overwrite_config):
            options2, profile2, capabilities2 = get_configuration(tabs_per_window=1000,
                                            headless=self.headless,
                                            load_images=self.load_images,
                                            autoload_videos=self.autoload_videos,
                                            disable_downloads=self.disable_downloads,
                                            proxys=self.get_proxy(http=True, ssl=True, ftp=True, socks=True),
                                            options=options if not overwrite_config else None, 
                                            profile=profile if not overwrite_config else None, 
                                            capabilities=capabilities if not overwrite_config else None)

            if(not overwrite_config or options is None):
                options = options2
            if(not overwrite_config or profile is None):
                profile = profile2
            if(not overwrite_config or capabilities is None):
                capabilities = capabilities2

            del options2, profile2, capabilities2

        print("Configuration complete. Trying to run the drivers. This could take some time...")
        try:
            self.driver = webdriver.Firefox(executable_path=f"{getcwd()}//geckodriver",
                    options=options, firefox_profile=profile, capabilities=capabilities) 

            #self._profile = profile
                    
            print("Drivers ran succesfully!")

        except Exception as ex:
            print(f"Looks like something failed. Error message: {ex}")
            self.driver = webdriver.Firefox()

        self.driver.get("about:config")
        self.driver.execute_script("document.querySelector('button').click();")

        #breakpoint()

        ### After driver configs
        #self.driver.set_page_load_timeout(0)
        self.driver.set_page_load_timeout(self.load_timeout)

        ### Non-shared memory
        self._site_from_tab = {}

        ## Async site_sleep
        self._async_site_sleep = AGenerator()
        

        print("New Browser opened")

    def close(self):
        self.driver.quit()

    def _get_sites(self, *, _number:int=1):
        # Ask for access
        self.__lock_sited.acquire()

        # There's work to be done. It's not the same a depth 0 site
        # as a depth N. Should be able to get more than one in one request
        if(not len(self._links)): 
            self.__lock_sited.release()
            return None

        result = self._links[0]
        self._links.pop(0)

        self.__lock_sited.release()

        return result

    def _get_loaded_sites(self, *, wait:bool=True) ->  Iterable:
        sites = []
        while(not len(sites)):
            sites = list(filter(None, next(self._async_site_sleep)))

            for site in sites:
                self._site_from_tab[site.tab] = site

            if(len(self._site_from_tab) or not wait):
                return self._site_from_tab.values()

    def open_tab(self, *, link:str=None, site:Site=None) -> int:
        self.__lock_timed.acquire()

        self.driver.switch_to.window(self.driver.window_handles[0])

        self.driver.execute_script("window.open();")
        
        #Switch to new tab instead of next
        self.driver.switch_to.window(self.driver.window_handles[self.driver.window_handles.index(
            self.driver.current_window_handle)+1])

        tab = self.driver.current_window_handle

        self.__lock_timed.release()

        if(not link is None):
            self.open_link(link, new_tab=False, site=site)

        return tab

    def open_link(self, link:str, *, new_tab:bool=False, site:Site=None, wait_load:bool=True):
        tab = self.driver.current_window_handle
        bef_tab = self.driver.window_handles.index(tab)

        if(new_tab):
            tab = self.open_tab(link=None)
        
        self.__lock_timed.acquire()

        try:
            self.driver.switch_to.window(tab)
            self.driver.get(link)
        except TimeoutException:
            pass

        self.__lock_timed.release()

        # window.array = [performance.getEntries()]; window.interval = setInterval(function foo(){var a = performance.getEntries(); if(window.array[window.array.length-1].length != a.length){window.array.push(a);}},200)

        domain = Browser.domain_from_link(link)

        self._visited_sites_counter[link] = self._visited_sites_counter.get(link,0) + 1
        self._visited_domains_counter[domain] = self._visited_domains_counter.get(domain,0) + 1

        if(site is None):
            site = Site(link, tab=tab)
        else:
            site.tab = tab
            site.link = link

        self._site_from_tab[tab] = site

        if(wait_load):
            self._async_site_sleep.append(self.async_sleep(tab, self.load_wait,
                                        _return=site))

        return site

    async def async_sleep(self, tab, load_wait:float, *, _return=None, period:float=0.1):
        load_start = datetime.now()
        timeout = load_start + timedelta(seconds=int(load_wait))

        while(not ready_state(self.driver, tab, self.__lock_timed) and (datetime.now() < timeout)):
            self.__lock_timed.release()

            await asleep(period)

        self.__lock_timed.release()

        if(timeout > datetime.now()):
            return

        await asleep(load_wait)

        return _return

    # open_link seems to be faster than async_open_link
    def async_open_link(self, link:str, *, new_tab:bool=False, site:Site=None):
        #raise DeprecationWarning()
        if(not link is None):
            self._async_site_sleep.append(self.__a_open_link(link, new_tab=new_tab, site=site))

    async def __a_open_link(self, link:str, *, new_tab:bool=False, site:Site=None):
        bef_tab = self.driver.window_handles.index(self.driver.current_window_handle)

        if(new_tab):
            tab = self.open_tab(link=None)
        else:
            tab = self.driver.current_window_handle

        try:
            self.driver.get(link)
        except TimeoutException:
            print(f"The site timed out. Skipping {link}")
            self._timed_out_links.append(link)
            self.driver.execute_script("window.close();")
            self.switch_to_window(self.driver.window_handles[bef_tab%len(self.driver.window_handles)])
            return 

        domain = Browser.domain_from_link(link)

        self._visited_sites_counter[link] = self._visited_sites_counter.get(link,0) + 1
        self._visited_domains_counter[domain] = self._visited_domains_counter.get(domain,0) + 1

        if(site is None):
            site = Site(link, tab=tab)
        else:
            site.tab = tab
            site.link = link

        self._site_from_tab[tab] = site

        await asleep(self.load_wait)

        return site

    def old_open_link(self, link:str, *, new_tab:bool=False, site:Site=None):
        if(new_tab):
            tab = self.open_tab(link=None)
        else:
            tab = self.driver.current_window_handle

        try:
            self.driver.get(link)
        except TimeoutException:
            print(f"The site timed out. Skipping {link}")
            self._timed_out_links.append(link)
            self.driver.execute_script("window.close();")
            self.switch_to_window(self.driver.window_handles[0])
            return 

        domain = Browser.domain_from_link(link)

        self._visited_sites_counter[link] = self._visited_sites_counter.get(link,0) + 1
        self._visited_domains_counter[domain] = self._visited_domains_counter.get(domain,0) + 1

        if(site is None):
            site = Site(link, tab=tab)
        else:
            site.tab = tab
            site.link = link

        self._site_from_tab[tab] = site

        sleep(self.load_wait)

        return site

    def switch_to_window(self, tab):
        if(tab in self.driver.window_handles):
            self.driver.switch_to_window(tab)
            return
        self.driver.switch_to_window(self.driver.window_handles[0])

    def restore_timed_out(self):
        self.__lock_timed.acquire()

        for zombie_tab in [tab for tab in self.driver.window_handles[1:] if not tab in self._site_from_tab.keys()]:
            print("Found a non-properly closed tab. Closing it...")
            self.close_tab(zombie_tab)


        self._links.extend(self._timed_out_links)
        
        while(len(self._timed_out_links)): self._timed_out_links.pop(0)

        self.__lock_timed.release()

    def extract_text(self, element:str="document") -> str:
        return self.driver.execute_script(
                        f"var query={element}.evaluate('//*[not(self::script)][not(self::style)]/text()',{element},null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,null);"
                        "return Array(query.snapshotLength).fill(0).map((element,index) => query.snapshotItem(index)"
                        ").map(x => function(e){if(e.data.replace('\\n','').trim()){return e.data;} return '';}(x)).join('\\n');")

    def extract_hrefs(self, element:str="document", *, site:Site=None):
        if(site is None):
            return list(filter(validate_url,
                        self.driver.execute_script(
                        f"var query={element}.evaluate('//body[1]//@href[not(self::script)][not(self::link)][not(self::style)]',{element},null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,null);"
                        "return Array.from(new Set(Array(query.snapshotLength).fill(0).map((element,index) => query.snapshotItem(index).value)"
                        ".map(x => function(e){if(e[0] == '/'){return location.origin+e;} return e}(x))))"
                        )))

        site.hrefs = list(filter(validate_url,
                        self.driver.execute_script(
                        f"var query={element}.evaluate('//@href',{element},null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,null);"
                        "return Array.from(new Set(Array(query.snapshotLength).fill(0).map((element,index) => query.snapshotItem(index).value)"
                        ".map(x => function(e){if(e[0] == '/'){return location.origin+e;} return e}(x))))"
                        )))
        return site.hrefs

    def extract_buttons(self):
        return self.driver.execute_script("""
                var _ = document.evaluate("//*", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
                return (jQuery(Array.from(new Set(Array(_.snapshotLength).fill(0).map((element, index) => _.snapshotItem(index).nodeName))).filter(function(value,_,_){return value!="IFRAME"}).join(", ")).contents().filter(function b(e){return jQuery._data(e, 'events') != undefined;})).toArray()
                """)
        """
        var c = (jQuery(window.some||Array.from(new Set(Array(_.snapshotLength).fill(0).map((element, index) => _.snapshotItem(index).nodeName))).filter(function(value,_,_){return value!="IFRAME"}).join(", ")).contents().toArray().filter(function b(e){return e.click != undefined;}));
        window.some = Array.from(new Set(c.map(function d(e){return e.nodeName;}))).filter(function f(e){return e != "IFRAME"}).join(', ')
        """

    def extract_video(self):
        return self.driver.execute_script("return performance.getEntries().map(e => (e.initiatorType == 'xmlhttprequest' && e.name.split('?')[0])).filter(e => (e && (e.endsWith('.mp4') || e.endsWith('.m3u8'))));")

    def store_cookies(self, filename:str="cookies.dat"):
        import pickle

        pickle.dump(self.driver.get_cookies(), open(filename, 'ab'))

    def load_cookies(self, filename:str="cookies.dat"):
        import pickle

        link_bef = self.driver.current_url

        for cookie in pickle.load(open(filename,'rb')):
            self.inject_cookie(cookie)
            print(f"Loaded cookie: {cookie}")

        self.driver.get(link_bef)

    def inject_cookie(self, cookie:dict):
        domain = cookie.get("domain",None)
        if(not domain is None):
            if(domain[0] == '.'): domain = "www"+domain
            if("about" in self.driver.current_url or 
                        Browser.domain_from_link(self.driver.current_url) != domain):
                self.driver.get(f"http://{domain}")
        else:
            if("about" in self.driver.current_url):
                self.driver.get("https://duckduckgo.com")

        if(isinstance(cookie, str)):
            raise NotImplementedError
            #cookie = cookie.split(":")
            #self.driver.add_cookie(dict(zip([][:len(cookie)],cookie)))
        self.driver.add_cookie(cookie)

    def inject_jQuery(self):
        self.driver.execute_script("window.el = document.createElement('script'); n = function(e){"
                    "e.type='text/javascript';"
                    f"e.innerHTML={self.__jQuery_script};e.onload=function()"
                    "{console.log('Checking jQuery');jQuery.noConflict();console.log('jQuery injected')};"
                    "console.log('Injecting');document.head.appendChild(e);console.log(e);}(el);")

    def get_proxy(self, *, http:bool=False, ssl:bool=False, ftp:bool=False, socks:bool=False) -> dict:
        result = {}
        self.__lock_proxy.acquire()

        if(http and len(self.proxys.get('httpProxy', []))):
            aux = self.proxys['httpProxy'][0]
            result['httpProxy'] = aux
            self.proxys['httpProxy'].pop(0)
            self.proxys['httpProxy'].append(aux)
        if(ssl and len(self.proxys.get('sslProxy', []))):
            aux = self.proxys['sslProxy'][0]
            result['sslProxy'] = aux
            self.proxys['sslProxy'].pop(0)
            self.proxys['sslProxy'].append(aux)
        if(ftp and len(self.proxys.get('ftpProxy', []))):
            aux = self.proxys['ftpProxy'][0]
            result['ftpProxy'] = aux
            self.proxys['ftpProxy'].pop(0)
            self.proxys['ftpProxy'].append(aux)
        if(socks and len(self.proxys.get('socksProxy', []))):
            aux = self.proxys['socksProxy'][0]
            result['socksProxy'] = aux
            self.proxys['socksProxy'].pop(0)
            self.proxys['socksProxy'].append(aux)

        self.__lock_proxy.release()

        return result

    def set_proxy(self, httpProxy:str=None, sslProxy:str=None, ftpProxy:str=None, 
                        socksProxy:str=None) -> None:
        self.driver.execute("SET_CONTEXT", {"context": "chrome"})

        if(not httpProxy is None):
            try:
                self.driver.execute_script("""
                        Services.prefs.setCharPref("network.proxy.http", arguments[0]);
                        Services.prefs.setIntPref("network.proxy.http_port", Number(arguments[1]));
                        """, *httpProxy.split(':'))
            except: pass
        if(not sslProxy is None):
            try:
                self.driver.execute_script("""
                Services.prefs.setCharPref("network.proxy.ssl", arguments[0]);
                Services.prefs.setIntPref("network.proxy.ssl_port", Number(arguments[1]));
                """, *sslProxy.split(':'))
            except: pass
        if(not ftpProxy is None):
            try:
                self.driver.execute_script("""
                Services.prefs.setCharPref('network.proxy.ftp', arguments[0]);
                Services.prefs.setIntPref('network.proxy.ftp_port', Number(arguments[1]));
                """, *ftpProxy.split(':'))
            except: pass
        if(not socksProxy is None):
            try:
                self.driver.execute_script("""
                Services.prefs.setCharPref('network.proxy.socks', arguments[0]);
                Services.prefs.setIntPref('network.proxy.socks_port', Number(arguments[1]));
                """, *socksProxy.split(':'))
            except: pass

        self.driver.execute("SET_CONTEXT", {"context": "content"})

    def close_tab(self, tab:int=None):
        if(not tab is None):
            if(not tab in self.driver.window_handles):
                #breakpoint()
                self._site_from_tab.pop(tab)
                for zombie_tab in [tab for tab in self.driver.window_handles[1:] if not tab in self._site_from_tab.keys()]:
                    print("Found a non-properly closed tab. Closing it...")
                    self.close_tab(zombie_tab)
                return

            self.__lock_timed.acquire()
            self.driver.switch_to.window(tab)
        else:
            self.__lock_timed.acquire()
        

        actual_tab = self.driver.current_window_handle
        self.driver.execute_script("window.close();")
        self.__lock_timed.release()

        # Suddenly this stopped working. Whatev
        try:
            self._site_from_tab.pop(actual_tab)
        except KeyError: pass

    # Aesthetics function    
    def set_size_pos(self, size:Tuple[int,int], position:Tuple[int,int]=(0,0)) -> None:
        self.driver.set_window_rect(*position,*size)

    @staticmethod
    def domain_from_link(link:str) -> Optional[str]:
        return re_search(r"(?<=:\/\/)?(([A-Z]|[a-z]|[0-9])+\.)+([A-Z]|[a-z]|[0-9])+", link).group()

    @staticmethod
    def check_proxys(proxys:list) -> list:
        return proxys

Example #15

Show file

    def run(self):
        time0 = time.time()
        final_data_out = {}
        mytxt = '0, started loading in data on %s.' % (
            datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
        logging.info(mytxt)
        self.emitString.emit(mytxt)
        #
        libraries_dict = plexcore.get_libraries(self.token,
                                                fullURL=self.fullURL,
                                                do_full=True)
        if not any(
                map(lambda value: 'show' in value[-1],
                    libraries_dict.values())):
            raise ValueError('Error, could not find TV shows.')
        library_name = max(
            map(
                lambda key: libraries_dict[key][0],
                filter(lambda key: libraries_dict[key][1] == 'show',
                       libraries_dict)))
        final_data_out['library_name'] = library_name
        mytxt = '1, found TV library in %0.3f seconds.' % (time.time() - time0)
        logging.info(mytxt)
        self.emitString.emit(mytxt)
        #
        if self.tvdata_on_plex is None:
            self.tvdata_on_plex = plexcore.get_library_data(
                library_name,
                fullURL=self.fullURL,
                token=self.token,
                num_threads=self.num_threads)
        if self.tvdata_on_plex is None:
            raise ValueError('Error, could not find TV shows on the server.')
        mytxt = '2, loaded TV data from Plex server in %0.3f seconds.' % (
            time.time() - time0)
        logging.info(mytxt)
        self.emitString.emit(mytxt)
        #
        ## using a stupid-ass pattern to shave some seconds off...
        manager = Manager()
        shared_list = manager.list()
        myLock = manager.RLock()
        myStage = manager.Value('stage', 2)

        #
        def _process_didend():
            if self.didend is not None:
                shared_list.append(('didend', self.didend))
                return
            didEnd = plextvdb.get_all_series_didend(self.tvdata_on_plex,
                                                    verify=self.verify,
                                                    tvdb_token=self.tvdb_token)
            myLock.acquire()
            myStage.value += 1
            mytxt = '%d, added information on whether shows ended in %0.3f seconds.' % (
                myStage.value, time.time() - time0)
            logging.info(mytxt)
            self.emitString.emit(mytxt)
            myLock.release()
            shared_list.append(('didend', didEnd))

        def _process_missing():
            if self.toGet is not None:
                shared_list.append(('toGet', self.toGet))
                return
            toGet = plextvdb.get_remaining_episodes(
                self.tvdata_on_plex,
                showSpecials=False,
                showsToExclude=self.showsToExclude,
                verify=self.verify,
                token=self.tvdb_token)
            myLock.acquire()
            myStage.value += 1
            mytxt = '%d, found missing episodes in %0.3f seconds.' % (
                myStage.value, time.time() - time0)
            logging.info(mytxt)
            self.emitString.emit(mytxt)
            myLock.release()
            shared_list.append(('toGet', toGet))

        def _process_plot_tvshowstats():
            tvdata_date_dict = plextvdb.get_tvdata_ordered_by_date(
                self.tvdata_on_plex)
            years_have = set(map(lambda date: date.year, tvdata_date_dict))
            with multiprocessing.Pool(
                    processes=multiprocessing.cpu_count()) as pool:
                figdictdata = dict(
                    pool.map(
                        lambda year:
                        (year,
                         plextvdb.create_plot_year_tvdata(
                             tvdata_date_dict, year, shouldPlot=False)),
                        years_have))
            myLock.acquire()
            myStage.value += 1
            mytxt = '%d, made plots of tv shows added in %d years in %0.3f seconds.' % (
                myStage.value, len(years_have), time.time() - time0)
            logging.info(mytxt)
            self.emitString.emit(mytxt)
            myLock.release()
            shared_list.append(('plotYears', figdictdata))

        jobs = [
            Process(target=_process_didend),
            Process(target=_process_missing)
        ]
        #         Process( target = _process_plot_tvshowstats ) ]
        for process in jobs:
            process.start()
        for process in jobs:
            process.join()
        #
        final_data = dict(shared_list)
        assert (set(final_data) == set(['didend', 'toGet']))
        didend = final_data['didend']
        toGet = final_data['toGet']
        for seriesName in self.tvdata_on_plex:
            self.tvdata_on_plex[seriesName]['didEnd'] = didend[seriesName]
        final_data_out['tvdata_on_plex'] = self.tvdata_on_plex
        mytxt = '%d, finished loading in all data on %s.' % (
            myStage.value + 1,
            datetime.datetime.now().strftime('%B %d, %Y @ %I:%M:%S %p'))
        logging.info(mytxt)
        self.emitString.emit(mytxt)
        missing_eps = dict(
            map(
                lambda seriesName: (seriesName, toGet[seriesName]['episodes']),
                set(self.tvdata_on_plex)
                & set(toGet) - set(self.showsToExclude)))
        final_data_out['missing_eps'] = missing_eps
        self.finalData.emit(final_data_out)

Example #16

Show file

class DataLogger(metaclass=Singleton):
    """Stores and save various type of data under various forms."""
    @staticmethod
    def _futures_callback(future: Future):
        """Called at future completion."""
        if future.exception():
            print(
                f"Future {future} raised the exception {repr(future.exception())}"
            )

    @staticmethod
    def _push(managed, entry, value, time):
        """Push method called by the pool executors"""
        with managed.lockers[entry]:
            managed.data[entry][time] = value
            managed.counters[entry] += 1
            for f in managed.on_push_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )

    @staticmethod
    def _dump(managed, entry):
        """Dump method called by the pool executors"""
        with managed.lockers[entry]:
            for f in managed.on_dump_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )

    @staticmethod
    def _reset(managed, entry):
        """Inner reset method called by the pool executor"""
        with managed.lockers[entry]:
            for f in managed.on_reset_callables[entry]:
                try:
                    f(entry, managed.data[entry], path=managed.path)
                except Exception as e:
                    logging.getLogger("datalogger").warning(
                        f"{managed.name} DataLogger: function {f} of {entry} failed: {e}"
                    )
            managed.data[entry].clear()
            managed.counters[entry] = 0

    def __init__(self):
        # Init and set attributes
        super(DataLogger, self).__init__()
        # Managed resources (accessible by remote threads or remote processes)
        self._manager = Manager()
        self._managed = self._manager.Namespace()
        self._managed.name = "data-logger"
        self._managed.path = "."
        self._managed.entries = self._manager.list()
        self._managed.data = self._manager.dict()
        self._managed.lockers = self._manager.dict()
        self._managed.counters = self._manager.dict()
        self._managed.on_push_callables = self._manager.dict()
        self._managed.on_reset_callables = self._manager.dict()
        self._managed.on_dump_callables = self._manager.dict()

        self.tick = datetime.datetime.now()
        self.futures = list()
        self.pool = ThreadPoolExecutor(max_workers=1)
        # Log
        logging.getLogger("datalogger").info(
            "{} DataLogger initialized!".format(self._managed.name))

    def set_path(self, path):
        """Sets the root path of the logger. Used by all the handlers that write on disk.

        :param string path: A valid path to write the data in.
        """
        if len(self._managed.lockers) != 0:
            raise Exception(
                "You tried to change logger path after having registered some entries."
            )
        os.makedirs(path, exist_ok=True)
        self._managed.path = path

    def set_pool(self, pool, n_par=5):
        """Sets the executor to be used to call handlers.

        :param string pool: The type of executor to use to call handlers. Either "thread" or "process".
        :param int n_par: The number of executor to use.
        """
        if len(self._managed.lockers) != 0:
            raise Exception(
                "You tried to pool after having registered some entries.")
        if pool == "thread":
            self.pool = ThreadPoolExecutor(max_workers=n_par)
        elif pool == "process":
            self.pool = ProcessPoolExecutor(max_workers=n_par)
        else:
            raise Exception(f"Unknown pool type `{pool}`")

    def set_name(self, name):
        """Sets the name of the logger.

        :param string name: Name of the logger
        """
        self._managed.name = name

    def declare(self, entry, on_push_callables, on_dump_callables,
                on_reset_callables):
        """Register a recurring log entry.

        Registering an entry gives access to the `push`, `reset` and `dump` methods. Note that all the handlers must be
        able to handle the data that will be pushed.

        :param string entry: Name of the log entry.
        :param List[handlers] on_push_callables: Handlers called on data when `push` is called.
        :param List[handlers] on_reset_callables: Handlers called on data when `reset` is called.
        :param List[handlers] on_dump_callables: Handlers called on the data when `dump` is called.
        """
        if entry in self._managed.entries:
            raise Exception("You tried to declare an existing log entry")
        self._managed.entries.append(entry)
        self._managed.lockers[entry] = self._manager.RLock()
        self._managed.data[entry] = self._manager.dict()
        self._managed.counters[entry] = 0
        self._managed.on_push_callables[entry] = self._manager.list(
            on_push_callables)
        self._managed.on_reset_callables[entry] = self._manager.list(
            on_reset_callables)
        self._managed.on_dump_callables[entry] = self._manager.list(
            on_dump_callables)
        if os.path.dirname(entry) != "":
            os.makedirs(os.path.join(self._managed.path,
                                     os.path.dirname(entry)),
                        exist_ok=True)

    def push(self, entry, value, time=None):
        """Append data to a recurring log.

        All handlers registered for the `on_push` event will be called.

        :param string entry: Name of the log entry
        :param Any value: Object containing the data to log. Should be of same type from call to call...
        :param int or None time: Date of the logging (epoch, iteration, tic ...). Will be used as key in the data
        dictionary. If `None`, the last data key plus one will be used.
        """
        future = self.pool.submit(
            DataLogger._push, self._managed, entry, value,
            time if time is not None else self._managed.counters[entry])
        future.add_done_callback(DataLogger._futures_callback)
        self.futures.append(future)

    def dump(self):
        """Calls handlers declared for `on_dump` event, for all registered log entries.
        """
        for entry in self._managed.entries:
            future = self.pool.submit(DataLogger._dump, self._managed, entry)
            future.add_done_callback(DataLogger._futures_callback)
            self.futures.append(future)

    def reset(self, entry):
        """Resets the data of a recurring log entry.

        All handlers registered for the `on_reset` event will be called before the storage is emptied.

        :param string entry: name of the log entry.
        """

        future = self.pool.submit(DataLogger._reset, self._managed, entry)
        future.add_done_callback(DataLogger._futures_callback)
        self.futures.append(future)

    def get_entry_length(self, entry):
        """Retrieves the number of data saved for a log entry.

        :param string entry: Name of the log entry
        :return: Number of data pieces in the entry storage
        :rtype: int
        """
        return self._managed.counters[entry]

    def get_serie(self, entry):
        """Returns the data in a list ordered by keys.

        :param string entry: Name of the log entry
        :return: Serie of data ordered by key
        :rtype: List[any]
        """
        return [i[1] for i in sorted(self._managed.data[entry].items())]

    def wait(self, log_durations=True):
        """Wait for the handling queue to be emptied.

        :param bool log_durations: Whether to log the wait duration.
        """
        b = datetime.datetime.now()
        while True:
            self.futures = list(filter(lambda x: not x.done(), self.futures))
            if self.futures:
                time.sleep(.1)
            else:
                break
        if log_durations:
            logging.getLogger("datalogger").info(
                f"{self._managed.name} DataLogger: Last wait occured {self.tick - b} ago."
            )
            logging.getLogger("datalogger").info(
                f"{self._managed.name} DataLogger: Waited {datetime.datetime.now() - b} for completion."
            )
        self.tick = datetime.datetime.now()