コード例 #1
0
def get_intersection_buffers(roads, road_bounds, intersection_buffer_units,
                             tile_max_units):
    """Buffers all intersections
    :param roads: List of shapely geometries representing road segments
    :param road_bounds: Bounding box of the roads shapefile
    :param intersection_buffer_units: Number of units to use for buffer radius
    :param tile_max_units: Maxium number of units for each side of a tile
    """

    # As an optimization, the road network is divided up into a grid of tiles,
    # and intersections are calculated within each tile.
    def roads_per_tile_iter():
        """Generator which yields a set of roads for each tile"""
        min_x, min_y, max_x, max_y = road_bounds
        bounds_width = max_x - min_x
        bounds_height = max_y - min_y
        x_divisions = ceil(bounds_width / tile_max_units)
        y_divisions = ceil(bounds_height / tile_max_units)
        tile_width = bounds_width / x_divisions
        tile_height = bounds_height / y_divisions

        # Create a spatial index for roads to efficiently match up roads to tiles
        logger.info('Generating spatial index for intersections')
        roads_index = rtree.index.Index()
        for idx, road in enumerate(roads):
            roads_index.insert(idx, road.bounds)

        logger.info('Number of tiles: {}'.format(int(x_divisions *
                                                     y_divisions)))
        for x_offset in range(0, int(x_divisions)):
            for y_offset in range(0, int(y_divisions)):
                road_ids_in_tile = roads_index.intersection([
                    min_x + x_offset * tile_width,
                    min_y + y_offset * tile_height,
                    min_x + (1 + x_offset) * tile_width,
                    min_y + (1 + y_offset) * tile_height
                ])
                roads_in_tile = [
                    roads[road_id] for road_id in road_ids_in_tile
                ]
                if len(roads_in_tile) > 1:
                    yield roads_in_tile

    # Allocate one worker per core, and parallelize the discovery of intersections
    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    tile_intersections = pool.imap(get_intersections, roads_per_tile_iter())
    pool.close()
    pool.join()

    logger.info('Buffering intersections')
    # Note: tile_intersections is a list of multipoints (which is a list of points).
    # itertools.chain.from_iterable flattens the list into a list of single points.
    buffered_intersections = [
        intersection.buffer(intersection_buffer_units)
        for intersection in itertools.chain.from_iterable(tile_intersections)
    ]

    # If intersection buffers overlap, union them to treat them as one
    logger.info('Performing unary union on buffered intersections')
    return unary_union(buffered_intersections)
コード例 #2
0
def main(argv, _raven_client=None, _stats_client=None):
    # run for example via:
    # bin/location_map --create --upload --datamaps=/path/to/datamaps/ \
    #   --output=ichnaea/content/static/tiles/

    parser = argparse.ArgumentParser(
        prog=argv[0], description='Generate and upload datamap tiles.')

    parser.add_argument('--create', action='store_true',
                        help='Create tiles?')
    parser.add_argument('--upload', action='store_true',
                        help='Upload tiles to S3?')
    parser.add_argument('--concurrency', default=2,
                        help='How many concurrent processes to use?')
    parser.add_argument('--datamaps',
                        help='Directory of the datamaps tools.')
    parser.add_argument('--output',
                        help='Optional directory for output files.')

    args = parser.parse_args(argv[1:])
    if args.create:
        conf = read_config()
        db_url = conf.get('database', 'rw_url')

        raven_client = configure_raven(
            conf.get('sentry', 'dsn'),
            transport='sync', _client=_raven_client)

        stats_client = configure_stats(conf, _client=_stats_client)

        bucketname = conf.get('assets', 'bucket').strip('/')

        upload = False
        if args.upload:
            upload = bool(args.upload)

        concurrency = billiard.cpu_count()
        if args.concurrency:
            concurrency = int(args.concurrency)

        datamaps = ''
        if args.datamaps:
            datamaps = os.path.abspath(args.datamaps)

        output = None
        if args.output:
            output = os.path.abspath(args.output)

        try:
            with stats_client.timed('datamaps', tags=['func:main']):
                generate(db_url, bucketname, raven_client, stats_client,
                         upload=upload,
                         concurrency=concurrency,
                         datamaps=datamaps,
                         output=output)
        except Exception:  # pragma: no cover
            raven_client.captureException()
            raise
    else:  # pragma: no cover
        parser.print_help()
コード例 #3
0
    def setup_instance(self,
                       queues=None,
                       ready_callback=None,
                       pidfile=None,
                       include=None,
                       **kwargs):
        self.pidfile = pidfile
        self.setup_defaults(kwargs, namespace='celeryd')
        self.setup_queues(queues)
        self.setup_includes(include)

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready
        self.use_eventloop = self.should_use_eventloop()

        signals.worker_init.send(sender=self)

        # Initialize boot steps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.components = []
        self.namespace = Namespace(app=self.app,
                                   on_start=self.on_start,
                                   on_close=self.on_close,
                                   on_stopped=self.on_stopped)
        self.namespace.apply(self, **kwargs)
コード例 #4
0
ファイル: __init__.py プロジェクト: ccobbster/celery
    def setup_instance(self, queues=None, ready_callback=None,
            pidfile=None, include=None, **kwargs):
        self.pidfile = pidfile
        self.app.loader.init_worker()
        self.setup_defaults(kwargs, namespace='celeryd')
        self.setup_queues(queues)
        self.setup_includes(include)

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready
        self.use_eventloop = self.should_use_eventloop()

        signals.worker_init.send(sender=self)

        # Initialize boot steps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.components = []
        self.namespace = Namespace(app=self.app).apply(self, **kwargs)
コード例 #5
0
ファイル: batch_exporter.py プロジェクト: shafcodes/pupil
    def __init__(self, g_pool):
        super(Batch_Exporter, self).__init__()
        self.g_pool = g_pool

        self.exports = []
        self.new_exports = []
        self.active_exports = []
        default_path = os.path.expanduser('~/Desktop')
        self.destination_dir = create_string_buffer(default_path, 512)
        self.source_dir = create_string_buffer(default_path, 512)

        self.run = False
        self.workers = [None for x in range(cpu_count())]
        logger.info(
            "Using a maximum of %s CPUs to process visualizations in parallel..."
            % cpu_count())
コード例 #6
0
ファイル: mans_to_es.py プロジェクト: TonyCrespoMe/mans_to_es
    def __init__(
        self,
        filename: str,
        index: str,
        name: str,
        es_host: str,
        es_port: int,
        bulk_size: int = 1000,
        cpu_count: int = cpu_count() - 1,
    ):
        self.filename = filename
        self.index = index
        self.name = name
        self.bulk_size = bulk_size
        self.cpu_count = cpu_count
        self.folder_path = self.filename + "__tmp"
        self.offset_stateagentinspector = None
        self.es_info = {"host": es_host, "port": es_port}
        self.filelist: Dict[str, Tuple[str, int]] = {}
        self.ioc_alerts: Dict[str, Any] = {}
        self.exd_alerts: List[Mapping[str, str]] = []
        self.generic_items: Dict[str, Any] = {}

        es = Elasticsearch([self.es_info])
        if not es.ping():
            raise ValueError("Connection failed")

        logging.debug(f"[MAIN] Start parsing {self.filename}.")
        logging.debug(
            f"[MAIN] Pushing on {self.name} index and {self.index} timeline")
コード例 #7
0
ファイル: __init__.py プロジェクト: shahjahanw/celery
    def setup_instance(
        self, queues=None, ready_callback=None, pidfile=None, include=None, use_eventloop=None, **kwargs
    ):
        self.pidfile = pidfile
        self.setup_queues(queues)
        self.setup_includes(include)

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready

        # this connection is not established, only used for params
        self._conninfo = self.app.connection()
        self.use_eventloop = self.should_use_eventloop() if use_eventloop is None else use_eventloop
        self.options = kwargs

        signals.worker_init.send(sender=self)

        # Initialize bootsteps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.steps = []
        self.on_init_namespace()
        self.namespace = self.Namespace(
            app=self.app, on_start=self.on_start, on_close=self.on_close, on_stopped=self.on_stopped
        )
        self.namespace.apply(self, **kwargs)
コード例 #8
0
def main(argv, _raven_client=None, _bucketname=None):
    # run for example via:
    # bin/location_map --create --upload \
    #   --output=ichnaea/content/static/tiles/

    parser = argparse.ArgumentParser(
        prog=argv[0], description="Generate and upload datamap tiles.")

    parser.add_argument("--create", action="store_true", help="Create tiles?")
    parser.add_argument("--upload",
                        action="store_true",
                        help="Upload tiles to S3?")
    parser.add_argument("--concurrency",
                        default=2,
                        help="How many concurrent processes to use?")
    parser.add_argument("--output",
                        help="Optional directory for output files.")

    args = parser.parse_args(argv[1:])
    if args.create:
        raven_client = configure_raven(transport="sync",
                                       tags={"app": "datamap"},
                                       _client=_raven_client)

        configure_stats()

        bucketname = _bucketname
        if not _bucketname:
            bucketname = settings("asset_bucket")
            if bucketname:
                bucketname = bucketname.strip("/")

        upload = False
        if args.upload:
            upload = bool(args.upload)

        concurrency = billiard.cpu_count()
        if args.concurrency:
            concurrency = int(args.concurrency)

        output = None
        if args.output:
            output = os.path.abspath(args.output)

        try:
            with METRICS.timer("datamaps", tags=["func:main"]):
                generate(
                    bucketname,
                    raven_client,
                    upload=upload,
                    concurrency=concurrency,
                    output=output,
                )
        except Exception:
            raven_client.captureException()
            raise
    else:
        parser.print_help()
コード例 #9
0
ファイル: controller.py プロジェクト: DT021/MemeMarketCap
 def stream(
     self,
     subreddit: str,
     start_time: int,
     end_time: int
 ) -> Iterator[List[Dict[str, Any]]]:
     for id_iter in query_pushshift(subreddit, start_time, end_time):
         with Pool(cpu_count(), initializer) as workers:
             yield list(workers.imap_unordered(praw_by_id, id_iter))
コード例 #10
0
ファイル: datamap.py プロジェクト: thelightnet/ichnaea
def main(argv, _raven_client=None, _stats_client=None, _bucketname=None):
    # run for example via:
    # bin/location_map --create --upload \
    #   --output=ichnaea/content/static/tiles/

    parser = argparse.ArgumentParser(
        prog=argv[0], description='Generate and upload datamap tiles.')

    parser.add_argument('--create', action='store_true', help='Create tiles?')
    parser.add_argument('--upload',
                        action='store_true',
                        help='Upload tiles to S3?')
    parser.add_argument('--concurrency',
                        default=2,
                        help='How many concurrent processes to use?')
    parser.add_argument('--output',
                        help='Optional directory for output files.')

    args = parser.parse_args(argv[1:])
    if args.create:
        raven_client = configure_raven(transport='sync', _client=_raven_client)

        stats_client = configure_stats(_client=_stats_client)

        bucketname = _bucketname
        if not _bucketname:  # pragma: no cover
            bucketname = ASSET_BUCKET
            if bucketname:
                bucketname = bucketname.strip('/')

        upload = False
        if args.upload:
            upload = bool(args.upload)

        concurrency = billiard.cpu_count()
        if args.concurrency:
            concurrency = int(args.concurrency)

        output = None
        if args.output:
            output = os.path.abspath(args.output)

        try:
            with stats_client.timed('datamaps', tags=['func:main']):
                generate(bucketname,
                         raven_client,
                         stats_client,
                         upload=upload,
                         concurrency=concurrency,
                         output=output)
        except Exception:  # pragma: no cover
            raven_client.captureException()
            raise
    else:  # pragma: no cover
        parser.print_help()
コード例 #11
0
ファイル: worker.py プロジェクト: rhelmer/eddy-lib
    def __init__(self,
                 hostname=None,
                 purge=False,
                 beat=False,
                 queues=None,
                 include=None,
                 app=None,
                 pidfile=None,
                 autoscale=None,
                 autoreload=False,
                 no_execv=False,
                 no_color=None,
                 **kwargs):
        self.app = app = app_or_default(app or self.app)
        self.hostname = hostname or socket.gethostname()

        # this signal can be used to set up configuration for
        # workers by name.
        signals.celeryd_init.send(sender=self.hostname,
                                  instance=self,
                                  conf=self.app.conf)

        self.setup_defaults(kwargs, namespace='celeryd')
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2
        self.purge = purge
        self.beat = beat
        self.use_queues = [] if queues is None else queues
        self.queues = None
        self.include = include
        self.pidfile = pidfile
        self.autoscale = None
        self.autoreload = autoreload
        self.no_color = no_color
        self.no_execv = no_execv
        if autoscale:
            max_c, _, min_c = autoscale.partition(',')
            self.autoscale = [int(max_c), min_c and int(min_c) or 0]
        self._isatty = isatty(sys.stdout)

        self.colored = app.log.colored(
            self.logfile,
            enabled=not no_color if no_color is not None else no_color)

        if isinstance(self.use_queues, basestring):
            self.use_queues = self.use_queues.split(',')
        if self.include:
            if isinstance(self.include, basestring):
                self.include = self.include.split(',')
            app.conf.CELERY_INCLUDE = (tuple(app.conf.CELERY_INCLUDE) +
                                       tuple(self.include))
        self.loglevel = mlevel(self.loglevel)
コード例 #12
0
    def __init__(self,
                 hostname=None,
                 discard=False,
                 embed_clockservice=False,
                 queues=None,
                 include=None,
                 app=None,
                 pidfile=None,
                 autoscale=None,
                 autoreload=False,
                 **kwargs):
        self.app = app = app_or_default(app or self.app)
        self.hostname = hostname or socket.gethostname()

        # this signal can be used to set up configuration for
        # workers by name.
        signals.celeryd_init.send(sender=self.hostname,
                                  instance=self,
                                  conf=self.app.conf)

        self.setup_defaults(kwargs, namespace="celeryd")
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2
        self.discard = discard
        self.embed_clockservice = embed_clockservice
        if self.app.IS_WINDOWS and self.embed_clockservice:
            self.die("-B option does not work on Windows.  "
                     "Please run celerybeat as a separate service.")
        self.use_queues = [] if queues is None else queues
        self.queues = None
        self.include = [] if include is None else include
        self.pidfile = pidfile
        self.autoscale = None
        self.autoreload = autoreload
        if autoscale:
            max_c, _, min_c = autoscale.partition(",")
            self.autoscale = [int(max_c), min_c and int(min_c) or 0]
        self._isatty = isatty(sys.stdout)

        self.colored = app.log.colored(self.logfile)

        if isinstance(self.use_queues, basestring):
            self.use_queues = self.use_queues.split(",")
        if isinstance(self.include, basestring):
            self.include = self.include.split(",")

        try:
            self.loglevel = mlevel(self.loglevel)
        except KeyError:
            self.die("Unknown level %r. Please use one of %s." %
                     (self.loglevel, "|".join(l for l in LOG_LEVELS.keys()
                                              if isinstance(l, basestring))))
コード例 #13
0
ファイル: mans_to_es.py プロジェクト: TonyCrespoMe/mans_to_es
def main():
    parser = argparse.ArgumentParser(
        description="Push .mans information in Elasticsearch index",
        prog="MANS to ES")
    # Required parameters
    parser.add_argument("--filename",
                        dest="filename",
                        help="Path of the .mans file")
    parser.add_argument("--name", dest="name", help="Timeline name")
    parser.add_argument("--index", dest="index", help="ES index name")
    parser.add_argument("--es_host", dest="es_host", help="ES host")
    parser.add_argument("--es_port", dest="es_port", help="ES port")

    # Optional parameters to increase performances
    parser.add_argument(
        "--cpu_count",
        dest="cpu_count",
        default=cpu_count() - 1,
        help="cpu count",
        type=int,
    )
    parser.add_argument(
        "--bulk_size",
        dest="bulk_size",
        default=1000,
        help="Bulk size for multiprocessing parsing and upload",
        type=int,
    )

    parser.add_argument("--version",
                        dest="version",
                        action="version",
                        version="%(prog)s 1.5")
    args = parser.parse_args()

    if not all([args.name, args.index, args.es_port, args.es_host]):
        parser.print_usage()
    else:
        try:
            mte = MansToEs(
                args.filename,
                args.index,
                args.name,
                args.es_host,
                args.es_port,
                args.bulk_size,
                args.cpu_count,
            )
            mte.run()
            logging.debug("[MAIN] Operation Completed [✔✔✔]")
        except:
            logging.exception("Error parsing .mans")
            return False
    return True
コード例 #14
0
 def praw_memes(self, verbose: bool) -> Iterator[List[redditData]]:
     for ids in self.query_pushshift():
         with cast(mpPool, Pool(cpu_count(), initializer)) as workers:
             if verbose:
                 memes: list[Union[redditData, None]] = list(
                     tqdm(workers.imap_unordered(praw_by_id, ids)))
             else:
                 memes = list(workers.imap_unordered(praw_by_id, ids))
         yield [
             meme for meme in memes if meme and meme["username"] != "None"
         ]
コード例 #15
0
ファイル: datamap.py プロジェクト: crankycoder/ichnaea
def main(argv, _raven_client=None, _stats_client=None, _bucketname=None):
    # run for example via:
    # bin/location_map --create --upload \
    #   --output=ichnaea/content/static/tiles/

    parser = argparse.ArgumentParser(
        prog=argv[0], description='Generate and upload datamap tiles.')

    parser.add_argument('--create', action='store_true',
                        help='Create tiles?')
    parser.add_argument('--upload', action='store_true',
                        help='Upload tiles to S3?')
    parser.add_argument('--concurrency', default=2,
                        help='How many concurrent processes to use?')
    parser.add_argument('--output',
                        help='Optional directory for output files.')

    args = parser.parse_args(argv[1:])
    if args.create:
        raven_client = configure_raven(
            transport='sync', _client=_raven_client)

        stats_client = configure_stats(_client=_stats_client)

        bucketname = _bucketname
        if not _bucketname:  # pragma: no cover
            bucketname = ASSET_BUCKET
            if bucketname:
                bucketname = bucketname.strip('/')

        upload = False
        if args.upload:
            upload = bool(args.upload)

        concurrency = billiard.cpu_count()
        if args.concurrency:
            concurrency = int(args.concurrency)

        output = None
        if args.output:
            output = os.path.abspath(args.output)

        try:
            with stats_client.timed('datamaps', tags=['func:main']):
                generate(bucketname, raven_client, stats_client,
                         upload=upload,
                         concurrency=concurrency,
                         output=output)
        except Exception:  # pragma: no cover
            raven_client.captureException()
            raise
    else:  # pragma: no cover
        parser.print_help()
コード例 #16
0
def get_intersection_buffers(roads, road_bounds, intersection_buffer_units, tile_max_units):
    """Buffers all intersections
    :param roads: List of shapely geometries representing road segments
    :param road_bounds: Bounding box of the roads shapefile
    :param intersection_buffer_units: Number of units to use for buffer radius
    :param tile_max_units: Maxium number of units for each side of a tile
    """
    # As an optimization, the road network is divided up into a grid of tiles,
    # and intersections are calculated within each tile.
    def roads_per_tile_iter():
        """Generator which yields a set of roads for each tile"""
        min_x, min_y, max_x, max_y = road_bounds
        bounds_width = max_x - min_x
        bounds_height = max_y - min_y
        x_divisions = ceil(bounds_width / tile_max_units)
        y_divisions = ceil(bounds_height / tile_max_units)
        tile_width = bounds_width / x_divisions
        tile_height = bounds_height / y_divisions

        # Create a spatial index for roads to efficiently match up roads to tiles
        logger.info('Generating spatial index for intersections')
        roads_index = rtree.index.Index()
        for idx, road in enumerate(roads):
            roads_index.insert(idx, road.bounds)

        logger.info('Number of tiles: {}'.format(int(x_divisions * y_divisions)))
        for x_offset in range(0, int(x_divisions)):
            for y_offset in range(0, int(y_divisions)):
                road_ids_in_tile = roads_index.intersection([
                    min_x + x_offset * tile_width,
                    min_y + y_offset * tile_height,
                    min_x + (1 + x_offset) * tile_width,
                    min_y + (1 + y_offset) * tile_height
                ])
                roads_in_tile = [roads[road_id] for road_id in road_ids_in_tile]
                if len(roads_in_tile) > 1:
                    yield roads_in_tile

    # Allocate one worker per core, and parallelize the discovery of intersections
    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    tile_intersections = pool.imap(get_intersections, roads_per_tile_iter())
    pool.close()
    pool.join()

    logger.info('Buffering intersections')
    # Note: tile_intersections is a list of multipoints (which is a list of points).
    # itertools.chain.from_iterable flattens the list into a list of single points.
    buffered_intersections = [intersection.buffer(intersection_buffer_units)
                              for intersection in itertools.chain.from_iterable(tile_intersections)]

    # If intersection buffers overlap, union them to treat them as one
    logger.info('Performing unary union on buffered intersections')
    return unary_union(buffered_intersections)
コード例 #17
0
    def __init__(
        self,
        hostname=None,
        purge=False,
        beat=False,
        queues=None,
        include=None,
        app=None,
        pidfile=None,
        autoscale=None,
        autoreload=False,
        no_execv=False,
        no_color=None,
        **kwargs
    ):
        self.app = app = app_or_default(app or self.app)
        self.hostname = hostname or socket.gethostname()

        # this signal can be used to set up configuration for
        # workers by name.
        signals.celeryd_init.send(sender=self.hostname, instance=self, conf=self.app.conf)

        self.setup_defaults(kwargs, namespace="celeryd")
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2
        self.purge = purge
        self.beat = beat
        self.use_queues = [] if queues is None else queues
        self.queues = None
        self.include = include
        self.pidfile = pidfile
        self.autoscale = None
        self.autoreload = autoreload
        self.no_color = no_color
        self.no_execv = no_execv
        if autoscale:
            max_c, _, min_c = autoscale.partition(",")
            self.autoscale = [int(max_c), min_c and int(min_c) or 0]
        self._isatty = isatty(sys.stdout)

        self.colored = app.log.colored(self.logfile, enabled=not no_color if no_color is not None else no_color)

        if isinstance(self.use_queues, basestring):
            self.use_queues = self.use_queues.split(",")
        if self.include:
            if isinstance(self.include, basestring):
                self.include = self.include.split(",")
            app.conf.CELERY_INCLUDE = tuple(app.conf.CELERY_INCLUDE) + tuple(self.include)
        self.loglevel = mlevel(self.loglevel)
コード例 #18
0
ファイル: holder.py プロジェクト: mattalexpugh/mphil-neptune
    def run(self):
        tasks = mp.Queue()
        results = mp.JoinableQueue()
        interim = []
        args = (tasks, results)
        # n_procs = min(mp.cpu_count(), len(self._videos))
        n_procs = mp.cpu_count()
        all_jobs = []

        for video_gt_pair in self._videos:
            gt = video_gt_pair[0]
            fp = video_gt_pair[1]

            for func in self._funcs:
                func_name = func[0]
                func_ptr = func[1]

                base_params = {
                    'gt_path': gt,
                    'video_path': fp,
                    'metric_func': func_ptr,
                    'init': False
                }

                for classifier in self._classifiers:
                    params = base_params.copy()
                    params.update(self._experiment_args)
                    params['classifier'] = classifier(metric=func_name)
                    log.info("Params ({}): {}".format(id(params), params))

                    all_jobs.append((params, self._experiment))

        for job in all_jobs:
            tasks.put(job)

        for _ in range(n_procs):
            p = mp.Process(target=train_classifier, args=args).start()

        for _ in range(len(all_jobs)):
            interim.append(results.get())
            results.task_done()

        for _ in range(n_procs):
            tasks.put(None)

        results.join()
        tasks.close()
        results.close()

        return interim
コード例 #19
0
ファイル: batch_exporter.py プロジェクト: cmcmurrough/pupil
    def __init__(self, g_pool):
        super(Batch_Exporter, self).__init__()
        self.g_pool = g_pool

        self.exports = []
        self.new_exports = []
        self.active_exports = []
        default_path = os.path.expanduser('~/Desktop')
        self.destination_dir = create_string_buffer(default_path,512)
        self.source_dir = create_string_buffer(default_path,512)

        self.run = False
        self.workers = [None for x in range(cpu_count())]
        logger.info("Using a maximum of %s CPUs to process visualizations in parallel..." %cpu_count())
コード例 #20
0
ファイル: holder.py プロジェクト: momor666/neptune
    def run(self):
        tasks = mp.Queue()
        results = mp.JoinableQueue()
        interim = []
        args = (tasks, results)
        # n_procs = min(mp.cpu_count(), len(self._videos))
        n_procs = mp.cpu_count()
        all_jobs = []

        for video_gt_pair in self._videos:
            gt = video_gt_pair[0]
            fp = video_gt_pair[1]

            for func in self._funcs:
                func_name = func[0]
                func_ptr = func[1]

                base_params = {
                    'gt_path': gt,
                    'video_path': fp,
                    'metric_func': func_ptr,
                    'init': False
                }

                for classifier in self._classifiers:
                    params = base_params.copy()
                    params.update(self._experiment_args)
                    params['classifier'] = classifier(metric=func_name)
                    log.info("Params ({}): {}".format(id(params), params))

                    all_jobs.append((params, self._experiment))

        for job in all_jobs:
            tasks.put(job)

        for _ in range(n_procs):
            p = mp.Process(target=train_classifier, args=args).start()

        for _ in range(len(all_jobs)):
            interim.append(results.get())
            results.task_done()

        for _ in range(n_procs):
            tasks.put(None)

        results.join()
        tasks.close()
        results.close()

        return interim
コード例 #21
0
 def __init__(self, *args, **options):
     concurrency = options.get('concurrency', 0)
     if concurrency is None or int(concurrency) < 0:
         concurrency = cpu_count()
     self.concurrency = int(concurrency)
     self.stream = sys.stderr
     self.original_stream = self.stream
     try:
         self.ramdb = settings.TEST_RUNNER_RAMDB
     except AttributeError:
         self.ramdb = options.get('ramdb', '')
     try:
         save = settings.LOCAL_CACHE
     except AttributeError:
         save = 'local_cache'
     self.ramdb_saves = os.path.join(os.getcwd(), save)
     super(DiscoverRoadRunner, self).__init__(*args, **options)
コード例 #22
0
    def setup_instance(self,
                       queues=None,
                       ready_callback=None,
                       pidfile=None,
                       include=None,
                       use_eventloop=None,
                       exclude_queues=None,
                       **kwargs):
        self.pidfile = pidfile
        self.setup_queues(queues, exclude_queues)
        self.setup_includes(str_to_list(include))

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready

        # this connection won't establish, only used for params
        self._conninfo = self.app.connection_for_read()
        self.use_eventloop = (self.should_use_eventloop()
                              if use_eventloop is None else use_eventloop)
        self.options = kwargs

        signals.worker_init.send(sender=self)

        # Initialize bootsteps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.steps = []
        self.on_init_blueprint()
        # 创建启动蓝图
        self.blueprint = self.Blueprint(
            steps=self.app.steps['worker'],
            on_start=self.on_start,
            on_close=self.on_close,
            on_stopped=self.on_stopped,
        )
        # 创建蓝图的steps
        self.blueprint.apply(self, **kwargs)
コード例 #23
0
ファイル: mans_to_es.py プロジェクト: LDO-CERT/mans_to_es
    def __init__(
        self,
        mode: str,
        filename: str,
        index: str = None,
        sketch_id: int = None,
        sketch_name: str = None,
        sketch_description: str = None,
        timeline_name: str = None,
        es_host: str = None,
        es_port: int = None,
        bulk_size: int = 1000,
        cpu_count: int = cpu_count() - 1,
    ):
        self.mode = mode
        self.filename = filename
        self.index = index
        self.sketch = None
        self.timeline_name = timeline_name
        self.bulk_size = bulk_size
        self.cpu_count = cpu_count
        self.folder_path = self.filename + "__tmp"
        self.offset_stateagentinspector = None
        self.es_info = {"host": es_host, "port": es_port}
        self.filelist: Dict[str, Tuple[str, int]] = {}
        self.ioc_alerts: Dict[str, Any] = {}
        self.exd_alerts: List[Mapping[str, str]] = []
        self.generic_items: Dict[str, Any] = {}

        # initial check, es is up or timesketch conf are ok
        if self.mode == "elastic":
            es = Elasticsearch([self.es_info])
            if not es.ping():
                raise ValueError("Connection failed")
        elif self.mode == "timesketch":
            ts = config.get_client()

            if sketch_id:
                self.sketch = ts.get_sketch(int(sketch_id))
            elif sketch_name:
                self.sketch = ts.create_sketch(sketch_name, sketch_description)

        logging.debug(f"[MAIN] Start parsing {self.filename} [{self.mode}].")
コード例 #24
0
    def __init__(self,
                 targets,
                 ports=range(65536),
                 threads=100,
                 timeout=3,
                 proxy_ip=["127.0.0.1", "127.0.0.1"],
                 proxy_port=[80, 80]):

        self._targets_ = targets
        self._ports_ = ports
        self._threads_ = threads
        self._timeout_ = timeout
        self._proxy_ip_ = proxy_ip
        self._proxy_port_ = proxy_port

        self._worker_pool_ = []
        self._worker_count_ = cpu_count()
        self._job_len_ = len(targets)

        self._scanners_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_, self._proxy_port_)
            for i in range(self._job_len_)
        ]

        self._scan_secure_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_[0], self._proxy_port_[0])
            for i in range(self._job_len_)
        ]

        self._scan_unsecure_ = [
            Scan(self._targets_[i], self._ports_, self._threads_,
                 self._timeout_, self._proxy_ip_[1], self._proxy_port_[1])
            for i in range(self._job_len_)
        ]

        self._manager_ = billiard.Manager()
        self._log_ = self._manager_.dict()
        self._proxy_log_ = self._manager_.dict()
        self._total_runtime_ = 0
コード例 #25
0
ファイル: __init__.py プロジェクト: michaelhenry/celery
    def setup_instance(self,
                       queues=None,
                       ready_callback=None,
                       pidfile=None,
                       include=None,
                       use_eventloop=None,
                       **kwargs):
        self.pidfile = pidfile
        self.setup_defaults(kwargs, namespace='celeryd')
        self.setup_queues(queues)
        self.setup_includes(include)

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready

        # this connection is not established, only used for params
        self._conninfo = self.app.connection()
        self.use_eventloop = (self.should_use_eventloop()
                              if use_eventloop is None else use_eventloop)
        self.options = kwargs

        signals.worker_init.send(sender=self)

        # Initialize bootsteps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.steps = []
        self.on_init_namespace()
        self.namespace = self.Namespace(app=self.app,
                                        on_start=self.on_start,
                                        on_close=self.on_close,
                                        on_stopped=self.on_stopped)
        self.namespace.apply(self, **kwargs)
コード例 #26
0
ファイル: worker.py プロジェクト: Scalr/celery
    def setup_instance(self, queues=None, ready_callback=None, pidfile=None,
                       include=None, use_eventloop=None, exclude_queues=None,
                       **kwargs):
        self.pidfile = pidfile
        self.setup_queues(queues, exclude_queues)
        self.setup_includes(str_to_list(include))

        # Set default concurrency
        if not self.concurrency:
            try:
                self.concurrency = cpu_count()
            except NotImplementedError:
                self.concurrency = 2

        # Options
        self.loglevel = mlevel(self.loglevel)
        self.ready_callback = ready_callback or self.on_consumer_ready

        # this connection won't establish, only used for params
        self._conninfo = self.app.connection_for_read()
        self.use_eventloop = (
            self.should_use_eventloop() if use_eventloop is None
            else use_eventloop
        )
        self.options = kwargs

        signals.worker_init.send(sender=self)

        # Initialize bootsteps
        self.pool_cls = _concurrency.get_implementation(self.pool_cls)
        self.steps = []
        self.on_init_blueprint()
        self.blueprint = self.Blueprint(
            steps=self.app.steps['worker'],
            on_start=self.on_start,
            on_close=self.on_close,
            on_stopped=self.on_stopped,
        )
        self.blueprint.apply(self, **kwargs)
コード例 #27
0
            'PORT': 5432 # in memory post
        }
    }

CELERY_TASK_ALWAYS_EAGER = True
CELERY_TASK_EAGER_PROPAGATES = True

SOUTH_TESTS_MIGRATE = False
#DATABASES = {
    #'default': {
        #'ENGINE': 'django.db.backends.sqlite3',
        #'NAME': ':memory:'
    #}
#}

ZEUS_MIXNET_NR_PARALLEL = billiard.cpu_count()
ZEUS_MIXNET_NR_ROUNDS = 16

ZEUS_ELECTION_STREAM_HANDLER = os.environ.get("ZEUS_TESTS_VERBOSE", False)

EMAIL_SUBJECT_PREFIX = 'Zeus System Message: '


def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
コード例 #28
0
def default_concurrency():
    return multiprocessing.cpu_count()
コード例 #29
0
ファイル: mans_to_es.py プロジェクト: LDO-CERT/mans_to_es
def main():
    parser = argparse.ArgumentParser(
        description="Push .mans information in ElasticSearch index",
        prog="MANS to ES")
    # Required parameters
    parser.add_argument("--filename",
                        dest="filename",
                        required=True,
                        help="Path of the .mans file")

    # TimeSketch parameters
    timesketch = argparse.ArgumentParser(add_help=False)
    timesketch.add_argument("--sketch_id",
                            dest="sketch_id",
                            help="TimeSketch Sketch id")
    timesketch.add_argument("--sketch_name",
                            dest="sketch_name",
                            help="TimeSketch Sketch name")
    timesketch.add_argument(
        "--sketch_description",
        dest="sketch_description",
        help="TimeSketch Sketch description",
    )
    timesketch.add_argument("--timeline_name",
                            dest="timeline_name",
                            help="TimeSketch Timeline Name")

    # Elastic parameters
    elastic = argparse.ArgumentParser(add_help=False)
    elastic.add_argument("--index",
                         dest="index",
                         help="ElasticSearch Index name")
    elastic.add_argument("--es_host",
                         dest="es_host",
                         help="ElasticSearch host")
    elastic.add_argument("--es_port",
                         dest="es_port",
                         help="ElasticSearch port")

    sp = parser.add_subparsers(required=True, dest="mode")
    sp_elastic = sp.add_parser("elastic",
                               parents=[elastic],
                               help="Save data in elastic")
    sp_timesketch = sp.add_parser("timesketch",
                                  parents=[timesketch],
                                  help="Save data in TimeSketch")

    # Optional parameters to increase performances
    parser.add_argument(
        "--cpu_count",
        dest="cpu_count",
        default=cpu_count() - 1,
        help="cpu count",
        type=int,
    )
    parser.add_argument(
        "--bulk_size",
        dest="bulk_size",
        default=1000,
        help="Bulk size for multiprocessing parsing and upload",
        type=int,
    )

    parser.add_argument("--version",
                        dest="version",
                        action="version",
                        version="%(prog)s 1.7")
    args = parser.parse_args()

    if args.mode == "elastic":
        if not all([args.index, args.es_port, args.es_host]):
            sp_elastic.print_help()
            return False
        else:
            mte = MansToEs(
                mode=args.mode,
                filename=args.filename,
                index=args.index,
                es_host=args.es_host,
                es_port=args.es_port,
                bulk_size=args.bulk_size,
                cpu_count=args.cpu_count,
            )
            mte.run()
            return True
    elif args.mode == "timesketch":
        if (not any(
            [args.sketch_id, args.sketch_name, args.sketch_description])
                or all([
                    args.sketch_id, args.sketch_name, args.sketch_description
                ]) or not args.timeline_name):
            sp_timesketch.print_help()
            return False
        else:
            mte = MansToEs(
                mode=args.mode,
                filename=args.filename,
                sketch_id=args.sketch_id,
                sketch_name=args.sketch_name,
                sketch_description=args.sketch_description,
                timeline_name=args.timeline_name,
                bulk_size=args.bulk_size,
                cpu_count=args.cpu_count,
            )
            mte.run()
            return True
コード例 #30
0
    def run_exp_for_all_classifiers(save_dir=DIR_CLASSIFIERS, parallel=True):
        """
        Runs all the saved classifiers that are located in save_dir.
        parallel, if True, will use the multiprocessing module to run
        multiple experiments at the same time.

        At present, however, this is broken due to the way in which Python
        processes match up to C-lib extensions. In this case, OpenCV just
        kinda dies when processing is attempted in this manner.

        Currently investigating a fix -- until then, just run linear or
        via threads.
        """
        classifiers = EXPClassifierHandler.get_all_saved_classifiers(DIR_CLASSIFIERS)
        classifiers = [x for x in classifiers if not x.endswith(".csv")]

        if len(classifiers) == 0:
            log.info("No more experiments to run, exiting.")
            return

        if parallel:
            videos_to_classifiers = {}

            for c in classifiers:
                clf = load_saved_classifier(save_dir + c)
                file_name = clf.video_path.split("/")[-1]

                if file_name not in videos_to_classifiers:
                    videos_to_classifiers[file_name] = []

                clfid = (clf.identifier, c)
                videos_to_classifiers[file_name].append(clfid)

            # So now we've mapped video_file: [classifiers], multiproc by k
            tasks = mp.Queue()
            results = mp.JoinableQueue()
            interim = []
            args = (tasks, results, save_dir)
            n_procs = min(mp.cpu_count(), len(videos_to_classifiers.keys()))

            for k in videos_to_classifiers.keys():
                these_classifiers = videos_to_classifiers[k]
                tasks.put(these_classifiers)

            delegator = EXPClassifierHandler.run_exp_from_mp_queue

            for _ in range(n_procs):
                p = mp.Process(target=delegator, args=args).start()

            for _ in range(len(videos_to_classifiers.keys())):
                interim.append(results.get())
                results.task_done()

            for _ in range(n_procs):
                tasks.put(None)

            results.join()
            tasks.close()
            results.close()
        else:
            for c in classifiers:
                EXPClassifierHandler.run_exp_for_classifier(c, save_dir)

        # Maybe by the time we get here more will be waiting... keep going
        EXPClassifierHandler.run_exp_for_all_classifiers(save_dir, parallel)
コード例 #31
0
        event_ids, event_dicts = zip(*event_tuples)

        with Pool(parallel_jobs(n)) as pool:
            event_bytes = pool.map(event_from_dict, event_dicts)
            events = pool.map(bytes_to_event, event_bytes)
            args = zip(*(events, [list_name] * len(events)))
            dicts = pool.starmap(extract_attr, args)

            args = zip(*(dicts, [field] * len(dicts), [value] * len(dicts)))
            matches = pool.starmap(find_first_by, args)

            for i, detection in enumerate(matches):
                if detection:
                    return detection, event_ids[i], events[i].correlations
        next_id = decrement_id(event_ids[-1])
    return None, None, None


def find_first_by(dicts, field, value):
    for d in dicts:
        if d[field] == value:
            return d
    return None


def event_from_dict(x):
    return next(iter(x.values()))


parallel_jobs = lambda x: min(x, cpu_count())
コード例 #32
0
ファイル: describe.py プロジェクト: pierr/pandas-profiling
def describe(df, bins=10, check_correlation=True, correlation_threshold=0.9, correlation_overrides=None, check_recoded=False, pool_size=multiprocessing.cpu_count(), **kwargs):
    """Generates a dict containing summary statistics for a given dataset stored as a pandas `DataFrame`.

    Used has is it will output its content as an HTML report in a Jupyter notebook.

    Parameters
    ----------
    df : DataFrame
        Data to be analyzed
    bins : int
        Number of bins in histogram.
        The default is 10.
    check_correlation : boolean
        Whether or not to check correlation.
        It's `True` by default.
    correlation_threshold: float
        Threshold to determine if the variable pair is correlated.
        The default is 0.9.
    correlation_overrides : list
        Variable names not to be rejected because they are correlated.
        There is no variable in the list (`None`) by default.
    check_recoded : boolean
        Whether or not to check recoded correlation (memory heavy feature).
        Since it's an expensive computation it can be activated for small datasets.
        `check_correlation` must be true to disable this check.
        It's `False` by default.
    pool_size : int
        Number of workers in thread pool
        The default is equal to the number of CPU.

    Returns
    -------
    dict
        Containing the following keys:
            * table: general statistics on the dataset
            * variables: summary statistics for each variable
            * freq: frequency table

    Notes:
    ------
        * The section dedicated to check the correlation should be externalized
    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError("df must be of type pandas.DataFrame")
    if df.empty:
        raise ValueError("df can not be empty")

    try:
        # reset matplotlib style before use
        # Fails in matplotlib 1.4.x so plot might look bad
        matplotlib.style.use("default")
    except:
        pass

    matplotlib.style.use(resource_filename(__name__, "pandas_profiling.mplstyle"))

    # Clearing the cache before computing stats
    base.clear_cache()

    if not pd.Index(np.arange(0, len(df))).equals(df.index):
        # Treat index as any other column
        df = df.reset_index()

    kwargs.update({'bins': bins})
    # Describe all variables in a univariate way
    if pool_size == 1:
        local_multiprocess_func = partial(multiprocess_func, **kwargs)
        ldesc = {col: s for col, s in map(local_multiprocess_func, df.iteritems())}
    else:
        pool = multiprocessing.Pool(pool_size)
        local_multiprocess_func = partial(multiprocess_func, **kwargs)
        ldesc = {col: s for col, s in pool.map(local_multiprocess_func, df.iteritems())}
        pool.close()

    # Get correlations
    dfcorrPear = df.corr(method="pearson")
    dfcorrSpear = df.corr(method="spearman")

    # Check correlations between variable
    if check_correlation is True:
        ''' TODO: corr(x,y) > 0.9 and corr(y,z) > 0.9 does not imply corr(x,z) > 0.9
        If x~y and y~z but not x~z, it would be better to delete only y
        Better way would be to find out which variable causes the highest increase in multicollinearity.
        '''
        corr = dfcorrPear.copy()
        for x, corr_x in corr.iterrows():
            if correlation_overrides and x in correlation_overrides:
                continue

            for y, corr in corr_x.iteritems():
                if x == y: break

                if corr > correlation_threshold:
                    ldesc[x] = pd.Series(['CORR', y, corr], index=['type', 'correlation_var', 'correlation'])

        if check_recoded:
            categorical_variables = [(name, data) for (name, data) in df.iteritems() if base.get_vartype(data)=='CAT']
            for (name1, data1), (name2, data2) in itertools.combinations(categorical_variables, 2):
                if correlation_overrides and name1 in correlation_overrides:
                    continue

                confusion_matrix=pd.crosstab(data1,data2)
                if confusion_matrix.values.diagonal().sum() == len(df):
                    ldesc[name1] = pd.Series(['RECODED', name2], index=['type', 'correlation_var'])

    # Convert ldesc to a DataFrame
    names = []
    ldesc_indexes = sorted([x.index for x in ldesc.values()], key=len)
    for idxnames in ldesc_indexes:
        for name in idxnames:
            if name not in names:
                names.append(name)
    variable_stats = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)
    variable_stats.columns.names = df.columns.names

    # General statistics
    table_stats = {}

    table_stats['n'] = len(df)
    table_stats['nvar'] = len(df.columns)
    table_stats['total_missing'] = variable_stats.loc['n_missing'].sum() / (table_stats['n'] * table_stats['nvar'])
    unsupported_columns = variable_stats.transpose()[variable_stats.transpose().type != base.S_TYPE_UNSUPPORTED].index.tolist()
    table_stats['n_duplicates'] = sum(df.duplicated(subset=unsupported_columns)) if len(unsupported_columns) > 0 else 0

    memsize = df.memory_usage(index=True).sum()
    table_stats['memsize'] = formatters.fmt_bytesize(memsize)
    table_stats['recordsize'] = formatters.fmt_bytesize(memsize / table_stats['n'])

    table_stats.update({k: 0 for k in ("NUM", "DATE", "CONST", "CAT", "UNIQUE", "CORR", "RECODED", "BOOL", "UNSUPPORTED")})
    table_stats.update(dict(variable_stats.loc['type'].value_counts()))
    table_stats['REJECTED'] = table_stats['CONST'] + table_stats['CORR'] + table_stats['RECODED']

    return {
        'table': table_stats,
        'variables': variable_stats.T,
        'freq': {k: (base.get_groupby_statistic(df[k])[0] if variable_stats[k].type != base.S_TYPE_UNSUPPORTED else None) for k in df.columns},
        'correlations': {'pearson': dfcorrPear, 'spearman': dfcorrSpear}
    }
コード例 #33
0
    def run_exp_for_all_classifiers(save_dir=DIR_CLASSIFIERS, parallel=True):
        """
        Runs all the saved classifiers that are located in save_dir.
        parallel, if True, will use the multiprocessing module to run
        multiple experiments at the same time.

        At present, however, this is broken due to the way in which Python
        processes match up to C-lib extensions. In this case, OpenCV just
        kinda dies when processing is attempted in this manner.

        Currently investigating a fix -- until then, just run linear or
        via threads.
        """
        classifiers = EXPClassifierHandler.get_all_saved_classifiers(
            DIR_CLASSIFIERS)
        classifiers = [x for x in classifiers if not x.endswith(".csv")]

        if len(classifiers) == 0:
            log.info("No more experiments to run, exiting.")
            return

        if parallel:
            videos_to_classifiers = {}

            for c in classifiers:
                clf = load_saved_classifier(save_dir + c)
                file_name = clf.video_path.split("/")[-1]

                if file_name not in videos_to_classifiers:
                    videos_to_classifiers[file_name] = []

                clfid = (clf.identifier, c)
                videos_to_classifiers[file_name].append(clfid)

            # So now we've mapped video_file: [classifiers], multiproc by k
            tasks = mp.Queue()
            results = mp.JoinableQueue()
            interim = []
            args = (tasks, results, save_dir)
            n_procs = min(mp.cpu_count(), len(videos_to_classifiers.keys()))

            for k in videos_to_classifiers.keys():
                these_classifiers = videos_to_classifiers[k]
                tasks.put(these_classifiers)

            delegator = EXPClassifierHandler.run_exp_from_mp_queue

            for _ in range(n_procs):
                p = mp.Process(target=delegator, args=args).start()

            for _ in range(len(videos_to_classifiers.keys())):
                interim.append(results.get())
                results.task_done()

            for _ in range(n_procs):
                tasks.put(None)

            results.join()
            tasks.close()
            results.close()
        else:
            for c in classifiers:
                EXPClassifierHandler.run_exp_for_classifier(c, save_dir)

        # Maybe by the time we get here more will be waiting... keep going
        EXPClassifierHandler.run_exp_for_all_classifiers(save_dir, parallel)