Exemple #1
0
def get_stats():
    '''
    Configure a shared ThreadStats instance for datadog
    '''
    global __stats

    if __stats is not None:
        return __stats

    if secrets.DATADOG_API_KEY:
        datadog.initialize(
            api_key=secrets.DATADOG_API_KEY,
            host_name='coverage.{}.moz.tools'.format(secrets.APP_CHANNEL),
        )
    else:
        logger.info('No datadog credentials')

    # Must be instantiated after initialize
    # https://datadogpy.readthedocs.io/en/latest/#datadog-threadstats-module
    __stats = datadog.ThreadStats(
        constant_tags=[
            config.PROJECT_NAME,
            'channel:{}'.format(secrets.APP_CHANNEL),
        ],
    )
    __stats.start(flush_in_thread=True)
    return __stats
    def __init__(self,
                 regions,
                 aws_access_key,
                 aws_secret_key,
                 kubeconfig,
                 pod_namespace,
                 idle_threshold,
                 type_idle_threshold,
                 instance_init_time,
                 cluster_name,
                 notifier,
                 scale_up=True,
                 maintainance=True,
                 datadog_api_key=None,
                 over_provision=5,
                 dry_run=False):
        if kubeconfig:
            # for using locally
            logger.debug('Using kubeconfig %s', kubeconfig)
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_file(kubeconfig))
        else:
            # for using on kube
            logger.debug('Using kube service account')
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_service_account())
        if pod_namespace is None:
            self.pod_namespace = pykube.all
        else:
            self.pod_namespace = pod_namespace

        self._drained = {}
        self.session = boto3.session.Session(
            aws_access_key_id=aws_access_key,
            aws_secret_access_key=aws_secret_key,
            region_name=regions[0])  # provide a default region
        self.autoscaling_groups = autoscaling_groups.AutoScalingGroups(
            session=self.session, regions=regions, cluster_name=cluster_name)
        self.autoscaling_timeouts = autoscaling_groups.AutoScalingTimeouts(
            self.session)

        # config
        self.regions = regions
        self.idle_threshold = idle_threshold
        self.instance_init_time = instance_init_time
        self.type_idle_threshold = type_idle_threshold
        self.over_provision = over_provision

        self.scale_up = scale_up
        self.maintainance = maintainance

        self.notifier = notifier

        if datadog_api_key:
            datadog.initialize(api_key=datadog_api_key)
            logger.info('Datadog initialized')
        self.stats = datadog.ThreadStats()
        self.stats.start()

        self.dry_run = dry_run
Exemple #3
0
def get_stats():
    """
    Configure a shared ThreadStats instance for datadog
    """
    global __stats

    if __stats is not None:
        return __stats

    app_channel = taskcluster.secrets["APP_CHANNEL"]

    if taskcluster.secrets.get("DATADOG_API_KEY"):
        datadog.initialize(
            api_key=taskcluster.secrets["DATADOG_API_KEY"],
            host_name=f"coverage.{app_channel}.moz.tools",
        )
    else:
        logger.info("No datadog credentials")

    # Must be instantiated after initialize
    # https://datadogpy.readthedocs.io/en/latest/#datadog-threadstats-module
    __stats = datadog.ThreadStats(
        constant_tags=[config.PROJECT_NAME, f"channel:{app_channel}"])
    __stats.start(flush_in_thread=True)
    return __stats
    def stats(cls):
        """
        Get the threaded datadog client (singleton): `datadog.ThreadStats`.

        This will return a `mock.Mock` instance if the `DATADOG_ENABLED` setting
        is `False`. This makes it possible to run this in development without
        having to make any additional changes or conditional checks.
        """
        if cls._stats_instance:
            return cls._stats_instance

        # If datadog is disabled by the Django setting DATADOG_ENABLED, we use
        # a mock object instead of the actual datadog client. This makes it
        # easier to switch it out without too much additional work and should
        # be good enough for development.
        api_key = getattr(cls.settings, cls.KEY_DATADOG_API_KEY, None)
        if cls.STATS_ENABLED is False or not api_key:
            cls._stats_instance = mock.Mock()

        else:
            datadog.initialize(api_key=api_key)

            cls._stats_instance = datadog.ThreadStats()
            cls._stats_instance.start(roll_up_interval=cls.ROLLUP_INTERVAL,
                                      flush_interval=cls.FLUSH_INTERVAL)

        return cls._stats_instance
Exemple #5
0
def _get_datadog_stats():
    global _datadog_stats_val
    if _datadog_stats_val is None:
        datadog.initialize(api_key=auth_config.DATADOG_API_KEY,
                           app_key=auth_config.DATADOG_APP_KEY)
        _datadog_stats_val = datadog.ThreadStats()
        _datadog_stats_val.start()
    return _datadog_stats_val
Exemple #6
0
    def __init__(self,
                 service_principal_app_id,
                 service_principal_secret,
                 service_principal_tenant_id,
                 kubeconfig,
                 template_file,
                 parameters_file,
                 idle_threshold,
                 spare_agents,
                 instance_init_time,
                 container_service_name,
                 resource_group,
                 notifier,
                 scale_up=True,
                 maintainance=True,
                 datadog_api_key=None,
                 over_provision=5,
                 dry_run=False):
        if kubeconfig:
            # for using locally
            logger.debug('Using kubeconfig %s', kubeconfig)
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_file(kubeconfig))
        else:
            # for using on kube
            logger.debug('Using kube service account')
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_service_account())

        self._drained = {}
        self.container_service_name = container_service_name
        self.template_file = template_file
        self.parameters_file = parameters_file
        self.resource_group = resource_group

        self.agent_pools = {}
        self.pools_instance_type = {}

        azure_login.login(service_principal_app_id, service_principal_secret,
                          service_principal_tenant_id)

        # config
        self.idle_threshold = idle_threshold
        self.instance_init_time = instance_init_time
        self.spare_agents = spare_agents
        self.over_provision = over_provision

        self.scale_up = scale_up
        self.maintainance = maintainance
        self.notifier = notifier

        if datadog_api_key:
            datadog.initialize(api_key=datadog_api_key)
            logger.info('Datadog initialized')
        self.stats = datadog.ThreadStats()
        self.stats.start()

        self.dry_run = dry_run
Exemple #7
0
def _init_datadog():
    dd_options = {
    'api_key': config['dd_api_key'],
    'app_key': config['dd_app_key']
    }

    datadog.initialize(**dd_options)
    stats = datadog.ThreadStats()
    stats.start()
    return stats
    def __init__(self, service_principal_app_id, service_principal_secret, service_principal_tenant_id,
                 kubeconfig, idle_threshold, reserve_idle_threshold,
                 instance_init_time, container_service_name, resource_group, notifier,
                 scale_up=True, maintainance=True,
                 datadog_api_key=None,
                 over_provision=5, dry_run=False):
        if kubeconfig:
            # for using locally
            logger.debug('Using kubeconfig %s', kubeconfig)
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_file(kubeconfig))
        else:
            # for using on kube
            logger.debug('Using kube service account')
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_service_account())

        self._drained = {}

        azure_login.login(
            service_principal_app_id,
            service_principal_secret,
            service_principal,tenant)       
         

        #  Create container service
        self.container_service = ContainerService(
            get_mgmt_service_client(ComputeManagementClient).container_services, 
            container_service_name, 
            resource_group)

        # self.autoscaling_timeouts = autoscaling_groups.AutoScalingTimeouts(
        #     self.session)

        # config
        self.idle_threshold = idle_threshold
        self.instance_init_time = instance_init_time
        self.reserve_idle_threshold = reserve_idle_threshold
        self.over_provision = over_provision

        self.scale_up = scale_up
        self.maintainance = maintainance

        self.notifier = notifier

        if datadog_api_key:
            datadog.initialize(api_key=datadog_api_key)
            logger.info('Datadog initialized')
        self.stats = datadog.ThreadStats()
        self.stats.start()

        self.dry_run = dry_run
Exemple #9
0
    def __init__(self, dd_api_key=None, dd_app_key=None, **kwargs): 
        self.dd_api_key = dd_api_key
        self.dd_app_key = dd_app_key
        self.env = kwargs.get('env')
        self.constant_tags = kwargs.get('constant_tags')
        self.agent = None

        if dd_api_key and dd_app_key:
            datadog.initialize(api_key=dd_api_key, app_key=dd_app_key)
            self.agent = datadog.ThreadStats(constant_tags=self.constant_tags)
            self.agent.start()
        if self.agent:
            LOG.info('Datadog agent found: Will report metrics successfully')
        else:
            LOG.info ('Datadog agent not found: Will not report metrics')
Exemple #10
0
def acquire_datadog_client(
    config: dict[str, Any],
) -> Iterator[Optional[datadog.ThreadStats]]:
    if all(config.values()):
        datadog.initialize(**config)
        datadog_client = datadog.ThreadStats()
        try:
            datadog_client.start(flush_in_thread=True, flush_interval=15)
            # wipe any previous stats from the page.
            datadog_client.gauge("gulag.online_players", 0)
            yield datadog_client
        finally:
            datadog_client.stop()
            datadog_client.flush()
    else:
        yield None
Exemple #11
0
    def __init__(self, apiKey=None, appKey=None, periodicChecks=None):
        """
		Initialize a toggleable Datadog Client

		:param apiKey: Datadog api key. Leave empty to create a dummy (disabled) Datadog client.
		:param appKey: Datadog app key. Leave empty to create a dummy (disabled) Datadog client.
		:param periodicChecks: List of periodicCheck objects. Optional. Leave empty to disable periodic checks.
		"""
        if apiKey is not None and appKey is not None:
            datadog.initialize(api_key=apiKey, app_key=appKey)
            self.client = datadog.ThreadStats()
            self.client.start()
            self.periodicChecks = periodicChecks
            if self.periodicChecks is not None:
                threading.Thread(target=self.__periodicCheckLoop).start()
        else:
            self.client = None
Exemple #12
0
def _dd_get_stats():
    global _dd_stats

    if not _dd_stats:
        dd_api_instance = datadog_model.DataDogApiAuth.GetInstance()
        if not dd_api_instance:
            return None

        datadog.initialize(dd_api_instance.api_key,
                           host_name='santaupvote.appspot.com')

        # we can't have background threads
        _dd_stats = datadog.ThreadStats()
        _dd_stats.start(flush_in_thread=False)

        # this requires an agent
        # _dd_stats = datadog.statsd

    return _dd_stats
# Configuration
nthreads = args.nthreads
ntesseract_processes = 10
socks_host = 'localhost'
socks_port = 9050
start_index = args.start_index
code_range = args.code_range
requests_per_second = args.requests_per_second
s3_bucket_name = "iran-article-html"

# DataDog
datadog.initialize(
    api_key=DATADOG_API_KEY,
    app_key=DATADOG_APP_KEY,
)
stats = datadog.ThreadStats()
stats.start()
wrapped_stats = WrappedStats(stats)

mk_proxy_url = 'socks5h://u{{}}:p{{}}@{}:{}'.format(socks_host,
                                                    socks_port).format

global_ctx = GlobalContext(
    mk_proxy_url=mk_proxy_url,
    rate_limiter=RateLimiter(requests_per_second),
    # tesseract_guard = ConcurrencyLimiter(max_running=ntesseract_processes).guard,
)

code_tracker = CodeTracker(start_index, code_range)

Exemple #14
0
    def __init__(self, aws_regions, aws_access_key, aws_secret_key,
                 azure_client_id, azure_client_secret, azure_subscription_id, azure_tenant_id,
                 azure_resource_group_names, azure_slow_scale_classes, kubeconfig,
                 idle_threshold, type_idle_threshold,
                 instance_init_time, cluster_name, notifier,
                 max_scale_in_fraction=0.1,
                 scale_up=True, maintainance=True,
                 datadog_api_key=None,
                 over_provision=5, dry_run=False):
        if kubeconfig:
            # for using locally
            logger.debug('Using kubeconfig %s', kubeconfig)
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_file(kubeconfig))
        else:
            # for using on kube
            logger.debug('Using kube service account')
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_service_account())

        self.max_scale_in_fraction = max_scale_in_fraction
        self._drained = {}
        self.session = None
        if aws_access_key and aws_secret_key:
            self.session = boto3.session.Session(
                aws_access_key_id=aws_access_key,
                aws_secret_access_key=aws_secret_key,
                region_name=aws_regions[0])  # provide a default region
        self.autoscaling_groups = autoscaling_groups.AutoScalingGroups(
            session=self.session, regions=aws_regions,
            cluster_name=cluster_name)
        self.autoscaling_timeouts = autoscaling_groups.AutoScalingTimeouts(
            self.session)

        azure_regions = []
        resource_groups = []
        self.azure_client = None
        if azure_client_id:
            azure_credentials = ServicePrincipalCredentials(
                client_id=azure_client_id,
                secret=azure_client_secret,
                tenant=azure_tenant_id
            )

            # Setup the Azure client
            resource_client = ResourceManagementClient(azure_credentials, azure_subscription_id)
            resource_client.providers.register('Microsoft.Compute')
            resource_client.providers.register('Microsoft.Network')
            resource_client.providers.register('Microsoft.Insights')

            region_map = {}
            for resource_group_name in azure_resource_group_names:
                resource_group = resource_client.resource_groups.get(resource_group_name)
                location = resource_group.location
                if location in region_map:
                    logger.fatal("{} and {} are both in {}. May only have one resource group per region".format(
                        resource_group_name, region_map[location], location
                    ))
                region_map[location] = resource_group_name
                azure_regions.append(location)
                resource_groups.append(resource_group)

            compute_client = ComputeManagementClient(azure_credentials, azure_subscription_id)
            compute_client.config.retry_policy.policy = azure.AzureBoundedRetry.from_retry(compute_client.config.retry_policy.policy)

            monitor_client = MonitorClient(azure_credentials, azure_subscription_id)
            monitor_client.config.retry_policy.policy = azure.AzureBoundedRetry.from_retry(monitor_client.config.retry_policy.policy)
            self.azure_client = AzureWriteThroughCachedApi(AzureWrapper(compute_client, monitor_client))

        self.azure_groups = azure.AzureGroups(resource_groups, azure_slow_scale_classes, self.azure_client)

        # config
        self.azure_resource_group_names = azure_resource_group_names
        self.azure_regions = azure_regions
        self.aws_regions = aws_regions
        self.idle_threshold = idle_threshold
        self.instance_init_time = instance_init_time
        self.type_idle_threshold = type_idle_threshold
        self.over_provision = over_provision

        self.scale_up = scale_up
        self.maintainance = maintainance

        self.notifier = notifier

        if datadog_api_key:
            datadog.initialize(api_key=datadog_api_key)
            logger.info('Datadog initialized')
        self.stats = datadog.ThreadStats()
        self.stats.start()

        self.dry_run = dry_run
Exemple #15
0
async def before_serving() -> None:
    """Called before the server begins serving connections."""
    glob.loop = asyncio.get_event_loop()

    if glob.has_internet:
        # retrieve a client session to use for http connections.
        glob.http = aiohttp.ClientSession(
            json_serialize=orjson.dumps)  # type: ignore
    else:
        glob.http = None

    # retrieve a pool of connections to use for mysql interaction.
    glob.db = cmyui.AsyncSQLPool()
    await glob.db.connect(glob.config.mysql)

    # run the sql & submodule updater (uses http & db).
    # TODO: updating cmyui_pkg should run before it's import
    updater = Updater(glob.version)
    await updater.run()
    await updater.log_startup()

    # open a connection to our local geoloc database,
    # if the database file is present.
    if GEOLOC_DB_FILE.exists():
        glob.geoloc_db = geoip2.database.Reader(GEOLOC_DB_FILE)
    else:
        glob.geoloc_db = None

    # support for https://datadoghq.com
    if all(glob.config.datadog.values()):
        datadog.initialize(**glob.config.datadog)
        glob.datadog = datadog.ThreadStats()
        glob.datadog.start(flush_in_thread=True, flush_interval=15)

        # wipe any previous stats from the page.
        glob.datadog.gauge('gulag.online_players', 0)
    else:
        glob.datadog = None

    # cache many global collections/objects from sql,
    # such as channels, mappools, clans, bot, etc.
    async with glob.db.pool.acquire() as conn:
        async with conn.cursor(aiomysql.DictCursor) as db_cursor:
            await setup_collections(db_cursor)

    new_coros = []

    # create a task for each donor expiring in 30d.
    new_coros.extend(await bg_loops.donor_expiry())

    # setup a loop to kick inactive ghosted players.
    new_coros.append(bg_loops.disconnect_ghosts())
    '''
    # if the surveillance webhook has a value, run
    # automatic (still very primitive) detections on
    # replays deemed by the server's configurable values.
    if glob.config.webhooks['surveillance']:
        new_coros.append(bg_loops.replay_detections())
    '''

    # reroll the bot's random status every `interval` sec.
    new_coros.append(bg_loops.reroll_bot_status(interval=300))

    for coro in new_coros:
        glob.app.add_pending_task(coro)
Exemple #16
0
def main() -> None:
    """Attempt to start up gulag."""
    # make sure we're running on an appropriate
    # platform with all required software.
    ensure_platform()

    # make sure all required services
    # are being run in the background.
    ensure_services()

    # warn the user if gulag is running on root.
    if os.geteuid() == 0:
        log(
            'It is not recommended to run gulag as root, '
            'especially in production..', Ansi.LYELLOW)

        if glob.config.advanced:
            log(
                'The risk is even greater with features '
                'such as config.advanced enabled.', Ansi.LRED)

    # check whether we are connected to the internet.
    glob.has_internet = utils.misc.check_connection(timeout=1.5)
    if not glob.has_internet:
        log('Running in offline mode, some features '
            'will not be available.', Ansi.LRED)

    # create /.data and its subdirectories.
    data_path = Path.cwd() / '.data'
    data_path.mkdir(exist_ok=True)

    for sub_dir in ('avatars', 'logs', 'osu', 'osr', 'ss'):
        subdir = data_path / sub_dir
        subdir.mkdir(exist_ok=True)

    achievements_path = data_path / 'assets/medals/client'
    if not achievements_path.exists():
        # create directory & download achievement images
        achievements_path.mkdir(parents=True)
        utils.misc.download_achievement_images(achievements_path)

    # make sure oppai-ng binary is built and ready.
    if not OPPAI_PATH.exists():
        log('No oppai-ng submodule found, attempting to clone.', Ansi.LMAGENTA)
        p = subprocess.Popen(args=['git', 'submodule', 'init'],
                             stdout=subprocess.DEVNULL,
                             stderr=subprocess.DEVNULL)
        if p.wait() == 1:
            sys.exit('Failed to initialize git submodules.')

        p = subprocess.Popen(args=['git', 'submodule', 'update'],
                             stdout=subprocess.DEVNULL,
                             stderr=subprocess.DEVNULL)
        if p.wait() == 1:
            sys.exit('Failed to update git submodules.')

    if not (OPPAI_PATH / 'oppai').exists():
        log('No oppai-ng binary found, attempting to build.', Ansi.LMAGENTA)
        p = subprocess.Popen(args=['./build'],
                             cwd='oppai-ng',
                             stdout=subprocess.DEVNULL,
                             stderr=subprocess.DEVNULL)
        if p.wait() == 1:
            sys.exit('Failed to build oppai-ng automatically.')

    # create a server object, which serves as a map of domains.
    app = glob.app = cmyui.Server(name=f'gulag v{glob.version}',
                                  gzip=4,
                                  debug=glob.config.debug)

    # add our endpoint's domains to the server;
    # each may potentially hold many individual endpoints.
    from domains.cho import domain as cho_domain  # c[e4-6]?.ppy.sh
    from domains.osu import domain as osu_domain  # osu.ppy.sh
    from domains.ava import domain as ava_domain  # a.ppy.sh
    from domains.map import domain as map_domain  # b.ppy.sh
    app.add_domains({cho_domain, osu_domain, ava_domain, map_domain})

    # enqueue tasks to run once the server
    # begins, and stops serving connections.
    # these make sure we set everything up
    # and take it down nice and graceful.
    app.before_serving = before_serving
    app.after_serving = after_serving

    # support for https://datadoghq.com
    if all(glob.config.datadog.values()):
        datadog.initialize(**glob.config.datadog)
        glob.datadog = datadog.ThreadStats()
        glob.datadog.start(flush_in_thread=True, flush_interval=15)

        # wipe any previous stats from the page.
        glob.datadog.gauge('gulag.online_players', 0)
    else:
        glob.datadog = None

    # start up the server; this starts an event loop internally,
    # using uvloop if it's installed. it uses SIGUSR1 for restarts.
    # NOTE: eventually the event loop creation will likely be
    # moved into the gulag codebase for increased flexibility.
    app.run(glob.config.server_addr, handle_restart=True)
Exemple #17
0
    from domains.cho import domain as cho_domain  # c[e4-6]?.ppy.sh
    from domains.osu import domain as osu_domain  # osu.ppy.sh
    from domains.ava import domain as ava_domain  # a.ppy.sh
    app.add_domains({cho_domain, osu_domain, ava_domain})

    # enqueue tasks to run once the server
    # begins, and stops serving connections.
    # these make sure we set everything up
    # and take it down nice and graceful.
    app.before_serving = before_serving
    app.after_serving = after_serving

    # support for https://datadoghq.com
    if all(glob.config.datadog.values()):
        datadog.initialize(**glob.config.datadog)
        glob.datadog = datadog.ThreadStats()
        glob.datadog.start(flush_in_thread=True, flush_interval=15)

        # wipe any previous stats from the page.
        glob.datadog.gauge('gulag.online_players', 0)
    else:
        glob.datadog = None

    # start up the server; this starts
    # an event loop internally, using
    # uvloop if it's installed.
    app.run(
        glob.config.server_addr,
        handle_signals=True,  # SIGHUP, SIGTERM, SIGINT
        sigusr1_restart=True)  # use SIGUSR1 for restarts
Exemple #18
0
 def __init__(self):
     self.api = datadog.ThreadStats()
Exemple #19
0
def main():
    import tempfile
    import argparse
    import h5py
    import datadog

    import settings
    import dataprep2

    if os.name != 'nt':
        import manhole
        manhole.install()

    logging.getLogger().setLevel(logging.INFO)
    logging.basicConfig(
        format='%(asctime)s %(thread)d %(levelname)s %(message)s',
        level=logging.INFO)

    default_host = os.environ.get("SPV2_DB_HOST", "localhost")
    default_dbname = os.environ.get("SPV2_DB_DBNAME", "postgres")
    default_schema = os.environ.get("SPV2_DB_SCHEMA", "public")
    default_user = os.environ.get("SPV2_DB_USER", "s2dev")
    default_password = os.environ.get("SPV2_DB_PASSWORD")
    default_dataprep_host = os.environ.get("SPV2_DATAPREP_SERVICE_HOST",
                                           "localhost")
    default_dataprep_port = int(
        os.environ.get("SPV2_DATAPREP_SERVICE_PORT", "8080"))
    parser = argparse.ArgumentParser(
        description="Trains a classifier for PDF Tokens")
    parser.add_argument("--host",
                        type=str,
                        default=default_host,
                        help="database host")
    parser.add_argument("--port", type=int, default=5432, help="database port")
    parser.add_argument("--dbname",
                        type=str,
                        default=default_dbname,
                        help="database name")
    parser.add_argument("--schema",
                        type=str,
                        default=default_schema,
                        help="schema name")
    parser.add_argument("--user",
                        type=str,
                        default=default_user,
                        help="database user")
    parser.add_argument("--password",
                        type=str,
                        default=default_password,
                        help="database password")
    parser.add_argument("--dataprep-host",
                        type=str,
                        default=default_dataprep_host,
                        help="Host where the dataprep service is running")
    parser.add_argument("--dataprep-port",
                        type=str,
                        default=default_dataprep_port,
                        help="Port where the dataprep service is running")
    args = parser.parse_args()

    taskdb_kwargs = dict(
        host=args.host,
        port=args.port,
        dbname=args.dbname,
        schema=args.schema,
        user=args.user,
    )
    logging.info("Task db config: %s", taskdb_kwargs)
    todo_list = papertasks.TaskDB(password=args.password, **taskdb_kwargs)

    # start datadog
    datadog.initialize(api_key=os.environ.get("DATADOG_API_KEY"))
    stats = datadog.ThreadStats()
    stats.start()
    datadog_prefix = args.host.split(".")[0]
    if datadog_prefix.startswith("spv2-"):
        datadog_prefix = datadog_prefix[5:]
    datadog_prefix = "spv2.%s." % datadog_prefix

    logging.info("Loading model settings ...")
    model_settings = settings.default_model_settings

    logging.info("Loading token statistics ...")
    token_stats = dataprep2.TokenStatistics("model/all.tokenstats3.gz")

    logging.info("Loading embeddings ...")
    embeddings = dataprep2.CombinedEmbeddings(
        token_stats, dataprep2.GloveVectors(model_settings.glove_vectors),
        model_settings.embedded_tokens_fraction)

    import with_labels  # Heavy import, so we do it here
    model = with_labels.model_with_labels(model_settings, embeddings)
    model.load_weights("model/C49.h5")
    model_version = 2

    logging.info("Starting to process tasks")
    total_paper_ids_processed = 0
    start_time = time.time()
    last_time_with_paper_ids = start_time

    def featurized_tokens_filenames() -> typing.Generator[typing.Tuple[
        tempfile.TemporaryDirectory, str], None, None]:
        # async http stuff
        async_event_loop = asyncio.new_event_loop()
        asyncio.set_event_loop(async_event_loop)
        connector = aiohttp.TCPConnector(loop=async_event_loop,
                                         force_close=True)
        session = aiohttp.ClientSession(connector=connector,
                                        read_timeout=120,
                                        conn_timeout=120)
        write_lock = asyncio.Lock()

        async def write_json_tokens_to_file(paper_id: str, json_file):
            url = "http://%s:%d/v1/json/paperid/%s" % (
                args.dataprep_host, args.dataprep_port, paper_id)
            attempts_left = 5
            with tempfile.NamedTemporaryFile(prefix="SPv2DBWorker-%s-" %
                                             paper_id,
                                             suffix=".json") as f:
                f.seek(0)
                f.truncate()

                def write_json_to_output(json_object):
                    f.write(json.dumps(json_object).encode("utf-8"))

                while True:
                    attempts_left -= 1
                    try:
                        async with session.get(url) as response:
                            if response.status == 200:
                                # We write to a tempfile first, because we don't want to end up with
                                # half-written json if something goes wrong while reading from the
                                # socket.
                                while True:
                                    chunk = await response.content.read(1024 *
                                                                        1024)
                                    if not chunk:
                                        break
                                    f.write(chunk)
                                stats.increment(datadog_prefix +
                                                "dataprep.success")
                                break
                            else:
                                stats.increment(datadog_prefix +
                                                "dataprep.failure")
                                if attempts_left > 0:
                                    logging.error(
                                        "Error %d from dataprep server for paper id %s. %d attempts left.",
                                        response.status, paper_id,
                                        attempts_left)
                                else:
                                    stats.increment(datadog_prefix +
                                                    "dataprep.gave_up")
                                    logging.error(
                                        "Error %d from dataprep server for paper id %s. Giving up.",
                                        response.status, paper_id)
                                    error = {
                                        "error": {
                                            "message":
                                            "Status %s from dataprep server" %
                                            response.status,
                                            "stackTrace":
                                            None,
                                            "docName":
                                            "%s.pdf" % paper_id
                                        }
                                    }
                                    write_json_to_output(error)
                                    break
                    except Exception as e:
                        stats.increment(datadog_prefix + "dataprep.failure")
                        if attempts_left > 0:
                            logging.error(
                                "Error %r from dataprep server for paper id %s. %d attempts left.",
                                e, paper_id, attempts_left)
                        else:
                            stats.increment(datadog_prefix +
                                            "dataprep.gave_up")
                            logging.error(
                                "Error %r from dataprep server for paper id %s. Giving up.",
                                e, paper_id)
                            error = {
                                "error": {
                                    "message":
                                    "Error %r while contacting dataprep server"
                                    % e,
                                    "stackTrace":
                                    None,
                                    "docName":
                                    "%s.pdf" % paper_id
                                }
                            }
                            write_json_to_output(error)
                            break

                # append the tempfile to the json file
                f.flush()
                f.seek(0)
                with await write_lock:
                    _send_all(f, json_file)

        processing_timeout = 600
        while True:
            paper_ids = todo_list.get_batch_to_process(model_version,
                                                       max_batch_size=50)
            logging.info("Received %d paper ids", len(paper_ids))
            if len(paper_ids) <= 0:
                if time.time() - last_time_with_paper_ids > processing_timeout:
                    logging.info(
                        "Saw no paper ids for more than %.0f seconds. Shutting down.",
                        processing_timeout)
                    return
                time.sleep(20)
                continue
            stats.increment(datadog_prefix + "attempts", len(paper_ids))

            temp_dir = tempfile.TemporaryDirectory(prefix="SPv2DBWorker-")

            logging.info("Getting JSON ...")
            getting_json_time = time.time()
            json_file_name = os.path.join(temp_dir.name, "tokens.json")
            with open(json_file_name, "wb") as json_file:
                write_json_futures = [
                    write_json_tokens_to_file(p, json_file) for p in paper_ids
                ]
                async_event_loop.run_until_complete(
                    asyncio.wait(write_json_futures))
            getting_json_time = time.time() - getting_json_time
            logging.info("Got JSON in %.2f seconds", getting_json_time)
            stats.timing(datadog_prefix + "get_json", getting_json_time)

            # pick out errors and write them to the DB
            paper_id_to_error = {}
            for line in dataprep2.json_from_file(json_file_name):
                if not "error" in line:
                    continue
                error = line["error"]
                error["message"] = dataprep2.sanitize_for_json(
                    error["message"])
                error["stackTrace"] = dataprep2.sanitize_for_json(
                    error["stackTrace"])
                paper_id = error["docName"]
                if paper_id.endswith(".pdf"):
                    paper_id = paper_id[:-4]
                paper_id_to_error[paper_id] = error
                logging.info("Paper %s has error %s", paper_id,
                             error["message"])
            if len(paper_id_to_error) > len(paper_ids) / 2:
                raise ValueError(
                    "More than half of the batch failed to preprocess. Something is afoot. We're giving up."
                )
            todo_list.post_errors(model_version, paper_id_to_error)
            stats.increment(datadog_prefix + "errors", len(paper_id_to_error))
            logging.info("Wrote %d errors to database", len(paper_id_to_error))

            # make unlabeled tokens file
            logging.info("Making unlabeled tokens ...")
            making_unlabeled_tokens_time = time.time()
            unlabeled_tokens_file_name = os.path.join(temp_dir.name,
                                                      "unlabeled-tokens.h5")
            dataprep2.make_unlabeled_tokens_file(json_file_name,
                                                 unlabeled_tokens_file_name,
                                                 ignore_errors=True)
            os.remove(json_file_name)
            making_unlabeled_tokens_time = time.time(
            ) - making_unlabeled_tokens_time
            logging.info("Made unlabeled tokens in %.2f seconds",
                         making_unlabeled_tokens_time)
            stats.timing(datadog_prefix + "make_unlabeled",
                         making_unlabeled_tokens_time)

            # make featurized tokens file
            logging.info("Making featurized tokens ...")
            making_featurized_tokens_time = time.time()
            with h5py.File(unlabeled_tokens_file_name,
                           "r") as unlabeled_tokens_file:
                featurized_tokens_file_name = os.path.join(
                    temp_dir.name, "featurized-tokens.h5")
                dataprep2.make_featurized_tokens_file(
                    featurized_tokens_file_name,
                    unlabeled_tokens_file, token_stats, embeddings,
                    dataprep2.VisionOutput(None), model_settings)
                # We don't delete the unlabeled file here because the featurized one contains references
                # to it.
            making_featurized_tokens_time = time.time(
            ) - making_featurized_tokens_time
            logging.info("Made featurized tokens in %.2f seconds",
                         making_featurized_tokens_time)
            stats.timing(datadog_prefix + "make_featurized",
                         making_featurized_tokens_time)

            yield temp_dir, featurized_tokens_file_name

    for temp_dir, featurized_tokens_file_name in dataprep2.threaded_generator(
            featurized_tokens_filenames(), 1):
        try:
            logging.info("Making and sending results ...")
            make_and_send_results_time = time.time()
            with h5py.File(
                    featurized_tokens_file_name) as featurized_tokens_file:

                def get_docs():
                    return dataprep2.documents_for_featurized_tokens(
                        featurized_tokens_file,
                        include_labels=False,
                        max_tokens_per_page=model_settings.tokens_per_batch)

                results = with_labels.run_model(model,
                                                model_settings,
                                                embeddings.glove_vocab(),
                                                get_docs,
                                                enabled_modes={"predictions"})
                results = {
                    doc.doc_sha: {
                        "docName":
                        doc.doc_id,
                        "docSha":
                        doc.doc_sha,
                        "title":
                        dataprep2.sanitize_for_json(
                            docresults["predictions"][0]),
                        "authors":
                        docresults["predictions"][1],
                        "bibs": [{
                            "title": bibtitle,
                            "authors": bibauthors,
                            "venue": bibvenue,
                            "year": bibyear
                        } for bibtitle, bibauthors, bibvenue, bibyear in
                                 docresults["predictions"][2]]
                    }
                    for doc, docresults in results
                }

                todo_list.post_results(model_version, results)
                stats.increment(datadog_prefix + "successes", len(results))
                total_paper_ids_processed += len(results)
        finally:
            temp_dir.cleanup()

        make_and_send_results_time = time.time() - make_and_send_results_time
        logging.info("Made and sent results in %.2f seconds",
                     make_and_send_results_time)
        stats.timing(datadog_prefix + "make_results",
                     make_and_send_results_time)

        # report progress
        paper_ids_per_hour = 3600 * total_paper_ids_processed / (time.time() -
                                                                 start_time)
        logging.info("This worker is processing %.0f paper ids per hour." %
                     paper_ids_per_hour)

        last_time_with_paper_ids = time.time()
Exemple #20
0
    def init_datadog(self, options):
        """ Initialize datadog agent """
        datadog.initialize(**options)

        self.dd = datadog.ThreadStats()
        self.dd.start()
Exemple #21
0
    def __init__(self,
                 regions,
                 aws_access_key,
                 aws_secret_key,
                 kubeconfig,
                 pod_namespace,
                 idle_threshold,
                 type_idle_threshold,
                 instance_init_time,
                 cluster_name,
                 notifier,
                 scale_up=True,
                 maintainance=True,
                 datadog_api_key=None,
                 over_provision=5,
                 dry_run=False,
                 drainable_labels={},
                 scale_label=None,
                 instance_type_priorities={}):
        if kubeconfig:
            # for using locally
            logger.debug('Using kubeconfig %s', kubeconfig)
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_file(kubeconfig))
        else:
            # for using on kube
            logger.debug('Using kube service account')
            self.api = pykube.HTTPClient(
                pykube.KubeConfig.from_service_account())
        if pod_namespace is None:
            self.pod_namespace = pykube.all
        else:
            self.pod_namespace = pod_namespace

        self._drained = {}
        self.session = boto3.session.Session(
            aws_access_key_id=aws_access_key,
            aws_secret_access_key=aws_secret_key,
            region_name=regions[0])  # provide a default region
        self.autoscaling_groups = autoscaling_groups.AutoScalingGroups(
            session=self.session, regions=regions, cluster_name=cluster_name)
        self.autoscaling_timeouts = autoscaling_groups.AutoScalingTimeouts(
            self.session)

        # config
        self.regions = regions
        self.idle_threshold = idle_threshold
        self.instance_init_time = instance_init_time
        self.type_idle_threshold = type_idle_threshold
        self.over_provision = over_provision

        self.scale_up = scale_up
        self.maintainance = maintainance

        self.notifier = notifier

        if datadog_api_key:
            datadog.initialize(api_key=datadog_api_key)
            logger.info('Datadog initialized')
        self.stats = datadog.ThreadStats()
        self.stats.start()

        self.dry_run = dry_run
        self.drainable_labels = drainable_labels
        self.scale_label = scale_label
        if not instance_type_priorities:
            self.instance_type_priorities = self._GROUP_PRIORITIES
        else:
            multiple_priorities = len(
                filter(lambda x: len(instance_type_priorities[x]) > 1,
                       instance_type_priorities.keys()))
            if multiple_priorities > 0:
                raise ValueError(
                    'You have specified more than one priority for %d instance types. Please specify a single priority for each instance type that you care about.'
                    % multiple_priorities)
            self.instance_type_priorities = {
                instance: min([int(value) for value in values])
                for instance, values in instance_type_priorities.items()
            }