Beispiel #1
0
def argparse_wrapper():
    p = ArgumentParser()
    p.add_argument("img_dir", type=Path)
    p.add_argument("mask_dir", type=Path)
    p.add_argument("optional_img_dir", type=Path, nargs="?")
    p.add_argument("-p", "--processes", type=int, default=1)
    p.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_PATH)
    p.add_argument("--enable-manhole", action="store_true")
    p.add_argument("--enable-faulthandler", action="store_true")

    options_file_group = p.add_mutually_exclusive_group()
    options_file_group.add_argument("--options-file", type=Path, default=DEFAULT_OPTIONS_FILE)
    options_file_group.add_argument("--options-preset")

    argss = p.parse_args()

    if argss.enable_manhole:
        import manhole

        manhole.install(activate_on="USR1")

    if argss.enable_faulthandler:
        faulthandler.enable(all_threads=True)

    if argss.options_preset is not None:
        argss.options_file = DEFAULT_OPTIONS_FILE.with_name(f"options-{argss.options_preset}.txt")

    main(
        img_dir=argss.img_dir,
        mask_dir=argss.mask_dir,
        processes=argss.processes,
        output_dir=argss.output_dir,
        options_path=argss.options_file,
        optional_img_dir=argss.optional_img_dir,
    )
Beispiel #2
0
def install_manhole(locals):
    if not config.getboolean('devel', 'manhole_enable'):
        return

    import manhole

    # locals:             Set the locals in the manhole shell
    # socket_path:        Set to create secure and easy to use manhole socket,
    #                     instead of /tmp/manhole-<vdsm-pid>.
    # daemon_connection:  Enable to ensure that manhole connection thread will
    #                     not block shutdown.
    # patch_fork:         Disable to avoid creation of a manhole thread in the
    #                     child process after fork.
    # sigmask:            Disable to avoid pointless modification of the
    #                     process signal mask if signlfd module is available.
    # redirect_stderr:    Disable since Python prints ignored exepctions to
    #                     stderr.

    path = os.path.join(constants.P_VDSM_RUN, 'vdsmd.manhole')
    manhole.install(locals=locals,
                    socket_path=path,
                    daemon_connection=True,
                    patch_fork=False,
                    sigmask=None,
                    redirect_stderr=False)
Beispiel #3
0
def main(exit=True, install_tornado_bridge=True, use_signals=True):
    if install_tornado_bridge:
        tornado.platform.asyncio.AsyncIOMainLoop().install()

    arg_parser = AppArgumentParser()
    args = arg_parser.parse_args()

    builder = Builder(args)
    application = builder.build()

    if use_signals:
        application.setup_signal_handlers()

    if args.debug_manhole:
        import manhole
        import wpull

        wpull.wpull_builder = builder
        manhole.install()

    exit_code = application.run_sync()

    if exit:
        sys.exit(exit_code)
    else:
        return exit_code
Beispiel #4
0
def main(exit=True, install_tornado_bridge=True, prefer_trollius=True):
    if prefer_trollius:
        try:
            import asyncio
        except ImportError:
            pass
        else:
            asyncio.set_event_loop_policy(trollius.get_event_loop_policy())

    if install_tornado_bridge:
        tornado.platform.asyncio.AsyncIOMainLoop().install()

    arg_parser = AppArgumentParser()
    args = arg_parser.parse_args()

    builder = Builder(args)
    builder.build()

    application = builder.factory['Application']
    application.setup_signal_handlers()

    if args.debug_manhole:
        import manhole
        import wpull
        wpull.wpull_builder = builder
        manhole.install()

    exit_code = application.run_sync()

    if exit:
        sys.exit(exit_code)
    else:
        return exit_code
Beispiel #5
0
def tb_worker(func, *args, **kwargs):
	#multiprocessing.Process(target=tb_worker, args=())
	new_process = func(*args, **kwargs)
	new_process.start()
	manhole.install(locals=locals())
	while(new_process.keep_running):
		time.sleep(1)
Beispiel #6
0
def main(exit=True, install_tornado_bridge=True, use_signals=True):
    if install_tornado_bridge:
        tornado.platform.asyncio.AsyncIOMainLoop().install()

    arg_parser = AppArgumentParser()
    args = arg_parser.parse_args()

    builder = Builder(args)
    application = builder.build()

    if use_signals:
        application.setup_signal_handlers()

    if args.debug_manhole:
        import manhole
        import wpull
        wpull.wpull_builder = builder
        manhole.install()

    exit_code = application.run_sync()

    if exit:
        sys.exit(exit_code)
    else:
        return exit_code
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser(description='tendrl Ceph Bridge')
    parser.add_argument('--debug', dest='debug', action='store_true',
                        default=False, help='print log to stdout')

    args = parser.parse_args()
    if args.debug:
        handler = logging.StreamHandler(sys.stdout)
        handler.setFormatter(logging.Formatter(tendrl.log.FORMAT))
        log.addHandler(handler)

    if manhole is not None:
        # Enable manhole for debugging.  Use oneshot mode
        # for gevent compatibility
        manhole.cry = lambda message: log.info("MANHOLE: %s" % message)
        manhole.install(oneshot_on=signal.SIGUSR1)

    m = Manager()
    m.start()

    complete = gevent.event.Event()

    def shutdown():
        log.info("Signal handler: stopping")
        complete.set()

    gevent.signal(signal.SIGTERM, shutdown)
    gevent.signal(signal.SIGINT, shutdown)

    while not complete.is_set():
        complete.wait(timeout=1)
Beispiel #8
0
def application(env, start_response):
    """For uwsgi or gunicorn."""
    global _app
    if not _app:
        _app = make_app()
        manhole_path = os.environ.get("PAASTA_MANHOLE_PATH")
        if manhole_path:
            manhole.install(socket_path=f"{manhole_path}-{os.getpid()}",
                            locals={"_app": _app})
    return _app(env, start_response)
Beispiel #9
0
 def setup_manhole(self):
     to_start_manhole = (
         self.config.has_section("debug")
         and self.config.has_option("debug", "enable_manhole")
         and self.config.getboolean("debug", "enable_manhole"))
     if to_start_manhole and not self.manhole_status:
         import manhole
         manhole.logger = logging.getLogger(self.daemon_name + ".manhole")
         self.logger.info("Opening manhole")
         manhole.install()
         self.manhole_status = True
Beispiel #10
0
 def open_manhole(self):
     """
     Open manhole
     """
     import manhole
     mh = manhole.install()
     return mh.uds_name
Beispiel #11
0
async def open_manhole():
    """
    Open manhole
    :return:
    """
    import manhole

    mh = manhole.install()
    return mh.uds_name
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser(description='Calamari management service')
    parser.add_argument('--debug',
                        dest='debug',
                        action='store_true',
                        default=False,
                        help='print log to stdout')

    args = parser.parse_args()
    if args.debug:
        handler = logging.StreamHandler(sys.stdout)
        handler.setFormatter(logging.Formatter(rlyeh.log.FORMAT))
        log.addHandler(handler)

    # Instruct salt to use the gevent version of ZMQ
    import zmq.green
    import salt.utils.event
    salt.utils.event.zmq = zmq.green

    if sqlalchemy is not None:
        # Set up gevent compatibility in psycopg2
        import psycogreen.gevent
        psycogreen.gevent.patch_psycopg()

    if manhole is not None:
        # Enable manhole for debugging.  Use oneshot mode
        # for gevent compatibility
        manhole.cry = lambda message: log.info("MANHOLE: %s" % message)
        manhole.install(oneshot_on=signal.SIGUSR1)

    m = Manager()
    m.start()

    complete = gevent.event.Event()

    def shutdown():
        log.info("Signal handler: stopping")
        complete.set()

    gevent.signal(signal.SIGTERM, shutdown)
    gevent.signal(signal.SIGINT, shutdown)

    while not complete.is_set():
        complete.wait(timeout=1)
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser(description='Calamari management service')
    parser.add_argument('--debug', dest='debug', action='store_true',
                        default=False, help='print log to stdout')

    args = parser.parse_args()
    if args.debug:
        handler = logging.StreamHandler(sys.stdout)
        handler.setFormatter(logging.Formatter(cthulhu.log.FORMAT))
        log.addHandler(handler)

    # Instruct salt to use the gevent version of ZMQ
    import zmq.green
    import salt.utils.event
    salt.utils.event.zmq = zmq.green

    if sqlalchemy is not None:
        # Set up gevent compatibility in psycopg2
        import psycogreen.gevent
        psycogreen.gevent.patch_psycopg()

    if manhole is not None:
        # Enable manhole for debugging.  Use oneshot mode
        # for gevent compatibility
        manhole.cry = lambda message: log.info("MANHOLE: %s" % message)
        manhole.install(oneshot_on=signal.SIGUSR1)

    m = Manager()
    m.start()

    complete = gevent.event.Event()

    def shutdown():
        log.info("Signal handler: stopping")
        complete.set()

    gevent.signal(signal.SIGTERM, shutdown)
    gevent.signal(signal.SIGINT, shutdown)

    while not complete.is_set():
        complete.wait(timeout=1)
Beispiel #14
0
def install_manhole(locals):
    if not config.getboolean('devel', 'manhole_enable'):
        return

    import manhole  # pylint: disable=import-error

    # locals:             Set the locals in the manhole shell
    # socket_path:        Set to create secure and easy to use manhole socket,
    #                     instead of /tmp/manhole-<vdsm-pid>.
    # daemon_connection:  Enable to ensure that manhole connection thread will
    #                     not block shutdown.
    # patch_fork:         Disable to avoid creation of a manhole thread in the
    #                     child process after fork.
    # sigmask:            Disable to avoid pointless modification of the
    #                     process signal mask if signlfd module is available.
    # redirect_stderr:    Disable since Python prints ignored exepctions to
    #                     stderr.

    path = os.path.join(constants.P_VDSM_RUN, 'vdsmd.manhole')
    manhole.install(locals=locals, socket_path=path, daemon_connection=True,
                    patch_fork=False, sigmask=None, redirect_stderr=False)
Beispiel #15
0
def main():
    logging.getLogger().setLevel(logging.DEBUG)

    if os.name != 'nt':
        import manhole
        manhole.install()

    model_settings = settings.default_model_settings

    import argparse
    parser = argparse.ArgumentParser(description="Runs the SPv2 server")
    parser.add_argument("--tokens-per-batch",
                        type=int,
                        default=model_settings.tokens_per_batch,
                        help="the number of tokens in a batch")
    parser.add_argument("--model",
                        type=str,
                        default="model/C49.h5",
                        help="filename of existing model")
    args = parser.parse_args()

    model_settings = model_settings._replace(
        tokens_per_batch=args.tokens_per_batch)
    logging.debug(model_settings)

    logging.info("Loading token statistics")
    token_stats = dataprep2.TokenStatistics("model/all.tokenstats3.gz")

    logging.info("Loading embeddings")
    embeddings = dataprep2.CombinedEmbeddings(
        token_stats, dataprep2.GloveVectors(model_settings.glove_vectors),
        model_settings.embedded_tokens_fraction)

    logging.info("Loading model")
    model = with_labels.model_with_labels(model_settings, embeddings)
    model.load_weights(args.model)

    logging.info("Starting server")
    server = Server(model, token_stats, embeddings, model_settings)
    server.serve_forever()
    def __init__(self, context):
        if context.config.get('MANHOLE_DEBUGGING', None):
            logger.debug('Installing manhole')
            socket = 'manhole-%s' % context.server.port
            socket_path = os.path.join(
                tempfile.gettempdir(),
                socket
            )

            manhole.install(socket_path=socket_path)

        # The gifsicle engine needs to work, regardless of
        # USE_GIFSICLE_ENGINE being on or not
        context.server.gifsicle_path = which('gifsicle')

        # T178072 Disable Thumbor's built-in EXIF parsing, which
        # emits logger.error messages constantly because it's trying
        # to parse our truncated buffer. EXIF parsing is done in our
        # imagemagick engine instead.
        thumbor.engines.METADATA_AVAILABLE = False

        super(App, self).__init__(context)
Beispiel #17
0
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""
Command line interface
"""

import argparse, sys, signal, asyncio, os, json
from traceback import TracebackException
from enum import IntEnum
from yarl import URL
try:
    import manhole
    manhole.install (patch_fork=False, oneshot_on='USR1')
except ModuleNotFoundError:
    pass

from . import behavior, browser
from .controller import SinglePageController, \
        ControllerSettings, StatsHandler, LogHandler, \
        RecursiveController, DepthLimit, PrefixLimit
from .devtools import Passthrough, Process
from .warc import WarcHandler
from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, \
        WarcHandlerConsumer, Level
from .devtools import Crashed

class SingleExitStatus(IntEnum):
    """ Exit status for single-shot command line """
Beispiel #18
0
            level=logging.DEBUG,
            format='[pid=%(process)d - %(asctime)s]: %(name)s - %(levelname)s - %(message)s',
        )
        test_name = sys.argv[2]

        setup_coverage()

        if os.getenv('PATCH_THREAD', False):
            import manhole
            setup_greenthreads(True)
        else:
            setup_greenthreads(True)
            import manhole

        if test_name == 'test_activate_on_usr2':
            manhole.install(activate_on='USR2')
            for i in range(TIMEOUT * 100):
                time.sleep(0.1)
        elif test_name == 'test_activate_on_with_oneshot_on':
            manhole.install(activate_on='USR2', oneshot_on='USR2')
            for i in range(TIMEOUT * 100):
                time.sleep(0.1)
        elif test_name == 'test_interrupt_on_accept':
            def handle_usr2(_sig, _frame):
                print('Got USR2')
            signal.signal(signal.SIGUSR2, handle_usr2)

            import ctypes
            import ctypes.util
            libpthread_path = ctypes.util.find_library("pthread")
            if not libpthread_path:
Beispiel #19
0
def start(port, reinstall_on_fork=False):
    manhole.logger = get_logger()
    manhole.Manhole.get_socket = staticmethod(_make_get_socket(port))
    manhole.ManholeConnectionThread.check_credentials = staticmethod(
        _check_credentials)
    manhole.install(patch_fork=reinstall_on_fork)
Beispiel #20
0
from __future__ import print_function
import os

# Tell gevent not to patch os.waitpid() since it is susceptible to race
# conditions. See:
# http://www.gevent.org/gevent.monkey.html#gevent.monkey.patch_os
os.environ['GEVENT_NOWAITPID'] = 'true'

# Use manhole to give us a way to debug hung processes
# https://pypi.python.org/pypi/manhole
import manhole
manhole.install(
    verbose=False,
    # Listen for SIGUSR1
    oneshot_on="USR1")
from gevent import monkey
monkey.patch_all(
    dns=False,
    # Don't patch subprocess to avoid http://tracker.ceph.com/issues/14990
    subprocess=False,
)
import sys
from gevent.hub import Hub

# Don't write pyc files
sys.dont_write_bytecode = True

from teuthology.orchestra import monkey
monkey.patch_all()

import logging
Beispiel #21
0
import os
# Tell gevent not to patch os.waitpid() since it is susceptible to race
# conditions. See:
# http://www.gevent.org/gevent.monkey.html#gevent.monkey.patch_os
os.environ['GEVENT_NOWAITPID'] = 'true'

# Use manhole to give us a way to debug hung processes
# https://pypi.python.org/pypi/manhole
import manhole
manhole.install(
    verbose=False,
    # Listen for SIGUSR1
    oneshot_on="USR1"
)
from gevent import monkey
monkey.patch_all(
    dns=False,
    # Don't patch subprocess to avoid http://tracker.ceph.com/issues/14990
    subprocess=False,
)
import sys
from gevent.hub import Hub

# Don't write pyc files
sys.dont_write_bytecode = True

from .orchestra import monkey
monkey.patch_all()

import logging
import subprocess
Beispiel #22
0
def worker(func, *args, **kwargs):
	#multiprocessing.Process(target=worker, args=())
	new_process = func(*args, **kwargs)
	manhole.install(locals=locals())
	while(True):
		time.sleep(1)
Beispiel #23
0
def main():
    import tempfile
    import argparse
    import h5py
    import datadog

    import settings
    import dataprep2

    if os.name != 'nt':
        import manhole
        manhole.install()

    logging.getLogger().setLevel(logging.INFO)
    logging.basicConfig(
        format='%(asctime)s %(thread)d %(levelname)s %(message)s',
        level=logging.INFO)

    default_host = os.environ.get("SPV2_DB_HOST", "localhost")
    default_dbname = os.environ.get("SPV2_DB_DBNAME", "postgres")
    default_schema = os.environ.get("SPV2_DB_SCHEMA", "public")
    default_user = os.environ.get("SPV2_DB_USER", "s2dev")
    default_password = os.environ.get("SPV2_DB_PASSWORD")
    default_dataprep_host = os.environ.get("SPV2_DATAPREP_SERVICE_HOST",
                                           "localhost")
    default_dataprep_port = int(
        os.environ.get("SPV2_DATAPREP_SERVICE_PORT", "8080"))
    parser = argparse.ArgumentParser(
        description="Trains a classifier for PDF Tokens")
    parser.add_argument("--host",
                        type=str,
                        default=default_host,
                        help="database host")
    parser.add_argument("--port", type=int, default=5432, help="database port")
    parser.add_argument("--dbname",
                        type=str,
                        default=default_dbname,
                        help="database name")
    parser.add_argument("--schema",
                        type=str,
                        default=default_schema,
                        help="schema name")
    parser.add_argument("--user",
                        type=str,
                        default=default_user,
                        help="database user")
    parser.add_argument("--password",
                        type=str,
                        default=default_password,
                        help="database password")
    parser.add_argument("--dataprep-host",
                        type=str,
                        default=default_dataprep_host,
                        help="Host where the dataprep service is running")
    parser.add_argument("--dataprep-port",
                        type=str,
                        default=default_dataprep_port,
                        help="Port where the dataprep service is running")
    args = parser.parse_args()

    taskdb_kwargs = dict(
        host=args.host,
        port=args.port,
        dbname=args.dbname,
        schema=args.schema,
        user=args.user,
    )
    logging.info("Task db config: %s", taskdb_kwargs)
    todo_list = papertasks.TaskDB(password=args.password, **taskdb_kwargs)

    # start datadog
    datadog.initialize(api_key=os.environ.get("DATADOG_API_KEY"))
    stats = datadog.ThreadStats()
    stats.start()
    datadog_prefix = args.host.split(".")[0]
    if datadog_prefix.startswith("spv2-"):
        datadog_prefix = datadog_prefix[5:]
    datadog_prefix = "spv2.%s." % datadog_prefix

    logging.info("Loading model settings ...")
    model_settings = settings.default_model_settings

    logging.info("Loading token statistics ...")
    token_stats = dataprep2.TokenStatistics("model/all.tokenstats3.gz")

    logging.info("Loading embeddings ...")
    embeddings = dataprep2.CombinedEmbeddings(
        token_stats, dataprep2.GloveVectors(model_settings.glove_vectors),
        model_settings.embedded_tokens_fraction)

    import with_labels  # Heavy import, so we do it here
    model = with_labels.model_with_labels(model_settings, embeddings)
    model.load_weights("model/C49.h5")
    model_version = 2

    logging.info("Starting to process tasks")
    total_paper_ids_processed = 0
    start_time = time.time()
    last_time_with_paper_ids = start_time

    def featurized_tokens_filenames() -> typing.Generator[typing.Tuple[
        tempfile.TemporaryDirectory, str], None, None]:
        # async http stuff
        async_event_loop = asyncio.new_event_loop()
        asyncio.set_event_loop(async_event_loop)
        connector = aiohttp.TCPConnector(loop=async_event_loop,
                                         force_close=True)
        session = aiohttp.ClientSession(connector=connector,
                                        read_timeout=120,
                                        conn_timeout=120)
        write_lock = asyncio.Lock()

        async def write_json_tokens_to_file(paper_id: str, json_file):
            url = "http://%s:%d/v1/json/paperid/%s" % (
                args.dataprep_host, args.dataprep_port, paper_id)
            attempts_left = 5
            with tempfile.NamedTemporaryFile(prefix="SPv2DBWorker-%s-" %
                                             paper_id,
                                             suffix=".json") as f:
                f.seek(0)
                f.truncate()

                def write_json_to_output(json_object):
                    f.write(json.dumps(json_object).encode("utf-8"))

                while True:
                    attempts_left -= 1
                    try:
                        async with session.get(url) as response:
                            if response.status == 200:
                                # We write to a tempfile first, because we don't want to end up with
                                # half-written json if something goes wrong while reading from the
                                # socket.
                                while True:
                                    chunk = await response.content.read(1024 *
                                                                        1024)
                                    if not chunk:
                                        break
                                    f.write(chunk)
                                stats.increment(datadog_prefix +
                                                "dataprep.success")
                                break
                            else:
                                stats.increment(datadog_prefix +
                                                "dataprep.failure")
                                if attempts_left > 0:
                                    logging.error(
                                        "Error %d from dataprep server for paper id %s. %d attempts left.",
                                        response.status, paper_id,
                                        attempts_left)
                                else:
                                    stats.increment(datadog_prefix +
                                                    "dataprep.gave_up")
                                    logging.error(
                                        "Error %d from dataprep server for paper id %s. Giving up.",
                                        response.status, paper_id)
                                    error = {
                                        "error": {
                                            "message":
                                            "Status %s from dataprep server" %
                                            response.status,
                                            "stackTrace":
                                            None,
                                            "docName":
                                            "%s.pdf" % paper_id
                                        }
                                    }
                                    write_json_to_output(error)
                                    break
                    except Exception as e:
                        stats.increment(datadog_prefix + "dataprep.failure")
                        if attempts_left > 0:
                            logging.error(
                                "Error %r from dataprep server for paper id %s. %d attempts left.",
                                e, paper_id, attempts_left)
                        else:
                            stats.increment(datadog_prefix +
                                            "dataprep.gave_up")
                            logging.error(
                                "Error %r from dataprep server for paper id %s. Giving up.",
                                e, paper_id)
                            error = {
                                "error": {
                                    "message":
                                    "Error %r while contacting dataprep server"
                                    % e,
                                    "stackTrace":
                                    None,
                                    "docName":
                                    "%s.pdf" % paper_id
                                }
                            }
                            write_json_to_output(error)
                            break

                # append the tempfile to the json file
                f.flush()
                f.seek(0)
                with await write_lock:
                    _send_all(f, json_file)

        processing_timeout = 600
        while True:
            paper_ids = todo_list.get_batch_to_process(model_version,
                                                       max_batch_size=50)
            logging.info("Received %d paper ids", len(paper_ids))
            if len(paper_ids) <= 0:
                if time.time() - last_time_with_paper_ids > processing_timeout:
                    logging.info(
                        "Saw no paper ids for more than %.0f seconds. Shutting down.",
                        processing_timeout)
                    return
                time.sleep(20)
                continue
            stats.increment(datadog_prefix + "attempts", len(paper_ids))

            temp_dir = tempfile.TemporaryDirectory(prefix="SPv2DBWorker-")

            logging.info("Getting JSON ...")
            getting_json_time = time.time()
            json_file_name = os.path.join(temp_dir.name, "tokens.json")
            with open(json_file_name, "wb") as json_file:
                write_json_futures = [
                    write_json_tokens_to_file(p, json_file) for p in paper_ids
                ]
                async_event_loop.run_until_complete(
                    asyncio.wait(write_json_futures))
            getting_json_time = time.time() - getting_json_time
            logging.info("Got JSON in %.2f seconds", getting_json_time)
            stats.timing(datadog_prefix + "get_json", getting_json_time)

            # pick out errors and write them to the DB
            paper_id_to_error = {}
            for line in dataprep2.json_from_file(json_file_name):
                if not "error" in line:
                    continue
                error = line["error"]
                error["message"] = dataprep2.sanitize_for_json(
                    error["message"])
                error["stackTrace"] = dataprep2.sanitize_for_json(
                    error["stackTrace"])
                paper_id = error["docName"]
                if paper_id.endswith(".pdf"):
                    paper_id = paper_id[:-4]
                paper_id_to_error[paper_id] = error
                logging.info("Paper %s has error %s", paper_id,
                             error["message"])
            if len(paper_id_to_error) > len(paper_ids) / 2:
                raise ValueError(
                    "More than half of the batch failed to preprocess. Something is afoot. We're giving up."
                )
            todo_list.post_errors(model_version, paper_id_to_error)
            stats.increment(datadog_prefix + "errors", len(paper_id_to_error))
            logging.info("Wrote %d errors to database", len(paper_id_to_error))

            # make unlabeled tokens file
            logging.info("Making unlabeled tokens ...")
            making_unlabeled_tokens_time = time.time()
            unlabeled_tokens_file_name = os.path.join(temp_dir.name,
                                                      "unlabeled-tokens.h5")
            dataprep2.make_unlabeled_tokens_file(json_file_name,
                                                 unlabeled_tokens_file_name,
                                                 ignore_errors=True)
            os.remove(json_file_name)
            making_unlabeled_tokens_time = time.time(
            ) - making_unlabeled_tokens_time
            logging.info("Made unlabeled tokens in %.2f seconds",
                         making_unlabeled_tokens_time)
            stats.timing(datadog_prefix + "make_unlabeled",
                         making_unlabeled_tokens_time)

            # make featurized tokens file
            logging.info("Making featurized tokens ...")
            making_featurized_tokens_time = time.time()
            with h5py.File(unlabeled_tokens_file_name,
                           "r") as unlabeled_tokens_file:
                featurized_tokens_file_name = os.path.join(
                    temp_dir.name, "featurized-tokens.h5")
                dataprep2.make_featurized_tokens_file(
                    featurized_tokens_file_name,
                    unlabeled_tokens_file, token_stats, embeddings,
                    dataprep2.VisionOutput(None), model_settings)
                # We don't delete the unlabeled file here because the featurized one contains references
                # to it.
            making_featurized_tokens_time = time.time(
            ) - making_featurized_tokens_time
            logging.info("Made featurized tokens in %.2f seconds",
                         making_featurized_tokens_time)
            stats.timing(datadog_prefix + "make_featurized",
                         making_featurized_tokens_time)

            yield temp_dir, featurized_tokens_file_name

    for temp_dir, featurized_tokens_file_name in dataprep2.threaded_generator(
            featurized_tokens_filenames(), 1):
        try:
            logging.info("Making and sending results ...")
            make_and_send_results_time = time.time()
            with h5py.File(
                    featurized_tokens_file_name) as featurized_tokens_file:

                def get_docs():
                    return dataprep2.documents_for_featurized_tokens(
                        featurized_tokens_file,
                        include_labels=False,
                        max_tokens_per_page=model_settings.tokens_per_batch)

                results = with_labels.run_model(model,
                                                model_settings,
                                                embeddings.glove_vocab(),
                                                get_docs,
                                                enabled_modes={"predictions"})
                results = {
                    doc.doc_sha: {
                        "docName":
                        doc.doc_id,
                        "docSha":
                        doc.doc_sha,
                        "title":
                        dataprep2.sanitize_for_json(
                            docresults["predictions"][0]),
                        "authors":
                        docresults["predictions"][1],
                        "bibs": [{
                            "title": bibtitle,
                            "authors": bibauthors,
                            "venue": bibvenue,
                            "year": bibyear
                        } for bibtitle, bibauthors, bibvenue, bibyear in
                                 docresults["predictions"][2]]
                    }
                    for doc, docresults in results
                }

                todo_list.post_results(model_version, results)
                stats.increment(datadog_prefix + "successes", len(results))
                total_paper_ids_processed += len(results)
        finally:
            temp_dir.cleanup()

        make_and_send_results_time = time.time() - make_and_send_results_time
        logging.info("Made and sent results in %.2f seconds",
                     make_and_send_results_time)
        stats.timing(datadog_prefix + "make_results",
                     make_and_send_results_time)

        # report progress
        paper_ids_per_hour = 3600 * total_paper_ids_processed / (time.time() -
                                                                 start_time)
        logging.info("This worker is processing %.0f paper ids per hour." %
                     paper_ids_per_hour)

        last_time_with_paper_ids = time.time()
Beispiel #24
0
 def __call__(self, event):
     import manhole
     inst = manhole.install(strict=False, thread=False, **self.options)
     inst.handle_oneshot()
Beispiel #25
0
 def __call__(self, event):
     import manhole
     inst = manhole.install(strict=False, thread=False, **self.options)
     inst.handle_oneshot()
Beispiel #26
0
def myFunc(a):
    manhole.install(locals={'a': a}, strict=False)
    while True:
        time.sleep(0.5)
        if a < 5:
            break
Beispiel #27
0
 def open_manhole(dummy_signum):
     with open(stack_dump_file, 'r') as fh:
         pid = fh.read().strip()
         if pid == str(os.getpid()):
             inst = manhole.install(strict=False, thread=False)
             inst.handle_oneshot(dummy_signum, dummy_signum)
Beispiel #28
0
def start(port, listen):
    manhole.Manhole.get_socket = staticmethod(_make_get_socket(port, listen))
    manhole.ManholeConnection.check_credentials = staticmethod(_check_credentials)
    manhole.install()
Beispiel #29
0
def start(port, reinstall_on_fork=False):
    manhole.logger = get_logger()
    manhole.Manhole.get_socket = staticmethod(_make_get_socket(port))
    manhole.ManholeConnectionThread.check_credentials = staticmethod(_check_credentials)
    manhole.install(patch_fork=reinstall_on_fork)
# Create Manager. This configures devices and creates the main manager window.
# Arguments parsed by argparse are passed to the Manager.
from .manager import Manager
watchdog = AppWatchdog()
man = Manager(args=args)
watchdog.setupParentPoller(man)
man.sigManagerQuit.connect(watchdog.quitApplication)

## for debugging with pdb
#QtCore.pyqtRemoveInputHook()

# manhole for debugging stuff inside the app from outside
if args.manhole:
    import manhole
    manhole.install()


# Start Qt event loop unless running in interactive mode and not using PySide.
import core.util.helpers as helpers
interactive = (sys.flags.interactive == 1) and not qtpy.PYSIDE

if interactive:
    logger.info('Interactive mode; not starting event loop.')
    print('Interactive mode; not starting event loop.')

    # import some modules which might be useful on the command line
    import numpy as np

    # Use CLI history and tab completion
    import atexit
Beispiel #31
0
    try:

        setup_coverage()

        if os.getenv('PATCH_THREAD', False):
            import manhole

            setup_greenthreads(True)
        else:
            setup_greenthreads(True)
            import manhole

        if test_name == 'test_environ_variable_activation':
            time.sleep(TIMEOUT)
        elif test_name == 'test_install_twice_not_strict':
            manhole.install(oneshot_on='USR2')
            manhole.install(strict=False)
            time.sleep(TIMEOUT)
        elif test_name == 'test_log_fd':
            manhole.install(verbose=True, verbose_destination=2)
            manhole._LOG("whatever-1")
            manhole._LOG("whatever-2")
        elif test_name == 'test_log_fh':

            class Output(object):
                data = []
                write = data.append

            manhole.install(verbose=True, verbose_destination=Output)
            manhole._LOG("whatever")
            if Output.data and "]: whatever" in Output.data[-1]:
import signal

import gevent

from baseplate import config, make_metrics_client
import manhole

from .dispatcher import MessageDispatcher
from .socketserver import SocketServer
from .source import MessageSource

manhole.install(oneshot_on='USR1')

CONFIG_SPEC = {
    "amqp": {
        "endpoint": config.Endpoint,
        "vhost": config.String,
        "username": config.String,
        "password": config.String,
        "exchange": {
            "broadcast": config.String,
            "status": config.String,
        },
        "send_status_messages": config.Boolean,
    },
    "web": {
        "mac_secret": config.Base64,
        "ping_interval": config.Integer,
        "admin_auth": config.String,
        "conn_shed_rate": config.Integer,
    },
Beispiel #33
0
        format='[pid=%(process)d - %(asctime)s]: %(name)s - %(levelname)s - %(message)s',
    )
    test_name = sys.argv[1]
    try:

        setup_coverage()

        if os.getenv('PATCH_THREAD', False):
            import manhole
            setup_greenthreads(True)
        else:
            setup_greenthreads(True)
            import manhole

        if test_name == 'test_activate_on_usr2':
            manhole.install(activate_on='USR2')
            for i in range(TIMEOUT * 100):
                time.sleep(0.1)
        elif test_name == 'test_install_once':
            manhole.install()
            try:
                manhole.install()
            except manhole.AlreadyInstalled:
                print('ALREADY_INSTALLED')
            else:
                raise AssertionError("Did not raise AlreadyInstalled")
        elif test_name == 'test_stderr_doesnt_deadlock':
            import subprocess
            manhole.install()

            for i in range(50):
Beispiel #34
0
        log.addHandler(handler)

    # Instruct salt to use the gevent version of ZMQ
    import zmq.green
    import salt.utils.event
    salt.utils.event.zmq = zmq.green

    # Set up gevent compatibility in psycopg2
    import psycogreen.gevent
    psycogreen.gevent.patch_psycopg()

    if manhole is not None:
        # Enable manhole for debugging.  Use oneshot mode
        # for gevent compatibility
        manhole.cry = lambda message: log.info("MANHOLE: %s" % message)
        manhole.install(oneshot_on=signal.SIGUSR1)

    from gthulhu.manager import Manager
    m = Manager()
    m.start()
    print "Started Manager"
    complete = gevent.event.Event()

    def shutdown():
        log.info("Signal handler: stopping")
        complete.set()

    gevent.signal(signal.SIGTERM, shutdown)
    gevent.signal(signal.SIGINT, shutdown)

    while not complete.is_set():
Beispiel #35
0
def install(**kwargs):
    kwargs.setdefault('oneshot_on', 'URG')
    kwargs.setdefault('connection_handler', 'exec')
    manhole.install(**kwargs)
Beispiel #36
0
            format=
            '[pid=%(process)d - %(asctime)s]: %(name)s - %(levelname)s - %(message)s',
        )
        test_name = sys.argv[2]

        setup_coverage()

        if os.getenv('PATCH_THREAD', False):
            import manhole
            setup_greenthreads(True)
        else:
            setup_greenthreads(True)
            import manhole

        if test_name == 'test_activate_on_usr2':
            manhole.install(activate_on='USR2')
            for i in range(TIMEOUT * 100):
                time.sleep(0.1)
        elif test_name == 'test_activate_on_with_oneshot_on':
            manhole.install(activate_on='USR2', oneshot_on='USR2')
            for i in range(TIMEOUT * 100):
                time.sleep(0.1)
        elif test_name == 'test_interrupt_on_accept':

            def handle_usr2(_sig, _frame):
                print('Got USR2')

            signal.signal(signal.SIGUSR2, handle_usr2)

            import ctypes
            import ctypes.util
Beispiel #37
0
 def open_manhole(dummy_signum):
     with open(stack_dump_file, 'r') as fh:
         pid = fh.read().strip()
         if pid == str(os.getpid()):
             inst = manhole.install(strict=False, thread=False)
             inst.handle_oneshot(dummy_signum, dummy_signum)
def main():

    # command line option handling
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-e',
                        '--env_id',
                        default='wob.mini.ClickTest-v0',
                        help='env id')
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('-m',
                        '--mode',
                        default='DATA',
                        help='mode (DATA | ENV | DEMO)')
    parser.add_argument('-f',
                        '--fps',
                        default=5,
                        type=int,
                        help='Number of frames per second')
    parser.add_argument(
        '-i',
        '--idle-timeout',
        type=float,
        help=
        'How long to keep the environment around when it has no active connections'
    )
    parser.add_argument('--rewarder-port',
                        type=int,
                        default=15900,
                        help='Which port to start the agent_conn thread')
    args = parser.parse_args()
    print(args)

    # logging and setup
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)
        logger.info("Starting world of bits run.py with: %s", sys.argv)

    error_buffer = universe.utils.ErrorBuffer()

    # Jot down the env_id so the uploader can find it later
    env_id_file_dir = os.path.join(os.sep, 'tmp', 'demo')
    env_id_file_path = os.path.join(env_id_file_dir, 'env_id.txt')
    if not os.path.exists(env_id_file_dir):
        logger.info("[world-of-bits] Creating directory %s", env_id_file_dir)
        os.makedirs(env_id_file_dir)

    try:
        with open(env_id_file_path, 'w') as env_id_file:
            logger.info("[world-of-bits] Writing env id to file %s",
                        env_id_file_path)
            env_id_file.write(args.env_id)
            env_id_file.write('\n')
    except PermissionError:
        logger.info("[world-of-bits] could not write env id to " +
                    env_id_file_path + " due to a permission error. skipping.")
        pass

    # create connection to the agent
    env_status = universe.rewarder.EnvStatus()
    env_status.set_env_info(env_id=args.env_id, fps=args.fps)
    cv = threading.Condition()
    control_buffer = remote.ControlBuffer(cv)
    agent_conn = remote.AgentConn(env_status,
                                  cv,
                                  control_buffer,
                                  error_buffer=error_buffer,
                                  idle_timeout=args.idle_timeout)
    agent_conn.listen(port=args.rewarder_port)

    # start up the environment controller
    env_controller = EnvController(env_status,
                                   agent_conn,
                                   error_buffer,
                                   control_buffer,
                                   args.mode,
                                   fps=args.fps)
    env_controller.start()

    # start up the rewarder
    rewarder = RewarderThread(env_status,
                              agent_conn,
                              env_controller,
                              error_buffer,
                              fps=args.fps)
    rewarder.start()

    # run the iothread
    iothread = IOThread(env_controller, error_buffer)
    iothread.start()

    # Debugging tool
    manhole.install(
        locals={
            'rewarder': rewarder,
            'env_controller': env_controller,
            'agent_conn': agent_conn
        })

    while True:
        try:
            error_buffer.blocking_check(timeout=60)
        except remote.Exit as e:
            logger.info('%s', e)
            return 0
Beispiel #39
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('-r',
                        '--remotes',
                        default='vnc://127.0.0.1:5900',
                        help='Which VNC address to connect to.')
    parser.add_argument(
        '-e',
        '--env-id',
        default=None,
        help=
        'An env ID to optionally run upon startup (e.g. flashgames.DuskDrive-v0).'
    )
    parser.add_argument('-V',
                        '--no-vexpect',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-S',
                        '--no-scorer',
                        action='store_true',
                        help='Whether to use the scorer.')
    parser.add_argument('-E',
                        '--no-env',
                        action='store_true',
                        help='Whether to maintain an environment.')
    parser.add_argument('-I',
                        '--integrator-mode',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-R',
                        '--no-rewarder',
                        action='store_true',
                        help='Whether to enable the rewarder thread at all.')
    parser.add_argument('--rewarder-port',
                        type=int,
                        default=15900,
                        help='Which port to start the agent_conn thread')
    parser.add_argument('--rewarder-fps',
                        default=60,
                        type=float,
                        help='The frame rate for the rewarder.')
    parser.add_argument(
        '-i',
        '--idle-timeout',
        type=float,
        help=
        'How long to keep the environment around when it has no active connections'
    )
    parser.add_argument('--demonstration',
                        action='store_true',
                        help='Run a demonstration agent.')
    parser.add_argument(
        '--bot-demonstration',
        action='store_true',
        help=
        'Run a demonstrationa agent that connects to the vnc_recorder port, to record complete demos with no human playing'
    )

    args = parser.parse_args()

    # TODO: only activate in dev
    signal.signal(signal.SIGINT, lambda signal, frame: os._exit(10))

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    # Launch demonstration agent if requested

    if args.bot_demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} -r vnc://localhost:5899+15899 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent in bot mode: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    elif args.demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    logger.info(
        "Starting play_controlplane.py with the following: command=%s args=%s env=%s",
        sys.argv, args, os.environ)

    error_buffer = universe.utils.ErrorBuffer()

    env_status = universe.rewarder.EnvStatus()
    env_status.set_env_info(env_id=args.env_id, fps=args.rewarder_fps)

    cv = threading.Condition()
    control_buffer = remote.ControlBuffer(cv)
    agent_conn = remote.AgentConn(env_status,
                                  cv,
                                  control_buffer,
                                  error_buffer=error_buffer,
                                  idle_timeout=args.idle_timeout)
    agent_conn.listen(port=args.rewarder_port)

    # Logger gives us the diagnostics printing
    if not args.no_env:
        env = wrappers.Unvectorize(
            wrappers.Vision(wrappers.Logger(vnc_env.VNCEnv())))
        # Assert when given self-referential rewarder connection
        # This shows up as a '+15900' or similar port number in the remotes string
        assert '+' not in args.remotes, "Remotes may not have rewarder ports"
        env.configure(
            remotes=args.remotes,
            ignore_clock_skew=True,
            disable_action_probes=True,
            vnc_driver='go',
            vnc_kwargs={
                'encoding': 'zrle',
                'compress_level': 9
            },
            observer=True,
        )
    else:
        logger.info(
            'Running without environment, meaning reward and gameover parsing will be disabled'
        )
        env = None

    no_vexpect = args.no_vexpect or args.integrator_mode

    env_controller = EnvController(
        env,
        args.remotes,
        env_status,
        agent_conn,
        error_buffer=error_buffer,
        control_buffer=control_buffer,
        no_vexpect=no_vexpect,
        integrator_mode=args.integrator_mode,
    )
    env_controller.start()

    if not args.no_rewarder:
        rewarder = Rewarder(
            env,
            args.remotes,
            agent_conn,
            env_status=env_controller.env_status,
            trigger_reset=env_controller.trigger_reset,
            error_buffer=error_buffer,
            no_vexpect=no_vexpect,
            no_scorer=args.no_scorer,
        )
        rewarder.start()
    else:
        rewarder = None

    manhole.install(locals={
        'rewarder': rewarder,
        'env_controller': env_controller
    })

    # TODO: clean up this API, but good enough for now
    while True:
        try:
            error_buffer.blocking_check(timeout=60)
        except remote.Exit as e:
            logger.info('%s', e)
            return 0

    return 1
Beispiel #40
0
        format='[pid=%(process)d - %(asctime)s]: %(name)s - %(levelname)s - %(message)s',
    )
    test_name = sys.argv[1]
    try:
        if os.getenv('PATCH_THREAD', False):
            import manhole

            setup_greenthreads(True)
        else:
            setup_greenthreads(True)
            import manhole

        if test_name == 'test_environ_variable_activation':
            time.sleep(TIMEOUT)
        elif test_name == 'test_install_twice_not_strict':
            manhole.install(oneshot_on='USR2')
            manhole.install(strict=False)
            time.sleep(TIMEOUT)
        elif test_name == 'test_log_fd':
            manhole.install(verbose=True, verbose_destination=2)
            manhole._LOG("whatever-1")
            manhole._LOG("whatever-2")
        elif test_name == 'test_log_fh':
            class Output(object):
                data = []
                write = data.append

            manhole.install(verbose=True, verbose_destination=Output)
            manhole._LOG("whatever")
            if Output.data and "]: whatever" in Output.data[-1]:
                print("SUCCESS")
Beispiel #41
0
# Create Manager. This configures devices and creates the main manager window.
# Arguments parsed by argparse are passed to the Manager.
from .manager import Manager
watchdog = AppWatchdog()
man = Manager(args=args)
watchdog.setupParentPoller(man)
man.sigManagerQuit.connect(watchdog.quitApplication)

## for debugging with pdb
#QtCore.pyqtRemoveInputHook()

# manhole for debugging stuff inside the app from outside
if args.manhole:
    import manhole
    manhole.install()


# Start Qt event loop unless running in interactive mode and not using PySide.
import core.util.helpers as helpers
interactive = (sys.flags.interactive == 1) and not qtpy.PYSIDE

if interactive:
    logger.info('Interactive mode; not starting event loop.')
    print('Interactive mode; not starting event loop.')

    # import some modules which might be useful on the command line
    import numpy as np

    # Use CLI history and tab completion
    import atexit
            ],
            'callbacks': {
                'oninitial': self.oninitial,
                'onsetup': self.onsetup,
                'onactivate_joint': self.onactivate_joint,
                'onmove_to_first_pose': self.onmove_to_first_pose,
                'oncalibrate_joint': self.oncalibrate_joint,
                'onjoint_calibrated': self.onjoint_calibrated,
                'onset_next_joint': self.onset_next_joint,
                'onmove_to_zero': self.onmove_to_zero
            }
        })
        self.fsm.init()


if __name__ == "__main__":
    l = Leveller(name="Masterer")
    l.init_attributes()
    try:
        import manhole
        manhole.install(locals={
            'l': l,
        })
    except Exception:
        print("Manhole not installed - run 'sudo pip2 install manhole'")

    try:
        while True:
            time.sleep(0.01)
    except KeyboardInterrupt:
        print("Exiting app")