Exemple #1
0
def s3_list_downloads(raven_client):
    files = {"full": [], "diff1": [], "diff2": []}

    if not settings("asset_bucket"):
        return files

    asset_url = settings("asset_url")
    if not asset_url.endswith("/"):
        asset_url = asset_url + "/"

    diff = []
    full = []
    try:
        s3 = boto3.resource("s3")
        bucket = s3.Bucket(settings("asset_bucket"))
        for obj in bucket.objects.filter(Prefix="export/"):
            name = obj.key.split("/")[-1]
            path = urlparse.urljoin(asset_url, obj.key)
            # round to kilobyte
            size = int(round(obj.size / 1024.0, 0))
            file = dict(name=name, path=path, size=size)
            if "diff-" in name:
                diff.append(file)
            elif "full-" in name:
                full.append(file)
    except (Boto3Error, BotoCoreError):
        raven_client.captureException()
        return files

    half = len(diff) // 2 + len(diff) % 2
    diff = list(sorted(diff, key=itemgetter("name"), reverse=True))
    files["diff1"] = diff[:half]
    files["diff2"] = diff[half:]
    files["full"] = list(sorted(full, key=itemgetter("name"), reverse=True))
    return files
Exemple #2
0
 def map_view(self):
     map_tiles_url = get_map_tiles_url(settings("asset_url"))
     return {
         "page_title": "Map",
         "map_enabled": self.is_map_enabled(),
         "map_tiles_url": map_tiles_url,
         "map_token": settings("mapbox_token"),
     }
Exemple #3
0
 def homepage_view(self):
     map_tiles_url = get_map_tiles_url(settings("asset_url"))
     image_base_url = HOMEPAGE_MAP_IMAGE.format(token=settings("mapbox_token"))
     image_url = map_tiles_url.format(z=0, x=0, y="0@2x")
     return {
         "page_title": "Overview",
         "map_enabled": self.is_map_enabled(),
         "map_image_base_url": image_base_url,
         "map_image_url": image_url,
     }
Exemple #4
0
def configure_redis(cache_url=None, _client=None):
    """
    Configure and return a :class:`~ichnaea.cache.RedisClient` instance.

    :param _client: Test-only hook to provide a pre-configured client.
    """
    cache_url = settings("redis_uri") if cache_url is None else cache_url

    if _client is not None:
        return _client

    url = urlparse(cache_url)
    netloc = url.netloc.split(":")
    host = netloc[0]
    if len(netloc) > 1:
        port = int(netloc[1])
    else:
        port = 6379
    if len(url.path) > 1:
        db = int(url.path[1:])
    else:
        db = 0
    pool = redis.ConnectionPool(
        max_connections=20,
        host=host,
        port=port,
        db=db,
        socket_timeout=30.0,
        socket_connect_timeout=60.0,
        socket_keepalive=True,
    )
    return RedisClient(connection_pool=pool)
Exemple #5
0
    def __call__(self, hourly=True, _bucket=None):
        if _bucket is None:
            bucket = settings("asset_bucket")
        else:
            bucket = _bucket

        if not bucket:
            return

        now = util.utcnow()
        today = now.date()
        start_time = None
        end_time = None

        if hourly:
            end_time = now.replace(minute=0, second=0)
            file_time = end_time
            file_type = "diff"
            start_time = end_time - timedelta(hours=1)
        else:
            file_time = now.replace(hour=0, minute=0, second=0)
            file_type = "full"

        filename = "MLS-%s-cell-export-" % file_type
        filename = filename + file_time.strftime("%Y-%m-%dT%H0000.csv.gz")

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, filename)
            with self.task.db_session(commit=False) as session:
                write_stations_to_csv(session,
                                      path,
                                      today,
                                      start_time=start_time,
                                      end_time=end_time)
            self.write_stations_to_s3(path, bucket)
def main(argv, _db=None):
    parser = argparse.ArgumentParser(
        prog=argv[0],
        description=(
            "Import from public cell data into a local dev environment. "
            "See https://location.services.mozilla.com/downloads"),
    )
    parser.add_argument("filename", help="Path to the csv.gz import file.")

    args = parser.parse_args(argv[1:])

    if not settings("local_dev_env"):
        print("This script can only be run in a local dev environment.")
        print("Set LOCAL_DEV_ENV=True in your environment.")
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if not os.path.isfile(filename):
        print("File %s not found." % filename)
        return 1

    configure_logging()
    celery_app = get_eager_celery_app()
    init_worker(celery_app)
    cellarea_queue = celery_app.data_queues["update_cellarea"]

    with db_worker_session(celery_app.db, commit=False) as session:
        with gzip_open(filename, "r") as file_handle:
            read_stations_from_csv(session, file_handle,
                                   celery_app.redis_client, cellarea_queue)
    return 0
Exemple #7
0
    def is_map_enabled(self):
        """Return whether maps are enabled.

        Enable maps if and only if there's a mapbox token and a url for the
        tiles location. Otherwise it's disabled.

        """
        return bool(settings("mapbox_token"))
Exemple #8
0
def main(argv, _raven_client=None, _bucketname=None):
    # run for example via:
    # bin/location_map --create --upload \
    #   --output=ichnaea/content/static/tiles/

    parser = argparse.ArgumentParser(
        prog=argv[0], description="Generate and upload datamap tiles.")

    parser.add_argument("--create", action="store_true", help="Create tiles?")
    parser.add_argument("--upload",
                        action="store_true",
                        help="Upload tiles to S3?")
    parser.add_argument("--concurrency",
                        default=2,
                        help="How many concurrent processes to use?")
    parser.add_argument("--output",
                        help="Optional directory for output files.")

    args = parser.parse_args(argv[1:])
    if args.create:
        raven_client = configure_raven(transport="sync",
                                       tags={"app": "datamap"},
                                       _client=_raven_client)

        configure_stats()

        bucketname = _bucketname
        if not _bucketname:
            bucketname = settings("asset_bucket")
            if bucketname:
                bucketname = bucketname.strip("/")

        upload = False
        if args.upload:
            upload = bool(args.upload)

        concurrency = billiard.cpu_count()
        if args.concurrency:
            concurrency = int(args.concurrency)

        output = None
        if args.output:
            output = os.path.abspath(args.output)

        try:
            with METRICS.timer("datamaps", tags=["func:main"]):
                generate(
                    bucketname,
                    raven_client,
                    upload=upload,
                    concurrency=concurrency,
                    output=output,
                )
        except Exception:
            raven_client.captureException()
            raise
    else:
        parser.print_help()
Exemple #9
0
def security_headers(event):
    response = event.response
    # Headers for all responses.
    response.headers.add("Strict-Transport-Security",
                         "max-age=31536000; includeSubDomains")
    response.headers.add("X-Content-Type-Options", "nosniff")
    # Headers for HTML responses.
    if response.content_type == "text/html":
        response.headers.add("Content-Security-Policy",
                             get_csp_policy(settings("asset_url")))
        response.headers.add("X-Frame-Options", "DENY")
        response.headers.add("X-XSS-Protection", "1; mode=block")
Exemple #10
0
def configure_stats():
    """Configure Markus for metrics."""
    local_dev_env = settings("local_dev_env")
    if local_dev_env:
        markus.configure(
            backends=[{
                "class": "markus.backends.logging.LoggingMetrics"
            }])
        return

    if settings("statsd_host"):
        markus.configure(backends=[{
            "class": "markus.backends.datadog.DatadogMetrics",
            "options": {
                "statsd_host": settings("statsd_host"),
                "statsd_port": settings("statsd_port"),
                "statsd_namespace": "location",
            },
        }])
    else:
        logging.getLogger(__name__).warning(
            "STATSD_HOST not set; no statsd configured")
Exemple #11
0
def generate_signature(reason, *parts):
    """
    Generate a salted signature for a set of strings.

    :arg reason A short "why" string used to salt the hash
    :arg parts A list of strings to add to the signature
    """
    siggen = sha512()
    for part in parts:
        if part:
            siggen.update(part.encode())
    siggen.update(reason.encode())
    siggen.update(settings("secret_key").encode())
    return siggen.hexdigest()
Exemple #12
0
 def __call__(self, *args, **kw):
     """
     Execute the task, capture a statsd timer for the task duration and
     automatically report exceptions into Sentry.
     """
     with METRICS.timer("task", tags=["task:" + self.shortname()]):
         try:
             result = super(BaseTask, self).__call__(*args, **kw)
         except Exception as exc:
             self.raven_client.captureException()
             if self._auto_retry and not settings("testing"):
                 raise self.retry(exc=exc)
             raise
     return result
Exemple #13
0
def configure_geoip(filename=None,
                    mode=MODE_AUTO,
                    raven_client=None,
                    _client=None):
    """
    Configure and return a :class:`~ichnaea.geoip.GeoIPWrapper` instance.

    If no geoip database file of the correct type can be found, return
    a :class:`~ichnaea.geoip.GeoIPNull` dummy implementation instead.

    :param raven_client: A configured raven/sentry client.
    :type raven_client: :class:`raven.base.Client`

    :param _client: Test-only hook to provide a pre-configured client.
    """
    filename = settings("geoip_path") if filename is None else filename

    if _client is not None:
        return _client

    if not filename:
        # No DB file specified in the config
        if raven_client is not None:
            try:
                raise OSError("No geoip filename specified.")
            except OSError:
                raven_client.captureException()
        LOGGER.info("Returning GeoIPNull.")
        return GeoIPNull()

    try:
        db = GeoIPWrapper(filename, mode=mode)
        if not db.check_extension() and raven_client is not None:
            try:
                raise RuntimeError("Maxmind C extension not installed.")
            except RuntimeError:
                raven_client.captureException()
        # Actually initialize the memory cache, by doing one fake look-up
        db.lookup("127.0.0.1")
    except (InvalidDatabaseError, IOError, OSError, ValueError):
        # Error opening the database file, maybe it doesn't exist
        if raven_client is not None:
            raven_client.captureException()
        LOGGER.info("Returning GeoIPNull.")
        return GeoIPNull()

    LOGGER.info("GeoIP configured.")
    return db
Exemple #14
0
def cmd_clitest(ctx):
    """Run Sentry test through cli."""
    sentry_dsn = settings("sentry_dsn")
    if not sentry_dsn:
        click.echo(
            click.style(
                "SENTRY_DSN is not configured so this will use DebugRavenClient.",
                fg="green",
            ))

    msg = "Testing Sentry configuration via cli (%s)" % str(
        datetime.datetime.now())
    click.echo(click.style("Using message: %s" % msg, fg="green"))
    click.echo(click.style("Building Raven client...", fg="green"))
    client = configure_raven(transport="sync", tags={"app": "sentry_test"})
    click.echo(click.style("Sending message...", fg="green"))
    client.captureMessage(msg)
Exemple #15
0
    def apply(self, *args, **kw):
        """
        This method is only used when calling tasks directly and blocking
        on them. It's also used if always_eager is set, like in tests.

        If always_eager is set, we feed the task arguments through the
        de/serialization process to make sure the arguments can indeed
        be serialized into JSON.
        """
        if settings("testing"):
            # We do the extra check to make sure this was really used from
            # inside tests
            serializer = self.app.conf.task_serializer
            content_type, encoding, data = kombu_dumps(args, serializer)
            args = kombu_loads(data, content_type, encoding)

        return super(BaseTask, self).apply(*args, **kw)
Exemple #16
0
def configure_raven(transport=None, _client=None):
    """Configure and return a :class:`raven.Client` instance.

    :param transport: The transport to use, one of the
                      :data:`RAVEN_TRANSPORTS` keys.
    :param _client: Test-only hook to provide a pre-configured client.
    """
    if _client is not None:
        return _client

    transport = RAVEN_TRANSPORTS.get(transport)
    if not transport:
        raise ValueError("No valid raven transport was configured.")

    dsn = settings("sentry_dsn")
    klass = DebugRavenClient if not dsn else RavenClient
    info = version_info()
    release = info.get("version") or info.get("commit") or "unknown"
    client = klass(dsn=dsn, transport=transport, release=release)
    return client
Exemple #17
0
 def map_json(self):
     map_tiles_url = get_map_tiles_url(settings("asset_url"))
     offset = map_tiles_url.find(TILES_PATTERN)
     base_url = map_tiles_url[:offset]
     return {"tiles_url": base_url}
Exemple #18
0
def main(_argv=None, _raven_client=None, _bucket_name=None):
    """
    Command-line entry point.

    :param _argv: Simulated sys.argv[1:] arguments for testing
    :param _raven_client: override Raven client for testing
    :param _bucket_name: override S3 bucket name for testing
    :return: A system exit code
    :rtype: int
    """

    # Parse the command line
    parser = get_parser()
    args = parser.parse_args(_argv)
    create = args.create
    upload = args.upload
    concurrency = args.concurrency
    verbose = args.verbose

    # Setup basic services
    if verbose:
        configure_logging(local_dev_env=True, logging_level="DEBUG")
    else:
        configure_logging()
    raven_client = configure_raven(
        transport="sync", tags={"app": "datamap"}, _client=_raven_client
    )

    # Check consistent output_dir, create, upload
    exit_early = 0
    output_dir = None
    if args.output:
        output_dir = os.path.abspath(args.output)
        tiles_dir = os.path.join(output_dir, "tiles")
        if not create and not os.path.isdir(tiles_dir):
            LOG.error(
                "The tiles subfolder of the --output directory should already"
                " exist when calling --upload without --create, to avoid"
                " deleting files from the S3 bucket.",
                tiles_dir=tiles_dir,
            )
            exit_early = 1
    else:
        if create and not upload:
            LOG.error(
                "The --output argument is required with --create but without"
                " --upload, since the temporary folder is removed at exit."
            )
            exit_early = 1

        if upload and not create:
            LOG.error(
                "The --output argument is required with --upload but without"
                " --create, to avoid deleting all tiles in the S3 bucket."
            )
            exit_early = 1

    # Exit early with help message if error or nothing to do
    if exit_early or not (create or upload):
        parser.print_help()
        return exit_early

    # Determine the S3 bucket name
    bucket_name = _bucket_name
    if not _bucket_name:
        bucket_name = settings("asset_bucket")
        if bucket_name:
            bucket_name = bucket_name.strip("/")

    # Check that the implied credentials are authorized to use the bucket
    if upload:
        if not bucket_name:
            LOG.error("Unable to determine upload bucket_name.")
            return 1
        else:
            works, fail_msg = check_bucket(bucket_name)
            if not works:
                LOG.error(
                    f"Bucket {bucket_name} can not be used for uploads: {fail_msg}"
                )
                return 1

    # Generate and upload the tiles
    success = True
    interrupted = False
    result = {}
    try:
        with Timer() as timer:
            if output_dir:
                result = generate(
                    output_dir,
                    bucket_name,
                    raven_client,
                    create=create,
                    upload=upload,
                    concurrency=concurrency,
                )
            else:
                with util.selfdestruct_tempdir() as temp_dir:
                    result = generate(
                        temp_dir,
                        bucket_name,
                        raven_client,
                        create=create,
                        upload=upload,
                        concurrency=concurrency,
                    )
    except KeyboardInterrupt:
        interrupted = True
        success = False
    except Exception:
        raven_client.captureException()
        success = False
        raise
    finally:
        if create and upload:
            task = "generation and upload"
        elif create:
            task = "generation"
        else:
            task = "upload"
        if interrupted:
            complete = "interrupted"
        elif success:
            complete = "complete"
        else:
            complete = "failed"
        final_log = structlog.get_logger("canonical-log-line")
        final_log.info(
            f"Datamap tile {task} {complete} in {timer.duration_s:0.1f} seconds.",
            success=success,
            duration_s=timer.duration_s,
            script_name="ichnaea.scripts.datamap",
            create=create,
            upload=upload,
            concurrency=concurrency,
            bucket_name=bucket_name,
            **result,
        )

    return 0
Exemple #19
0
def _map_content_enabled():
    return bool(settings("mapbox_token"))
Exemple #20
0
def configure_logging():
    """Configure Python logging."""
    local_dev_env = settings("local_dev_env")
    logging_level = settings("logging_level")

    if local_dev_env:
        handlers = ["dev"]
        # Prepare structlog logs for local dev ProcessorFormatter
        structlog_fmt_prep = structlog.stdlib.ProcessorFormatter.wrap_for_formatter
        structlog_dev_processors = [
            structlog.stdlib.add_logger_name,
            structlog.stdlib.add_log_level,
            structlog.processors.TimeStamper(fmt="iso"),
        ]
    else:
        handlers = ["mozlog"]
        # Prepare structlog logs for JsonLogFormatter
        structlog_fmt_prep = structlog.stdlib.render_to_log_kwargs
        structlog_dev_processors = []

    # Processors used for logs generated by structlog and stdlib's logging
    logging_config = {
        "version": 1,
        "disable_existing_loggers": True,
        "formatters": {
            "structlog_dev_console": {
                "()": structlog.stdlib.ProcessorFormatter,
                "processor": structlog.dev.ConsoleRenderer(colors=True),
                "foreign_pre_chain": structlog_dev_processors,
            },
            "mozlog_json": {
                "()": "dockerflow.logging.JsonLogFormatter",
                "logger_name": "ichnaea",
            },
        },
        "handlers": {
            "dev": {
                "class": "logging.StreamHandler",
                "formatter": "structlog_dev_console",
                "level": "DEBUG",
            },
            "mozlog": {
                "class": "logging.StreamHandler",
                "formatter": "mozlog_json",
                "level": "DEBUG",
            },
        },
        "loggers": {
            "alembic": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "celery": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "ichnaea": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "markus": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            # https://stripe.com/blog/canonical-log-lines
            "canonical-log-line": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
        },
        "root": {
            "handlers": handlers,
            "level": "WARNING"
        },
    }

    logging.config.dictConfig(logging_config)

    structlog_processors = ([
        structlog.threadlocal.merge_threadlocal,
        structlog.stdlib.filter_by_level
    ] + structlog_dev_processors + [
        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.processors.StackInfoRenderer(),
        structlog.processors.format_exc_info,
        structlog.processors.UnicodeDecoder(),
        structlog_fmt_prep,
    ])
    structlog.configure(
        context_class=structlog.threadlocal.wrap_dict(dict),
        processors=structlog_processors,
        logger_factory=structlog.stdlib.LoggerFactory(
            ignore_frame_names=["venusian", "pyramid.config"]),
        wrapper_class=structlog.stdlib.BoundLogger,
        cache_logger_on_first_use=True,
    )
Exemple #21
0
def log_tween_factory(handler, registry):
    """A logging tween, handling collection of stats, exceptions, and a request log."""

    local_dev_env = settings("local_dev_env")

    def log_tween(request):
        """Time a request, emit metrics and log results, with exception handling."""
        start = time.time()
        structlog.threadlocal.clear_threadlocal()
        structlog.threadlocal.bind_threadlocal(http_method=request.method,
                                               http_path=request.path)
        # Skip detailed logging and capturing for static assets, either in
        # /static or paths like /robots.txt
        is_static_content = (request.path in registry.skip_logging
                             or request.path.startswith("/static"))

        def record_response(status_code):
            """Time request, (maybe) emit metrics, and (maybe) log this request.

            For static assets, metrics are skipped, and logs are skipped unless
            we're in the development environment.
            """
            duration = time.time() - start

            if not is_static_content:
                # Emit a request.timing and a request metric
                duration_ms = round(duration * 1000)
                # Convert a URI to to a statsd acceptable metric
                stats_path = (request.path.replace("/",
                                                   ".").lstrip(".").replace(
                                                       "@", "-"))
                # Use generate_tag to lowercase, truncate to 200 characters
                statsd_tags = [
                    # Homepage is ".homepage", would otherwise be empty string / True
                    generate_tag("path", stats_path or ".homepage"),
                    generate_tag("method",
                                 request.method),  # GET -> get, POST -> post
                ]
                METRICS.timing("request.timing", duration_ms, tags=statsd_tags)
                METRICS.incr(
                    "request",
                    tags=statsd_tags +
                    [generate_tag("status", str(status_code))],
                )

            if local_dev_env or not is_static_content:
                # Emit a canonical-log-line
                duration_s = round(duration, 3)
                logger = structlog.get_logger("canonical-log-line")
                logger.info(
                    f"{request.method} {request.path} - {status_code}",
                    http_status=status_code,
                    duration_s=duration_s,
                )

        try:
            response = handler(request)
            record_response(response.status_code)
            return response
        except (BaseClientError, HTTPRedirection) as exc:
            # BaseClientError: 4xx error raise by Ichnaea API, other Ichnaea code
            # HTTPRedirection: 3xx redirect from Pyramid
            # Log, but do not send these exceptions to Sentry
            record_response(exc.status_code)
            raise
        except HTTPClientError:
            # HTTPClientError: 4xx error from Pyramid
            # Do not log or send to Sentry
            raise
        except HTTPException as exc:
            # HTTPException: Remaining 5xx (or maybe 2xx) errors from Pyramid
            # Log and send to Sentry
            record_response(exc.status_code)
            registry.raven_client.captureException()
            raise
        except Exception:
            # Any other exception, treat as 500 Internal Server Error
            # Treat as 500 Internal Server Error, log and send to Sentry
            record_response(500)
            registry.raven_client.captureException()
            raise

    return log_tween
Exemple #22
0
import backoff
import markus
from pymysql.constants.CLIENT import MULTI_STATEMENTS
from pymysql.constants.ER import LOCK_WAIT_TIMEOUT, LOCK_DEADLOCK
from pymysql.err import DatabaseError, MySQLError
from sqlalchemy import create_engine
from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import OperationalError, StatementError
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.pool import NullPool, QueuePool
from sqlalchemy.sql import func, select

from ichnaea.conf import settings

DB_TYPE = {
    "ro": settings("db_readonly_uri"),
    "rw": settings("db_readwrite_uri")
}
METRICS = markus.get_metrics()


class SqlAlchemyUrlNotSpecified(Exception):
    """Raised when SQLALCHEMY_URL is not specified in environment."""
    def __init__(self, *args, **kwargs):
        super().__init__("SQLALCHEMY_URL is not specified in the environment")


def get_sqlalchemy_url():
    """Returns the ``SQLALCHEMY_URL`` environment value.

    :returns: the sqlalchemy url to be used for alembic migrations
Exemple #23
0
def _cell_export_enabled():
    return bool(settings("asset_bucket"))
Exemple #24
0
def configure_logging():
    """Configure Python logging."""
    local_dev_env = settings("local_dev_env")
    logging_level = settings("logging_level")

    if local_dev_env:
        handlers = ["console"]
    else:
        handlers = ["mozlog"]

    logging_config = {
        "version": 1,
        "disable_existing_loggers": True,
        "formatters": {
            "app": {
                "format":
                "%(asctime)s %(levelname)-5s [%(name)s] - %(message)s"
            },
            "json": {
                "()": "dockerflow.logging.JsonLogFormatter",
                "logger_name": "ichnaea",
            },
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "formatter": "app",
                "level": "DEBUG",
            },
            "mozlog": {
                "class": "logging.StreamHandler",
                "formatter": "json",
                "level": "DEBUG",
            },
        },
        "loggers": {
            "alembic": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "celery": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "ichnaea": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
            "markus": {
                "propagate": False,
                "handlers": handlers,
                "level": logging_level,
            },
        },
        "root": {
            "handlers": handlers,
            "level": "WARNING"
        },
    }

    logging.config.dictConfig(logging_config)