Example #1
0
    def handle(self, *args, **options):
        t = ping.apply_async()
        try:
            res = t.get(timeout=30)
        except celery.exceptions.TimeoutError:
            print("Celery task didn't complete: Celery may be down.")
            return

        if res is not True:
            print("Wrong result from celery task")

        # check that the coredata.tasks.beat_test periodic task has run recently
        if not beat_time_okay():
            print("Periodic task marker file is old: celery beat likely not processing tasks.")
Example #2
0
    def handle(self, *args, **options):
        t = ping.apply_async()
        try:
            res = t.get(timeout=30)
        except celery.exceptions.TimeoutError:
            print("Celery task didn't complete: Celery may be down.")
            return

        if res is not True:
            print("Wrong result from celery task")

        # check that the coredata.tasks.beat_test periodic task has run recently
        try:
            beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime
            if beatfile_age > BEAT_FILE_MAX_AGE:
                print("Periodic task marker file is old: celery beat likely not processing tasks.")
        except OSError:
            print("Periodic task marker file missing: celery beat likely not processing tasks.")
Example #3
0
    def handle(self, *args, **options):
        t = ping.apply_async()
        try:
            res = t.get(timeout=30)
        except celery.exceptions.TimeoutError:
            print "Celery task didn't complete: Celery may be down."
            return

        if res is not True:
            print "Wrong result from celery task"

        # check that the coredata.tasks.beat_test periodic task has run recently
        try:
            beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime
            if beatfile_age > BEAT_FILE_MAX_AGE:
                print "Periodic task marker file is old: celery beat likely not processing tasks."
        except OSError:
            print "Periodic task marker file missing: celery beat likely not processing tasks."
Example #4
0
def deploy_checks(request=None):
    passed = []
    failed = []

    # cache something now to see if it's still there further down.
    randval = random.randint(1, 1000000)
    cache.set('check_things_cache_test', randval, 60)

    # Django database
    try:
        n = Semester.objects.all().count()
        if n > 0:
            passed.append(('Main database connection', 'okay'))
        else:
            failed.append(('Main database connection',
                           "Can't find any coredata.Semester objects"))
    except django.db.utils.OperationalError:
        failed.append(
            ('Main database connection', "can't connect to database"))
    except django.db.utils.ProgrammingError:
        failed.append(('Main database connection', "database tables missing"))

    # non-BMP Unicode in database
    try:
        l = LogEntry.objects.create(userid='ggbaker',
                                    description='Test Unicode \U0001F600',
                                    related_object=Semester.objects.first())
    except OperationalError:
        failed.append(('Unicode handling in database',
                       'non-BMP character not supported by connection'))
    else:
        l = LogEntry.objects.get(id=l.id)
        if '\U0001F600' in l.description:
            passed.append(('Unicode handling in database', 'okay'))
        else:
            failed.append(('Unicode handling in database',
                           'non-BMP character not stored correctly'))

    # Celery tasks
    celery_okay = False
    try:
        if settings.USE_CELERY:
            try:
                from coredata.tasks import ping
            except ImportError:
                failed.append(
                    ('Celery task',
                     "Couldn't import task: probably missing MySQLdb module"))
            else:
                try:
                    t = ping.apply_async()
                except kombu.exceptions.OperationalError:
                    failed.append(
                        ('Celery task',
                         'Kombu error. Probably RabbitMQ not running.'))
                else:
                    res = t.get(timeout=5)
                    if res == True:
                        passed.append(('Celery task', 'okay'))
                        celery_okay = True
                    else:
                        failed.append(
                            ('Celery task', 'got incorrect result from task'))
        else:
            failed.append(('Celery task', 'celery disabled in settings'))
    except celery.exceptions.TimeoutError:
        failed.append(
            ('Celery task',
             "didn't get result before timeout: celeryd maybe not running"))
    except socket.error:
        failed.append(('Celery task', "can't communicate with broker"))
    except NotImplementedError:
        failed.append(('Celery task', 'celery disabled'))
    except django.db.utils.ProgrammingError:
        failed.append(('Celery task', 'celery DB tables missing'))
    except django.db.utils.OperationalError:
        failed.append(('Celery task', 'djkombu tables missing: try migrating'))

    # celery beat
    try:
        from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE
        beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime
        if beatfile_age < BEAT_FILE_MAX_AGE:
            passed.append(('Celery beat', 'okay'))
        else:
            failed.append((
                'Celery beat',
                'marker file is old: celery beat likely not processing tasks'))
    except OSError:
        failed.append((
            'Celery beat',
            'marker file is missing: celery beat likely not processing tasks'))

    # Django cache
    # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache)
    subprocess.call(
        ['python3', 'manage.py', 'check_things', '--cache_subcall'])
    cache_okay = False
    res = cache.get('check_things_cache_test')
    if res == randval:
        failed.append((
            'Django cache',
            'other processes not sharing cache: dummy/local probably being used instead of memcached'
        ))
    elif res is None:
        failed.append(
            ('Django cache', 'unable to retrieve anything from cache'))
    elif res != randval + 1:
        failed.append(('Django cache', 'unknown result'))
    else:
        passed.append(('Django cache', 'okay'))
        cache_okay = True

    # Reporting DB connection
    try:
        db = SIMSConn()
        db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ())
        result = list(db)
        # whoever this is, they have non-ASCII in their name: let's hope they don't change it.
        lname = result[0][0]
        if not isinstance(lname, str):
            failed.append(
                ('Reporting DB connection',
                 'string result not a string: check Unicode decoding'))
        elif lname[1] != u'\u00e4':
            failed.append(('Reporting DB connection',
                           'returned incorrectly-decoded Unicode'))
        elif len(result) == 0:
            failed.append(('Reporting DB connection',
                           'query inexplicably returned nothing'))
        else:
            passed.append(('Reporting DB connection', 'okay'))
    except SIMSProblem as e:
        failed.append(
            ('Reporting DB connection', 'SIMSProblem, %s' % (str(e))))
    except ImportError:
        failed.append(
            ('Reporting DB connection', "couldn't import DB2 module"))

    # compression enabled?
    if settings.COMPRESS_ENABLED:
        passed.append(('Asset compression enabled', 'okay'))
    else:
        failed.append(('Asset compression enabled', 'disabled in settings'))

    # Haystack searching
    from haystack.query import SearchQuerySet
    try:
        res = SearchQuerySet().filter(text='cmpt')
        if res:
            passed.append(('Haystack search', 'okay'))
        else:
            failed.append((
                'Haystack search',
                'nothing found: maybe update_index, or wait for search server to fully start'
            ))
    except IOError:
        failed.append(('Haystack search', "can't read/write index"))

    # photo fetching
    if cache_okay and celery_okay:
        try:
            res = do_photo_fetch(['301222726'])
            if '301222726' not in res:  # I don't know who 301222726 is, but he/she is real.
                failed.append(
                    ('Photo fetching', "didn't find photo we expect to exist"))
            else:
                passed.append(('Photo fetching', 'okay'))
        except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError):
            failed.append(('Photo fetching', 'photo password not set'))
        except urllib.error.HTTPError as e:
            failed.append(
                ('Photo fetching',
                 'failed to fetch photo (%s). Maybe wrong password?' % (e)))
    else:
        failed.append(
            ('Photo fetching', 'not testing since memcached or celery failed'))

    # emplid/userid API
    emplid = userid_to_emplid('ggbaker')
    if not emplid:
        failed.append(('Emplid API', 'no emplid returned'))
    elif isinstance(emplid, str) and not emplid.startswith('2000'):
        failed.append(('Emplid API', 'incorrect emplid returned'))
    else:
        passed.append(('Emplid API', 'okay'))

    # Piwik API
    #if not request:
    #    failed.append(('Piwik API', "can only check in web frontend with valid request object"))
    #elif not settings.PIWIK_URL or not settings.PIWIK_TOKEN:
    #    failed.append(('Piwik API', "not configured in secrets.py"))
    #else:
    #    # try to re-log this request in piwik and see what happens
    #    from piwik_middleware.tracking import PiwikTrackerLogic, urllib_errors
    #    tracking_logic = PiwikTrackerLogic()
    #    kwargs = tracking_logic.get_track_kwargs(request)
    #    try:
    #        tracking_logic.do_track_page_view(fail_silently=False, **kwargs)
    #    except urllib_errors as e:
    #        failed.append(('Piwik API', "API call failed: %s" % (e)))
    #    else:
    #        passed.append(('Piwik API', 'okay'))

    # Backup server
    #if not settings.BACKUP_SERVER or not settings.BACKUP_USER or not settings.BACKUP_PATH or not settings.BACKUP_PASSPHRASE:
    #    failed.append(('Backup server', 'Backup server settings not all present'))
    #else:
    #    from coredata.management.commands.backup_remote import do_check
    #    try:
    #        do_check()
    #    except RuntimeError as e:
    #        failed.append(('Backup server', unicode(e)))
    #    passed.append(('Backup server', 'okay'))

    # certificates
    bad_cert = 0
    res = _check_cert('/etc/stunnel/stunnel.pem')
    if res:
        failed.append(('Stunnel cert', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.pem')
    if res:
        failed.append(('SSL PEM', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.key')
    if res:
        failed.append(('SSL KEY', res))
        bad_cert += 1

    if bad_cert == 0:
        passed.append((
            'Certificates',
            'All okay, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/'
        ))

    # file creation in the necessary places
    dirs_to_check = [
        (settings.DB_BACKUP_DIR, 'DB backup dir'),
        (settings.SUBMISSION_PATH, 'submitted files path'),
        (os.path.join(settings.COMPRESS_ROOT,
                      'CACHE'), 'compressed media root'),
    ]
    for directory, label in dirs_to_check:
        res = _check_file_create(directory)
        if res is None:
            passed.append(('File creation in ' + label, 'okay'))
        else:
            failed.append(('File creation in ' + label, res))

    # are any services listening publicly that shouldn't?
    hostname = socket.gethostname()
    ports = [
        25,  # mail server
        #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq
        45130,  # beam? rabbitmq something
        4000,  # main DB stunnel
        50000,  # reporting DB
        8000,  # gunicorn
        11211,  # memcached
        9200,
        9300,  # elasticsearch
    ]
    connected = []
    for p in ports:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((hostname, p))
        except socket.error:
            # couldn't connect: good
            pass
        else:
            connected.append(p)
        finally:
            s.close()

    if connected:
        failed.append(
            ('Ports listening externally',
             'got connections to port ' + ','.join(str(p) for p in connected)))
    else:
        passed.append(('Ports listening externally', 'okay'))

    # is the server time close to real-time?
    import ntplib
    c = ntplib.NTPClient()
    response = c.request('0.ca.pool.ntp.org')
    if abs(response.offset) > 0.1:
        failed.append(
            ('Server time',
             'Time is %g seconds off NTP pool.' % (response.offset, )))
    else:
        passed.append(('Server time', 'okay'))

    # library sanity
    err = bitfield_check()
    if err:
        failed.append(('Library sanity', 'django-bitfield: ' + err))
    else:
        err = cache_check()
        if err:
            failed.append(('Library sanity', 'django cache: ' + err))
        else:
            passed.append(('Library sanity', 'okay'))

    # github-flavoured markdown subprocess
    from courselib.markup import markdown_to_html
    try:
        # checks that script runs; does github-flavour correctly; does Unicode correctly.
        html = markdown_to_html(
            'test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600')
        if html.strip(
        ) == '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>':
            passed.append(('Markdown subprocess', 'okay'))
        else:
            failed.append(('Markdown subprocess',
                           'markdown script returned incorrect markup'))
    except OSError:
        failed.append((
            'Markdown subprocess',
            'failed to start ruby command: ruby package probably not installed'
        ))
    except RuntimeError:
        failed.append(('Markdown subprocess', 'markdown script failed'))

    return passed, failed
Example #5
0
import sys, os
os.environ['DJANGO_SETTINGS_MODULE'] = 'courses.settings'
sys.path.append('.')

from coredata.tasks import ping
from celery.exceptions import TimeoutError

# run a task
res = ping.apply_async()
try:
    # try to run a task
    res.get(timeout=600)
except TimeoutError:
    print "Celery ping task failed: celeryd probably isn't running."

Example #6
0
def deploy_checks(request=None):
    passed = []
    failed = []

    # cache something now to see if it's still there further down.
    randval = random.randint(1, 1000000)
    cache.set('check_things_cache_test', randval, 60)

    # Django database
    try:
        n = Semester.objects.all().count()
        if n > 0:
            passed.append(('Main database connection', 'okay'))
        else:
            failed.append(('Main database connection',
                           "Can't find any coredata.Semester objects"))
    except django.db.utils.OperationalError:
        failed.append(
            ('Main database connection', "can't connect to database"))
    except django.db.utils.ProgrammingError:
        failed.append(('Main database connection', "database tables missing"))

    # non-BMP Unicode in database
    try:
        l = LogEntry.objects.create(userid='ggbaker',
                                    description='Test Unicode \U0001F600',
                                    related_object=Semester.objects.first())
    except OperationalError:
        failed.append(('Unicode handling in database',
                       'non-BMP character not supported by connection'))
    else:
        l = LogEntry.objects.get(id=l.id)
        if '\U0001F600' in l.description:
            passed.append(('Unicode handling in database', 'okay'))
        else:
            failed.append(('Unicode handling in database',
                           'non-BMP character not stored correctly'))

    # check that all database tables are utf8mb4, if mysql
    if settings.DATABASES['default']['ENGINE'].endswith('.mysql'):
        from django.apps import apps
        from django.db import connection

        CORRECT_CHARSET = 'utf8mb4'
        CORRECT_COLLATION = 'utf8mb4_unicode_ci'
        db_name = settings.DATABASES['default']['NAME']

        with connection.cursor() as cursor:
            # check database defaults
            cursor.execute(
                "SELECT @@character_set_database, @@collation_database;")
            row = cursor.fetchone()
            if row != (CORRECT_CHARSET, CORRECT_COLLATION):
                failed.append((
                    'MySQL database charset',
                    'database default CHARACTER SET and COLLATION incorrect (it is %s): consider "ALTER DATABASE %s CHARACTER SET %s COLLATE %s;"'
                    % (row, db_name, CORRECT_CHARSET, CORRECT_COLLATION)))

            # check each table
            table_names = [model._meta.db_table for model in apps.get_models()]
            # inspect table charset and collations, adapted from https://stackoverflow.com/a/1049958/6871666
            cursor.execute(
                '''SELECT T.table_name, CCSA.character_set_name, CCSA.collation_name
                FROM information_schema.`TABLES` T,
                    information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA
                WHERE CCSA.collation_name=T.table_collation
                    AND T.table_schema=%s
                    AND T.table_name IN %s
            ''', (db_name, table_names))
            for table, charset, collation in cursor.fetchall():
                if (charset, collation) != (CORRECT_CHARSET,
                                            CORRECT_COLLATION):
                    failed.append((
                        'MySQL database charset',
                        'table %s has incorrect CHARACTER SET and COLLATION: consider "ALTER TABLE %s CHARACTER SET=%s COLLATE=%s;"'
                        % (table, table, CORRECT_CHARSET, CORRECT_COLLATION)))

            cursor.execute(
                '''SELECT table_name, column_name, character_set_name, collation_name
                FROM information_schema.`COLUMNS`
                WHERE table_schema=%s
                    AND (character_set_name IS NOT NULL OR collation_name IS NOT NULL)
                    AND (character_set_name!=%s OR collation_name!=%s);
                ''', (db_name, CORRECT_CHARSET, CORRECT_COLLATION))
            for table, column, charset, collation in cursor.fetchall():
                failed.append((
                    'MySQL database charset',
                    'table %s has incorrect CHARACTER SET and COLLATION on a column (%s and %s): consider "ALTER TABLE %s CONVERT TO CHARACTER SET %s COLLATE %s;"'
                    % (table, charset, collation, table, CORRECT_CHARSET,
                       CORRECT_COLLATION)))

    # Celery tasks
    celery_okay = False
    sims_task = None
    try:
        if settings.USE_CELERY:
            try:
                from coredata.tasks import ping
            except ImportError:
                failed.append(
                    ('Celery task',
                     "Couldn't import task: probably missing MySQLdb module"))
            else:
                try:
                    task = ping.apply_async()
                except kombu.exceptions.OperationalError:
                    failed.append(
                        ('Celery task',
                         'Kombu error. Probably RabbitMQ not running.'))
                except amqp.exceptions.AccessRefused:
                    failed.append((
                        'Celery task',
                        'AccessRefused error. Probably bad RabbitMQ auth details.'
                    ))
                else:
                    from coredata.tasks import check_sims_task
                    sims_task = check_sims_task.apply_async(
                    )  # start here, in case it's slow
                    res = task.get(timeout=5)
                    if res == True:
                        passed.append(('Celery task', 'okay'))
                        celery_okay = True
                    else:
                        failed.append(
                            ('Celery task', 'got incorrect result from task'))
        else:
            failed.append(('Celery task', 'celery disabled in settings'))
    except celery.exceptions.TimeoutError:
        failed.append(
            ('Celery task',
             "didn't get result before timeout: celeryd maybe not running"))
    except socket.error:
        failed.append(('Celery task', "can't communicate with broker"))
    except NotImplementedError:
        failed.append(
            ('Celery task', 'celery failed to start with NotImplementedError'))
    except django.db.utils.ProgrammingError:
        failed.append(('Celery task', 'celery DB tables missing'))
    except django.db.utils.OperationalError:
        failed.append(('Celery task', 'djkombu tables missing: try migrating'))

    # celery beat
    if settings.USE_CELERY:
        try:
            from coredata.tasks import beat_time_okay
            if beat_time_okay():
                passed.append(('Celery beat', 'okay'))
            else:
                failed.append((
                    'Celery beat',
                    'marker file is old: celery beat likely not processing tasks'
                ))
        except OSError:
            failed.append((
                'Celery beat',
                'marker file is missing: celery beat likely not processing tasks'
            ))

    # Django cache
    # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache)
    subprocess.call(
        ['python3', 'manage.py', 'check_things', '--cache_subcall'])
    cache_okay = False
    res = cache.get('check_things_cache_test')
    if res == randval:
        failed.append((
            'Django cache',
            'other processes not sharing cache: dummy/local probably being used instead of memcached'
        ))
    elif res is None:
        failed.append(
            ('Django cache', 'unable to retrieve anything from cache'))
    elif res != randval + 1:
        failed.append(('Django cache', 'unknown result'))
    else:
        passed.append(('Django cache', 'okay'))
        cache_okay = True

    # Reporting DB connection
    try:
        db = SIMSConn()
        db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ())
        result = list(db)
        # whoever this is, they have non-ASCII in their name: let's hope they don't change it.
        lname = result[0][0]
        if not isinstance(lname, str):
            failed.append(
                ('Reporting DB connection',
                 'string result not a string: check Unicode decoding'))
        elif lname[1] != u'\u00e4':
            failed.append(('Reporting DB connection',
                           'returned incorrectly-decoded Unicode'))
        elif len(result) == 0:
            failed.append(('Reporting DB connection',
                           'query inexplicably returned nothing'))
        else:
            passed.append(('Reporting DB connection', 'okay'))
    except SIMSProblem as e:
        failed.append(
            ('Reporting DB connection', 'SIMSProblem, %s' % (str(e))))
    except ImportError:
        failed.append(
            ('Reporting DB connection', "couldn't import DB2 module"))
    except Exception as e:
        failed.append(
            ('Reporting DB connection', 'Generic exception, %s' % (str(e))))

    if settings.USE_CELERY and sims_task:
        # sims_task started above, so we can double-up on any wait
        try:
            res = sims_task.get(timeout=5)
            if res:
                failed.append(('Celery Reporting DB', res))
            else:
                passed.append(('Celery Reporting DB', 'okay'))
        except celery.exceptions.TimeoutError:
            failed.append((
                'Celery Reporting DB',
                "didn't get result before timeout: maybe reporting database is slow?"
            ))
    elif sims_task is None:
        failed.append(
            ('Celery Reporting DB', "didn't check because of Celery failure"))

    # compression enabled?
    if settings.COMPRESS_ENABLED:
        passed.append(('Asset compression enabled', 'okay'))
    else:
        failed.append(('Asset compression enabled', 'disabled in settings'))

    # Haystack searching
    from haystack.query import SearchQuerySet
    try:
        res = SearchQuerySet().filter(text='cmpt')
        if res:
            passed.append(('Haystack search', 'okay'))
        else:
            failed.append((
                'Haystack search',
                'nothing found: maybe update_index, or wait for search server to fully start'
            ))
    except IOError:
        failed.append(('Haystack search', "can't read/write index"))

    # photo fetching
    if cache_okay and celery_okay:
        try:
            res = do_photo_fetch(['301222726'])
            if '301222726' not in res:  # I don't know who 301222726 is, but he/she is real.
                failed.append(
                    ('Photo fetching', "didn't find photo we expect to exist"))
            else:
                passed.append(('Photo fetching', 'okay'))
        except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError):
            failed.append(('Photo fetching', 'photo password not set'))
        except urllib.error.HTTPError as e:
            failed.append(
                ('Photo fetching',
                 'failed to fetch photo (%s). Maybe wrong password?' % (e)))
    else:
        failed.append(
            ('Photo fetching', 'not testing since memcached or celery failed'))

    # emplid/userid API
    emplid = userid_to_emplid('ggbaker')
    if not emplid:
        failed.append(('Emplid API', 'no emplid returned'))
    elif isinstance(emplid, str) and not emplid.startswith('2000'):
        failed.append(('Emplid API', 'incorrect emplid returned'))
    else:
        passed.append(('Emplid API', 'okay'))

    # file creation in the necessary places
    dirs_to_check = [
        (settings.DB_BACKUP_DIR, 'DB backup dir'),
        (settings.SUBMISSION_PATH, 'submitted files path'),
        (os.path.join(settings.COMPRESS_ROOT,
                      'CACHE'), 'compressed media root'),
    ]
    for directory, label in dirs_to_check:
        res = _check_file_create(directory)
        if res is None:
            passed.append(('File creation in ' + label, 'okay'))
        else:
            failed.append(('File creation in ' + label, res))

    # are any services listening publicly that shouldn't?
    hostname = socket.gethostname()
    ports = [
        25,  # mail server
        #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq
        45130,  # beam? rabbitmq something
        4000,  # main DB stunnel
        50000,  # reporting DB
        8000,  # gunicorn
        11211,  # memcached
        9200,
        9300,  # elasticsearch
        8983,  # solr
    ]
    connected = []
    for p in ports:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((hostname, p))
        except socket.error:
            # couldn't connect: good
            pass
        else:
            connected.append(p)
        finally:
            s.close()

    if connected:
        failed.append(
            ('Ports listening externally',
             'got connections to port ' + ','.join(str(p) for p in connected)))
    else:
        passed.append(('Ports listening externally', 'okay'))

    # correct serving/redirecting of production domains
    if settings.DEPLOY_MODE == 'production':
        production_host_fails = 0
        for host in settings.SERVE_HOSTS + settings.REDIRECT_HOSTS:
            # check HTTPS serving/redirect
            try:
                url = 'https://' + host + reverse(
                    'docs:list_docs'
                )  # must be a URL that doesn't require auth
                resp = requests.get(url, allow_redirects=False, timeout=5)
                if host in settings.SERVE_HOSTS and resp.status_code != 200:
                    failed.append(('HTTPS Serving',
                                   'expected 200 okay, but got %i at %s' %
                                   (resp.status_code, url)))
                    production_host_fails += 1
                elif host in settings.REDIRECT_HOSTS and resp.status_code != 301:
                    failed.append(('HTTPS Serving',
                                   'expected 301 redirect, but got %i at %s' %
                                   (resp.status_code, url)))
                    production_host_fails += 1
            except requests.exceptions.SSLError:
                failed.append(('HTTPS Serving',
                               'bad SSL/TLS certificate for %s' % (url, )))
                production_host_fails += 1
            except requests.exceptions.RequestException:
                failed.append(('HTTPS Serving',
                               'unable to connect to request %s' % (url, )))
                production_host_fails += 1

            # check HTTP redirect
            try:
                url = 'http://' + host + reverse(
                    'docs:list_docs'
                )  # must be a URL that doesn't require auth
                resp = requests.get(url, allow_redirects=False, timeout=5)
                if resp.status_code != 301:
                    failed.append((
                        'HTTP Serving',
                        'expected 301 redirect to https://, but got %i at %s' %
                        (resp.status_code, url)))
                    production_host_fails += 1
            except requests.exceptions.RequestException:
                failed.append(('HTTP Serving',
                               'unable to connect to request %s' % (url, )))
                production_host_fails += 1

        if production_host_fails == 0:
            passed.append((
                'HTTPS Serving',
                'okay: certs and redirects as expected, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/'
            ))

    # is the server time close to real-time?
    import ntplib
    try:
        c = ntplib.NTPClient()
        response = c.request('pool.ntp.org')
        if abs(response.offset) > 0.1:
            failed.append(
                ('Server time',
                 'Time is %g seconds off NTP pool.' % (response.offset, )))
        else:
            passed.append(('Server time', 'okay'))
    except ntplib.NTPException as e:
        failed.append(('Server time', 'Unable to query NTP pool: %s' % (e, )))

    # library sanity
    err = bitfield_check()
    if err:
        failed.append(('Library sanity', 'django-bitfield: ' + err))
    else:
        err = cache_check()
        if err:
            failed.append(('Library sanity', 'django cache: ' + err))
        else:
            passed.append(('Library sanity', 'okay'))

    # github-flavoured markdown
    from courselib.github_markdown import markdown_to_html_rpc, markdown_to_html_subprocess
    md = 'test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600'
    correct = '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>'

    try:
        # checks that ruby subprocess runs; does github-flavour correctly; does Unicode correctly.
        html = markdown_to_html_subprocess(md, fallback=False)
        if html.strip() == correct:
            passed.append(('Markdown subprocess', 'okay'))
        else:
            failed.append(('Markdown subprocess',
                           'markdown script returned incorrect markup'))
    except OSError:
        failed.append((
            'Markdown subprocess',
            'failed to start ruby command: ruby package probably not installed'
        ))
    except RuntimeError:
        failed.append(('Markdown subprocess', 'markdown script failed'))

    try:
        # checks that docker RPC runs; does github-flavour correctly; does Unicode correctly.
        html = markdown_to_html_rpc(md, fallback=False)
        if html.strip() == correct:
            passed.append(('Markdown RPC', 'okay'))
        else:
            failed.append(
                ('Markdown RPC', 'markdown script returned incorrect markup'))
    except OSError:
        failed.append(
            ('Markdown RPC',
             'unable to connect for RPC: docker container may be down'))
    except AttributeError:
        failed.append(
            ('Markdown RPC',
             'unable to connect to RabbitMQ: not configured in settings.py'))

    # MOSS subprocess
    from submission.moss import check_moss_executable
    check_moss_executable(passed, failed)

    # locale is UTF-8 (matters for markdown script calls, the SIMS database connection)
    import locale
    _, encoding = locale.getdefaultlocale()
    if encoding == 'UTF-8':
        passed.append(('Locale encoding', 'okay'))
    else:
        failed.append(
            ('Locale encoding', "is %r; should be 'UTF-8'" % (encoding, )))

    return passed, failed
Example #7
0
def deploy_checks():
    passed = []
    failed = []

    # cache something now to see if it's still there further down.
    randval = random.randint(1, 1000000)
    cache.set('check_things_cache_test', randval, 60)

    # Django database
    try:
        n = Semester.objects.all().count()
        if n > 0:
            passed.append(('Main database connection', 'okay'))
        else:
            failed.append(('Main database connection',
                           "Can't find any coredata.Semester objects"))
    except django.db.utils.OperationalError:
        failed.append(
            ('Main database connection', "can't connect to database"))
    except django.db.utils.ProgrammingError:
        failed.append(('Main database connection', "database tables missing"))

    # Celery tasks
    celery_okay = False
    try:
        if settings.USE_CELERY:
            try:
                from coredata.tasks import ping
            except ImportError:
                failed.append(
                    ('Celery task',
                     "Couldn't import task: probably missing MySQLdb module"))
            else:
                t = ping.apply_async()
                res = t.get(timeout=5)
                if res == True:
                    passed.append(('Celery task', 'okay'))
                    celery_okay = True
                else:
                    failed.append(
                        ('Celery task', 'got incorrect result from task'))
        else:
            failed.append(('Celery task', 'celery disabled in settings'))
    except celery.exceptions.TimeoutError:
        failed.append(
            ('Celery task',
             "didn't get result before timeout: celeryd maybe not running"))
    except socket.error:
        failed.append(('Celery task', "can't communicate with broker"))
    except NotImplementedError:
        failed.append(('Celery task', 'celery disabled'))
    except django.db.utils.ProgrammingError:
        failed.append(('Celery task', 'celery DB tables missing'))
    except django.db.utils.OperationalError:
        failed.append(('Celery task', 'djkombu tables missing: try migrating'))

    # Django cache
    # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache)
    subprocess.call(['python', 'manage.py', 'check_things', '--cache_subcall'])
    cache_okay = False
    res = cache.get('check_things_cache_test')
    if res == randval:
        failed.append((
            'Django cache',
            'other processes not sharing cache: dummy/local probably being used instead of memcached'
        ))
    elif res is None:
        failed.append(
            ('Django cache', 'unable to retrieve anything from cache'))
    elif res != randval + 1:
        failed.append(('Django cache', 'unknown result'))
    else:
        passed.append(('Django cache', 'okay'))
        cache_okay = True

    # Reporting DB connection
    try:
        db = SIMSConn()
        db.execute("SELECT last_name FROM ps_names WHERE emplid=200133427", ())
        n = len(list(db))
        if n > 0:
            passed.append(('Reporting DB connection', 'okay'))
        else:
            failed.append(('Reporting DB connection',
                           'query inexplicably returned nothing'))
    except SIMSProblem as e:
        failed.append(
            ('Reporting DB connection', 'SIMSProblem, %s' % (unicode(e))))
    except ImportError:
        failed.append(
            ('Reporting DB connection', "couldn't import DB2 module"))

    # compression enabled?
    if settings.COMPRESS_ENABLED:
        passed.append(('Asset compression enabled', 'okay'))
    else:
        failed.append(('Asset compression enabled', 'disabled in settings'))

    # Haystack searching
    from haystack.query import SearchQuerySet
    try:
        res = SearchQuerySet().filter(text='cmpt')
        if res:
            passed.append(('Haystack search', 'okay'))
        else:
            failed.append((
                'Haystack search',
                'nothing found: maybe update_index, or wait for search server to fully start'
            ))
    except IOError:
        failed.append(('Haystack search', "can't read/write index"))

    # photo fetching
    if cache_okay and celery_okay:
        try:
            res = do_photo_fetch(['301222726'])
            if '301222726' not in res:  # I don't know who 301222726 is, but he/she is real.
                failed.append(
                    ('Photo fetching', "didn't find photo we expect to exist"))
            else:
                passed.append(('Photo fetching', 'okay'))
        except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError):
            failed.append(('Photo fetching', 'photo password not set'))
        except urllib2.HTTPError as e:
            failed.append(
                ('Photo fetching',
                 'failed to fetch photo (%s). Maybe wrong password?' % (e)))
    else:
        failed.append(
            ('Photo fetching', 'not testing since memcached or celery failed'))

    # emplid/userid API
    emplid = userid_to_emplid('ggbaker')
    if not emplid:
        failed.append(('Emplid API', 'no emplid returned'))
    elif isinstance(emplid, basestring) and not emplid.startswith('2000'):
        failed.append(('Emplid API', 'incorrect emplid returned'))
    else:
        passed.append(('Emplid API', 'okay'))

    # certificates
    bad_cert = 0
    res = _check_cert('/etc/stunnel/stunnel.pem')
    if res:
        failed.append(('Stunnel cert', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.pem')
    if res:
        failed.append(('SSL PEM', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.key')
    if res:
        failed.append(('SSL KEY', res))
        bad_cert += 1

    if bad_cert == 0:
        passed.append(
            ('Certificates',
             'All okay, but maybe check http://www.digicert.com/help/'))

    # SVN database
    if settings.SVN_DB_CONNECT:
        from courselib.svn import SVN_TABLE, _db_conn
        import MySQLdb
        try:
            db = _db_conn()
            db.execute('SELECT count(*) FROM ' + SVN_TABLE, ())
            n = list(db)[0][0]
            if n > 0:
                passed.append(('SVN database', 'okay'))
            else:
                failed.append(('SVN database', "couldn't access records"))
        except MySQLdb.OperationalError:
            failed.append(('SVN database', "can't connect to database"))
    else:
        failed.append(('SVN database', 'SVN_DB_CONNECT not set in secrets.py'))

    # AMAINT database
    if settings.AMAINT_DB_PASSWORD:
        from coredata.importer import AMAINTConn
        import MySQLdb
        try:
            db = AMAINTConn()
            db.execute("SELECT count(*) FROM idMap", ())
            n = list(db)[0][0]
            if n > 0:
                passed.append(('AMAINT database', 'okay'))
            else:
                failed.append(('AMAINT database', "couldn't access records"))
        except MySQLdb.OperationalError:
            failed.append(('AMAINT database', "can't connect to database"))
    else:
        failed.append(
            ('AMAINT database', 'AMAINT_DB_PASSWORD not set in secrets.py'))

    # file creation in the necessary places
    dirs_to_check = [
        (settings.DB_BACKUP_DIR, 'DB backup dir'),
        (settings.SUBMISSION_PATH, 'submitted files path'),
        (os.path.join(settings.COMPRESS_ROOT,
                      'CACHE'), 'compressed media root'),
    ]
    for directory, label in dirs_to_check:
        res = _check_file_create(directory)
        if res is None:
            passed.append(('File creation in ' + label, 'okay'))
        else:
            failed.append(('File creation in ' + label, res))

    # are any services listening publicly that shouldn't?
    hostname = socket.gethostname()
    ports = [
        25,  # mail server
        #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq
        45130,  # beam? rabbitmq something
        4000,  # main DB stunnel
        50000,  # reporting DB
        8000,  # gunicorn
        11211,  # memcached
        9200,
        9300,  # elasticsearch
    ]
    connected = []
    for p in ports:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((hostname, p))
        except socket.error:
            # couldn't connect: good
            pass
        else:
            connected.append(p)
        finally:
            s.close()

    if connected:
        failed.append(
            ('Ports listening externally',
             'got connections to port ' + ','.join(str(p) for p in connected)))
    else:
        passed.append(('Ports listening externally', 'okay'))

    return passed, failed
Example #8
0
def deploy_checks(request=None):
    passed = []
    failed = []

    # cache something now to see if it's still there further down.
    randval = random.randint(1, 1000000)
    cache.set('check_things_cache_test', randval, 60)

    # Django database
    try:
        n = Semester.objects.all().count()
        if n > 0:
            passed.append(('Main database connection', 'okay'))
        else:
            failed.append(('Main database connection', "Can't find any coredata.Semester objects"))
    except django.db.utils.OperationalError:
        failed.append(('Main database connection', "can't connect to database"))
    except django.db.utils.ProgrammingError:
        failed.append(('Main database connection', "database tables missing"))

    # non-BMP Unicode in database
    try:
        l = LogEntry.objects.create(userid='ggbaker', description='Test Unicode \U0001F600', related_object=Semester.objects.first())
    except OperationalError:
        failed.append(('Unicode handling in database', 'non-BMP character not supported by connection'))
    else:
        l = LogEntry.objects.get(id=l.id)
        if '\U0001F600' in l.description:
            passed.append(('Unicode handling in database', 'okay'))
        else:
            failed.append(('Unicode handling in database', 'non-BMP character not stored correctly'))

    # Celery tasks
    celery_okay = False
    try:
        if settings.USE_CELERY:
            try:
                from coredata.tasks import ping
            except ImportError:
                failed.append(('Celery task', "Couldn't import task: probably missing MySQLdb module"))
            else:
                try:
                    t = ping.apply_async()
                except kombu.exceptions.OperationalError:
                    failed.append(('Celery task', 'Kombu error. Probably RabbitMQ not running.'))
                else:
                    res = t.get(timeout=5)
                    if res == True:
                        passed.append(('Celery task', 'okay'))
                        celery_okay = True
                    else:
                        failed.append(('Celery task', 'got incorrect result from task'))
        else:
            failed.append(('Celery task', 'celery disabled in settings'))
    except celery.exceptions.TimeoutError:
        failed.append(('Celery task', "didn't get result before timeout: celeryd maybe not running"))
    except socket.error:
        failed.append(('Celery task', "can't communicate with broker"))
    except NotImplementedError:
        failed.append(('Celery task', 'celery disabled'))
    except django.db.utils.ProgrammingError:
        failed.append(('Celery task', 'celery DB tables missing'))
    except django.db.utils.OperationalError:
        failed.append(('Celery task', 'djkombu tables missing: try migrating'))

    # celery beat
    try:
        from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE
        beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime
        if beatfile_age < BEAT_FILE_MAX_AGE:
            passed.append(('Celery beat', 'okay'))
        else:
            failed.append(('Celery beat', 'marker file is old: celery beat likely not processing tasks'))
    except OSError:
        failed.append(('Celery beat', 'marker file is missing: celery beat likely not processing tasks'))

    # Django cache
    # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache)
    subprocess.call(['python3', 'manage.py', 'check_things', '--cache_subcall'])
    cache_okay = False
    res = cache.get('check_things_cache_test')
    if res == randval:
        failed.append(('Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached'))
    elif res is None:
        failed.append(('Django cache', 'unable to retrieve anything from cache'))
    elif res != randval + 1:
        failed.append(('Django cache', 'unknown result'))
    else:
        passed.append(('Django cache', 'okay'))
        cache_okay = True

    # Reporting DB connection
    try:
        db = SIMSConn()
        db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ())
        result = list(db)
        # whoever this is, they have non-ASCII in their name: let's hope they don't change it.
        lname = result[0][0]
        if not isinstance(lname, str):
            failed.append(('Reporting DB connection', 'string result not a string: check Unicode decoding'))
        elif lname[1] != u'\u00e4':
            failed.append(('Reporting DB connection', 'returned incorrectly-decoded Unicode'))
        elif len(result) == 0:
            failed.append(('Reporting DB connection', 'query inexplicably returned nothing'))
        else:
            passed.append(('Reporting DB connection', 'okay'))
    except SIMSProblem as e:
        failed.append(('Reporting DB connection', 'SIMSProblem, %s' % (str(e))))
    except ImportError:
        failed.append(('Reporting DB connection', "couldn't import DB2 module"))

    # compression enabled?
    if settings.COMPRESS_ENABLED:
        passed.append(('Asset compression enabled', 'okay'))
    else:
        failed.append(('Asset compression enabled', 'disabled in settings'))

    # Haystack searching
    from haystack.query import SearchQuerySet
    try:
        res = SearchQuerySet().filter(text='cmpt')
        if res:
            passed.append(('Haystack search', 'okay'))
        else:
            failed.append(('Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start'))
    except IOError:
        failed.append(('Haystack search', "can't read/write index"))

    # photo fetching
    if cache_okay and celery_okay:
        try:
            res = do_photo_fetch(['301222726'])
            if '301222726' not in res: # I don't know who 301222726 is, but he/she is real.
                failed.append(('Photo fetching', "didn't find photo we expect to exist"))
            else:
                passed.append(('Photo fetching', 'okay'))
        except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError):
            failed.append(('Photo fetching', 'photo password not set'))
        except urllib.error.HTTPError as e:
            failed.append(('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e)))
    else:
        failed.append(('Photo fetching', 'not testing since memcached or celery failed'))

    # emplid/userid API
    emplid = userid_to_emplid('ggbaker')
    if not emplid:
        failed.append(('Emplid API', 'no emplid returned'))
    elif isinstance(emplid, str) and not emplid.startswith('2000'):
        failed.append(('Emplid API', 'incorrect emplid returned'))
    else:
        passed.append(('Emplid API', 'okay'))

    # Piwik API
    #if not request:
    #    failed.append(('Piwik API', "can only check in web frontend with valid request object"))
    #elif not settings.PIWIK_URL or not settings.PIWIK_TOKEN:
    #    failed.append(('Piwik API', "not configured in secrets.py"))
    #else:
    #    # try to re-log this request in piwik and see what happens
    #    from piwik_middleware.tracking import PiwikTrackerLogic, urllib_errors
    #    tracking_logic = PiwikTrackerLogic()
    #    kwargs = tracking_logic.get_track_kwargs(request)
    #    try:
    #        tracking_logic.do_track_page_view(fail_silently=False, **kwargs)
    #    except urllib_errors as e:
    #        failed.append(('Piwik API', "API call failed: %s" % (e)))
    #    else:
    #        passed.append(('Piwik API', 'okay'))

    # Backup server
    #if not settings.BACKUP_SERVER or not settings.BACKUP_USER or not settings.BACKUP_PATH or not settings.BACKUP_PASSPHRASE:
    #    failed.append(('Backup server', 'Backup server settings not all present'))
    #else:
    #    from coredata.management.commands.backup_remote import do_check
    #    try:
    #        do_check()
    #    except RuntimeError as e:
    #        failed.append(('Backup server', unicode(e)))
    #    passed.append(('Backup server', 'okay'))


    # certificates
    bad_cert = 0
    res = _check_cert('/etc/stunnel/stunnel.pem')
    if res:
        failed.append(('Stunnel cert', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.pem')
    if res:
        failed.append(('SSL PEM', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.key')
    if res:
        failed.append(('SSL KEY', res))
        bad_cert += 1

    if bad_cert == 0:
        passed.append(('Certificates', 'All okay, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/'))

    # file creation in the necessary places
    dirs_to_check = [
        (settings.DB_BACKUP_DIR, 'DB backup dir'),
        (settings.SUBMISSION_PATH, 'submitted files path'),
        (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'),
    ]
    for directory, label in dirs_to_check:
        res = _check_file_create(directory)
        if res is None:
            passed.append(('File creation in ' + label, 'okay'))
        else:
            failed.append(('File creation in ' + label, res))

    # are any services listening publicly that shouldn't?
    hostname = socket.gethostname()
    ports = [
        25, # mail server
        #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq
        45130, # beam? rabbitmq something
        4000, # main DB stunnel
        50000, # reporting DB
        8000, # gunicorn
        11211, # memcached
        9200, 9300, # elasticsearch
    ]
    connected = []
    for p in ports:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((hostname, p))
        except socket.error:
            # couldn't connect: good
            pass
        else:
            connected.append(p)
        finally:
            s.close()

    if connected:
        failed.append(('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected)))
    else:
        passed.append(('Ports listening externally', 'okay'))


    # is the server time close to real-time?
    import ntplib
    c = ntplib.NTPClient()
    response = c.request('0.ca.pool.ntp.org')
    if abs(response.offset) > 0.1:
        failed.append(('Server time', 'Time is %g seconds off NTP pool.' % (response.offset,)))
    else:
        passed.append(('Server time', 'okay'))


    # library sanity
    err = bitfield_check()
    if err:
        failed.append(('Library sanity', 'django-bitfield: ' + err))
    else:
        err = cache_check()
        if err:
            failed.append(('Library sanity', 'django cache: ' + err))
        else:
            passed.append(('Library sanity', 'okay'))


    # github-flavoured markdown subprocess
    from courselib.markup import markdown_to_html
    try:
        # checks that script runs; does github-flavour correctly; does Unicode correctly.
        html = markdown_to_html('test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600')
        if html.strip() == '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>':
            passed.append(('Markdown subprocess', 'okay'))
        else:
            failed.append(('Markdown subprocess', 'markdown script returned incorrect markup'))
    except OSError:
        failed.append(('Markdown subprocess', 'failed to start ruby command: ruby package probably not installed'))
    except RuntimeError:
        failed.append(('Markdown subprocess', 'markdown script failed'))

    # locale is UTF-8 (matters for markdown script calls, the SIMS database connection)
    import locale
    _, encoding = locale.getdefaultlocale()
    if encoding == 'UTF-8':
        passed.append(('Locale encoding', 'okay'))
    else:
        failed.append(('Locale encoding', "is %r; should be 'UTF-8'" % (encoding,)))

    return passed, failed
Example #9
0
def deploy_checks():
    passed = []
    failed = []

    # cache something now to see if it's still there further down.
    randval = random.randint(1, 1000000)
    cache.set('check_things_cache_test', randval, 60)

    # Django database
    try:
        n = Semester.objects.all().count()
        if n > 0:
            passed.append(('Main database connection', 'okay'))
        else:
            failed.append(('Main database connection', "Can't find any coredata.Semester objects"))
    except django.db.utils.OperationalError:
        failed.append(('Main database connection', "can't connect to database"))
    except django.db.utils.ProgrammingError:
        failed.append(('Main database connection', "database tables missing"))

    # Celery tasks
    celery_okay = False
    try:
        if settings.USE_CELERY:
            try:
                from coredata.tasks import ping
            except ImportError:
                failed.append(('Celery task', "Couldn't import task: probably missing MySQLdb module"))
            else:
                t = ping.apply_async()
                res = t.get(timeout=5)
                if res == True:
                    passed.append(('Celery task', 'okay'))
                    celery_okay = True
                else:
                    failed.append(('Celery task', 'got incorrect result from task'))
        else:
            failed.append(('Celery task', 'celery disabled in settings'))
    except celery.exceptions.TimeoutError:
        failed.append(('Celery task', "didn't get result before timeout: celeryd maybe not running"))
    except socket.error:
        failed.append(('Celery task', "can't communicate with broker"))
    except NotImplementedError:
        failed.append(('Celery task', 'celery disabled'))
    except django.db.utils.ProgrammingError:
        failed.append(('Celery task', 'celery DB tables missing'))
    except django.db.utils.OperationalError:
        failed.append(('Celery task', 'djkombu tables missing: try migrating'))

    # celery beat
    try:
        from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE
        beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime
        if beatfile_age < BEAT_FILE_MAX_AGE:
            passed.append(('Celery beat', 'okay'))
        else:
            failed.append(('Celery beat', 'marker file is old: celery beat likely not processing tasks'))
    except OSError:
        failed.append(('Celery beat', 'marker file is missing: celery beat likely not processing tasks'))

    # Django cache
    # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache)
    subprocess.call(['python', 'manage.py', 'check_things', '--cache_subcall'])
    cache_okay = False
    res = cache.get('check_things_cache_test')
    if res == randval:
        failed.append(('Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached'))
    elif res is None:
        failed.append(('Django cache', 'unable to retrieve anything from cache'))
    elif res != randval + 1:
        failed.append(('Django cache', 'unknown result'))
    else:
        passed.append(('Django cache', 'okay'))
        cache_okay = True

    # Reporting DB connection
    try:
        db = SIMSConn()
        db.execute("SELECT last_name FROM ps_names WHERE emplid=200133427", ())
        n = len(list(db))
        if n > 0:
            passed.append(('Reporting DB connection', 'okay'))
        else:
            failed.append(('Reporting DB connection', 'query inexplicably returned nothing'))
    except SIMSProblem as e:
        failed.append(('Reporting DB connection', 'SIMSProblem, %s' % (unicode(e))))
    except ImportError:
        failed.append(('Reporting DB connection', "couldn't import DB2 module"))

    # compression enabled?
    if settings.COMPRESS_ENABLED:
        passed.append(('Asset compression enabled', 'okay'))
    else:
        failed.append(('Asset compression enabled', 'disabled in settings'))

    # Haystack searching
    from haystack.query import SearchQuerySet
    try:
        res = SearchQuerySet().filter(text='cmpt')
        if res:
            passed.append(('Haystack search', 'okay'))
        else:
            failed.append(('Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start'))
    except IOError:
        failed.append(('Haystack search', "can't read/write index"))

    # photo fetching
    if cache_okay and celery_okay:
        try:
            res = do_photo_fetch(['301222726'])
            if '301222726' not in res: # I don't know who 301222726 is, but he/she is real.
                failed.append(('Photo fetching', "didn't find photo we expect to exist"))
            else:
                passed.append(('Photo fetching', 'okay'))
        except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError):
            failed.append(('Photo fetching', 'photo password not set'))
        except urllib2.HTTPError as e:
            failed.append(('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e)))
    else:
        failed.append(('Photo fetching', 'not testing since memcached or celery failed'))

    # emplid/userid API
    emplid = userid_to_emplid('ggbaker')
    if not emplid:
        failed.append(('Emplid API', 'no emplid returned'))
    elif isinstance(emplid, basestring) and not emplid.startswith('2000'):
        failed.append(('Emplid API', 'incorrect emplid returned'))
    else:
        passed.append(('Emplid API', 'okay'))


    # certificates
    bad_cert = 0
    res = _check_cert('/etc/stunnel/stunnel.pem')
    if res:
        failed.append(('Stunnel cert', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.pem')
    if res:
        failed.append(('SSL PEM', res))
        bad_cert += 1
    res = _check_cert('/etc/nginx/cert.key')
    if res:
        failed.append(('SSL KEY', res))
        bad_cert += 1

    if bad_cert == 0:
        passed.append(('Certificates', 'All okay, but maybe check http://www.digicert.com/help/'))

    # SVN database
    if settings.SVN_DB_CONNECT:
        from courselib.svn import SVN_TABLE, _db_conn
        import MySQLdb
        try:
            db = _db_conn()
            db.execute('SELECT count(*) FROM '+SVN_TABLE, ())
            n = list(db)[0][0]
            if n > 0:
                passed.append(('SVN database', 'okay'))
            else:
                failed.append(('SVN database', "couldn't access records"))
        except MySQLdb.OperationalError:
            failed.append(('SVN database', "can't connect to database"))
    else:
        failed.append(('SVN database', 'SVN_DB_CONNECT not set in secrets.py'))

    # AMAINT database
    if settings.AMAINT_DB_PASSWORD:
        from coredata.importer import AMAINTConn
        import MySQLdb
        try:
            db = AMAINTConn()
            db.execute("SELECT count(*) FROM idMap", ())
            n = list(db)[0][0]
            if n > 0:
                passed.append(('AMAINT database', 'okay'))
            else:
                failed.append(('AMAINT database', "couldn't access records"))
        except MySQLdb.OperationalError:
            failed.append(('AMAINT database', "can't connect to database"))
    else:
        failed.append(('AMAINT database', 'AMAINT_DB_PASSWORD not set in secrets.py'))


    # file creation in the necessary places
    dirs_to_check = [
        (settings.DB_BACKUP_DIR, 'DB backup dir'),
        (settings.SUBMISSION_PATH, 'submitted files path'),
        (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'),
    ]
    for directory, label in dirs_to_check:
        res = _check_file_create(directory)
        if res is None:
            passed.append(('File creation in ' + label, 'okay'))
        else:
            failed.append(('File creation in ' + label, res))

    # are any services listening publicly that shouldn't?
    hostname = socket.gethostname()
    ports = [
        25, # mail server
        #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq
        45130, # beam? rabbitmq something
        4000, # main DB stunnel
        50000, # reporting DB
        8000, # gunicorn
        11211, # memcached
        9200, 9300, # elasticsearch
    ]
    connected = []
    for p in ports:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((hostname, p))
        except socket.error:
            # couldn't connect: good
            pass
        else:
            connected.append(p)
        finally:
            s.close()

    if connected:
        failed.append(('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected)))
    else:
        passed.append(('Ports listening externally', 'okay'))


    return passed, failed