Example #1
0
def fetch_all_sources(api_client):
    for i, batch in enumerate(sources_iterator(api_client)):
        print(f'[fetch-all-sources] batch #{i}')
        batch = map(map_source, batch)
        with get_engine().begin() as engine:
            engine.execute(Source.upsert_query(),
                           [source.__dict__ for source in batch])
def commandImp(msg):
	autoDestroy(msg)
	command, text = splitCommand(msg.text)
	if "s4_g" in command:
		return msg.reply_text(iterateMessage(msg.chat.id), quote=False)
	elif matchKey(command, ["s4_source_add", "s4_sa", "s4_a"]):
		autoDestroy(msg.reply_text(Source.add(text)))
	elif matchKey(command, ["s4_source_delete", "s4_sd", "s4_d"]):
		autoDestroy(msg.reply_text(Source.remove(text)))
	elif matchKey(command, ["s4_source_list", "s4_sl", "s4_l"]):
		pass # intentional
	elif matchKey(command, ["s4_sub", "s4_s"]):
		Subscription.add(msg.chat.id, text)
	else:
		return
	if matchKey(command, ["s4_source", "s4_sl", "s4_l", "s4_sa", "s4_a", "s4_sd", "s3_d"]):
		sources = ['[%s](t.me/%s)' % (chatname, chatname) for \
			chatname in Source.db.keys()]
		autoDestroy(msg.reply_text('Source list: \n' + '\n'.join(sources), 
			parse_mode='Markdown', 
			disable_web_page_preview=True))
	else:
		autoDestroy(msg.reply_text("success"))
Example #3
0
def create():
    sid = crypto_util.hash_codename(session['codename'])

    source = Source(sid, crypto_util.display_id())
    db_session.add(source)
    try:
        db_session.commit()
    except IntegrityError as e:
        app.logger.error("Attempt to create a source with duplicate codename: %s" % (e,))
    else:
        os.mkdir(store.path(sid))

    session['logged_in'] = True
    return redirect(url_for('lookup'))
Example #4
0
def add_data(name):
    """add data from html to database"""
    # use cached data if possible, otherwise go to wikiquote
    try:
        html = open("raw/" + name + ".txt", 'r').read()
        print("loaded", name, 'from disk')
    except:
        print("retreiving", name)
        html = get_person_page(name)
        if not html:
            return None
        with open("raw/" + name + ".txt", 'w') as f:
            f.write(html)
        sleep(2.5)
        return None

    try:
        quotes = parse_html(html)
    except:
        print("exception parsing", name)
        quotes = None

    sources = {}
    if quotes:
        for q in quotes:
            if q.source is None:
                q.source = "*None*"
            if q.source in sources:
                sources[q.source].append(q.quote)
            else:
                sources[q.source] = [q.quote]

        session = Session()
        person = Person(name=name)
        for s in sources:
            if s is not '*None*':
                source = Source(source=s)
                person.sources.append(source)
            for q in sources[s]:
                quote = Quote(quote=q)
                person.quotes.append(quote)
                if s is not '*None*':
                    source.quotes.append(quote)
        session.add(person)
        session.commit()
        session.close()
    def test_get_zip(self):
        sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R'
        source = Source(sid, crypto_util.display_id())
        db_session.add(source)
        db_session.commit()

        files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg']
        filenames = common.setup_test_docs(sid, files)

        archive = zipfile.ZipFile(store.get_bulk_archive(filenames))

        archivefile_contents = archive.namelist()

        for archived_file, actual_file in zip(archivefile_contents, filenames):
            actual_file_content = open(actual_file).read()
            zipped_file_content = archive.read(archived_file)
            self.assertEquals(zipped_file_content, actual_file_content)
    def test_bulk_download(self):
        sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R'
        source = Source(sid, crypto_util.display_id())
        db_session.add(source)
        db_session.commit()
        files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg']
        filenames = common.setup_test_docs(sid, files)

        self._login_user()
        rv = self.client.post('/bulk',
                              data=dict(action='download',
                                        sid=sid,
                                        doc_names_selected=files))

        self.assertEqual(rv.status_code, 200)
        self.assertEqual(rv.content_type, 'application/zip')
        self.assertTrue(zipfile.is_zipfile(StringIO(rv.data)))
        self.assertTrue(
            zipfile.ZipFile(StringIO(rv.data)).getinfo(
                os.path.join(source.journalist_filename, files[0])))
Example #7
0
    def create():
        filesystem_id = crypto_util.hash_codename(session['codename'])

        source = Source(filesystem_id, crypto_util.display_id())
        db_session.add(source)
        try:
            db_session.commit()
        except IntegrityError as e:
            db_session.rollback()
            current_app.logger.error(
                "Attempt to create a source with duplicate codename: %s" %
                (e, ))

            # Issue 2386: don't log in on duplicates
            del session['codename']
            abort(500)
        else:
            os.mkdir(store.path(filesystem_id))

        session['logged_in'] = True
        return redirect(url_for('.lookup'))
def loopImp():
    requests_count = 0
    for chatname in Source.db:
        soup = getSoup('https://telete.in/s/' + chatname)
        max_message_id = getMaxMessageId(soup)
        for message_id in Source.iterate(chatname, max_message_id):
            url = "https://telete.in/%s/%d?embed=1" % (chatname, message_id)
            soup = getSoup(url)
            text = soup.find('div', class_='tgme_widget_message_text')
            requests_count += 1
            if not text:
                continue
            for item in text.find_all():
                if not item or not item.text:
                    continue
                for url in item.text.split():
                    if not '://' in url:
                        url = "https://" + url
                    if '://telegra.ph' in url:
                        Pool.add(url)
            if requests_count > 30:
                break
    for chat_id in Subscription.db:
        tele.bot.send_message(chat_id=chat_id, text=iterateMessage(chat_id))
Example #9
0
def migrate_database(backup):
    print "* Migrating database..."

    # Get the sources table from the 0.2.1 instance's db
    old_db = backup.getmember(
        "var/chroot/document/var/www/securedrop/db.sqlite")
    old_db.name = "db.old.sqlite"
    backup.extract(old_db)
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from old_crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # stored in the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Journalist, Submission, Reply, db_session, init_db

    # We need to be able to link replies to the Journalist that sent
    # them. Since this information was not recorded in 0.2.1, we
    # arbitrarily say all replies were sent by an arbitrary journalist
    # that is present on this system. Since this information is not
    # currently exposed in the UI, this does not create a problem (for
    # now).
    if len(Journalist.query.all()) == 0:
        print "!!! FATAL: You must create a journalist account before running this migration."
        print "           Run ./manage.py add_admin and try again."
        sys.exit(1)
    else:
        arbitrary_journalist = Journalist.query.all()[0]

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")

    # Copied from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        replies = []
        for fn in os.listdir(source_dir):
            append_to = submissions
            if fn.startswith('reply-'):
                append_to = replies
            append_to.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))

        # Sort by submission time
        submissions.sort(key=itemgetter(1))
        replies.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, it's tricky to
        # figure out how to set this value. We can't distinguish between sources
        # who created an account but never submitted anything and sources who
        # had been active, but didn't have any stored submissions or replies at
        # the time of migration.
        #
        # After having explored the options, I think the best thing to do here
        # is set pending to True if there are no submissions or replies. Sources
        # who created an account but never submitted anything won't create noise
        # in the list, and sources who are active can probably be expected to
        # log back in relatively soon and so will automatially reappear once
        # they submit something new.
        if len(submissions + replies) == 0:
            migrated_source.pending = True
        else:
            migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not technicially correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions) + len(replies)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # Combine everything into one list, sorted by date, so we can
        # correctly set the interaction counts for each file.
        everything = submissions + replies
        everything.sort(key=itemgetter(1))
        for count, item in enumerate(everything):
            # Rename the file to fit the new file naming scheme used by 0.3
            fn = item[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-{1}-reply.gpg".format(
                    count + 1, journalist_filename(source[1]))
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a database entry for this item
            db_entry = None

            if fn.startswith('reply-'):
                migrated_reply = Reply(arbitrary_journalist, migrated_source,
                                       new_fn)
                db_entry = migrated_reply
            else:
                migrated_submission = Submission(migrated_source, new_fn)
                # Assume that all submissions that are being migrated
                # have already been downloaded
                migrated_submission.downloaded = True
                db_entry = migrated_submission

            db_session.add(db_entry)
            db_session.commit()

    # chown the database file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
Example #10
0
def migrate_database(backup):
    print "* Migrating database..."

    # Get the sources table from the 0.2.1 instance's db
    old_db = backup.getmember("var/chroot/document/var/www/securedrop/db.sqlite")
    old_db.name = "db.old.sqlite"
    backup.extract(old_db)
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from old_crypto_util import displayid

    # Generate a list of the filesystem ids that have journalist designations
    # stored in the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Journalist, Submission, Reply, db_session, init_db

    # We need to be able to link replies to the Journalist that sent
    # them. Since this information was not recorded in 0.2.1, we
    # arbitrarily say all replies were sent by an arbitrary journalist
    # that is present on this system. Since this information is not
    # currently exposed in the UI, this does not create a problem (for
    # now).
    if len(Journalist.query.all()) == 0:
        print "!!! FATAL: You must create a journalist account before running this migration."
        print "           Run ./manage.py add_admin and try again."
        sys.exit(1)
    else:
        arbitrary_journalist = Journalist.query.all()[0]

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak")

    # Copied from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = "abcdefghijklmnopqrstuvwxyz1234567890-_"
        return "".join([c for c in s.lower().replace(" ", "_") if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        replies = []
        for fn in os.listdir(source_dir):
            append_to = submissions
            if fn.startswith("reply-"):
                append_to = replies
            append_to.append((fn, os.path.getmtime(os.path.join(source_dir, fn))))

        # Sort by submission time
        submissions.sort(key=itemgetter(1))
        replies.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, it's tricky to
        # figure out how to set this value. We can't distinguish between sources
        # who created an account but never submitted anything and sources who
        # had been active, but didn't have any stored submissions or replies at
        # the time of migration.
        #
        # After having explored the options, I think the best thing to do here
        # is set pending to True if there are no submissions or replies. Sources
        # who created an account but never submitted anything won't create noise
        # in the list, and sources who are active can probably be expected to
        # log back in relatively soon and so will automatially reappear once
        # they submit something new.
        if len(submissions + replies) == 0:
            migrated_source.pending = True
        else:
            migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not technicially correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions) + len(replies)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # Combine everything into one list, sorted by date, so we can
        # correctly set the interaction counts for each file.
        everything = submissions + replies
        everything.sort(key=itemgetter(1))
        for count, item in enumerate(everything):
            # Rename the file to fit the new file naming scheme used by 0.3
            fn = item[0]

            if fn.startswith("reply-"):
                new_fn = "{0}-{1}-reply.gpg".format(count + 1, journalist_filename(source[1]))
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg"
                )

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn))

            # Add a database entry for this item
            db_entry = None

            if fn.startswith("reply-"):
                migrated_reply = Reply(arbitrary_journalist, migrated_source, new_fn)
                db_entry = migrated_reply
            else:
                migrated_submission = Submission(migrated_source, new_fn)
                # Assume that all submissions that are being migrated
                # have already been downloaded
                migrated_submission.downloaded = True
                db_entry = migrated_submission

            db_session.add(db_entry)
            db_session.commit()

    # chown the database file to the securedrop user
    subprocess.call(["chown", "www-data:www-data", "/var/lib/securedrop/db.sqlite"])
Example #11
0
def map_source(source):
    return Source(id=source['id'],
                  name=source['name'],
                  url=source['url'],
                  stype=source.get('source_type', {}).get('name', None),
                  veracity=source.get('veracity', None))
Example #12
0
def migrate_database(zf):
    print "* Migrating database..."

    extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite", "db.old.sqlite")
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # store din the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Submission, db_session, init_db

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")
    # Make sure current database is in a pristine state
    os.remove("/var/lib/securedrop/db.sqlite")
    init_db()

    # Copy from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join([c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        for fn in os.listdir(source_dir):
            submissions.append((fn, os.path.getmtime(os.path.join(source_dir, fn))))
        # Sort by submission time
        submissions.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, set all migrated
        # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish
        # between sources who created an account but never submitted anything
        # and sources who just didn't have any stored submissions/replies at
        # the time of migration. To avoid stopping journalists from replying to
        # previous known sources, we set all migrated sources to not be pending
        # so they will apppear in the document interface.
        migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not techncially, correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # submissions are now sorted by date, so we can just loop over them to
        # infer the interaction counts
        for count, submission in enumerate(submissions):
            # TODO Possible concern: submission filenames. Should we migrate
            # them to the current naming scheme? What about the extensions
            # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count`
            # among other places)?

            fn = submission[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-reply.gpg".format(count+1)
            else:
                new_fn = "{0}-{1}-{2}".format(count+1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a submission for this source
            migrated_submission = Submission(migrated_source, new_fn)
            # Assume that all submissions that are being migrated have already
            # been downloaded
            migrated_submission.downloaded = True
            db_session.add(migrated_submission)
            db_session.commit()

    # chown the databse file to the securedrop user
    subprocess.call(['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
Example #13
0
def migrate_database(zf):
    print "* Migrating database..."

    extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite",
                    "db.old.sqlite")
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # store din the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Submission, db_session, init_db

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")
    # Make sure current database is in a pristine state
    os.remove("/var/lib/securedrop/db.sqlite")
    init_db()

    # Copy from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        for fn in os.listdir(source_dir):
            submissions.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))
        # Sort by submission time
        submissions.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, set all migrated
        # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish
        # between sources who created an account but never submitted anything
        # and sources who just didn't have any stored submissions/replies at
        # the time of migration. To avoid stopping journalists from replying to
        # previous known sources, we set all migrated sources to not be pending
        # so they will apppear in the document interface.
        migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not techncially, correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # submissions are now sorted by date, so we can just loop over them to
        # infer the interaction counts
        for count, submission in enumerate(submissions):
            # TODO Possible concern: submission filenames. Should we migrate
            # them to the current naming scheme? What about the extensions
            # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count`
            # among other places)?

            fn = submission[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-reply.gpg".format(count + 1)
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a submission for this source
            migrated_submission = Submission(migrated_source, new_fn)
            # Assume that all submissions that are being migrated have already
            # been downloaded
            migrated_submission.downloaded = True
            db_session.add(migrated_submission)
            db_session.commit()

    # chown the databse file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])