Beispiel #1
0
def create():
    sid = crypto_util.hash_codename(session['codename'])

    source = Source(sid, crypto_util.display_id())
    db_session.add(source)
    try:
        db_session.commit()
    except IntegrityError as e:
        app.logger.error("Attempt to create a source with duplicate codename: %s" % (e,))
    else:
        os.mkdir(store.path(sid))

    session['logged_in'] = True
    return redirect(url_for('lookup'))
Beispiel #2
0
def add_data(name):
    """add data from html to database"""
    # use cached data if possible, otherwise go to wikiquote
    try:
        html = open("raw/" + name + ".txt", 'r').read()
        print("loaded", name, 'from disk')
    except:
        print("retreiving", name)
        html = get_person_page(name)
        if not html:
            return None
        with open("raw/" + name + ".txt", 'w') as f:
            f.write(html)
        sleep(2.5)
        return None

    try:
        quotes = parse_html(html)
    except:
        print("exception parsing", name)
        quotes = None

    sources = {}
    if quotes:
        for q in quotes:
            if q.source is None:
                q.source = "*None*"
            if q.source in sources:
                sources[q.source].append(q.quote)
            else:
                sources[q.source] = [q.quote]

        session = Session()
        person = Person(name=name)
        for s in sources:
            if s is not '*None*':
                source = Source(source=s)
                person.sources.append(source)
            for q in sources[s]:
                quote = Quote(quote=q)
                person.quotes.append(quote)
                if s is not '*None*':
                    source.quotes.append(quote)
        session.add(person)
        session.commit()
        session.close()
    def test_get_zip(self):
        sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R'
        source = Source(sid, crypto_util.display_id())
        db_session.add(source)
        db_session.commit()

        files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg']
        filenames = common.setup_test_docs(sid, files)

        archive = zipfile.ZipFile(store.get_bulk_archive(filenames))

        archivefile_contents = archive.namelist()

        for archived_file, actual_file in zip(archivefile_contents, filenames):
            actual_file_content = open(actual_file).read()
            zipped_file_content = archive.read(archived_file)
            self.assertEquals(zipped_file_content, actual_file_content)
    def test_bulk_download(self):
        sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R'
        source = Source(sid, crypto_util.display_id())
        db_session.add(source)
        db_session.commit()
        files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg']
        filenames = common.setup_test_docs(sid, files)

        self._login_user()
        rv = self.client.post('/bulk',
                              data=dict(action='download',
                                        sid=sid,
                                        doc_names_selected=files))

        self.assertEqual(rv.status_code, 200)
        self.assertEqual(rv.content_type, 'application/zip')
        self.assertTrue(zipfile.is_zipfile(StringIO(rv.data)))
        self.assertTrue(
            zipfile.ZipFile(StringIO(rv.data)).getinfo(
                os.path.join(source.journalist_filename, files[0])))
Beispiel #5
0
    def create():
        filesystem_id = crypto_util.hash_codename(session['codename'])

        source = Source(filesystem_id, crypto_util.display_id())
        db_session.add(source)
        try:
            db_session.commit()
        except IntegrityError as e:
            db_session.rollback()
            current_app.logger.error(
                "Attempt to create a source with duplicate codename: %s" %
                (e, ))

            # Issue 2386: don't log in on duplicates
            del session['codename']
            abort(500)
        else:
            os.mkdir(store.path(filesystem_id))

        session['logged_in'] = True
        return redirect(url_for('.lookup'))
Beispiel #6
0
def migrate_database(backup):
    print "* Migrating database..."

    # Get the sources table from the 0.2.1 instance's db
    old_db = backup.getmember(
        "var/chroot/document/var/www/securedrop/db.sqlite")
    old_db.name = "db.old.sqlite"
    backup.extract(old_db)
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from old_crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # stored in the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Journalist, Submission, Reply, db_session, init_db

    # We need to be able to link replies to the Journalist that sent
    # them. Since this information was not recorded in 0.2.1, we
    # arbitrarily say all replies were sent by an arbitrary journalist
    # that is present on this system. Since this information is not
    # currently exposed in the UI, this does not create a problem (for
    # now).
    if len(Journalist.query.all()) == 0:
        print "!!! FATAL: You must create a journalist account before running this migration."
        print "           Run ./manage.py add_admin and try again."
        sys.exit(1)
    else:
        arbitrary_journalist = Journalist.query.all()[0]

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")

    # Copied from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        replies = []
        for fn in os.listdir(source_dir):
            append_to = submissions
            if fn.startswith('reply-'):
                append_to = replies
            append_to.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))

        # Sort by submission time
        submissions.sort(key=itemgetter(1))
        replies.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, it's tricky to
        # figure out how to set this value. We can't distinguish between sources
        # who created an account but never submitted anything and sources who
        # had been active, but didn't have any stored submissions or replies at
        # the time of migration.
        #
        # After having explored the options, I think the best thing to do here
        # is set pending to True if there are no submissions or replies. Sources
        # who created an account but never submitted anything won't create noise
        # in the list, and sources who are active can probably be expected to
        # log back in relatively soon and so will automatially reappear once
        # they submit something new.
        if len(submissions + replies) == 0:
            migrated_source.pending = True
        else:
            migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not technicially correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions) + len(replies)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # Combine everything into one list, sorted by date, so we can
        # correctly set the interaction counts for each file.
        everything = submissions + replies
        everything.sort(key=itemgetter(1))
        for count, item in enumerate(everything):
            # Rename the file to fit the new file naming scheme used by 0.3
            fn = item[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-{1}-reply.gpg".format(
                    count + 1, journalist_filename(source[1]))
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a database entry for this item
            db_entry = None

            if fn.startswith('reply-'):
                migrated_reply = Reply(arbitrary_journalist, migrated_source,
                                       new_fn)
                db_entry = migrated_reply
            else:
                migrated_submission = Submission(migrated_source, new_fn)
                # Assume that all submissions that are being migrated
                # have already been downloaded
                migrated_submission.downloaded = True
                db_entry = migrated_submission

            db_session.add(db_entry)
            db_session.commit()

    # chown the database file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
Beispiel #7
0
def map_source(source):
    return Source(id=source['id'],
                  name=source['name'],
                  url=source['url'],
                  stype=source.get('source_type', {}).get('name', None),
                  veracity=source.get('veracity', None))
Beispiel #8
0
def migrate_database(zf):
    print "* Migrating database..."

    extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite",
                    "db.old.sqlite")
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # store din the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Submission, db_session, init_db

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")
    # Make sure current database is in a pristine state
    os.remove("/var/lib/securedrop/db.sqlite")
    init_db()

    # Copy from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        for fn in os.listdir(source_dir):
            submissions.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))
        # Sort by submission time
        submissions.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, set all migrated
        # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish
        # between sources who created an account but never submitted anything
        # and sources who just didn't have any stored submissions/replies at
        # the time of migration. To avoid stopping journalists from replying to
        # previous known sources, we set all migrated sources to not be pending
        # so they will apppear in the document interface.
        migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not techncially, correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # submissions are now sorted by date, so we can just loop over them to
        # infer the interaction counts
        for count, submission in enumerate(submissions):
            # TODO Possible concern: submission filenames. Should we migrate
            # them to the current naming scheme? What about the extensions
            # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count`
            # among other places)?

            fn = submission[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-reply.gpg".format(count + 1)
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a submission for this source
            migrated_submission = Submission(migrated_source, new_fn)
            # Assume that all submissions that are being migrated have already
            # been downloaded
            migrated_submission.downloaded = True
            db_session.add(migrated_submission)
            db_session.commit()

    # chown the databse file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])