def create(): sid = crypto_util.hash_codename(session['codename']) source = Source(sid, crypto_util.display_id()) db_session.add(source) try: db_session.commit() except IntegrityError as e: app.logger.error("Attempt to create a source with duplicate codename: %s" % (e,)) else: os.mkdir(store.path(sid)) session['logged_in'] = True return redirect(url_for('lookup'))
def add_data(name): """add data from html to database""" # use cached data if possible, otherwise go to wikiquote try: html = open("raw/" + name + ".txt", 'r').read() print("loaded", name, 'from disk') except: print("retreiving", name) html = get_person_page(name) if not html: return None with open("raw/" + name + ".txt", 'w') as f: f.write(html) sleep(2.5) return None try: quotes = parse_html(html) except: print("exception parsing", name) quotes = None sources = {} if quotes: for q in quotes: if q.source is None: q.source = "*None*" if q.source in sources: sources[q.source].append(q.quote) else: sources[q.source] = [q.quote] session = Session() person = Person(name=name) for s in sources: if s is not '*None*': source = Source(source=s) person.sources.append(source) for q in sources[s]: quote = Quote(quote=q) person.quotes.append(quote) if s is not '*None*': source.quotes.append(quote) session.add(person) session.commit() session.close()
def test_get_zip(self): sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R' source = Source(sid, crypto_util.display_id()) db_session.add(source) db_session.commit() files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg'] filenames = common.setup_test_docs(sid, files) archive = zipfile.ZipFile(store.get_bulk_archive(filenames)) archivefile_contents = archive.namelist() for archived_file, actual_file in zip(archivefile_contents, filenames): actual_file_content = open(actual_file).read() zipped_file_content = archive.read(archived_file) self.assertEquals(zipped_file_content, actual_file_content)
def test_bulk_download(self): sid = 'EQZGCJBRGISGOTC2NZVWG6LILJBHEV3CINNEWSCLLFTUWZJPKJFECLS2NZ4G4U3QOZCFKTTPNZMVIWDCJBBHMUDBGFHXCQ3R' source = Source(sid, crypto_util.display_id()) db_session.add(source) db_session.commit() files = ['1-abc1-msg.gpg', '2-abc2-msg.gpg'] filenames = common.setup_test_docs(sid, files) self._login_user() rv = self.client.post('/bulk', data=dict(action='download', sid=sid, doc_names_selected=files)) self.assertEqual(rv.status_code, 200) self.assertEqual(rv.content_type, 'application/zip') self.assertTrue(zipfile.is_zipfile(StringIO(rv.data))) self.assertTrue( zipfile.ZipFile(StringIO(rv.data)).getinfo( os.path.join(source.journalist_filename, files[0])))
def create(): filesystem_id = crypto_util.hash_codename(session['codename']) source = Source(filesystem_id, crypto_util.display_id()) db_session.add(source) try: db_session.commit() except IntegrityError as e: db_session.rollback() current_app.logger.error( "Attempt to create a source with duplicate codename: %s" % (e, )) # Issue 2386: don't log in on duplicates del session['codename'] abort(500) else: os.mkdir(store.path(filesystem_id)) session['logged_in'] = True return redirect(url_for('.lookup'))
def migrate_database(backup): print "* Migrating database..." # Get the sources table from the 0.2.1 instance's db old_db = backup.getmember( "var/chroot/document/var/www/securedrop/db.sqlite") old_db.name = "db.old.sqlite" backup.extract(old_db) conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from old_crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # stored in the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Journalist, Submission, Reply, db_session, init_db # We need to be able to link replies to the Journalist that sent # them. Since this information was not recorded in 0.2.1, we # arbitrarily say all replies were sent by an arbitrary journalist # that is present on this system. Since this information is not # currently exposed in the UI, this does not create a problem (for # now). if len(Journalist.query.all()) == 0: print "!!! FATAL: You must create a journalist account before running this migration." print " Run ./manage.py add_admin and try again." sys.exit(1) else: arbitrary_journalist = Journalist.query.all()[0] # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Copied from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_' return ''.join( [c for c in s.lower().replace(' ', '_') if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] replies = [] for fn in os.listdir(source_dir): append_to = submissions if fn.startswith('reply-'): append_to = replies append_to.append( (fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) replies.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp( submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, it's tricky to # figure out how to set this value. We can't distinguish between sources # who created an account but never submitted anything and sources who # had been active, but didn't have any stored submissions or replies at # the time of migration. # # After having explored the options, I think the best thing to do here # is set pending to True if there are no submissions or replies. Sources # who created an account but never submitted anything won't create noise # in the list, and sources who are active can probably be expected to # log back in relatively soon and so will automatially reappear once # they submit something new. if len(submissions + replies) == 0: migrated_source.pending = True else: migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not technicially correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) + len(replies) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # Combine everything into one list, sorted by date, so we can # correctly set the interaction counts for each file. everything = submissions + replies everything.sort(key=itemgetter(1)) for count, item in enumerate(everything): # Rename the file to fit the new file naming scheme used by 0.3 fn = item[0] if fn.startswith('reply-'): new_fn = "{0}-{1}-reply.gpg".format( count + 1, journalist_filename(source[1])) else: new_fn = "{0}-{1}-{2}".format( count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg") # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a database entry for this item db_entry = None if fn.startswith('reply-'): migrated_reply = Reply(arbitrary_journalist, migrated_source, new_fn) db_entry = migrated_reply else: migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated # have already been downloaded migrated_submission.downloaded = True db_entry = migrated_submission db_session.add(db_entry) db_session.commit() # chown the database file to the securedrop user subprocess.call( ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
def map_source(source): return Source(id=source['id'], name=source['name'], url=source['url'], stype=source.get('source_type', {}).get('name', None), veracity=source.get('veracity', None))
def migrate_database(zf): print "* Migrating database..." extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite", "db.old.sqlite") conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # store din the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Submission, db_session, init_db # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Make sure current database is in a pristine state os.remove("/var/lib/securedrop/db.sqlite") init_db() # Copy from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_' return ''.join( [c for c in s.lower().replace(' ', '_') if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] for fn in os.listdir(source_dir): submissions.append( (fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp( submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, set all migrated # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish # between sources who created an account but never submitted anything # and sources who just didn't have any stored submissions/replies at # the time of migration. To avoid stopping journalists from replying to # previous known sources, we set all migrated sources to not be pending # so they will apppear in the document interface. migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not techncially, correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # submissions are now sorted by date, so we can just loop over them to # infer the interaction counts for count, submission in enumerate(submissions): # TODO Possible concern: submission filenames. Should we migrate # them to the current naming scheme? What about the extensions # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count` # among other places)? fn = submission[0] if fn.startswith('reply-'): new_fn = "{0}-reply.gpg".format(count + 1) else: new_fn = "{0}-{1}-{2}".format( count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg") # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a submission for this source migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated have already # been downloaded migrated_submission.downloaded = True db_session.add(migrated_submission) db_session.commit() # chown the databse file to the securedrop user subprocess.call( ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])