def submit_assignment(assignment_id): assignment = Assignment.query.filter_by(id=assignment_id).first() if not assignment: return json.dumps({ 'success': False, 'error': 'Assignment not found' }), 404 user_id = request.form.get('user_id') user = User.query.filter_by(id=user_id).first() if not user: return json.dumps({'success': False, 'error': 'User not found'}), 404 client = boto3.client('s3') file = request.files.get('content') bucket_name = 'cmsproject-bucket' file_name = user.name + '/' + file.filename client.put_object(Body=file, Bucket=bucket_name, Key=file_name) submission = Submission( content=client.generate_presigned_url('get_object', Params={ 'Bucket': bucket_name, 'Key': file_name }, ExpiresIn=3600)) course = Course.query.filter_by(id=assignment.course_id).first() if user not in course.students: return json.dumps({ 'success': False, 'error': 'User does not have this assignment' }), 400 assignment.submissions.append(submission) db.session.add(submission) db.session.commit() return json.dumps({'success': True, 'data': submission.serialize()})
def submit(): msg = request.form['msg'] fh = request.files['fh'] # Don't bother submitting anything if it was an "empty" submission. #878. if not (msg or fh): flash("You must enter a message or choose a file to submit.", "error") return redirect(url_for('lookup')) fnames = [] journalist_filename = g.source.journalist_filename first_submission = g.source.interaction_count == 0 if msg: g.source.interaction_count += 1 fnames.append( store.save_message_submission(g.sid, g.source.interaction_count, journalist_filename, msg)) if fh: g.source.interaction_count += 1 fnames.append( store.save_file_submission(g.sid, g.source.interaction_count, journalist_filename, fh.filename, fh.stream)) if first_submission: msg = render_template('first_submission_flashed_message.html') flash(Markup(msg), "success") else: if msg and not fh: things = 'message' elif not msg and fh: things = 'document' else: things = 'message and document' msg = render_template('next_submission_flashed_message.html', things=things) flash(Markup(msg), "success") for fname in fnames: submission = Submission(g.source, fname) db_session.add(submission) if g.source.pending: g.source.pending = False # Generate a keypair now, if there's enough entropy (issue #303) entropy_avail = int( open('/proc/sys/kernel/random/entropy_avail').read()) if entropy_avail >= 2400: async_genkey(g.sid, g.codename) g.source.last_updated = datetime.utcnow() db_session.commit() normalize_timestamps(g.sid) return redirect(url_for('lookup'))
def submit(): msg = request.form['msg'] fh = request.files['fh'] # Don't bother submitting anything if it was an "empty" submission. #878. if not (msg or fh): flash("You must enter a message or choose a file to submit.", "error") return redirect(url_for('lookup')) fnames = [] journalist_filename = g.source.journalist_filename first_submission = g.source.interaction_count == 0 if msg: g.source.interaction_count += 1 fnames.append( store.save_message_submission(g.sid, g.source.interaction_count, journalist_filename, msg)) if fh: g.source.interaction_count += 1 fnames.append( store.save_file_submission(g.sid, g.source.interaction_count, journalist_filename, fh.filename, fh.stream)) if first_submission: flash( "Thanks for submitting something to SecureDrop! Please check back later for replies.", "notification") else: if msg: flash("Thanks! We received your message.", "notification") if fh: flash( '{} "{}".'.format("Thanks! We received your document", fh.filename or '[unnamed]'), "notification") for fname in fnames: submission = Submission(g.source, fname) db_session.add(submission) if g.source.pending: g.source.pending = False # Generate a keypair now, if there's enough entropy (issue #303) entropy_avail = int( open('/proc/sys/kernel/random/entropy_avail').read()) if entropy_avail >= 2400: async_genkey(g.sid, g.codename) g.source.last_updated = datetime.utcnow() db_session.commit() normalize_timestamps(g.sid) return redirect(url_for('lookup'))
def write_to_database(q): counter = 0 while True: json_filepath = q.get() data = None with open(json_filepath, 'r') as f: data = json.load(f) for json_item in data['data']: if 'body' in json_item: # if 'body' is present then assume it's a comment db_record = db_Comment.get_or_none( db_Comment.id == json_item['id']) if not db_record: json_item['body'] = clean_text(json_item['body']) db_record = db_Comment.create(**json_item) print(f"comment {json_item['id']} written to database") counter += 1 elif 'selftext' in json_item: # if 'selftext' is present then assume it's a submission db_record = db_Submission.get_or_none( db_Submission.id == json_item['id']) if not db_record: json_item['selftext'] = clean_text(json_item['selftext']) db_record = db_Submission.create(**json_item) print(f"submission {json_item['id']} written to database") counter += 1 q.task_done()
def setup_test_docs(sid, files): filenames = [os.path.join(config.STORE_DIR, sid, file) for file in files] for filename in filenames: dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) with open(filename, 'w') as fp: fp.write(str(uuid.uuid4())) # Add Submission to the db source = Source.query.filter(Source.filesystem_id == sid).one() submission = Submission(source, os.path.basename(filename)) db_session.add(submission) db_session.commit() return filenames
def submit(uid, problem, lang, source, fn): #Validate parameters if not lang in ['cc','python','java']: if fn[-3:] == '.py': lang = 'python' elif fn[-3:] in ['.cc','.c','.cpp','.cxx']: lang = 'cc' elif fn[-3:] in ['.java']: lang = 'java' else: p = re.compile(r'\$\$[ ]*language:[ ]*(python|cc|java)[ ]*\$\$') m = p.search(source) if m: lang = m.group(1) else: #Todo detect by magic regexps return "Unable to detect language" if problem == -1: p = re.compile(r'\$\$[ ]*problem:[ ]*([0-9]+)[ ]*\$\$') m = p.search(source) if m: problem = int(m.group(1)) else: p = re.compile(r'(0-9)+') m = p.search(fn) if m: problem = int(m.group(1)) else: return "Unable to detect problem number" if len(source) < 5: return "No source code submitted" #Send job to overlord try: code, msg = client.judge(problem, lang, source) session = Session() s = Submission() s.submitter_id = uid s.source = source s.problem_id = problem s.submitTime = datetime.utcnow() if int(code) == 12: s.jobid = int(msg) s.code = -1 s.msg = '' else: s.msg = msg s.code = code s.jobid = None s.judgeTime = datetime.utcnow() session.add(s) session.commit() return "" except Exception as e: return str(e)
def submit(): msg = request.form['msg'] fh = request.files['fh'] # Don't submit anything if it was an "empty" submission. #878 if not (msg or fh): flash( gettext( "You must enter a message or choose a file to submit."), "error") return redirect(url_for('main.lookup')) fnames = [] journalist_filename = g.source.journalist_filename first_submission = g.source.interaction_count == 0 if msg: g.source.interaction_count += 1 fnames.append( store.save_message_submission(g.filesystem_id, g.source.interaction_count, journalist_filename, msg)) if fh: g.source.interaction_count += 1 fnames.append( store.save_file_submission(g.filesystem_id, g.source.interaction_count, journalist_filename, fh.filename, fh.stream)) if first_submission: msg = render_template('first_submission_flashed_message.html') flash(Markup(msg), "success") else: if msg and not fh: html_contents = gettext('Thanks! We received your message.') elif not msg and fh: html_contents = gettext('Thanks! We received your document.') else: html_contents = gettext('Thanks! We received your message and ' 'document.') msg = render_template('next_submission_flashed_message.html', html_contents=html_contents) flash(Markup(msg), "success") for fname in fnames: submission = Submission(g.source, fname) db_session.add(submission) if g.source.pending: g.source.pending = False # Generate a keypair now, if there's enough entropy (issue #303) # (gpg reads 300 bytes from /dev/random) entropy_avail = get_entropy_estimate() if entropy_avail >= 2400: async_genkey(g.filesystem_id, g.codename) current_app.logger.info( "generating key, entropy: {}".format(entropy_avail)) else: current_app.logger.warn( "skipping key generation. entropy: {}".format( entropy_avail)) g.source.last_updated = datetime.utcnow() db_session.commit() normalize_timestamps(g.filesystem_id) return redirect(url_for('main.lookup'))
def migrate_database(backup): print "* Migrating database..." # Get the sources table from the 0.2.1 instance's db old_db = backup.getmember( "var/chroot/document/var/www/securedrop/db.sqlite") old_db.name = "db.old.sqlite" backup.extract(old_db) conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from old_crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # stored in the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Journalist, Submission, Reply, db_session, init_db # We need to be able to link replies to the Journalist that sent # them. Since this information was not recorded in 0.2.1, we # arbitrarily say all replies were sent by an arbitrary journalist # that is present on this system. Since this information is not # currently exposed in the UI, this does not create a problem (for # now). if len(Journalist.query.all()) == 0: print "!!! FATAL: You must create a journalist account before running this migration." print " Run ./manage.py add_admin and try again." sys.exit(1) else: arbitrary_journalist = Journalist.query.all()[0] # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Copied from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_' return ''.join( [c for c in s.lower().replace(' ', '_') if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] replies = [] for fn in os.listdir(source_dir): append_to = submissions if fn.startswith('reply-'): append_to = replies append_to.append( (fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) replies.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp( submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, it's tricky to # figure out how to set this value. We can't distinguish between sources # who created an account but never submitted anything and sources who # had been active, but didn't have any stored submissions or replies at # the time of migration. # # After having explored the options, I think the best thing to do here # is set pending to True if there are no submissions or replies. Sources # who created an account but never submitted anything won't create noise # in the list, and sources who are active can probably be expected to # log back in relatively soon and so will automatially reappear once # they submit something new. if len(submissions + replies) == 0: migrated_source.pending = True else: migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not technicially correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) + len(replies) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # Combine everything into one list, sorted by date, so we can # correctly set the interaction counts for each file. everything = submissions + replies everything.sort(key=itemgetter(1)) for count, item in enumerate(everything): # Rename the file to fit the new file naming scheme used by 0.3 fn = item[0] if fn.startswith('reply-'): new_fn = "{0}-{1}-reply.gpg".format( count + 1, journalist_filename(source[1])) else: new_fn = "{0}-{1}-{2}".format( count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg") # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a database entry for this item db_entry = None if fn.startswith('reply-'): migrated_reply = Reply(arbitrary_journalist, migrated_source, new_fn) db_entry = migrated_reply else: migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated # have already been downloaded migrated_submission.downloaded = True db_entry = migrated_submission db_session.add(db_entry) db_session.commit() # chown the database file to the securedrop user subprocess.call( ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
def migrate_database(backup): print "* Migrating database..." # Get the sources table from the 0.2.1 instance's db old_db = backup.getmember("var/chroot/document/var/www/securedrop/db.sqlite") old_db.name = "db.old.sqlite" backup.extract(old_db) conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from old_crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # stored in the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Journalist, Submission, Reply, db_session, init_db # We need to be able to link replies to the Journalist that sent # them. Since this information was not recorded in 0.2.1, we # arbitrarily say all replies were sent by an arbitrary journalist # that is present on this system. Since this information is not # currently exposed in the UI, this does not create a problem (for # now). if len(Journalist.query.all()) == 0: print "!!! FATAL: You must create a journalist account before running this migration." print " Run ./manage.py add_admin and try again." sys.exit(1) else: arbitrary_journalist = Journalist.query.all()[0] # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Copied from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = "abcdefghijklmnopqrstuvwxyz1234567890-_" return "".join([c for c in s.lower().replace(" ", "_") if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] replies = [] for fn in os.listdir(source_dir): append_to = submissions if fn.startswith("reply-"): append_to = replies append_to.append((fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) replies.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, it's tricky to # figure out how to set this value. We can't distinguish between sources # who created an account but never submitted anything and sources who # had been active, but didn't have any stored submissions or replies at # the time of migration. # # After having explored the options, I think the best thing to do here # is set pending to True if there are no submissions or replies. Sources # who created an account but never submitted anything won't create noise # in the list, and sources who are active can probably be expected to # log back in relatively soon and so will automatially reappear once # they submit something new. if len(submissions + replies) == 0: migrated_source.pending = True else: migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not technicially correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) + len(replies) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # Combine everything into one list, sorted by date, so we can # correctly set the interaction counts for each file. everything = submissions + replies everything.sort(key=itemgetter(1)) for count, item in enumerate(everything): # Rename the file to fit the new file naming scheme used by 0.3 fn = item[0] if fn.startswith("reply-"): new_fn = "{0}-{1}-reply.gpg".format(count + 1, journalist_filename(source[1])) else: new_fn = "{0}-{1}-{2}".format( count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg" ) # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a database entry for this item db_entry = None if fn.startswith("reply-"): migrated_reply = Reply(arbitrary_journalist, migrated_source, new_fn) db_entry = migrated_reply else: migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated # have already been downloaded migrated_submission.downloaded = True db_entry = migrated_submission db_session.add(db_entry) db_session.commit() # chown the database file to the securedrop user subprocess.call(["chown", "www-data:www-data", "/var/lib/securedrop/db.sqlite"])
def save(self, sha256sum, message, file): engine = create_engine('sqlite:///telescam.db') Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() data = self.koodous_link_existing_analysis_json(sha256sum) koodous_url = self.koodous_link_existing_analysis(sha256sum) if data == None: logger.debug( 'Received empty json response at save from koodous_link_existing_analysis_json' ) return False try: new_certificate = None try: new_certificate = session.query(Certificate).filter( Certificate.sha1 == data['androguard']['certificate'] ['sha1']).first() logger.debug( "Checking if current certificate exists in the database") except KeyError, e: logger.debug( "Koodous couldn't exctract the certificate, Corrupted APK, using default certificate" ) new_certificate = session.query(Certificate).filter( Certificate.sha1 == '-').first() if new_certificate == None: logger.debug("Certificate didn't exist") new_certificate = Certificate( sha1=data['androguard']['certificate']['sha1'], not_before=data['androguard']['certificate']['not_before'], not_after=data['androguard']['certificate']['not_after'], subjectdn=data['androguard']['certificate']['subjectDN'], issuerdn=data['androguard']['certificate']['issuerDN'], serial=data['androguard']['certificate']['serial']) session.add(new_certificate) new_apk = session.query(APK).filter( APK.sha256 == data['sha256']).first() logger.debug("Checking if current apk exists in the database") if new_apk == None: logger.debug("apk didn't exist") # Save apk local_filename = self.FILES_DIR + message.document.file_id + '.apk' try: logger.debug("Saving to disk") mkdir_p(os.path.dirname(local_filename)) with open(local_filename, 'wb') as new_file: new_file.write(file) except Exception as e: logger.error('Failed to save apk to disk: %s' % local_filename, exc_info=True) raise new_apk = APK( app_name=data['androguard']['app_name'], package_name=data['androguard']['package_name'], version_code=data['androguard']['version_code'], displayed_version=data['androguard']['displayed_version'], local_package=local_filename, koodous_url=koodous_url, sha256=data['sha256'], certificate=new_certificate) session.add(new_apk) new_submission = Submission( submitted_to_username=message.chat.username, submitted_to_title=message.chat.title, submitted_to_id=message.chat.id, forwarded_from_username=message.forward_from.username if message.forward_from != None else None, forwarded_from_firstname=message.forward_from.first_name if message.forward_from != None else None, forwarded_from_lastname=message.forward_from.last_name if message.forward_from != None else None, forwarded_from_id=message.forward_from.id if message.forward_from != None else None, submitted_by_username=message.from_user.username, submitted_by_firstname=message.from_user.first_name, submitted_by_lastname=message.from_user.last_name, submitted_by_id=message.from_user.id, message_text=message.text, filename=message.document.file_name, apk=new_apk) session.add(new_submission) logger.debug("Adding submission details to database") try: session.commit() logger.debug("Saved changes to database") return True except Exception as e: logger.error('Failed to save changes to the database', exc_info=True) raise
def main(): random.seed() bot_name = "training_output" # Insert the names of the subreddits training_subreddits = [] all_submissions = [] # all submissions ordered by date all_submissions = list( db_Submission.select().where((fn.Lower(db_Submission.subreddit).in_( [s.lower() for s in training_subreddits])) & (fn.Lower(db_Submission.author).not_in( [a.lower() for a in author_blacklist])))) # We'll shuffle all the submission records and split them into a training and evaluation # lists in a 90/10 ratio. simpletransformers will use the evaluation to test the accuracy # of the training random.shuffle(all_submissions) split_point = int(len(all_submissions) * 0.9) training_submissions = all_submissions[:split_point] eval_submissions = all_submissions[split_point:] print( f'{len(training_submissions)} training submissions, {len(eval_submissions)} evaluation submissions' ) # file name for the output text file date_string = datetime.today().strftime('%d%m%y_%H%M') counter = 0 # use concurrent futures (multiprocessing) to speed up the output with concurrent.futures.ProcessPoolExecutor() as executor: filename = f'{bot_name}_{date_string}_training.txt' with open(filename, 'a', encoding='utf-8') as fd: for sub, output_text_gen_string in zip( training_submissions, executor.map(gather_comments_for_submission, training_submissions)): counter += 1 if output_text_gen_string: fd.write(f'{output_text_gen_string}' + '<|endoftext|>\n') print( f'subs counted: {counter}. {round(counter/len(all_submissions), 2)}' ) filename = f'{bot_name}_{date_string}_eval.txt' with open(filename, 'a', encoding='utf-8') as fd: for sub, output_text_gen_string in zip( eval_submissions, executor.map(gather_comments_for_submission, eval_submissions)): counter += 1 if output_text_gen_string: fd.write(f'{output_text_gen_string}' + '<|endoftext|>\n') print( f'subs counted: {counter}. {round(counter/len(all_submissions), 2)}' )
def migrate_database(zf): print "* Migrating database..." extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite", "db.old.sqlite") conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # store din the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Submission, db_session, init_db # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Make sure current database is in a pristine state os.remove("/var/lib/securedrop/db.sqlite") init_db() # Copy from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_' return ''.join([c for c in s.lower().replace(' ', '_') if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] for fn in os.listdir(source_dir): submissions.append((fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, set all migrated # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish # between sources who created an account but never submitted anything # and sources who just didn't have any stored submissions/replies at # the time of migration. To avoid stopping journalists from replying to # previous known sources, we set all migrated sources to not be pending # so they will apppear in the document interface. migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not techncially, correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # submissions are now sorted by date, so we can just loop over them to # infer the interaction counts for count, submission in enumerate(submissions): # TODO Possible concern: submission filenames. Should we migrate # them to the current naming scheme? What about the extensions # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count` # among other places)? fn = submission[0] if fn.startswith('reply-'): new_fn = "{0}-reply.gpg".format(count+1) else: new_fn = "{0}-{1}-{2}".format(count+1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg") # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a submission for this source migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated have already # been downloaded migrated_submission.downloaded = True db_session.add(migrated_submission) db_session.commit() # chown the databse file to the securedrop user subprocess.call(['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
def migrate_database(zf): print "* Migrating database..." extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite", "db.old.sqlite") conn = sqlite3.connect("db.old.sqlite") c = conn.cursor() sources = c.execute("SELECT * FROM sources").fetchall() os.remove("db.old.sqlite") # Fill in the rest of the sources. Since sources were only added to the # database if their codename was changed by the journalist, we need to fill # in the rest by examining all of the filesystem designations in the source # directory and re-generating the codenames. # # Note: Must be called after /var/lib/securedrop/store is populated from crypto_util import displayid # Generate a list of the filesystem ids that have journalist designations # store din the database, since they are already known and should not be # generated from the filesystem id already_processed = set([source[0] for source in sources]) for fs_id in os.listdir("/var/lib/securedrop/store"): if fs_id in already_processed: continue sources.append((fs_id, displayid(fs_id))) # Import current application's config so we can easily populate the db sys.path.append("/var/www/securedrop") import config from db import Source, Submission, db_session, init_db # Back up current database just in case shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak") # Make sure current database is in a pristine state os.remove("/var/lib/securedrop/db.sqlite") init_db() # Copy from db.py to compute filesystem-safe journalist filenames def journalist_filename(s): valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_' return ''.join( [c for c in s.lower().replace(' ', '_') if c in valid_chars]) # Migrate rows to new database with SQLAlchemy ORM for source in sources: migrated_source = Source(source[0], source[1]) source_dir = os.path.join("/var/lib/securedrop/store", source[0]) # It appears that there was a bug in 0.2.1 where sources with changed # names were not always successfully removed from the database. Skip # any sources that didn't have files copied for them, they were deleted # and are in the database erroneously. if not os.path.isdir(source_dir): continue # Can infer "flagged" state by looking for _FLAG files in store if "_FLAG" in os.listdir(source_dir): # Mark the migrated source as flagged migrated_source.flagged = True # Delete the _FLAG file os.remove(os.path.join(source_dir, "_FLAG")) # Sort the submissions by the date of submission so we can infer the # correct interaction_count for the new filenames later, and so we can # set source.last_updated to the time of the most recently uploaded # submission in the store now. submissions = [] for fn in os.listdir(source_dir): submissions.append( (fn, os.path.getmtime(os.path.join(source_dir, fn)))) # Sort by submission time submissions.sort(key=itemgetter(1)) if len(submissions) > 0: migrated_source.last_updated = datetime.utcfromtimestamp( submissions[-1][1]) else: # The source will have the default .last_updated of utcnow(), which # might be a little confusing, but it's the best we can do. pass # Since the concept of "pending" is introduced in 0.3, set all migrated # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish # between sources who created an account but never submitted anything # and sources who just didn't have any stored submissions/replies at # the time of migration. To avoid stopping journalists from replying to # previous known sources, we set all migrated sources to not be pending # so they will apppear in the document interface. migrated_source.pending = False # Set source.interaction_count to the number of current submissions for # each source. This is not techncially, correct, but since we can't # know how many submissions have been deleted it will give us a # reasonable, monotonically increasing basis for future increments to # the interaction_count. migrated_source.interaction_count = len(submissions) # Add and commit the source to the db so they will have a primary key # assigned to use with the ForeignKey relationship with their # submissions db_session.add(migrated_source) db_session.commit() # submissions are now sorted by date, so we can just loop over them to # infer the interaction counts for count, submission in enumerate(submissions): # TODO Possible concern: submission filenames. Should we migrate # them to the current naming scheme? What about the extensions # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count` # among other places)? fn = submission[0] if fn.startswith('reply-'): new_fn = "{0}-reply.gpg".format(count + 1) else: new_fn = "{0}-{1}-{2}".format( count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg") # Move to the new filename os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn)) # Add a submission for this source migrated_submission = Submission(migrated_source, new_fn) # Assume that all submissions that are being migrated have already # been downloaded migrated_submission.downloaded = True db_session.add(migrated_submission) db_session.commit() # chown the databse file to the securedrop user subprocess.call( ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])