Exemple #1
0
def submit_assignment(assignment_id):
    assignment = Assignment.query.filter_by(id=assignment_id).first()
    if not assignment:
        return json.dumps({
            'success': False,
            'error': 'Assignment not found'
        }), 404
    user_id = request.form.get('user_id')
    user = User.query.filter_by(id=user_id).first()
    if not user:
        return json.dumps({'success': False, 'error': 'User not found'}), 404
    client = boto3.client('s3')
    file = request.files.get('content')
    bucket_name = 'cmsproject-bucket'
    file_name = user.name + '/' + file.filename
    client.put_object(Body=file, Bucket=bucket_name, Key=file_name)
    submission = Submission(
        content=client.generate_presigned_url('get_object',
                                              Params={
                                                  'Bucket': bucket_name,
                                                  'Key': file_name
                                              },
                                              ExpiresIn=3600))
    course = Course.query.filter_by(id=assignment.course_id).first()
    if user not in course.students:
        return json.dumps({
            'success': False,
            'error': 'User does not have this assignment'
        }), 400
    assignment.submissions.append(submission)
    db.session.add(submission)
    db.session.commit()
    return json.dumps({'success': True, 'data': submission.serialize()})
Exemple #2
0
def submit():
    msg = request.form['msg']
    fh = request.files['fh']

    # Don't bother submitting anything if it was an "empty" submission. #878.
    if not (msg or fh):
        flash("You must enter a message or choose a file to submit.", "error")
        return redirect(url_for('lookup'))

    fnames = []
    journalist_filename = g.source.journalist_filename
    first_submission = g.source.interaction_count == 0

    if msg:
        g.source.interaction_count += 1
        fnames.append(
            store.save_message_submission(g.sid, g.source.interaction_count,
                                          journalist_filename, msg))
    if fh:
        g.source.interaction_count += 1
        fnames.append(
            store.save_file_submission(g.sid, g.source.interaction_count,
                                       journalist_filename, fh.filename,
                                       fh.stream))

    if first_submission:
        msg = render_template('first_submission_flashed_message.html')
        flash(Markup(msg), "success")

    else:
        if msg and not fh:
            things = 'message'
        elif not msg and fh:
            things = 'document'
        else:
            things = 'message and document'

        msg = render_template('next_submission_flashed_message.html',
                              things=things)
        flash(Markup(msg), "success")

    for fname in fnames:
        submission = Submission(g.source, fname)
        db_session.add(submission)

    if g.source.pending:
        g.source.pending = False

        # Generate a keypair now, if there's enough entropy (issue #303)
        entropy_avail = int(
            open('/proc/sys/kernel/random/entropy_avail').read())
        if entropy_avail >= 2400:
            async_genkey(g.sid, g.codename)

    g.source.last_updated = datetime.utcnow()
    db_session.commit()
    normalize_timestamps(g.sid)

    return redirect(url_for('lookup'))
Exemple #3
0
def submit():
    msg = request.form['msg']
    fh = request.files['fh']

    # Don't bother submitting anything if it was an "empty" submission. #878.
    if not (msg or fh):
        flash("You must enter a message or choose a file to submit.", "error")
        return redirect(url_for('lookup'))

    fnames = []
    journalist_filename = g.source.journalist_filename
    first_submission = g.source.interaction_count == 0

    if msg:
        g.source.interaction_count += 1
        fnames.append(
            store.save_message_submission(g.sid, g.source.interaction_count,
                                          journalist_filename, msg))
    if fh:
        g.source.interaction_count += 1
        fnames.append(
            store.save_file_submission(g.sid, g.source.interaction_count,
                                       journalist_filename, fh.filename,
                                       fh.stream))

    if first_submission:
        flash(
            "Thanks for submitting something to SecureDrop! Please check back later for replies.",
            "notification")
    else:
        if msg:
            flash("Thanks! We received your message.", "notification")
        if fh:
            flash(
                '{} "{}".'.format("Thanks! We received your document",
                                  fh.filename or '[unnamed]'), "notification")

    for fname in fnames:
        submission = Submission(g.source, fname)
        db_session.add(submission)

    if g.source.pending:
        g.source.pending = False

        # Generate a keypair now, if there's enough entropy (issue #303)
        entropy_avail = int(
            open('/proc/sys/kernel/random/entropy_avail').read())
        if entropy_avail >= 2400:
            async_genkey(g.sid, g.codename)

    g.source.last_updated = datetime.utcnow()
    db_session.commit()
    normalize_timestamps(g.sid)

    return redirect(url_for('lookup'))
def write_to_database(q):

    counter = 0

    while True:
        json_filepath = q.get()

        data = None

        with open(json_filepath, 'r') as f:
            data = json.load(f)

        for json_item in data['data']:

            if 'body' in json_item:
                # if 'body' is present then assume it's a comment

                db_record = db_Comment.get_or_none(
                    db_Comment.id == json_item['id'])

                if not db_record:

                    json_item['body'] = clean_text(json_item['body'])

                    db_record = db_Comment.create(**json_item)
                    print(f"comment {json_item['id']} written to database")
                    counter += 1

            elif 'selftext' in json_item:
                # if 'selftext' is present then assume it's a submission
                db_record = db_Submission.get_or_none(
                    db_Submission.id == json_item['id'])

                if not db_record:

                    json_item['selftext'] = clean_text(json_item['selftext'])

                    db_record = db_Submission.create(**json_item)
                    print(f"submission {json_item['id']} written to database")
                    counter += 1

        q.task_done()
Exemple #5
0
def setup_test_docs(sid, files):
    filenames = [os.path.join(config.STORE_DIR, sid, file) for file in files]

    for filename in filenames:
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        with open(filename, 'w') as fp:
            fp.write(str(uuid.uuid4()))

        # Add Submission to the db
        source = Source.query.filter(Source.filesystem_id == sid).one()
        submission = Submission(source, os.path.basename(filename))
        db_session.add(submission)
        db_session.commit()

    return filenames
Exemple #6
0
def submit(uid, problem, lang, source, fn):
    #Validate parameters
    if not lang in ['cc','python','java']:
        if fn[-3:] == '.py': 
            lang = 'python'
        elif fn[-3:] in ['.cc','.c','.cpp','.cxx']:
            lang = 'cc'
        elif fn[-3:] in ['.java']:
            lang = 'java'
        else:
            p = re.compile(r'\$\$[ ]*language:[ ]*(python|cc|java)[ ]*\$\$')
            m = p.search(source)
            if m: 
                lang = m.group(1)
            else:
                #Todo detect by magic regexps
                return "Unable to detect language"
    if problem == -1:
        p = re.compile(r'\$\$[ ]*problem:[ ]*([0-9]+)[ ]*\$\$')
        m = p.search(source)
        if m:
            problem = int(m.group(1))
        else:
            p = re.compile(r'(0-9)+')
            m = p.search(fn)
            if m:
                problem = int(m.group(1))
            else:
                return "Unable to detect problem number"
    if len(source) < 5:
        return "No source code submitted"
    #Send job to overlord
    try:
        code, msg = client.judge(problem, lang, source)

        session = Session()
        
        s = Submission()
        s.submitter_id = uid
        s.source = source
        s.problem_id = problem
        s.submitTime = datetime.utcnow()
        if int(code) == 12:
            s.jobid = int(msg)
            s.code = -1
            s.msg = ''
        else:
            s.msg = msg
            s.code = code
            s.jobid = None
            s.judgeTime = datetime.utcnow()

        session.add(s)
        session.commit()
        
        return ""
    except Exception as e:
        return str(e)
Exemple #7
0
    def submit():
        msg = request.form['msg']
        fh = request.files['fh']

        # Don't submit anything if it was an "empty" submission. #878
        if not (msg or fh):
            flash(
                gettext(
                    "You must enter a message or choose a file to submit."),
                "error")
            return redirect(url_for('main.lookup'))

        fnames = []
        journalist_filename = g.source.journalist_filename
        first_submission = g.source.interaction_count == 0

        if msg:
            g.source.interaction_count += 1
            fnames.append(
                store.save_message_submission(g.filesystem_id,
                                              g.source.interaction_count,
                                              journalist_filename, msg))
        if fh:
            g.source.interaction_count += 1
            fnames.append(
                store.save_file_submission(g.filesystem_id,
                                           g.source.interaction_count,
                                           journalist_filename, fh.filename,
                                           fh.stream))

        if first_submission:
            msg = render_template('first_submission_flashed_message.html')
            flash(Markup(msg), "success")

        else:
            if msg and not fh:
                html_contents = gettext('Thanks! We received your message.')
            elif not msg and fh:
                html_contents = gettext('Thanks! We received your document.')
            else:
                html_contents = gettext('Thanks! We received your message and '
                                        'document.')

            msg = render_template('next_submission_flashed_message.html',
                                  html_contents=html_contents)
            flash(Markup(msg), "success")

        for fname in fnames:
            submission = Submission(g.source, fname)
            db_session.add(submission)

        if g.source.pending:
            g.source.pending = False

            # Generate a keypair now, if there's enough entropy (issue #303)
            # (gpg reads 300 bytes from /dev/random)
            entropy_avail = get_entropy_estimate()
            if entropy_avail >= 2400:
                async_genkey(g.filesystem_id, g.codename)
                current_app.logger.info(
                    "generating key, entropy: {}".format(entropy_avail))
            else:
                current_app.logger.warn(
                    "skipping key generation. entropy: {}".format(
                        entropy_avail))

        g.source.last_updated = datetime.utcnow()
        db_session.commit()
        normalize_timestamps(g.filesystem_id)

        return redirect(url_for('main.lookup'))
def migrate_database(backup):
    print "* Migrating database..."

    # Get the sources table from the 0.2.1 instance's db
    old_db = backup.getmember(
        "var/chroot/document/var/www/securedrop/db.sqlite")
    old_db.name = "db.old.sqlite"
    backup.extract(old_db)
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from old_crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # stored in the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Journalist, Submission, Reply, db_session, init_db

    # We need to be able to link replies to the Journalist that sent
    # them. Since this information was not recorded in 0.2.1, we
    # arbitrarily say all replies were sent by an arbitrary journalist
    # that is present on this system. Since this information is not
    # currently exposed in the UI, this does not create a problem (for
    # now).
    if len(Journalist.query.all()) == 0:
        print "!!! FATAL: You must create a journalist account before running this migration."
        print "           Run ./manage.py add_admin and try again."
        sys.exit(1)
    else:
        arbitrary_journalist = Journalist.query.all()[0]

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")

    # Copied from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        replies = []
        for fn in os.listdir(source_dir):
            append_to = submissions
            if fn.startswith('reply-'):
                append_to = replies
            append_to.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))

        # Sort by submission time
        submissions.sort(key=itemgetter(1))
        replies.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, it's tricky to
        # figure out how to set this value. We can't distinguish between sources
        # who created an account but never submitted anything and sources who
        # had been active, but didn't have any stored submissions or replies at
        # the time of migration.
        #
        # After having explored the options, I think the best thing to do here
        # is set pending to True if there are no submissions or replies. Sources
        # who created an account but never submitted anything won't create noise
        # in the list, and sources who are active can probably be expected to
        # log back in relatively soon and so will automatially reappear once
        # they submit something new.
        if len(submissions + replies) == 0:
            migrated_source.pending = True
        else:
            migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not technicially correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions) + len(replies)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # Combine everything into one list, sorted by date, so we can
        # correctly set the interaction counts for each file.
        everything = submissions + replies
        everything.sort(key=itemgetter(1))
        for count, item in enumerate(everything):
            # Rename the file to fit the new file naming scheme used by 0.3
            fn = item[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-{1}-reply.gpg".format(
                    count + 1, journalist_filename(source[1]))
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a database entry for this item
            db_entry = None

            if fn.startswith('reply-'):
                migrated_reply = Reply(arbitrary_journalist, migrated_source,
                                       new_fn)
                db_entry = migrated_reply
            else:
                migrated_submission = Submission(migrated_source, new_fn)
                # Assume that all submissions that are being migrated
                # have already been downloaded
                migrated_submission.downloaded = True
                db_entry = migrated_submission

            db_session.add(db_entry)
            db_session.commit()

    # chown the database file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
Exemple #9
0
def migrate_database(backup):
    print "* Migrating database..."

    # Get the sources table from the 0.2.1 instance's db
    old_db = backup.getmember("var/chroot/document/var/www/securedrop/db.sqlite")
    old_db.name = "db.old.sqlite"
    backup.extract(old_db)
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from old_crypto_util import displayid

    # Generate a list of the filesystem ids that have journalist designations
    # stored in the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Journalist, Submission, Reply, db_session, init_db

    # We need to be able to link replies to the Journalist that sent
    # them. Since this information was not recorded in 0.2.1, we
    # arbitrarily say all replies were sent by an arbitrary journalist
    # that is present on this system. Since this information is not
    # currently exposed in the UI, this does not create a problem (for
    # now).
    if len(Journalist.query.all()) == 0:
        print "!!! FATAL: You must create a journalist account before running this migration."
        print "           Run ./manage.py add_admin and try again."
        sys.exit(1)
    else:
        arbitrary_journalist = Journalist.query.all()[0]

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite", "/var/lib/securedrop/db.sqlite.bak")

    # Copied from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = "abcdefghijklmnopqrstuvwxyz1234567890-_"
        return "".join([c for c in s.lower().replace(" ", "_") if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        replies = []
        for fn in os.listdir(source_dir):
            append_to = submissions
            if fn.startswith("reply-"):
                append_to = replies
            append_to.append((fn, os.path.getmtime(os.path.join(source_dir, fn))))

        # Sort by submission time
        submissions.sort(key=itemgetter(1))
        replies.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, it's tricky to
        # figure out how to set this value. We can't distinguish between sources
        # who created an account but never submitted anything and sources who
        # had been active, but didn't have any stored submissions or replies at
        # the time of migration.
        #
        # After having explored the options, I think the best thing to do here
        # is set pending to True if there are no submissions or replies. Sources
        # who created an account but never submitted anything won't create noise
        # in the list, and sources who are active can probably be expected to
        # log back in relatively soon and so will automatially reappear once
        # they submit something new.
        if len(submissions + replies) == 0:
            migrated_source.pending = True
        else:
            migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not technicially correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions) + len(replies)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # Combine everything into one list, sorted by date, so we can
        # correctly set the interaction counts for each file.
        everything = submissions + replies
        everything.sort(key=itemgetter(1))
        for count, item in enumerate(everything):
            # Rename the file to fit the new file naming scheme used by 0.3
            fn = item[0]

            if fn.startswith("reply-"):
                new_fn = "{0}-{1}-reply.gpg".format(count + 1, journalist_filename(source[1]))
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg"
                )

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn), os.path.join(source_dir, new_fn))

            # Add a database entry for this item
            db_entry = None

            if fn.startswith("reply-"):
                migrated_reply = Reply(arbitrary_journalist, migrated_source, new_fn)
                db_entry = migrated_reply
            else:
                migrated_submission = Submission(migrated_source, new_fn)
                # Assume that all submissions that are being migrated
                # have already been downloaded
                migrated_submission.downloaded = True
                db_entry = migrated_submission

            db_session.add(db_entry)
            db_session.commit()

    # chown the database file to the securedrop user
    subprocess.call(["chown", "www-data:www-data", "/var/lib/securedrop/db.sqlite"])
    def save(self, sha256sum, message, file):
        engine = create_engine('sqlite:///telescam.db')
        Base.metadata.bind = engine
        DBSession = sessionmaker(bind=engine)
        session = DBSession()
        data = self.koodous_link_existing_analysis_json(sha256sum)
        koodous_url = self.koodous_link_existing_analysis(sha256sum)
        if data == None:
            logger.debug(
                'Received empty json response at save from koodous_link_existing_analysis_json'
            )
            return False
        try:
            new_certificate = None
            try:
                new_certificate = session.query(Certificate).filter(
                    Certificate.sha1 == data['androguard']['certificate']
                    ['sha1']).first()
                logger.debug(
                    "Checking if current certificate exists in the database")
            except KeyError, e:
                logger.debug(
                    "Koodous couldn't exctract the certificate, Corrupted APK, using default certificate"
                )
                new_certificate = session.query(Certificate).filter(
                    Certificate.sha1 == '-').first()
            if new_certificate == None:
                logger.debug("Certificate didn't exist")
                new_certificate = Certificate(
                    sha1=data['androguard']['certificate']['sha1'],
                    not_before=data['androguard']['certificate']['not_before'],
                    not_after=data['androguard']['certificate']['not_after'],
                    subjectdn=data['androguard']['certificate']['subjectDN'],
                    issuerdn=data['androguard']['certificate']['issuerDN'],
                    serial=data['androguard']['certificate']['serial'])
                session.add(new_certificate)

            new_apk = session.query(APK).filter(
                APK.sha256 == data['sha256']).first()
            logger.debug("Checking if current apk exists in the database")
            if new_apk == None:
                logger.debug("apk didn't exist")
                # Save apk
                local_filename = self.FILES_DIR + message.document.file_id + '.apk'
                try:
                    logger.debug("Saving to disk")
                    mkdir_p(os.path.dirname(local_filename))
                    with open(local_filename, 'wb') as new_file:
                        new_file.write(file)
                except Exception as e:
                    logger.error('Failed to save apk to disk: %s' %
                                 local_filename,
                                 exc_info=True)
                    raise
                new_apk = APK(
                    app_name=data['androguard']['app_name'],
                    package_name=data['androguard']['package_name'],
                    version_code=data['androguard']['version_code'],
                    displayed_version=data['androguard']['displayed_version'],
                    local_package=local_filename,
                    koodous_url=koodous_url,
                    sha256=data['sha256'],
                    certificate=new_certificate)
                session.add(new_apk)

            new_submission = Submission(
                submitted_to_username=message.chat.username,
                submitted_to_title=message.chat.title,
                submitted_to_id=message.chat.id,
                forwarded_from_username=message.forward_from.username
                if message.forward_from != None else None,
                forwarded_from_firstname=message.forward_from.first_name
                if message.forward_from != None else None,
                forwarded_from_lastname=message.forward_from.last_name
                if message.forward_from != None else None,
                forwarded_from_id=message.forward_from.id
                if message.forward_from != None else None,
                submitted_by_username=message.from_user.username,
                submitted_by_firstname=message.from_user.first_name,
                submitted_by_lastname=message.from_user.last_name,
                submitted_by_id=message.from_user.id,
                message_text=message.text,
                filename=message.document.file_name,
                apk=new_apk)
            session.add(new_submission)
            logger.debug("Adding submission details to database")
            try:
                session.commit()
                logger.debug("Saved changes to database")
                return True
            except Exception as e:
                logger.error('Failed to save changes to the database',
                             exc_info=True)
                raise
Exemple #11
0
def main():

    random.seed()

    bot_name = "training_output"

    # Insert the names of the subreddits
    training_subreddits = []

    all_submissions = []
    # all submissions ordered by date
    all_submissions = list(
        db_Submission.select().where((fn.Lower(db_Submission.subreddit).in_(
            [s.lower() for s in training_subreddits]))
                                     & (fn.Lower(db_Submission.author).not_in(
                                         [a.lower()
                                          for a in author_blacklist]))))

    # We'll shuffle all the submission records and split them into a training and evaluation
    # lists in a 90/10 ratio. simpletransformers will use the evaluation to test the accuracy
    # of the training
    random.shuffle(all_submissions)

    split_point = int(len(all_submissions) * 0.9)
    training_submissions = all_submissions[:split_point]
    eval_submissions = all_submissions[split_point:]

    print(
        f'{len(training_submissions)} training submissions, {len(eval_submissions)} evaluation submissions'
    )

    # file name for the output text file
    date_string = datetime.today().strftime('%d%m%y_%H%M')
    counter = 0

    # use concurrent futures (multiprocessing) to speed up the output
    with concurrent.futures.ProcessPoolExecutor() as executor:
        filename = f'{bot_name}_{date_string}_training.txt'

        with open(filename, 'a', encoding='utf-8') as fd:
            for sub, output_text_gen_string in zip(
                    training_submissions,
                    executor.map(gather_comments_for_submission,
                                 training_submissions)):
                counter += 1
                if output_text_gen_string:
                    fd.write(f'{output_text_gen_string}' + '<|endoftext|>\n')
                print(
                    f'subs counted: {counter}. {round(counter/len(all_submissions), 2)}'
                )

        filename = f'{bot_name}_{date_string}_eval.txt'
        with open(filename, 'a', encoding='utf-8') as fd:
            for sub, output_text_gen_string in zip(
                    eval_submissions,
                    executor.map(gather_comments_for_submission,
                                 eval_submissions)):
                counter += 1
                if output_text_gen_string:
                    fd.write(f'{output_text_gen_string}' + '<|endoftext|>\n')
                print(
                    f'subs counted: {counter}. {round(counter/len(all_submissions), 2)}'
                )
Exemple #12
0
def migrate_database(zf):
    print "* Migrating database..."

    extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite", "db.old.sqlite")
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # store din the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Submission, db_session, init_db

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")
    # Make sure current database is in a pristine state
    os.remove("/var/lib/securedrop/db.sqlite")
    init_db()

    # Copy from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join([c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        for fn in os.listdir(source_dir):
            submissions.append((fn, os.path.getmtime(os.path.join(source_dir, fn))))
        # Sort by submission time
        submissions.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, set all migrated
        # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish
        # between sources who created an account but never submitted anything
        # and sources who just didn't have any stored submissions/replies at
        # the time of migration. To avoid stopping journalists from replying to
        # previous known sources, we set all migrated sources to not be pending
        # so they will apppear in the document interface.
        migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not techncially, correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # submissions are now sorted by date, so we can just loop over them to
        # infer the interaction counts
        for count, submission in enumerate(submissions):
            # TODO Possible concern: submission filenames. Should we migrate
            # them to the current naming scheme? What about the extensions
            # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count`
            # among other places)?

            fn = submission[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-reply.gpg".format(count+1)
            else:
                new_fn = "{0}-{1}-{2}".format(count+1, journalist_filename(source[1]), "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a submission for this source
            migrated_submission = Submission(migrated_source, new_fn)
            # Assume that all submissions that are being migrated have already
            # been downloaded
            migrated_submission.downloaded = True
            db_session.add(migrated_submission)
            db_session.commit()

    # chown the databse file to the securedrop user
    subprocess.call(['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])
Exemple #13
0
def migrate_database(zf):
    print "* Migrating database..."

    extract_to_path(zf, "var/chroot/document/var/www/securedrop/db.sqlite",
                    "db.old.sqlite")
    conn = sqlite3.connect("db.old.sqlite")
    c = conn.cursor()
    sources = c.execute("SELECT * FROM sources").fetchall()
    os.remove("db.old.sqlite")

    # Fill in the rest of the sources. Since sources were only added to the
    # database if their codename was changed by the journalist, we need to fill
    # in the rest by examining all of the filesystem designations in the source
    # directory and re-generating the codenames.
    #
    # Note: Must be called after /var/lib/securedrop/store is populated
    from crypto_util import displayid
    # Generate a list of the filesystem ids that have journalist designations
    # store din the database, since they are already known and should not be
    # generated from the filesystem id
    already_processed = set([source[0] for source in sources])
    for fs_id in os.listdir("/var/lib/securedrop/store"):
        if fs_id in already_processed:
            continue
        sources.append((fs_id, displayid(fs_id)))

    # Import current application's config so we can easily populate the db
    sys.path.append("/var/www/securedrop")
    import config
    from db import Source, Submission, db_session, init_db

    # Back up current database just in case
    shutil.copy("/var/lib/securedrop/db.sqlite",
                "/var/lib/securedrop/db.sqlite.bak")
    # Make sure current database is in a pristine state
    os.remove("/var/lib/securedrop/db.sqlite")
    init_db()

    # Copy from db.py to compute filesystem-safe journalist filenames
    def journalist_filename(s):
        valid_chars = 'abcdefghijklmnopqrstuvwxyz1234567890-_'
        return ''.join(
            [c for c in s.lower().replace(' ', '_') if c in valid_chars])

    # Migrate rows to new database with SQLAlchemy ORM
    for source in sources:
        migrated_source = Source(source[0], source[1])
        source_dir = os.path.join("/var/lib/securedrop/store", source[0])

        # It appears that there was a bug in 0.2.1 where sources with changed
        # names were not always successfully removed from the database. Skip
        # any sources that didn't have files copied for them, they were deleted
        # and are in the database erroneously.
        if not os.path.isdir(source_dir):
            continue

        # Can infer "flagged" state by looking for _FLAG files in store
        if "_FLAG" in os.listdir(source_dir):
            # Mark the migrated source as flagged
            migrated_source.flagged = True
            # Delete the _FLAG file
            os.remove(os.path.join(source_dir, "_FLAG"))

        # Sort the submissions by the date of submission so we can infer the
        # correct interaction_count for the new filenames later, and so we can
        # set source.last_updated to the time of the most recently uploaded
        # submission in the store now.
        submissions = []
        for fn in os.listdir(source_dir):
            submissions.append(
                (fn, os.path.getmtime(os.path.join(source_dir, fn))))
        # Sort by submission time
        submissions.sort(key=itemgetter(1))

        if len(submissions) > 0:
            migrated_source.last_updated = datetime.utcfromtimestamp(
                submissions[-1][1])
        else:
            # The source will have the default .last_updated of utcnow(), which
            # might be a little confusing, but it's the best we can do.
            pass

        # Since the concept of "pending" is introduced in 0.3, set all migrated
        # sources from 0.2.1 to not be pending. Otherwise, we can't distinguish
        # between sources who created an account but never submitted anything
        # and sources who just didn't have any stored submissions/replies at
        # the time of migration. To avoid stopping journalists from replying to
        # previous known sources, we set all migrated sources to not be pending
        # so they will apppear in the document interface.
        migrated_source.pending = False

        # Set source.interaction_count to the number of current submissions for
        # each source. This is not techncially, correct, but since we can't
        # know how many submissions have been deleted it will give us a
        # reasonable, monotonically increasing basis for future increments to
        # the interaction_count.
        migrated_source.interaction_count = len(submissions)

        # Add and commit the source to the db so they will have a primary key
        # assigned to use with the ForeignKey relationship with their
        # submissions
        db_session.add(migrated_source)
        db_session.commit()

        # submissions are now sorted by date, so we can just loop over them to
        # infer the interaction counts
        for count, submission in enumerate(submissions):
            # TODO Possible concern: submission filenames. Should we migrate
            # them to the current naming scheme? What about the extensions
            # ("msg.gpg" or "doc.zip.gpg", used in `documents_messages_count`
            # among other places)?

            fn = submission[0]

            if fn.startswith('reply-'):
                new_fn = "{0}-reply.gpg".format(count + 1)
            else:
                new_fn = "{0}-{1}-{2}".format(
                    count + 1, journalist_filename(source[1]),
                    "msg.gpg" if fn.endswith("msg.gpg") else "doc.zip.gpg")

            # Move to the new filename
            os.rename(os.path.join(source_dir, fn),
                      os.path.join(source_dir, new_fn))

            # Add a submission for this source
            migrated_submission = Submission(migrated_source, new_fn)
            # Assume that all submissions that are being migrated have already
            # been downloaded
            migrated_submission.downloaded = True
            db_session.add(migrated_submission)
            db_session.commit()

    # chown the databse file to the securedrop user
    subprocess.call(
        ['chown', 'www-data:www-data', "/var/lib/securedrop/db.sqlite"])