def reindex_specific_data_type(data_type):
    FileProcessLock.lock()
    print "starting..."
    #this line will raise an error if something is wrong with the data type
    file_name_key = data_stream_to_s3_file_name_string(data_type)
    relevant_chunks = ChunksRegistry(data_type=data_type)
    relevant_indexed_files = [ chunk["chunk_path"] for chunk in relevant_chunks ]
    print "purging old data..."
    for chunk in relevant_chunks: chunk.remove()

    pool = ThreadPool(20)
    pool.map(s3_delete, relevant_indexed_files)

    print "pulling files to process..."
    files_lists = pool.map(s3_list_files, [str(s._id) for s in Studies()] )
    for i,l in enumerate(files_lists):
        print str(datetime.now()), i+1, "of", str(Studies.count()) + ",", len(l), "files"
        for fp in l:
            if fp[-4:] in PROCESSABLE_FILE_EXTENSIONS:
                FileToProcess.append_file_for_processing(fp, ObjectId(fp.split("/", 1)[0]), fp.split("/", 2)[1])
    del files_lists, l
    pool.close()
    pool.terminate()
    print str(datetime.now()), "processing data..."
    FileProcessLock.unlock()
    process_file_chunks()
    print "Done."
def reindex_all_files_to_process():
    """ Totally removes the FilesToProcess DB, deletes all chunked files on s3,
    clears the chunksregistry, and then adds all relevent files on s3 to the
    files to process registry. """
    FileProcessLock.lock()
    print str(datetime.now()), "purging FilesToProcess:", FilesToProcess.count()
    FileToProcess.db().drop()
    print str(datetime.now()), "purging existing ChunksRegistry", ChunksRegistry.count()
    ChunkRegistry.db().drop()

    pool = ThreadPool(CONCURRENT_NETWORK_OPS * 2 )

    print str(datetime.now()), "deleting older chunked data:",
    CHUNKED_DATA = s3_list_files(CHUNKS_FOLDER)
    print len(CHUNKED_DATA)
    pool.map(s3_delete, CHUNKED_DATA)
    del CHUNKED_DATA

    print str(datetime.now()), "pulling new files to process..."
    files_lists = pool.map(s3_list_files, [str(s._id) for s in Studies()] )
    print "putting new files to process..."
    for i,l in enumerate(files_lists):
        print str(datetime.now()), i+1, "of", str(Studies.count()) + ",", len(l), "files"
        for fp in l:
            if fp[-4:] in PROCESSABLE_FILE_EXTENSIONS:
                FileToProcess.append_file_for_processing(fp, ObjectId(fp.split("/", 1)[0]), fp.split("/", 2)[1])
    del files_lists, l
    pool.close()
    pool.terminate()
    print str(datetime.now()), "processing data."
    FileProcessLock.unlock()
    process_file_chunks()
def edit_admin(admin_id):
    admin = Admin(admin_id)
    admin_is_current_user = (admin._id == session['admin_username'])
    current_studies = sorted(Studies(admins=admin._id),
                             key=lambda x: x.name.lower())
    return render_template('edit_admin.html',
                           admin=admin,
                           current_studies=current_studies,
                           all_studies=Studies.get_all_studies(),
                           allowed_studies=get_admins_allowed_studies(),
                           admin_is_current_user=admin_is_current_user,
                           system_admin=admin_is_system_admin())
Esempio n. 4
0
def render_edit_survey(survey_id=None):
    survey = Survey(survey_id)
    study = [
        study for study in Studies() if survey['_id'] in study['surveys']
    ][0]
    if not survey:
        return abort(404)
    return render_template('edit_survey.html',
                           survey=survey,
                           study=study,
                           allowed_studies=get_admins_allowed_studies(),
                           system_admin=admin_is_system_admin())
Esempio n. 5
0
def get_studies():
    # Cases: invalid access creds
    access_key = request.values["access_key"]
    access_secret = request.values["secret_key"]
    admin = Admin(access_key_id=access_key)
    if not admin: return abort(403)  # access key DNE
    if not admin.validate_access_credentials(access_secret):
        return abort(403)  # incorrect secret key
    return json.dumps({
        str(study._id): study.name
        for study in Studies(admins=str(admin._id))
    })
def manage_admins():
    admins = []
    for admin in Admins():
        admin_name = admin._id
        allowed_studies = ' | '.join(
            sorted(Studies(admins=admin._id, field='name'),
                   key=lambda x: x.lower()))
        admins.append((admin_name, allowed_studies))
    admins = sorted(admins, key=lambda s: s[0].lower())
    return render_template('manage_admins.html',
                           admins=admins,
                           allowed_studies=get_admins_allowed_studies(),
                           system_admin=admin_is_system_admin())
Esempio n. 7
0
def migrate_studies():
    d_study_list = []
    for m_study in MStudySet.iterator():
        with error_handler:
            # Create a Django Study object modeled off the Mongolia Study
            study_name = m_study['name']
            d_study = DStudy(
                name=study_name,
                encryption_key=m_study['encryption_key'],
                object_id=m_study['_id'],
                deleted=m_study['deleted'],
                is_test=m_study['is_test'],
            )
    
            # Validate the new Study object and add it to the bulk create list
            d_study.full_clean()
            d_study_list.append(d_study)
    
            # Get lists of Mongolia Surveys, Admins and StudyDeviceSettings attached to this Study
            m_survey_list = m_study['surveys']
            m_admin_list = m_study['admins']
            m_device_settings = m_study['device_settings']
            study_referents[study_name] = {
                'survey_list': m_survey_list,
                'admin_list': m_admin_list,
                'device_settings': m_device_settings,
            }

    # Bulk create the Django Studies
    DStudy.objects.bulk_create(d_study_list)

    # Create a reference from Mongolia Study IDs to Django Studies that doesn't require
    # any future database calls.
    for m_study in MStudySet.iterator():
        with error_handler:
            m_study_id = m_study['_id']
            d_study_id = DStudy.objects.filter(name=m_study['name']).values('pk', 'deleted').get()
            study_id_dict[m_study_id] = d_study_id
def create_study():
    if request.method == 'GET':
        return render_template('create_study.html',
                               studies=Studies.get_all_studies(),
                               allowed_studies=get_admins_allowed_studies(),
                               system_admin=admin_is_system_admin())
    name = request.form.get('name')
    encryption_key = request.form.get('encryption_key')
    try:
        study = Study.create_default_study(name, encryption_key)
        flash("Successfully created a new study.", 'success')
        copy_existing_study_if_asked_to(study)
        return redirect('/device_settings/' + str(study._id))
    except (InvalidEncryptionKeyError, StudyAlreadyExistsError) as e:
        flash(e.message, 'danger')
        return redirect('/create_study')
Esempio n. 9
0
 def authenticate_and_call(*args, **kwargs):
     if not is_logged_in():  #check for regular login requirement
         return redirect("/")
     admin = Admin(session['admin_username'])
     if not admin["system_admin"]:
         # TODO: Low Priority. Josh. redirect to a URL, not a template file
         return abort(403)
     if 'study_id' in kwargs:
         study_id = kwargs['study_id']
         if not isinstance(study_id,
                           ObjectId):  # make an extra check in case
             study_id = ObjectId(
                 study_id)  # authenticate_admin_study_access
             kwargs['study_id'] = study_id  # has already converted the id.
         if not Studies(_id=study_id):
             return redirect("/")
     return some_function(*args, **kwargs)
Esempio n. 10
0
def get_all_timings_files( ):
    # get users associated with studies
    study_users = { str( s._id ):Users( study_id=s._id, field='_id' ) for s in
                    Studies( ) }
    all_user_timings = []
    for sid, users in study_users.items( ):  # construct prefixes
        all_user_timings.extend(
                [sid + "/" + u + "/" + "surveyTimings" for u in users] )
    # use a threadpool to efficiently get all those strings of s3 paths we
    # will need
    pool = ThreadPool( len( all_user_timings ) )
    try:
        files_lists = pool.map( s3_list_files, all_user_timings )
    except Exception:
        raise
    finally:
        pool.close( )
        pool.terminate( )

    files_list = []
    for l in files_lists: files_list.extend( l )
    # we need to purge the occasional pre-multistudy file, and ensure it is utf encoded.
    return [f.decode( "utf8" ) for f in files_list if f.count( '/' ) == 4]
Esempio n. 11
0
def manage_studies():
    return render_template('manage_studies.html',
                           studies=Studies.get_all_studies(),
                           allowed_studies=get_admins_allowed_studies(),
                           system_admin=admin_is_system_admin())
Esempio n. 12
0
    def get_creation_arguments(cls, params, file_object):
        errors = []

        # ensure required are present, we don't allow falsey contents.
        for key in PipelineUpload.REQUIREDS:
            if not params.get(key, None):
                errors.append('missing required parameter: "%s"' % key)

        # if we escape here early we can simplify the code that requires all parameters later
        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        # validate study_id
        study_id_object_id = ObjectId(params["study_id"])
        if not Studies(_id=study_id_object_id):
            errors.append('encountered invalid study_id: "%s"' %
                          params["study_id"] if params["study_id"] else None)

        print 'file_name' in params
        print params['file_name']
        if len(params['file_name']) > 256:
            errors.append(
                "encountered invalid file_name, file_names cannot be more than 256 characters"
            )

        if PipelineUploads.count(file_name=params['file_name']):
            errors.append('a file with the name "%s" already exists' %
                          params['file_name'])

        try:
            tags = json.loads(params["tags"])
            if not isinstance(tags, list):
                # must be json list, can't be json dict, number, or string.
                raise ValueError()
            if not tags:
                errors.append(
                    "you must provide at least one tag for your file.")
            tags = [str(_) for _ in tags]
        except ValueError:
            errors.append(
                "could not parse tags, ensure that your uploaded list of tags is a json compatible array."
            )

        if errors:
            raise InvalidUploadParameterError("\n".join(errors))

        creation_time = datetime.utcnow()
        file_hash = low_memory_chunk_hash(file_object.read())
        file_object.seek(0)

        s3_path = "%s/%s/%s/%s/%s" % (
            PIPELINE_FOLDER,
            params["study_id"],
            params["file_name"],
            creation_time.isoformat(),
            ''.join(
                random.choice(string.ascii_letters + string.digits)
                for i in range(32)),
            # todo: file_name?
        )

        return {
            "creation_time": creation_time,
            "s3_path": s3_path,
            "study_id": study_id_object_id,
            "tags": tags,
            "file_name": params["file_name"],
            "file_hash": file_hash,
        }
Esempio n. 13
0
def get_admins_allowed_studies():
    """ Return a list of studies which the currently logged-in admin is autho-
    rized to view and edit """
    admin = Admin(session['admin_username'])
    return sorted(Studies(admins=admin._id), key=lambda x: x.name.lower())
Esempio n. 14
0
    print "migrate_upload_trackers..."
    migrate_upload_trackers()


if __name__ == '__main__':
    study_referents = {}
    study_id_dict = {}
    user_id_dict = {}
    survey_id_dict = {}
    
    orphaned_surveys = {}
    
    d_study_admin_list = []  # A list of study-researcher pairs
    d_study_survey_dict = {}  # A mapping of surveys to their associated studies
    d_study_settings_dict = {}  # A mapping of device settings to their associated studies

    CHUNK_SIZE = 10000
    
    # error_handler = ErrorHandler()
    error_handler = null_error_handler()
    
    print(MStudySet.count(), MSurveySet.count(), MSettingsSet.count(),
          MAdminSet.count(), MUserSet.count(), MChunkSet.count(), MUploadSet.count())
    with error_handler:
        run_all_migrations()
    print(DStudy.objects.count(), DSurvey.objects.count(), DSettings.objects.count(),
          DAdmin.objects.count(), DUser.objects.count(), DChunk.objects.count(), DUpload.objects.count())
    print("end:", datetime.now())
    
    error_handler.raise_errors()