Example #1
0
def study_validation(initialization: Initialization = Initialization()):
    print("Running study_validation")
    try:
        with sqlite3.connect(initialization.DB_LOCATION) as sqlite_connection:
            sync_obj = create_sync_obj(initialization, sqlite_connection)
            sync_obj.perform_study_validation()
    except sqlite3.IntegrityError as e:
        print(time.time(), e)
    auth_sync(initialization)
 def authorize_for_study(self, email, study, study_name):
     statement1 = "SELECT email FROM authorities WHERE email = '{}' AND authority LIKE '%{}%'".format(
         email, study_name.upper())
     results = self._cbio_sql.exec_sql_to_column_set(statement1)
     if not results:
         print(
             "Authorizing email '{}' for study '{}' with cancer_study_identifier '{}"
             .format(email, study.get_study_name(), study_name))
         statement2 = "INSERT INTO authorities (email, authority) VALUES (%s, 'cbioportal:{}')".format(
             study_name.upper())
         self._cbio_sql.exec_sql(statement2, email)
Example #3
0
 def _run_update_orgs(self):
     print("Running orgs update...")
     orgs_in_db = {
         org.get_name()
         for org in self.OrganizationsAccess.list_all_orgs()
     }
     new_orgs = [
         org for org in set(self._sync.all_entries.keys()) - orgs_in_db
     ]
     for org_name in new_orgs:
         self.OrganizationsAccess.add_org(org_name)
Example #4
0
 def _run_update_dashboard_json(self):
     print("Running update dashboard json...")
     tables_and_files = {
         "top_level_dashboard": "top_level.json",
         "second_level_dashboard": "second_level.json"
     }
     os.makedirs('/dashboard/data', exist_ok=True)
     for k, v in tables_and_files.items():
         with open(os.path.join('/dashboard/data', v), 'w') as f:
             json.dump(
                 self._sql.exec_sql_to_dict('SELECT * FROM {}'.format(k)),
                 f)
Example #5
0
 def _run_update_studies(self):
     print("Running studies update...")
     for org_name, study_entries in self._sync.all_entries.items():
         org = self.OrganizationsAccess.get_org_by_name(org_name)
         studies_in_db = org.get_studies()
         incoming_study_names = set(study_entries.keys())
         for study in studies_in_db:
             if study.get_study_name() not in incoming_study_names:
                 study.mark_unavailable()
             elif not study.is_available():
                 study.mark_available()
         for study_name in incoming_study_names:
             if not org.study_name_exists(study_name):
                 self.StudyAccess.new_study(org, study_name, available=True)
Example #6
0
 def _run_study_version_validation(self):
     print("Running study version validation...")
     os.makedirs("/dashboard/validation", exist_ok=True)
     study_versions_needing_validation = self.StudyVersionAccess.get_study_versions_needing_validation(
     )
     for study_version in study_versions_needing_validation:
         print("Validating study '{}' @ study_version_id '{}'".format(
             study_version.get_study().get_study_name(),
             study_version.get_id()))
         study_version_tmp_path = os.path.join(self._study_link_dir,
                                               str(study_version.get_id()))
         if os.path.exists(study_version_tmp_path):
             shutil.rmtree(study_version_tmp_path)
         for study_version_file in study_version.get_study_version_files():
             file_path = self.FileAccess.get_file_from_study_version_file(
                 study_version_file).get_path()
             link_path = study_version_file.get_file_path()
             full_link_path = os.path.join(study_version_tmp_path,
                                           link_path)
             print("{} -> {}".format(file_path, full_link_path))
             os.makedirs(os.path.dirname(full_link_path), exist_ok=True)
             os.symlink(file_path, full_link_path)
         try:
             cmd = "python {} -s {} -n -html {}.html".format(
                 self._validator_path, study_version_tmp_path,
                 os.path.join("/dashboard/validation",
                              str(study_version.get_id())))
             print("Running command '{}'".format(cmd))
             p = subprocess.check_output(cmd,
                                         shell=True,
                                         stderr=subprocess.STDOUT)
             status_code = 0
             output = p.decode('utf-8')
         except subprocess.CalledProcessError as e:
             status_code = e.returncode
             output = e.output.decode('utf-8')
         if status_code in {0, 3}:
             success = True
         else:
             success = False
         print("Command exited with code '{}', marking as a {}.".format(
             status_code, 'success' if success else 'failure'))
         study_version.add_study_version_validation(status_code,
                                                    success, output,
                                                    int(time.time()))
Example #7
0
 def _run_files_download(self):
     print("Running files download...")
     for content_hash, remote_path in self._sync.content_hash_to_remote_path.items(
     ):
         file_from_db = self.FileAccess.get_file_by_content_hash(
             content_hash)
         do_download = True if file_from_db is None or (not os.path.isfile(
             file_from_db.get_path())) or content_hash != content_hasher(
                 file_from_db.get_path()) else False
         if file_from_db is None or do_download:
             self.FileAccess.delete_files_by_content_hash(content_hash)
         if do_download:
             file_download_path = os.path.join(self._download_dir,
                                               content_hash)
             if not (os.path.isfile(file_download_path) and content_hash
                     == content_hasher(file_download_path)):
                 print("Downloading file {} with content_hash {}".format(
                     remote_path, content_hash))
                 self._sync.do_download(local_path=file_download_path,
                                        remote_path=remote_path)
                 assert content_hash == content_hasher(file_download_path)
             self.FileAccess.insert_file_path_with_content_hash(
                 content_hash, file_download_path)
Example #8
0
 def _run_update_study_versions(self):
     print("Running study versions update...")
     for org_name, study_entries in self._sync.all_entries.items():
         org = self.OrganizationsAccess.get_org_by_name(org_name)
         for study_name, path_entries in study_entries.items():
             study = org.get_study_by_name(study_name)
             aggregate_list = [
                 path.encode('utf-8') + content_hash.encode('utf-8')
                 for path, content_hash_entries in path_entries.items()
                 for content_hash in content_hash_entries.keys() if
                 os.path.basename(path) not in self.UNVERSIONED_FILE_NAMES
             ]
             aggregate_hash = hashlib.sha256(b''.join(
                 sorted(aggregate_list))).hexdigest()
             for path, content_hash_entries in filter(
                     lambda x: os.path.basename(x[0]) == "access.txt",
                     path_entries.items()):
                 for content_hash in content_hash_entries:
                     file = self.FileAccess.get_file_by_content_hash(
                         content_hash)
                     if is_valid_access_file(
                             file
                     ) and not self.StudyAccessAccess.study_access_exists(
                             study, file):
                         self.StudyAccessAccess.add_new_study_access(
                             study, file)
             if not self.StudyVersionAccess.study_version_exists(
                     study, aggregate_hash):
                 study_version = self.StudyVersionAccess.new_study_version(
                     study, aggregate_hash)
                 for path, content_hash_entries in path_entries.items():
                     for content_hash, server_modified in content_hash_entries.items(
                     ):
                         file = self.FileAccess.get_file_by_content_hash(
                             content_hash)
                         self.StudyVersionFileAccess.add_new_study_version_file(
                             study_version, file, path, server_modified)
 def _run_user_sync(self):
     scope = [
         'https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive'
     ]
     gcloud_creds = json.loads(os.environ['GCLOUD_CREDS'])
     service_account_creds = ServiceAccountCredentials.from_json_keyfile_dict(
         gcloud_creds, scope)
     gc = gspread.authorize(service_account_creds)
     spreadsheet = gc.open_by_key(os.environ['AUTH_SHEET_KEY'])
     worksheet = spreadsheet.worksheet(
         os.environ['AUTH_SHEET_WORKSHEET_NAME'])
     key_map = json.loads(os.environ['AUTH_SHEET_KEYMAP'])
     true_val = os.environ['AUTH_SHEET_TRUEVAL']
     user_records = worksheet.get_all_records()
     distinct_emails = set()
     public_studies = self.get_public_studies()
     print("Found public studies {}".format(public_studies))
     approved_col = worksheet.find("Approved in portal").col
     for record in user_records:
         name = ' '.join([
             record[key] for key in (key_map['name'] if isinstance(
                 key_map['name'], list) else [key_map['name']])
         ])
         email = record[key_map['email']]
         enabled = True if record[key_map['enabled']] == true_val else False
         distinct_emails.add(email)
         self.user_handler(email, name, enabled, public_studies)
         email_row = worksheet.find(email).row
         worksheet.update_cell(email_row, approved_col, true_val)
     admin_emails = {
         email
         for email in os.environ['ADMIN_EMAILS'].split(',')
     }
     for email in admin_emails - distinct_emails:
         self.user_handler(email, email, True, public_studies)
 def user_handler(self, email, name, enabled, public_studies):
     user = _User(email, name, enabled, self._cbio_sql)
     print("Checking for user {}, {}".format(name, email))
     if not user._exists():
         print("User {}, {} does not exist.  Creating with enabled = {}".
               format(name, email, enabled))
         user._add()
     elif user._needs_updating():
         print(
             "User with email {} does exist, but needs updating.  Updating with name = {} and enabled = {}"
             .format(email, name, enabled))
         user._update()
     if user._is_enabled():
         for study_name in public_studies:
             self.authorize_for_study(user.email, study_name)
    def _run_auth_sync(self):
        for top_level in self.TopLevelFolderAccess.list_all_orgs():
            for study in top_level.get_studies():
                study_version = self.StudyVersionAccess.get_active_study_version(
                    study)
                if study_version is not None:
                    access_file = self.StudyAccessAccess.get_most_recent_access_file_for_study(
                        study)
                    is_valid = None
                    authorized_emails = set() | {
                        email
                        for email in os.environ['ADMIN_EMAILS'].split(',')
                    }
                    if access_file is not None:
                        for line in line_iter(access_file.get_contents()):
                            authorized_emails.add(line.strip())

                    meta_study_file = self.FileAccess.get_file_from_study_version_file(
                        self.FileAccess.
                        get_meta_study_version_file_from_study_version(
                            study_version))
                    meta_dict = {
                        k: v
                        for k, v in
                        [(line.split(':')[0],
                          line.split(':')[1]) if ':' in line else (line, None)
                         for line in line_iter(meta_study_file.get_contents())]
                    } if meta_study_file is not None else dict()
                    if meta_dict:
                        cancer_study_name = meta_dict[
                            'cancer_study_identifier'].strip()
                        print(
                            "Found meta study file for study '{}' at '{}' with cancer_study_identifier as '{}'"
                            .format(study.get_study_name(),
                                    meta_study_file.get_path(),
                                    cancer_study_name))
                        if is_valid is not None:
                            print(
                                "Current access.txt for study '{}' is not valid, please fix."
                                .format(cancer_study_name))
                            break
                        if self.disable_unauth:
                            print("Removing all authorizations...")
                            self.unauthorize_all_for_study(cancer_study_name)
                        for email in authorized_emails:
                            self.authorize_for_study(email, study,
                                                     cancer_study_name)
Example #12
0
def auth_sync(initialization: Initialization = Initialization()):
    print("Running auth_sync")
    try:
        with sqlite3.connect(initialization.DB_LOCATION) as sqlite_connection:
            sync_obj = create_sync_obj(initialization, sqlite_connection)
            sync_obj.perform_db_sync()
    except sqlite3.IntegrityError as e:
        print(time.time(), e)
    try:
        cbio_con = MySQLdb.connect(
            **initialization.CBIOPORTAL_DB_CONNECTION_INFO)
        user_sync_enabled = True if os.getenv("DISABLE_USER_SYNC",
                                              "no") != "yes" else False
        disable_unauth = True if os.getenv("DISABLE_UNAUTH",
                                           "no") == "yes" else False
        with SQL_mysql(cbio_con) as cbioportal_sql, sqlite3.connect(
                initialization.DB_LOCATION) as sqlite_connection:
            auth_sync_obj = AuthorizationManager(
                SQL_sqlite3(sqlite_connection), cbioportal_sql,
                user_sync_enabled, disable_unauth)
            auth_sync_obj.run()
    except sqlite3.IntegrityError as e:
        print(time.time(), e)
 def run(self):
     self._run_auth_sync()
     if self.user_sync_enabled:
         self._run_user_sync()
     else:
         print("User sync is disabled, skipping...")
Example #14
0
 def _run_local_db_init(self):
     print("Running schema setup...")
     with open(self._schema_sql_path) as schema_file:
         statements = schema_file.read().split(';')
     for statement in statements:
         self._sql.connection.execute(statement)
Example #15
0
    def _run_study_version_import(self):
        print("Running study version import...")
        os.makedirs("/dashboard/import", exist_ok=True)
        study_versions_needing_import_test = self.StudyVersionAccess.get_study_versions_needing_import_test(
        )
        while study_versions_needing_import_test:
            for study_version in study_versions_needing_import_test:
                print("Importing study '{}' @ study_version_id '{}'".format(
                    study_version.get_study().get_study_name(),
                    study_version.get_id()))
                study_version_tmp_path = os.path.join(
                    self._study_link_dir, str(study_version.get_id()))
                if os.path.exists(study_version_tmp_path):
                    shutil.rmtree(study_version_tmp_path)
                for study_version_file in study_version.get_study_version_files(
                ):
                    file_path = self.FileAccess.get_file_from_study_version_file(
                        study_version_file).get_path()
                    link_path = study_version_file.get_file_path()
                    full_link_path = os.path.join(study_version_tmp_path,
                                                  link_path)
                    print("{} -> {}".format(file_path, full_link_path))
                    os.makedirs(os.path.dirname(full_link_path), exist_ok=True)
                    os.symlink(file_path, full_link_path)
                cmd = "python {} --command import-study --study_directory {}".format(
                    self._cbioportalimporter_path, study_version_tmp_path)
                try:
                    study_version.set_currently_loaded(False)
                    self.StudyVersionAccess.set_all_study_versions_in_study_currently_loaded(
                        study_version.get_study(), False)
                    print("Running command '{}'".format(cmd))
                    p = subprocess.check_output(cmd,
                                                shell=True,
                                                stderr=subprocess.STDOUT)
                    status_code = 0
                    output = p.decode('utf-8')
                except subprocess.CalledProcessError as e:
                    status_code = e.returncode
                    output = e.output.decode('utf-8')
                if status_code in {0, 3}:
                    success = True
                    study_version.set_currently_loaded(True)
                else:
                    success = False
                print("Command exited with code '{}', marking as a {}.".format(
                    status_code, 'success' if success else 'failure'))
                with open(
                        os.path.join("/dashboard/import",
                                     "{}.txt".format(study_version.get_id())),
                        'a') as wf:
                    wf.write("===================================\n")
                    wf.write(str(time.time()) + "\n")
                    wf.write(cmd + "\n")
                    wf.write(output)
                    wf.write("\n===================================\n")

                if not success:
                    try:
                        print("Removing study version as import failed")
                        meta_file = self.FileAccess.get_meta_study_version_file_from_study_version(
                            study_version)
                        if meta_file:
                            print(
                                "Found meta study file '{}'".format(meta_file))
                            meta_file_path = os.path.join(
                                study_version_tmp_path,
                                meta_file.get_file_path())
                            print("Meta study file complete path '{}'".format(
                                meta_file_path))
                            cmd = 'python  {} --command remove-study --meta_filename="{}"'.format(
                                self._cbioportalimporter_path, meta_file_path)
                            print(
                                "Running command '{}' to remove the study version"
                                .format(cmd))
                            p = subprocess.check_output(
                                cmd, shell=True, stderr=subprocess.STDOUT)
                            print("Command output {}".format(
                                p.decode('utf-8')))
                    except subprocess.CalledProcessError as e:
                        print("ERROR: '{}'".format(e.output))
                study_version.add_study_version_import(status_code,
                                                       success, output,
                                                       int(time.time()))
            study_versions_needing_import_test = self.StudyVersionAccess.get_study_versions_needing_import_test(
            )