def study_validation(initialization: Initialization = Initialization()): print("Running study_validation") try: with sqlite3.connect(initialization.DB_LOCATION) as sqlite_connection: sync_obj = create_sync_obj(initialization, sqlite_connection) sync_obj.perform_study_validation() except sqlite3.IntegrityError as e: print(time.time(), e) auth_sync(initialization)
def authorize_for_study(self, email, study, study_name): statement1 = "SELECT email FROM authorities WHERE email = '{}' AND authority LIKE '%{}%'".format( email, study_name.upper()) results = self._cbio_sql.exec_sql_to_column_set(statement1) if not results: print( "Authorizing email '{}' for study '{}' with cancer_study_identifier '{}" .format(email, study.get_study_name(), study_name)) statement2 = "INSERT INTO authorities (email, authority) VALUES (%s, 'cbioportal:{}')".format( study_name.upper()) self._cbio_sql.exec_sql(statement2, email)
def _run_update_orgs(self): print("Running orgs update...") orgs_in_db = { org.get_name() for org in self.OrganizationsAccess.list_all_orgs() } new_orgs = [ org for org in set(self._sync.all_entries.keys()) - orgs_in_db ] for org_name in new_orgs: self.OrganizationsAccess.add_org(org_name)
def _run_update_dashboard_json(self): print("Running update dashboard json...") tables_and_files = { "top_level_dashboard": "top_level.json", "second_level_dashboard": "second_level.json" } os.makedirs('/dashboard/data', exist_ok=True) for k, v in tables_and_files.items(): with open(os.path.join('/dashboard/data', v), 'w') as f: json.dump( self._sql.exec_sql_to_dict('SELECT * FROM {}'.format(k)), f)
def _run_update_studies(self): print("Running studies update...") for org_name, study_entries in self._sync.all_entries.items(): org = self.OrganizationsAccess.get_org_by_name(org_name) studies_in_db = org.get_studies() incoming_study_names = set(study_entries.keys()) for study in studies_in_db: if study.get_study_name() not in incoming_study_names: study.mark_unavailable() elif not study.is_available(): study.mark_available() for study_name in incoming_study_names: if not org.study_name_exists(study_name): self.StudyAccess.new_study(org, study_name, available=True)
def _run_study_version_validation(self): print("Running study version validation...") os.makedirs("/dashboard/validation", exist_ok=True) study_versions_needing_validation = self.StudyVersionAccess.get_study_versions_needing_validation( ) for study_version in study_versions_needing_validation: print("Validating study '{}' @ study_version_id '{}'".format( study_version.get_study().get_study_name(), study_version.get_id())) study_version_tmp_path = os.path.join(self._study_link_dir, str(study_version.get_id())) if os.path.exists(study_version_tmp_path): shutil.rmtree(study_version_tmp_path) for study_version_file in study_version.get_study_version_files(): file_path = self.FileAccess.get_file_from_study_version_file( study_version_file).get_path() link_path = study_version_file.get_file_path() full_link_path = os.path.join(study_version_tmp_path, link_path) print("{} -> {}".format(file_path, full_link_path)) os.makedirs(os.path.dirname(full_link_path), exist_ok=True) os.symlink(file_path, full_link_path) try: cmd = "python {} -s {} -n -html {}.html".format( self._validator_path, study_version_tmp_path, os.path.join("/dashboard/validation", str(study_version.get_id()))) print("Running command '{}'".format(cmd)) p = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) status_code = 0 output = p.decode('utf-8') except subprocess.CalledProcessError as e: status_code = e.returncode output = e.output.decode('utf-8') if status_code in {0, 3}: success = True else: success = False print("Command exited with code '{}', marking as a {}.".format( status_code, 'success' if success else 'failure')) study_version.add_study_version_validation(status_code, success, output, int(time.time()))
def _run_files_download(self): print("Running files download...") for content_hash, remote_path in self._sync.content_hash_to_remote_path.items( ): file_from_db = self.FileAccess.get_file_by_content_hash( content_hash) do_download = True if file_from_db is None or (not os.path.isfile( file_from_db.get_path())) or content_hash != content_hasher( file_from_db.get_path()) else False if file_from_db is None or do_download: self.FileAccess.delete_files_by_content_hash(content_hash) if do_download: file_download_path = os.path.join(self._download_dir, content_hash) if not (os.path.isfile(file_download_path) and content_hash == content_hasher(file_download_path)): print("Downloading file {} with content_hash {}".format( remote_path, content_hash)) self._sync.do_download(local_path=file_download_path, remote_path=remote_path) assert content_hash == content_hasher(file_download_path) self.FileAccess.insert_file_path_with_content_hash( content_hash, file_download_path)
def _run_update_study_versions(self): print("Running study versions update...") for org_name, study_entries in self._sync.all_entries.items(): org = self.OrganizationsAccess.get_org_by_name(org_name) for study_name, path_entries in study_entries.items(): study = org.get_study_by_name(study_name) aggregate_list = [ path.encode('utf-8') + content_hash.encode('utf-8') for path, content_hash_entries in path_entries.items() for content_hash in content_hash_entries.keys() if os.path.basename(path) not in self.UNVERSIONED_FILE_NAMES ] aggregate_hash = hashlib.sha256(b''.join( sorted(aggregate_list))).hexdigest() for path, content_hash_entries in filter( lambda x: os.path.basename(x[0]) == "access.txt", path_entries.items()): for content_hash in content_hash_entries: file = self.FileAccess.get_file_by_content_hash( content_hash) if is_valid_access_file( file ) and not self.StudyAccessAccess.study_access_exists( study, file): self.StudyAccessAccess.add_new_study_access( study, file) if not self.StudyVersionAccess.study_version_exists( study, aggregate_hash): study_version = self.StudyVersionAccess.new_study_version( study, aggregate_hash) for path, content_hash_entries in path_entries.items(): for content_hash, server_modified in content_hash_entries.items( ): file = self.FileAccess.get_file_by_content_hash( content_hash) self.StudyVersionFileAccess.add_new_study_version_file( study_version, file, path, server_modified)
def _run_user_sync(self): scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] gcloud_creds = json.loads(os.environ['GCLOUD_CREDS']) service_account_creds = ServiceAccountCredentials.from_json_keyfile_dict( gcloud_creds, scope) gc = gspread.authorize(service_account_creds) spreadsheet = gc.open_by_key(os.environ['AUTH_SHEET_KEY']) worksheet = spreadsheet.worksheet( os.environ['AUTH_SHEET_WORKSHEET_NAME']) key_map = json.loads(os.environ['AUTH_SHEET_KEYMAP']) true_val = os.environ['AUTH_SHEET_TRUEVAL'] user_records = worksheet.get_all_records() distinct_emails = set() public_studies = self.get_public_studies() print("Found public studies {}".format(public_studies)) approved_col = worksheet.find("Approved in portal").col for record in user_records: name = ' '.join([ record[key] for key in (key_map['name'] if isinstance( key_map['name'], list) else [key_map['name']]) ]) email = record[key_map['email']] enabled = True if record[key_map['enabled']] == true_val else False distinct_emails.add(email) self.user_handler(email, name, enabled, public_studies) email_row = worksheet.find(email).row worksheet.update_cell(email_row, approved_col, true_val) admin_emails = { email for email in os.environ['ADMIN_EMAILS'].split(',') } for email in admin_emails - distinct_emails: self.user_handler(email, email, True, public_studies)
def user_handler(self, email, name, enabled, public_studies): user = _User(email, name, enabled, self._cbio_sql) print("Checking for user {}, {}".format(name, email)) if not user._exists(): print("User {}, {} does not exist. Creating with enabled = {}". format(name, email, enabled)) user._add() elif user._needs_updating(): print( "User with email {} does exist, but needs updating. Updating with name = {} and enabled = {}" .format(email, name, enabled)) user._update() if user._is_enabled(): for study_name in public_studies: self.authorize_for_study(user.email, study_name)
def _run_auth_sync(self): for top_level in self.TopLevelFolderAccess.list_all_orgs(): for study in top_level.get_studies(): study_version = self.StudyVersionAccess.get_active_study_version( study) if study_version is not None: access_file = self.StudyAccessAccess.get_most_recent_access_file_for_study( study) is_valid = None authorized_emails = set() | { email for email in os.environ['ADMIN_EMAILS'].split(',') } if access_file is not None: for line in line_iter(access_file.get_contents()): authorized_emails.add(line.strip()) meta_study_file = self.FileAccess.get_file_from_study_version_file( self.FileAccess. get_meta_study_version_file_from_study_version( study_version)) meta_dict = { k: v for k, v in [(line.split(':')[0], line.split(':')[1]) if ':' in line else (line, None) for line in line_iter(meta_study_file.get_contents())] } if meta_study_file is not None else dict() if meta_dict: cancer_study_name = meta_dict[ 'cancer_study_identifier'].strip() print( "Found meta study file for study '{}' at '{}' with cancer_study_identifier as '{}'" .format(study.get_study_name(), meta_study_file.get_path(), cancer_study_name)) if is_valid is not None: print( "Current access.txt for study '{}' is not valid, please fix." .format(cancer_study_name)) break if self.disable_unauth: print("Removing all authorizations...") self.unauthorize_all_for_study(cancer_study_name) for email in authorized_emails: self.authorize_for_study(email, study, cancer_study_name)
def auth_sync(initialization: Initialization = Initialization()): print("Running auth_sync") try: with sqlite3.connect(initialization.DB_LOCATION) as sqlite_connection: sync_obj = create_sync_obj(initialization, sqlite_connection) sync_obj.perform_db_sync() except sqlite3.IntegrityError as e: print(time.time(), e) try: cbio_con = MySQLdb.connect( **initialization.CBIOPORTAL_DB_CONNECTION_INFO) user_sync_enabled = True if os.getenv("DISABLE_USER_SYNC", "no") != "yes" else False disable_unauth = True if os.getenv("DISABLE_UNAUTH", "no") == "yes" else False with SQL_mysql(cbio_con) as cbioportal_sql, sqlite3.connect( initialization.DB_LOCATION) as sqlite_connection: auth_sync_obj = AuthorizationManager( SQL_sqlite3(sqlite_connection), cbioportal_sql, user_sync_enabled, disable_unauth) auth_sync_obj.run() except sqlite3.IntegrityError as e: print(time.time(), e)
def run(self): self._run_auth_sync() if self.user_sync_enabled: self._run_user_sync() else: print("User sync is disabled, skipping...")
def _run_local_db_init(self): print("Running schema setup...") with open(self._schema_sql_path) as schema_file: statements = schema_file.read().split(';') for statement in statements: self._sql.connection.execute(statement)
def _run_study_version_import(self): print("Running study version import...") os.makedirs("/dashboard/import", exist_ok=True) study_versions_needing_import_test = self.StudyVersionAccess.get_study_versions_needing_import_test( ) while study_versions_needing_import_test: for study_version in study_versions_needing_import_test: print("Importing study '{}' @ study_version_id '{}'".format( study_version.get_study().get_study_name(), study_version.get_id())) study_version_tmp_path = os.path.join( self._study_link_dir, str(study_version.get_id())) if os.path.exists(study_version_tmp_path): shutil.rmtree(study_version_tmp_path) for study_version_file in study_version.get_study_version_files( ): file_path = self.FileAccess.get_file_from_study_version_file( study_version_file).get_path() link_path = study_version_file.get_file_path() full_link_path = os.path.join(study_version_tmp_path, link_path) print("{} -> {}".format(file_path, full_link_path)) os.makedirs(os.path.dirname(full_link_path), exist_ok=True) os.symlink(file_path, full_link_path) cmd = "python {} --command import-study --study_directory {}".format( self._cbioportalimporter_path, study_version_tmp_path) try: study_version.set_currently_loaded(False) self.StudyVersionAccess.set_all_study_versions_in_study_currently_loaded( study_version.get_study(), False) print("Running command '{}'".format(cmd)) p = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) status_code = 0 output = p.decode('utf-8') except subprocess.CalledProcessError as e: status_code = e.returncode output = e.output.decode('utf-8') if status_code in {0, 3}: success = True study_version.set_currently_loaded(True) else: success = False print("Command exited with code '{}', marking as a {}.".format( status_code, 'success' if success else 'failure')) with open( os.path.join("/dashboard/import", "{}.txt".format(study_version.get_id())), 'a') as wf: wf.write("===================================\n") wf.write(str(time.time()) + "\n") wf.write(cmd + "\n") wf.write(output) wf.write("\n===================================\n") if not success: try: print("Removing study version as import failed") meta_file = self.FileAccess.get_meta_study_version_file_from_study_version( study_version) if meta_file: print( "Found meta study file '{}'".format(meta_file)) meta_file_path = os.path.join( study_version_tmp_path, meta_file.get_file_path()) print("Meta study file complete path '{}'".format( meta_file_path)) cmd = 'python {} --command remove-study --meta_filename="{}"'.format( self._cbioportalimporter_path, meta_file_path) print( "Running command '{}' to remove the study version" .format(cmd)) p = subprocess.check_output( cmd, shell=True, stderr=subprocess.STDOUT) print("Command output {}".format( p.decode('utf-8'))) except subprocess.CalledProcessError as e: print("ERROR: '{}'".format(e.output)) study_version.add_study_version_import(status_code, success, output, int(time.time())) study_versions_needing_import_test = self.StudyVersionAccess.get_study_versions_needing_import_test( )