def __init__(self, property_file_name): self.props = PropHelper( property_file_name, required_props=[ 'neo4j.server', 'neo4j.username', 'neo4j.password', 'db.host', 'db.name', 'db.username', 'db.password', 'user.mapping', 'client.id', 'client.secret', 'old.ingest.upload.dir', 'new.ingest.upload.dir', 'uuid.api.url', 'user.nexus.token' ]) self.graph = Graph(self.props.get('neo4j.server'), auth=(self.props.get('neo4j.username'), self.props.get('neo4j.password'))) self.uuid_wrker = UUIDWorker(self.props.get('client.id'), self.props.get('client.secret'), self.props.get('db.host'), self.props.get('db.name'), self.props.get('db.username'), self.props.get('db.password')) self.old_ingest_upload_dir = file_helper.ensureTrailingSlash( self.props.get('old.ingest.upload.dir')) self.new_ingest_upload_dir = file_helper.ensureTrailingSlash( self.props.get('new.ingest.upload.dir')) self.uuid_api_url = file_helper.ensureTrailingSlashURL( self.props.get('uuid.api.url')) + "hmuuid" self.user_token = self.props.get('user.nexus.token')
def __init__(self, property_file_name): self.dataset_info = None self.dataset_info_tsv_path = None self.prop_file_name = property_file_name if not os.path.isfile(property_file_name): raise Exception("property file does not exist: " + property_file_name) #Open the properties file propMgr = Property() self.props = propMgr.load_property_files(property_file_name) self.data_root_path = file_helper.ensureTrailingSlash(self.get_prop('root.path.to.data')) self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("ingest.api.url")) self.nexus_token = self.get_prop("nexus.token").strip() self.entity_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("entity.api.url")) self.uuid_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("uuid.api.url")) self.dataset_info_tsv_path = self.get_prop("vand.dataset.info.tsv") if string_helper.isBlank(self.dataset_info_tsv_path) or not os.path.isfile(self.dataset_info_tsv_path): raise Exception("dataset info file does not exist:" + self.dataset_info_tsv_path) if not self.dataset_info_tsv_path.endswith(".tsv"): raise Exception("dataset info file must be of type .tsv : " + self.dataset_info_tsv_path) self.dataset_info = [] with open(self.dataset_info_tsv_path, newline='') as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: info_row = {} for key in row.keys(): info_row[key] = row[key] self.dataset_info.append(info_row) self.collections = {} self.meta_info = None
class ProvConst(object): PROV_ENTITY_TYPE = 'prov:Entity' PROV_ACTIVITY_TYPE = 'prov:Activity' PROV_AGENT_TYPE = 'prov:Agent' PROV_COLLECTION_TYPE = 'prov:Collection' PROV_ORGANIZATION_TYPE = 'prov:Organization' PROV_PERSON_TYPE = 'prov:Person' PROV_LABEL_ATTRIBUTE = 'prov:label' PROV_TYPE_ATTRIBUTE = 'prov:type' PROV_GENERATED_TIME_ATTRIBUTE = 'prov:generatedAtTime' HUBMAP_DOI_ATTRIBUTE = 'hubmap:doi' #the doi concept here might be a good alternative: https://sparontologies.github.io/datacite/current/datacite.html HUBMAP_DISPLAY_DOI_ATTRIBUTE = 'hubmap:displayDOI' HUBMAP_SPECIMEN_TYPE_ATTRIBUTE = 'hubmap:specimenType' HUBMAP_DISPLAY_IDENTIFIER_ATTRIBUTE = 'hubmap:displayIdentifier' HUBMAP_UUID_ATTRIBUTE = 'hubmap:uuid' #HUBMAP_SOURCE_UUID_ATTRIBUTE = 'hubmap:sourceUUID' HUBMAP_METADATA_ATTRIBUTE = 'hubmap:metadata' HUBMAP_MODIFIED_TIMESTAMP = 'hubmap:modifiedTimestamp' HUBMAP_PROV_GROUP_NAME = 'hubmap:groupName' HUBMAP_PROV_GROUP_UUID = 'hubmap:groupUUID' HUBMAP_PROV_USER_DISPLAY_NAME = 'hubmap:userDisplayName' HUBMAP_PROV_USER_EMAIL = 'hubmap:userEmail' HUBMAP_PROV_USER_UUID = 'hubmap:userUUID' groupJsonFilename = file_helper.ensureTrailingSlash( os.path.dirname( os.path.realpath(__file__))) + 'hubmap-globus-groups.json'
def __init__(self, property_file_name): self.props = IngestProps(property_file_name, required_props = ['nexus.token', 'ingest.api.url', 'search.api.url', 'uuid.api.url', 'dataset.uuid.file', 'globus.app.client.id', 'globus.app.client.secret']) if len(sys.argv) >= 2: self.id_file = sys.argv[1] else: self.id_file = self.props.get('dataset.uuid.file') if string_helper.isBlank(self.id_file): raise ErrorMessage("ERROR: A list of dataset uuids must be specified in " + self.prop_file_name + " as as property 'dataset.uuid.file' or as the first argument on the command line") if not os.path.isfile(self.id_file): raise ErrorMessage("ERROR: Input file " + self.id_file + " does not exist.") base_file_name = os.path.splitext(os.path.basename(self.id_file))[0] dir_path = file_helper.ensureTrailingSlash(os.path.dirname(self.id_file)) #set up log files, first for errors, second to record all actions cur_time = time.strftime("%d-%m-%Y-%H-%M-%S") error_log_filename = dir_path + base_file_name + "-errors." + cur_time + ".log" self.error_logger = logging.getLogger('publish.datasets.err') self.error_logger.setLevel(logging.INFO) error_logFH = logging.FileHandler(error_log_filename) self.error_logger.addHandler(error_logFH) recording_log_filename = dir_path + base_file_name + "-rcding." + cur_time + ".log" self.recording_logger = logging.getLogger('publish.datasets.rcd') self.recording_logger.setLevel(logging.INFO) recording_logFH = logging.FileHandler(recording_log_filename) self.recording_logger.addHandler(recording_logFH) #initialize variables, get required values from property file self.dataset_info = None self.dataset_info_tsv_path = None self.token = self.props.get('nexus.token') self.search_api_url = file_helper.ensureTrailingSlashURL(self.props.get('search.api.url')) self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.props.get('ingest.api.url')) #initialize the auth helper and use it to get the #user information for the person running the script auth_helper = AuthHelper.create(self.props.get('globus.app.client.id'), self.props.get('globus.app.client.secret')) user_info = auth_helper.getUserInfo(self.token, getGroups = True) if isinstance(user_info, Response): raise ErrorMessage("error validating auth token: " + user_info.get_data(as_text=True)) id_f = open(self.id_file, 'r') id_lines = id_f.readlines() id_f.close() self.ds_ids = [] for id_line in id_lines: if not string_helper.isBlank(id_line): tl = id_line.strip() if not tl.startswith('#'): self.ds_ids.append(tl) self.donors_to_reindex = [] self.set_acl_commands = []
def remove_file(self, file_dir, file_uuid, files_info_list): for file_info in files_info_list: if file_info['file_uuid'] == file_uuid: # Remove from file system file_dir = file_helper.ensureTrailingSlash( file_dir) + file_info['file_uuid'] path_to_file = file_dir + os.sep + file_info['filename'] os.remove(path_to_file) os.rmdir(file_dir) # Remove from the list files_info_list.remove(file_info) break return files_info_list
def resolve_to_uuid(self, identifier): url = file_helper.ensureTrailingSlash( self.uuid_url) + "hmuuid/" + identifier headers = {'Authorization': 'Bearer ' + self.token} resp = requests.get(url, headers=headers) status_code = resp.status_code if status_code < 200 or status_code >= 300: return None id_info = resp.json() if not isinstance(id_info, list) or len(id_info) == 0: return None else: vals = id_info[0] if 'hmuuid' in vals: return vals['hmuuid'] else: return None
def new_uuid(self, entity_type, generate_doi=False): url = file_helper.ensureTrailingSlash(self.uuid_url) + "hmuuid" headers = { 'Authorization': 'Bearer ' + self.token, 'Content-Type': 'application/json' } gen_doi = "false" if generate_doi: gen_doi = "true" resp = requests.post(url, json={ 'entityType': entity_type, 'generateDOI': gen_doi }, headers=headers) status_code = resp.status_code if status_code < 200 or status_code >= 300: raise ErrorMessage("Unable to generate UUID " + str(resp.status_code) + ":" + str(resp.text)) vals = resp.json() return vals[0]
def upload_validate(upload_uuid): ingest_helper = IngestFileHelper(app.config) url = commons_file_helper.ensureTrailingSlashURL( app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + upload_uuid auth_headers = { 'Authorization': request.headers["AUTHORIZATION"], 'X-Hubmap-Application': 'ingest-api' } resp = requests.get(url, headers=auth_headers) if resp.status_code >= 300: return Response(resp.text, resp.status_code) upload = resp.json() prev_status = upload['status'] upload_path = ingest_helper.get_upload_directory_absolute_path( None, upload['group_uuid'], upload_uuid) if not os.path.exists(upload_path): return Response(f"upload directory does not exist: {upload_path}", 500) mock_cfg_path = commons_file_helper.ensureTrailingSlash( upload_path) + "mock_run.json" if not os.path.exists(mock_cfg_path): return Response( f"mock configuration json file does not exist: {mock_cfg_path}") ''' Example mock_run.json { "mock_processing_time_seconds": 20, "new_status_message": "new message", "new_status": "Invalid" } ''' #read the mock_run json file into a dict with open(mock_cfg_path) as json_file: mock_run = json.load(json_file) x = threading.Thread( target=__apply_mock_run, args=[mock_run, upload_path, upload_uuid, auth_headers, prev_status]) x.start() return Response("Accepted", 202)
def __init__(self, upload_temp_dir, upload_dir, uuid_api_url): self.upload_temp_dir = file_helper.ensureTrailingSlash(upload_temp_dir) self.upload_dir = file_helper.ensureTrailingSlash(upload_dir) self.uuid_api_url = uuid_api_url
class AuthCache: cache = {} userLock = threading.RLock() groupLock = threading.RLock() groupIdByName = None roleIdByName = None groupsById = {} rolesById = {} groupLastRefreshed = None groupJsonFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-globus-groups.json' globusGroups = None roleJsonFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-globus-roles.json' procSecret = None admin_groups = None processUserFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-process-user.json' processUser = None @staticmethod def setProcessSecret(secret): if AuthCache.procSecret is None: AuthCache.procSecret = secret @staticmethod def setGlobusGroups(globusJson): AuthCache.globusGroups = identifyGroups(globusJson) @staticmethod def getHMGroups(): with AuthCache.groupLock: now = datetime.datetime.now() diff = None if AuthCache.groupLastRefreshed is not None: diff = now - AuthCache.groupLastRefreshed if diff is None or diff.days > 0 or diff.seconds > TOKEN_EXPIRATION: if AuthCache.globusGroups is not None: return AuthCache.globusGroups else: with open(AuthCache.groupJsonFilename) as jsFile: groups = json.load(jsFile) return identifyGroups(groups) @staticmethod def getHMGroupsById(): if len(AuthCache.groupsById) == 0: AuthCache.getHMGroups() return(AuthCache.groupsById) @staticmethod def getHMRoles(): with AuthCache.groupLock: now = datetime.datetime.now() diff = None if AuthCache.groupLastRefreshed is not None: diff = now - AuthCache.groupLastRefreshed if diff is None or diff.days > 0 or diff.seconds > TOKEN_EXPIRATION: roleIdByName = {} #rolesById = {} with open(AuthCache.roleJsonFilename) as jsFile: roles = json.load(jsFile) for role in roles: if 'name' in role and 'uuid' in role and 'displayname' in role and not string_helper.isBlank(role['name']) and not string_helper.isBlank(role['uuid']) and not string_helper.isBlank(role['displayname']): role_obj = {'name' : role['name'].lower().strip(), 'uuid' : role['uuid'].lower().strip(), 'displayname' : role['displayname']} roleIdByName[role['name'].lower().strip()] = role_obj AuthCache.rolesById[role['uuid']] = role_obj return roleIdByName @staticmethod def getUserWithGroups(appKey, token): return AuthCache.getUserInfo(appKey, token, getGroups=True) @staticmethod def getUserInfo(appKey, token, getGroups=False): with AuthCache.userLock: if token in AuthCache.cache: now = datetime.datetime.now() diff = now - AuthCache.cache[token]['timestamp'] if diff.days > 0 or diff.seconds > TOKEN_EXPIRATION: #15 minutes AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups) else: AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups) if isinstance(AuthCache.cache[token]['info'], Response): return AuthCache.cache[token]['info'] if getGroups and "hmgroupids" not in AuthCache.cache[token]['info']: AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups) return AuthCache.cache[token]['info'] @staticmethod def __authRecord(appKey, token, getGroups=False): rVal = {} now = datetime.datetime.now() info = AuthCache.__userInfo(appKey, token, getGroups) rVal['info'] = info rVal['timestamp'] = now if isinstance(rVal['info'], Response): rVal['valid'] = False elif not 'active' in info or info['active'] is None: rVal['valid'] = False else: rVal['valid'] = info['active'] if rVal['valid'] and 'scope' in info and not string_helper.isBlank(info['scope']): info['hmscopes'] = info['scope'].lower().strip().split() return rVal @staticmethod def __get_admin_groups(): if AuthCache.admin_groups is None: #start with hubmap-read group admin_grps = ["5777527e-ec11-11e8-ab41-0af86edb4424", data_admin_group_uuid] all_groups = AuthCache.getHMGroups() #add all data provider groups for grp_name in all_groups.keys(): grp = all_groups[grp_name] if 'data_provider' in grp and grp['data_provider']: admin_grps.append(grp) AuthCache.admin_groups = admin_grps return AuthCache.admin_groups #try to get user's group info via both deprecated Nexus token and new Groups API token #@staticmethod #def __userGroupsComb(token): # groups = AuthCache.__userGroupsNexus(token) # if isinstance(groups, Response): # #if the nexus call failed try the Groups API # groups = AuthCache.__get_user_groups_via_groups_api(token) # return groups #@staticmethod #def __userGroupsNexus(token): # if token == AuthCache.procSecret: # return AuthCache.__get_admin_groups() # getHeaders = { # 'Content-Type': 'application/json', # 'Accept': 'application/json', # 'Authorization': 'Bearer ' + token # } # url='https://nexus.api.globusonline.org/groups?fields=id,name,description,group_type,has_subgroups,identity_set_properties&for_all_identities=false&include_identaaaaay_set_properties=false&my_statuses=active' # response = requests.get(url, headers=getHeaders) # if response.status_code != 200: # return Response("Unable to get user groups\n"+response.text, 500) # try: # jsonResp = response.json() # ids = [] # for value in jsonResp: # if 'id' in value: # ids.append(value['id'].lower().strip()) # return ids # except Exception as e: # return Response('Unable to parse json response while gathering user groups\n' + str(e), 500) @staticmethod def __get_user_groups_via_groups_api(token): if token == AuthCache.procSecret: return AuthCache.__get_admin_groups() #GET /v2/groups/my_groups url = 'https://groups.api.globus.org/v2/groups/my_groups' headers = { 'Authorization' : 'Bearer ' + token } response = requests.get(url, headers=headers) if response.status_code != 200: return Response("Unable to get user groups\n"+response.text, 500) try: jsonResp = response.json() # print(json.dumps(jsonResp, indent=4)) ids = [] # if 'my_memberships' in jsonResp and isinstance(jsonResp['my_memberships'], list): for grp in jsonResp: if 'id' in grp: ids.append(grp['id']) return ids except Exception as e: return Response("unable to get groups from Groups api while gathering user groups\n" + str(e), 500) @staticmethod def __userInfo(applicationKey, authToken, getGroups=False): if authToken == AuthCache.procSecret: if AuthCache.processUser is None: with open(AuthCache.processUserFilename) as jsFile: AuthCache.processUser = json.load(jsFile) return AuthCache.processUser postHeaders = { 'Content-Type': 'application/x-www-form-urlencoded', 'Authorization': 'Basic ' + applicationKey } tdata = 'token=' + authToken url='https://auth.globus.org/v2/oauth2/token/introspect' response = requests.post(url, headers=postHeaders, data=tdata) if response.status_code != 200: return Response("Unable to introspect user from token", 500) try: jsonResp = response.json() if 'active' in jsonResp and jsonResp['active']: if getGroups: if len(AuthCache.groupsById) == 0: AuthCache.getHMGroups() if len(AuthCache.rolesById) == 0: AuthCache.getHMRoles() #groups = AuthCache.__userGroupsComb(authToken) groups = AuthCache.__get_user_groups_via_groups_api(authToken) if isinstance(groups, Response): return groups grp_list = [] role_list = [] for group_uuid in groups: #if group_uuid in AuthCache.groupsById: # grp_list.append(group_uuid) #elif group_uuid in AuthCache.rolesById: # role_list.append(group_uuid) if not group_uuid in grp_list: grp_list.append(group_uuid) jsonResp['hmgroupids'] = grp_list jsonResp['group_membership_ids'] = grp_list jsonResp['hmroleids'] = role_list return jsonResp else: return Response("Non-active login", 401) except Exception as e: print("ERROR!: " + str(e)) return Response("Unable to parse json response on user introspect", 500) if not 'active' in jsonResp or not jsonResp['active']: return Response("Login session not active.", 401)