def load_group_data(self): with open(AuthCache.groupJsonFilename) as jsFile: self.groups = json.load(jsFile) self.groups_by_tmc_prefix = {} for group in self.groups: if 'name' in group and 'uuid' in group and 'generateuuid' in group and 'displayname' in group and not string_helper.isBlank( group['name']) and not string_helper.isBlank( group['uuid']) and not string_helper.isBlank( group['displayname']): group_obj = { 'name': group['name'].lower().strip(), 'uuid': group['uuid'].lower().strip(), 'displayname': group['displayname'], 'generateuuid': group['generateuuid'] } if 'tmc_prefix' in group: group_obj['tmc_prefix'] = group['tmc_prefix'] if 'uuid' in group and 'displayname' in group and not string_helper.isBlank( group['uuid']) and not string_helper.isBlank( group['displayname']): group_info = {} group_info['uuid'] = group['uuid'] group_info['displayname'] = group['displayname'] group_info['tmc_prefix'] = group['tmc_prefix'] self.groups_by_tmc_prefix[group[ 'tmc_prefix'].upper().strip()] = group_info self.groupsByName[ group['name'].lower().strip()] = group_obj self.groupsById[group['uuid']] = group_obj
def ingest_rui_info(self): for ingest_row in self.dataset_info: if ( ingest_row['type'].upper() == "CCFDATA" and string_helper.isBlank(ingest_row['new_entity_uuid']) and not string_helper.isBlank(ingest_row['parent_uuid']) and not ingest_row['parent_uuid'].startswith('No UUID')): col_uuid = self.import_rui_location(ingest_row) ingest_row.update({'new_entity_uuid':col_uuid})
def ingest_recordsets(self): for ingest_row in self.dataset_info: if ( ingest_row['type'].upper() == "DATASET" and string_helper.isBlank(ingest_row['new_entity_uuid']) and not string_helper.isBlank(ingest_row['parent_uuid']) and not ingest_row['parent_uuid'].startswith('No UUID')): col_uuid = self.create_dataset(ingest_row) ingest_row.update({'new_entity_uuid':col_uuid})
def __init__(self, property_file_name): self.props = IngestProps(property_file_name, required_props = ['nexus.token', 'ingest.api.url', 'search.api.url', 'uuid.api.url', 'dataset.uuid.file', 'globus.app.client.id', 'globus.app.client.secret']) if len(sys.argv) >= 2: self.id_file = sys.argv[1] else: self.id_file = self.props.get('dataset.uuid.file') if string_helper.isBlank(self.id_file): raise ErrorMessage("ERROR: A list of dataset uuids must be specified in " + self.prop_file_name + " as as property 'dataset.uuid.file' or as the first argument on the command line") if not os.path.isfile(self.id_file): raise ErrorMessage("ERROR: Input file " + self.id_file + " does not exist.") base_file_name = os.path.splitext(os.path.basename(self.id_file))[0] dir_path = file_helper.ensureTrailingSlash(os.path.dirname(self.id_file)) #set up log files, first for errors, second to record all actions cur_time = time.strftime("%d-%m-%Y-%H-%M-%S") error_log_filename = dir_path + base_file_name + "-errors." + cur_time + ".log" self.error_logger = logging.getLogger('publish.datasets.err') self.error_logger.setLevel(logging.INFO) error_logFH = logging.FileHandler(error_log_filename) self.error_logger.addHandler(error_logFH) recording_log_filename = dir_path + base_file_name + "-rcding." + cur_time + ".log" self.recording_logger = logging.getLogger('publish.datasets.rcd') self.recording_logger.setLevel(logging.INFO) recording_logFH = logging.FileHandler(recording_log_filename) self.recording_logger.addHandler(recording_logFH) #initialize variables, get required values from property file self.dataset_info = None self.dataset_info_tsv_path = None self.token = self.props.get('nexus.token') self.search_api_url = file_helper.ensureTrailingSlashURL(self.props.get('search.api.url')) self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.props.get('ingest.api.url')) #initialize the auth helper and use it to get the #user information for the person running the script auth_helper = AuthHelper.create(self.props.get('globus.app.client.id'), self.props.get('globus.app.client.secret')) user_info = auth_helper.getUserInfo(self.token, getGroups = True) if isinstance(user_info, Response): raise ErrorMessage("error validating auth token: " + user_info.get_data(as_text=True)) id_f = open(self.id_file, 'r') id_lines = id_f.readlines() id_f.close() self.ds_ids = [] for id_line in id_lines: if not string_helper.isBlank(id_line): tl = id_line.strip() if not tl.startswith('#'): self.ds_ids.append(tl) self.donors_to_reindex = [] self.set_acl_commands = []
def __init__(self, property_file_name): self.dataset_info = None self.dataset_info_tsv_path = None self.prop_file_name = property_file_name if not os.path.isfile(property_file_name): raise Exception("property file does not exist: " + property_file_name) #Open the properties file propMgr = Property() self.props = propMgr.load_property_files(property_file_name) self.data_root_path = file_helper.ensureTrailingSlash(self.get_prop('root.path.to.data')) self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("ingest.api.url")) self.nexus_token = self.get_prop("nexus.token").strip() self.entity_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("entity.api.url")) self.uuid_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("uuid.api.url")) self.dataset_info_tsv_path = self.get_prop("vand.dataset.info.tsv") if string_helper.isBlank(self.dataset_info_tsv_path) or not os.path.isfile(self.dataset_info_tsv_path): raise Exception("dataset info file does not exist:" + self.dataset_info_tsv_path) if not self.dataset_info_tsv_path.endswith(".tsv"): raise Exception("dataset info file must be of type .tsv : " + self.dataset_info_tsv_path) self.dataset_info = [] with open(self.dataset_info_tsv_path, newline='') as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: info_row = {} for key in row.keys(): info_row[key] = row[key] self.dataset_info.append(info_row) self.collections = {} self.meta_info = None
def get_prop(self, prop_name): if not prop_name in self.props: raise Exception("Required property " + prop_name + " not found in " + self.prop_file_name) val = self.props[prop_name] if string_helper.isBlank(val): raise Exception("Required property " + prop_name + " from " + self.prop_file_name + " is blank") return self.props[prop_name]
def test(self, lab_id): if isBlank(lab_id): return None check_id = lab_id.strip().lower() with closing(self.uuid_db.getDBConnection()) as dbConn: with closing(dbConn.cursor()) as curs: curs.execute("select hm_uuid from hm_data_centers where hm_uuid = '" + check_id + "' or dc_uuid = '" + check_id + "'") result = curs.fetchone() if result is None: prov_helper = Provenance("a", "b", "c") try: # Deprecate the use of Provenance, use the new globus_groups module - Zhou #lab = prov_helper.get_group_by_identifier(check_id) # Get the globus groups info based on the groups json file in commons package globus_groups_info = globus_groups.get_globus_groups_info() groups_by_tmc_prefix_dict = globus_groups_info['by_tmc_prefix'] lab = groups_by_tmc_prefix_dict[check_id] except ValueError: return Response("") if not 'tmc_prefix' in lab: return Response("") curs.execute("") return(lab['tmc_prefix']) else: return result[0]
def __init__(self, property_file_name): self.props = IngestProps(property_file_name, required_props=[ 'nexus.token', 'neo4j.server', 'neo4j.username', 'neo4j.password', 'collections.input.file', 'uuid.api.url' ]) self.uuid_helper = UUIDHelper(ingest_props=self.props) self.token = self.props.get('nexus.token') self.neo4j_server = self.props.get('neo4j.server') self.neo4j_user = self.props.get('neo4j.username') self.neo4j_password = self.props.get('neo4j.password') self.collections_tsv_path = self.props.get("collections.input.file") self.auth_helper = AuthHelper.instance() if string_helper.isBlank( self.collections_tsv_path) or not os.path.isfile( self.collections_tsv_path): raise Exception("collections tsf file does not exist:" + self.collections_tsv_path) if not self.collections_tsv_path.endswith(".tsv"): raise Exception("collections file must be of type .tsv : " + self.collections_tsv_path) self.collection_info = [] with open(self.collections_tsv_path, newline='') as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: info_row = {} for key in row.keys(): info_row[key] = row[key] self.collection_info.append(info_row) self.graph = Graph(self.neo4j_server, auth=(self.neo4j_user, self.neo4j_password))
def getFileIdInfo(self, fid): check_id = fid.strip() if isBlank(check_id) or len(check_id) != 32: return Response("Invalid file id format. 32 digit hex only.", 400) sql = "select hm_uuid, path, checksum, size, base_dir, ancestor_uuid from hm_files inner join hm_ancestors on hm_ancestors.descendant_uuid = hm_files.hm_uuid where hm_uuid = '" + check_id + "'" with closing(self.hmdb.getDBConnection()) as dbConn: with closing(dbConn.cursor()) as curs: curs.execute(sql) results = [ dict((curs.description[i][0], value) for i, value in enumerate(row)) for row in curs.fetchall() ] if results is None or not results: return Response("Could not find the target id: " + fid, 404) if isinstance(results, list) and (len(results) == 0): return Response("Could not find the target id: " + fid, 404) if not 'hm_uuid' in results[0]: return Response("Could not find the target id: " + fid, 404) if results[0]['hm_uuid'] is None: return Response("Could not find the target id: " + fid, 404) rdict = self._convert_result_id_array(results, check_id) if 'checksum' in rdict and rdict['checksum'] is None: rdict.pop('checksum') if 'size' in rdict and rdict['size'] is None: rdict.pop('size') return json.dumps(rdict, indent=4, sort_keys=True, default=str)
def fix_ids(self): for ingest_row in self.dataset_info: ing_type = ingest_row['type'] if ( ing_type == 'CCFDATA' or ing_type == 'IMAGE' or ing_type == 'CLINICALDATA'): if 'parent_display_id' in ingest_row and not string_helper.isBlank(ingest_row['parent_display_id']): parent_uuid = self.resolveToUUID(ingest_row['parent_display_id']) ingest_row.update({'parent_uuid':parent_uuid})
def identifyGroups(groups): groupIdByName = {} for group in groups: if 'name' in group and 'uuid' in group and 'generateuuid' in group and 'displayname' in group and not string_helper.isBlank( group['name']) and not string_helper.isBlank(group['uuid']) and not string_helper.isBlank( group['displayname']): group_obj = {'name': group['name'].lower().strip(), 'uuid': group['uuid'].lower().strip(), 'displayname': group['displayname'], 'generateuuid': group['generateuuid']} if 'tmc_prefix' in group: group_obj['tmc_prefix'] = group['tmc_prefix'] if 'data_provider' in group: group_obj['data_provider'] = group['data_provider'] if 'shortname' in group: group_obj['shortname'] = group['shortname'] groupIdByName[group['name'].lower().strip()] = group_obj AuthCache.groupsById[group['uuid']] = group_obj return groupIdByName
def ingest_collections(self): for ingest_row in self.dataset_info: if ingest_row['type'].upper() == "COLLECTION": if string_helper.isBlank(ingest_row['new_entity_uuid']): col_uuid = self.create_collection(ingest_row['name'], ingest_row['description'], ingest_row['collection_key']) ingest_row.update({'new_entity_uuid':col_uuid}) print("Collection created key:" + ingest_row['parent_display_id'] + "uuid:" + col_uuid) self.collections[ingest_row['parent_display_id']] = ingest_row
def check_io(): while True: output = process.stdout.readline().decode() if output: if not string_helper.isBlank(output): logger.log(logging.INFO, output.rstrip()) else: break
def getFirstNonComment(file): with open(file) as f: foundLine = "" for line in f: if not string_helper.isBlank(line): checkline = line.strip() if not checkline.startswith('#'): foundLine = checkline break return foundLine
def init(): global logger global worker try: logger = logging.getLogger('uuid.service') logger.setLevel(logging.INFO) logFH = logging.FileHandler(LOG_FILE_NAME) logger.addHandler(logFH) logger.info("started") except Exception as e: print("Error opening log file during startup") print(str(e)) try: if 'APP_CLIENT_ID' not in app.config or isBlank( app.config['APP_CLIENT_ID']): raise Exception( "Required configuration parameter APP_CLIENT_ID not found in application configuration." ) if 'APP_CLIENT_SECRET' not in app.config or isBlank( app.config['APP_CLIENT_ID']): raise Exception( "Required configuration parameter APP_CLIENT_SECRET not found in application configuration." ) cId = app.config['APP_CLIENT_ID'] cSecret = app.config['APP_CLIENT_SECRET'] dbHost = app.config['DB_HOST'] dbName = app.config['DB_NAME'] dbUsername = app.config['DB_USERNAME'] dbPassword = app.config['DB_PASSWORD'] worker = UUIDWorker(clientId=cId, clientSecret=cSecret, dbHost=dbHost, dbName=dbName, dbUsername=dbUsername, dbPassword=dbPassword) logger.info("initialized") except Exception as e: print("Error during startup.") print(str(e)) logger.error(e, exc_info=True) print("Check the log file for further information: " + LOG_FILE_NAME)
def convert_to_globus_url(self, path_to_file): if string_helper.isBlank(path_to_file): return ("") else: pfile = path_to_file.strip() if pfile.startswith(self.globus_system_dir): #url = self.globus_base_url + file_helper.ensureBeginningSlashURL(pfile[len(self.globus_system_dir):]).replace("/", "%2F").replace(" ", "%20") url = self.globus_base_url + file_helper.ensureBeginningSlashURL( pfile[len(self.globus_system_dir):]).replace(" ", "%20") return url else: return ""
def get(self, prop_name, required=False): if not prop_name in self.props: if required: raise ErrorMessage("Required property " + prop_name + " not found in " + self.prop_file_name) else: return None val = self.props[prop_name] if required and string_helper.isBlank(val): raise ErrorMessage("Required property " + prop_name + " from " + self.prop_file_name + " is blank") return self.props[prop_name]
def __resolve_lab_id(self, lab_id, user_id, user_email): if isBlank(lab_id): return None check_id = lab_id.strip().lower() r_val = {} with closing(self.hmdb.getDBConnection()) as dbConn: with closing(dbConn.cursor()) as curs: curs.execute( "select hm_uuid, dc_code from hm_data_centers where hm_uuid = '" + check_id + "' or dc_uuid = '" + check_id + "'") result = curs.fetchone() if result is None: try: # Deprecate the use of Provenance #lab = self.prov_helper.get_group_by_identifier(check_id) # Get the globus groups info based on the groups json file in commons package globus_groups_info = globus_groups.get_globus_groups_info( ) groups_by_tmc_prefix_dict = globus_groups_info[ 'by_tmc_prefix'] if not check_id in groups_by_tmc_prefix_dict: lab = {} else: lab = groups_by_tmc_prefix_dict[check_id] except ValueError: return Response( "A valid lab with specified id not found id:" + check_id, 400) if not 'tmc_prefix' in lab: return Response( "Lab with specified id:" + check_id + " does not contain a tmc_prefix.", 400) uuid_json = self.newUUIDs([], "LAB", user_id, user_email, 1, gen_base_ids=False) uuid_info = json.loads(uuid_json) r_val['dc_code'] = lab['tmc_prefix'] r_val['hm_uuid'] = uuid_info[0]['uuid'] curs.execute( "insert into hm_data_centers (HM_UUID, DC_UUID, DC_CODE) VALUES ('" + r_val['hm_uuid'] + "','" + check_id + "','" + r_val['dc_code'] + "')") dbConn.commit() else: r_val['dc_code'] = result[1] r_val['hm_uuid'] = result[0] return r_val
def get_collection_key(parent_dir): if string_helper.isBlank(parent_dir): raise Exception("Can't create a collection with an empty name.") coll_key = parent_dir.strip().upper() if not coll_key in collections: block_id = csv_data.get_block_id(parent_dir) description = substitute_into_template("collection-description.txt", block_id, None) collection_info = {} collection_info['description'] = description collection_info['name'] = get_dataset_name(block_id, 'collection') collections[coll_key] = collection_info return coll_key
def check_fields(self, collection, row_num): all_good = True if not 'coll_num' in collection or string_helper.isBlank( collection['coll_num']): print("coll_num field missing in row " + str(row_num) + ". Will not import row.") return False if collection['coll_num'].strip().startswith('#'): print("collection " + collection['coll_num'] + " is commented out. Will not import") return False if not 'title' in collection or string_helper.isBlank( collection['title']): print("collection with coll_num " + collection['coll_num'] + " is missing the title field. Will not be imported.") all_good = False if not 'description' in collection or string_helper.isBlank( collection['description']): print("collection with coll_num " + collection['coll_num'] + " is missing the description field. Will not be imported.") all_good = False if not 'creators' in collection or string_helper.isBlank( collection['creators']): print("collection with coll_num " + collection['coll_num'] + " is missing the creators field. Will not be imported.") all_good = False if not self.is_valid_json(collection['creators']): print( "collection with coll_num " + collection['coll_num'] + " does not contain valid json for the creators field. Will not be imported." ) all_good = False if not 'datasets' in collection or string_helper.isBlank( collection['datasets']): print("collection with coll_num " + collection['coll_num'] + " is missing the datasets field. Will not be imported.") all_good = False return all_good
def getAuthorizationTokens(self, requestHeaders): hasMauth=False hasAuth=False if 'Mauthorization' in requestHeaders: hasMauth=True if 'Authorization' in requestHeaders: hasAuth=True if hasMauth: mauthHeader = requestHeaders['Mauthorization'] if string_helper.isBlank(mauthHeader): return Response("Empty Mauthorization header", 401) mauthHeader = mauthHeader.strip() """if len(mauthHeader) <= 8: return Response("Invalid Mauthorization header", 401)""" jsonTokens = mauthHeader if mauthHeader.upper().startswith("MBEARER"): jsonTokens = mauthHeader[7:].strip() try: tokens = json.loads(jsonTokens) except Exception as e: print("ERROR!: " + str(e)) return Response("Error decoding json included in Mauthorization header", 401) return tokens elif hasAuth: authHeader = requestHeaders['Authorization'] if string_helper.isBlank(authHeader): return Response("Empty Authorization header", 401) authHeader = authHeader.strip() if len(authHeader) <= 7: return Response("Invalid Authorization header", 401) if not authHeader.upper().startswith("BEARER"): return Response("Bearer Authorization required", 401) token = authHeader[6:].strip() if string_helper.isBlank(token): return Response('Invalid Bearer Authorization', 401) return(token) else: return Response('No Authorization header', 401)
def __init__(self, clientId, clientSecret, dbHost, dbName, dbUsername, dbPassword): if clientId is None or clientSecret is None or isBlank( clientId) or isBlank(clientSecret): raise Exception( "Globus client id and secret are required in AuthHelper") if not AuthHelper.isInitialized(): self.authHelper = AuthHelper.create(clientId=clientId, clientSecret=clientSecret) else: self.authHelper.instance() #Open the config file self.logger = logging.getLogger('uuid.service') self.dbHost = dbHost self.dbName = dbName self.dbUsername = dbUsername self.dbPassword = dbPassword self.lock = threading.RLock() self.hmdb = DBConn(self.dbHost, self.dbUsername, self.dbPassword, self.dbName)
def load_dataset_info(): with open(ingest_info_file, newline='') as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: info_row = {} for key in row.keys(): info_row[key] = row[key] if info_row['type'] == 'DATASET': if string_helper.isBlank(info_row['new_entity_uuid']): print("WARNING: DATASET WITHOUT UUID: " + info_row['parent_display_id']) else: dataset_info_by_uuid[ info_row['new_entity_uuid']] = info_row
def isValidHMId(hmid): if isBlank(hmid): return False if startsWithComponentPrefix(hmid): return True tid = stripHMid(hmid) l = len(tid) if not (l == 10 or l == 32): return False tid = tid.upper() if l == 10: if not set(tid[0:3]).issubset(HMID_NUM_CHARS): return False if not set(tid[3:7]).issubset(HMID_ALPHA_CHARS): return False if not set(tid[7:]).issubset(HMID_NUM_CHARS): return False if l == 32: if not set(tid).issubset(HEX_CHARS): return False return True
def __init__(self, clientId, clientSecret, globusGroups=None): global helperInstance if helperInstance is not None: raise Exception("An instance of singleton AuthHelper exists already. Use AuthHelper.instance() to retrieve it") if clientId is None or clientSecret is None or string_helper.isBlank(clientId) or string_helper.isBlank(clientSecret): raise Exception("Globus client id and secret are required in AuthHelper") self.applicationClientId = clientId self.applicationClientSecret = clientSecret if globusGroups is not None: AuthCache.setGlobusGroups(globusGroups) AuthCache.setProcessSecret(re.sub(r'[^a-zA-Z0-9]', '', clientSecret)) if helperInstance is None: helperInstance = self
def create_dataset(self, ingest_row): ''' { "dataset_name": "Test Name", "dataset_description": "This is a test description", "dataset_collection_uuid": "ab93b3983acge938294857fe292429234", "source_uuids": ["ea93b3983acge938294857fe292429234", "f343b3983acge938294857fe292429234", "cdeb3983acge938294857fe292429234"], "data_types": ["PAS"], "creator_email": "*****@*****.**", "creator_name": "Dataset Owner", "group_uuid": "193439-29392-2939243", "group_name": "HuBMAP-Test", "contains_human_genomic_sequences": "no" } ''' recd = {} recd['dataset_name'] = ingest_row['name'].encode(encoding='ascii', errors='ignore').decode('ascii') recd['dataset_description'] = ingest_row['description'].encode(encoding='ascii', errors='ignore').decode('ascii') if not ingest_row['collection_key'].startswith("NO_COLLECTION"): recd['dataset_collection_uuid'] = self.lookup_collection_uuid(ingest_row['collection_key']) source_uuids = ingest_row['parent_uuid'].split('|') recd['source_uuids'] = source_uuids data_type = [] dtype = ingest_row['assay_type'] if not string_helper.isBlank(dtype) and dtype.upper() == 'LC': dtype = 'LC-MS-untargeted' data_type.append(dtype) recd['data_types'] = data_type recd['creator_email'] = ingest_row['creator_email'] recd['creator_name'] = ingest_row['creator_name'] recd['group_uuid'] = ingest_row['group_id'] recd['group_name'] = ingest_row['group_name'] recd['contains_human_genomic_sequences'] = 'no' url = file_helper.ensureTrailingSlashURL(self.ingest_api_url) + "datasets/ingest" heads = {'Authorization': 'Bearer ' + self.nexus_token, 'Content-Type': 'application/json'} recds = json.dumps(recd) resp = requests.post(url, headers=heads, data=recds, verify=False) status_code = resp.status_code if status_code < 200 or status_code >= 300: print("Unable to create RECORDSET for parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] , file=sys.stderr) resp.raise_for_status() val = resp.json() if val is None or not 'uuid' in val: raise Exception("No UUID returned on creation of DATASET parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] ) print("Created Dataset ingest_id:" + ingest_row['ingest_id'] + " UUID:" + val['uuid'] + " parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] ) return val['uuid']
def print_protocol_info(self, entity, entity_type, include_uuid=False, include_test_group=False): group_id = self.get_dict_attrib( entity, "provenance_group_uuid").strip().lower() if string_helper.isBlank(group_id): return if not include_test_group: if self.test_group_id == group_id: return group_name = self.get_dict_attrib(entity, "provenance_group_name") display_doi = self.get_dict_attrib(entity, "display_doi") uuid = self.get_dict_attrib(entity, "uuid") display_id = self.get_dict_attrib(entity, "hubmap_identifier") protocols = self.get_dict_attrib(entity, "protocols") prot_files = [] prot_globus_urls = [] prot_dois = [] if not string_helper.isBlank(protocols): prots = json.loads(protocols.replace("'", '"')) for prot in prots: if "protocol_file" in prot and not string_helper.isBlank( prot['protocol_file']): prot_files.append(prot['protocol_file']) prot_globus_urls.append( self.convert_to_globus_url(prot['protocol_file'])) if "protocol_url" in prot and not string_helper.isBlank( prot['protocol_url']): prot_dois.append(prot['protocol_url']) protocol = self.get_dict_attrib(entity, "protocol") if not string_helper.isBlank(protocol): prot_dois.append(protocol) protocol_file = self.get_dict_attrib(entity, "protocol_file") if not string_helper.isBlank(protocol_file): prot_files.append(protocol_file) prot_globus_urls.append(self.convert_to_globus_url(protocol_file)) vals = [] vals.append(entity_type) vals.append(group_name) vals.append(display_id) vals.append(display_doi) if include_uuid: vals.append(uuid) vals.append(string_helper.listToDelimited(prot_dois, delimitChar=" | ")) vals.append( string_helper.listToDelimited(prot_globus_urls, delimitChar=" | ")) vals.append( string_helper.listToDelimited(prot_files, delimitChar=" | ")) print(string_helper.listToCommaSeparated(vals))
def write_tsv(row, datatype): uuid = row['hmuuid'] if string_helper.isBlank(uuid): print("ERROR: no uuid for row in datatype: " + datatype) return output_path = output_dir + "/" + row['hmuuid'] + "-" + datatype + ".tsv" with open(output_path, 'w') as writer: keys = [] data = [] base_submission_path = None if not uuid in dataset_info_by_uuid: print("WARNING: dataset information not found for " + uuid) else: base_submission_path = dataset_info_by_uuid[uuid]['local_path'] if not base_submission_path.lower().startswith('/hive/hubmap/lz'): print( "WARNING: dataset local path doesn't start with /hive/hubmap/lz: " + uuid) else: base_submission_path = base_submission_path[15:].lower() for key in row.keys(): if not key == 'hmuuid': val = row[key] keyl = key.lower() if keyl == 'metadata_path' or key == 'data_path': if base_submission_path is None: print("ERROR: no base_submittion_path set on " + key + ": " + val) if val is None: print("WARNING: no path found for " + key + " for type: " + datatype + " on uuid: " + uuid) elif not val.lower().startswith(base_submission_path): print("WARNING: path " + key + ": " + val + " does not start with base path: " + base_submission_path) val = val[len(base_submission_path):] keys.append(key) data.append(val) writer.write( string_helper.listToDelimited(keys, delimitChar='\t') + '\n') writer.write( string_helper.listToDelimited(data, delimitChar='\t') + '\n')
def __init__(self, file_name): if string_helper.isBlank(file_name): raise Exception("ERROR: TSV Filename Not Found") if not os.path.isfile(file_name): raise ErrorMessage("TSV file " + file_name + " not found.") self.records = [] self.header = [] with open(file_name, newline='') as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') first = True for row in reader: data_row = {} for key in row.keys(): if first: self.header.append(key) data_row[key] = row[key] self.records.append(data_row) if first: first = False
def __authRecord(appKey, token, getGroups=False): rVal = {} now = datetime.datetime.now() info = AuthCache.__userInfo(appKey, token, getGroups) rVal['info'] = info rVal['timestamp'] = now if isinstance(rVal['info'], Response): rVal['valid'] = False elif not 'active' in info or info['active'] is None: rVal['valid'] = False else: rVal['valid'] = info['active'] if rVal['valid'] and 'scope' in info and not string_helper.isBlank(info['scope']): info['hmscopes'] = info['scope'].lower().strip().split() return rVal