예제 #1
0
 def load_group_data(self):
     with open(AuthCache.groupJsonFilename) as jsFile:
         self.groups = json.load(jsFile)
         self.groups_by_tmc_prefix = {}
         for group in self.groups:
             if 'name' in group and 'uuid' in group and 'generateuuid' in group and 'displayname' in group and not string_helper.isBlank(
                     group['name']) and not string_helper.isBlank(
                         group['uuid']) and not string_helper.isBlank(
                             group['displayname']):
                 group_obj = {
                     'name': group['name'].lower().strip(),
                     'uuid': group['uuid'].lower().strip(),
                     'displayname': group['displayname'],
                     'generateuuid': group['generateuuid']
                 }
                 if 'tmc_prefix' in group:
                     group_obj['tmc_prefix'] = group['tmc_prefix']
                     if 'uuid' in group and 'displayname' in group and not string_helper.isBlank(
                             group['uuid']) and not string_helper.isBlank(
                                 group['displayname']):
                         group_info = {}
                         group_info['uuid'] = group['uuid']
                         group_info['displayname'] = group['displayname']
                         group_info['tmc_prefix'] = group['tmc_prefix']
                         self.groups_by_tmc_prefix[group[
                             'tmc_prefix'].upper().strip()] = group_info
                 self.groupsByName[
                     group['name'].lower().strip()] = group_obj
                 self.groupsById[group['uuid']] = group_obj
예제 #2
0
 def ingest_rui_info(self):
     for ingest_row in self.dataset_info:
         if ( ingest_row['type'].upper() == "CCFDATA" and 
               string_helper.isBlank(ingest_row['new_entity_uuid']) and
               not string_helper.isBlank(ingest_row['parent_uuid']) and
               not ingest_row['parent_uuid'].startswith('No UUID')):                
             col_uuid = self.import_rui_location(ingest_row)
             ingest_row.update({'new_entity_uuid':col_uuid})
예제 #3
0
 def ingest_recordsets(self):
     for ingest_row in self.dataset_info:
         if ( ingest_row['type'].upper() == "DATASET" and 
               string_helper.isBlank(ingest_row['new_entity_uuid']) and
               not string_helper.isBlank(ingest_row['parent_uuid']) and
               not ingest_row['parent_uuid'].startswith('No UUID')):
             col_uuid = self.create_dataset(ingest_row)
             ingest_row.update({'new_entity_uuid':col_uuid})
    def __init__(self, property_file_name):
        self.props = IngestProps(property_file_name, required_props = ['nexus.token', 'ingest.api.url', 'search.api.url', 'uuid.api.url', 'dataset.uuid.file', 'globus.app.client.id', 'globus.app.client.secret'])
        if len(sys.argv) >= 2:
            self.id_file = sys.argv[1]
        else:
            self.id_file = self.props.get('dataset.uuid.file')
            if string_helper.isBlank(self.id_file):
                raise ErrorMessage("ERROR: A list of dataset uuids must be specified in " + self.prop_file_name + " as as property 'dataset.uuid.file' or as the first argument on the command line")
        if not os.path.isfile(self.id_file):
            raise ErrorMessage("ERROR: Input file " + self.id_file + " does not exist.")

        base_file_name = os.path.splitext(os.path.basename(self.id_file))[0] 
        dir_path = file_helper.ensureTrailingSlash(os.path.dirname(self.id_file))
        
        
        #set up log files, first for errors, second to record all actions
        cur_time = time.strftime("%d-%m-%Y-%H-%M-%S")
        error_log_filename = dir_path + base_file_name + "-errors." + cur_time + ".log"
        self.error_logger = logging.getLogger('publish.datasets.err')
        self.error_logger.setLevel(logging.INFO)
        error_logFH = logging.FileHandler(error_log_filename)
        self.error_logger.addHandler(error_logFH)
        
        recording_log_filename = dir_path + base_file_name + "-rcding." + cur_time + ".log" 
        self.recording_logger = logging.getLogger('publish.datasets.rcd')
        self.recording_logger.setLevel(logging.INFO)
        recording_logFH = logging.FileHandler(recording_log_filename)
        self.recording_logger.addHandler(recording_logFH)
        
        #initialize variables, get required values from property file
        self.dataset_info = None
        self.dataset_info_tsv_path = None

        self.token = self.props.get('nexus.token')
        self.search_api_url = file_helper.ensureTrailingSlashURL(self.props.get('search.api.url'))
        self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.props.get('ingest.api.url'))
        
        #initialize the auth helper and use it to get the
        #user information for the person running the script
        auth_helper = AuthHelper.create(self.props.get('globus.app.client.id'), self.props.get('globus.app.client.secret'))
        user_info = auth_helper.getUserInfo(self.token, getGroups = True)        
        if isinstance(user_info, Response):
            raise ErrorMessage("error validating auth token: " + user_info.get_data(as_text=True))
        
        id_f = open(self.id_file, 'r') 
        id_lines = id_f.readlines()
        id_f.close()
        
        self.ds_ids = []
        for id_line in id_lines:
            if not string_helper.isBlank(id_line):
                tl = id_line.strip()
                if not tl.startswith('#'):
                    self.ds_ids.append(tl)

        self.donors_to_reindex = []
        self.set_acl_commands = []
예제 #5
0
 def __init__(self, property_file_name):
     self.dataset_info = None
     self.dataset_info_tsv_path = None
     self.prop_file_name = property_file_name
     if not os.path.isfile(property_file_name):
         raise Exception("property file does not exist: " + property_file_name)
     #Open the properties file
     propMgr = Property()
     self.props = propMgr.load_property_files(property_file_name)
     self.data_root_path = file_helper.ensureTrailingSlash(self.get_prop('root.path.to.data'))
     self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("ingest.api.url"))
     self.nexus_token = self.get_prop("nexus.token").strip()
     self.entity_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("entity.api.url"))
     self.uuid_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("uuid.api.url"))
     self.dataset_info_tsv_path = self.get_prop("vand.dataset.info.tsv")
     if string_helper.isBlank(self.dataset_info_tsv_path) or not os.path.isfile(self.dataset_info_tsv_path):
         raise Exception("dataset info file does not exist:" + self.dataset_info_tsv_path)
     if not self.dataset_info_tsv_path.endswith(".tsv"):
         raise Exception("dataset info file must be of type .tsv : " + self.dataset_info_tsv_path)
     self.dataset_info = []
     with open(self.dataset_info_tsv_path, newline='') as tsvfile:
         reader = csv.DictReader(tsvfile, delimiter='\t')
         for row in reader:
             info_row = {}
             for key in row.keys():
                 info_row[key] = row[key]
             self.dataset_info.append(info_row)
             
     self.collections = {}
     self.meta_info = None
예제 #6
0
 def get_prop(self, prop_name):
     if not prop_name in self.props:
         raise Exception("Required property " + prop_name + " not found in " + self.prop_file_name)
     val = self.props[prop_name]
     if string_helper.isBlank(val):
         raise Exception("Required property " + prop_name + " from " + self.prop_file_name + " is blank")
     return self.props[prop_name]
예제 #7
0
    def test(self, lab_id):
        if isBlank(lab_id):
            return None
        check_id = lab_id.strip().lower()
        
        with closing(self.uuid_db.getDBConnection()) as dbConn:
            with closing(dbConn.cursor()) as curs:
                curs.execute("select hm_uuid from hm_data_centers where hm_uuid = '" + check_id + "' or dc_uuid = '" + check_id + "'")
                result = curs.fetchone()
                if result is None:
                    prov_helper = Provenance("a", "b", "c")
                    try:
                        # Deprecate the use of Provenance, use the new globus_groups module - Zhou
                        #lab = prov_helper.get_group_by_identifier(check_id)

                        # Get the globus groups info based on the groups json file in commons package
                        globus_groups_info = globus_groups.get_globus_groups_info()
                        groups_by_tmc_prefix_dict = globus_groups_info['by_tmc_prefix']
                        lab = groups_by_tmc_prefix_dict[check_id]
                    except ValueError:
                        return Response("")
                    if not 'tmc_prefix' in lab:
                        return Response("")
                    curs.execute("")
                    return(lab['tmc_prefix'])
                else:
                    return result[0]
예제 #8
0
    def __init__(self, property_file_name):
        self.props = IngestProps(property_file_name,
                                 required_props=[
                                     'nexus.token', 'neo4j.server',
                                     'neo4j.username', 'neo4j.password',
                                     'collections.input.file', 'uuid.api.url'
                                 ])
        self.uuid_helper = UUIDHelper(ingest_props=self.props)
        self.token = self.props.get('nexus.token')
        self.neo4j_server = self.props.get('neo4j.server')
        self.neo4j_user = self.props.get('neo4j.username')
        self.neo4j_password = self.props.get('neo4j.password')
        self.collections_tsv_path = self.props.get("collections.input.file")
        self.auth_helper = AuthHelper.instance()
        if string_helper.isBlank(
                self.collections_tsv_path) or not os.path.isfile(
                    self.collections_tsv_path):
            raise Exception("collections tsf file does not exist:" +
                            self.collections_tsv_path)
        if not self.collections_tsv_path.endswith(".tsv"):
            raise Exception("collections file must be of type .tsv : " +
                            self.collections_tsv_path)
        self.collection_info = []
        with open(self.collections_tsv_path, newline='') as tsvfile:
            reader = csv.DictReader(tsvfile, delimiter='\t')
            for row in reader:
                info_row = {}
                for key in row.keys():
                    info_row[key] = row[key]
                self.collection_info.append(info_row)

        self.graph = Graph(self.neo4j_server,
                           auth=(self.neo4j_user, self.neo4j_password))
예제 #9
0
    def getFileIdInfo(self, fid):
        check_id = fid.strip()
        if isBlank(check_id) or len(check_id) != 32:
            return Response("Invalid file id format.  32 digit hex only.", 400)
        sql = "select hm_uuid, path, checksum, size, base_dir, ancestor_uuid from hm_files inner join hm_ancestors on hm_ancestors.descendant_uuid = hm_files.hm_uuid where hm_uuid = '" + check_id + "'"
        with closing(self.hmdb.getDBConnection()) as dbConn:
            with closing(dbConn.cursor()) as curs:
                curs.execute(sql)
                results = [
                    dict((curs.description[i][0], value)
                         for i, value in enumerate(row))
                    for row in curs.fetchall()
                ]

        if results is None or not results:
            return Response("Could not find the target id: " + fid, 404)
        if isinstance(results, list) and (len(results) == 0):
            return Response("Could not find the target id: " + fid, 404)
        if not 'hm_uuid' in results[0]:
            return Response("Could not find the target id: " + fid, 404)
        if results[0]['hm_uuid'] is None:
            return Response("Could not find the target id: " + fid, 404)

        rdict = self._convert_result_id_array(results, check_id)
        if 'checksum' in rdict and rdict['checksum'] is None:
            rdict.pop('checksum')
        if 'size' in rdict and rdict['size'] is None: rdict.pop('size')
        return json.dumps(rdict, indent=4, sort_keys=True, default=str)
예제 #10
0
 def fix_ids(self):
     for ingest_row in self.dataset_info:
         ing_type = ingest_row['type']
         if ( ing_type == 'CCFDATA' or ing_type == 'IMAGE' or ing_type == 'CLINICALDATA'):
             if 'parent_display_id' in ingest_row and not string_helper.isBlank(ingest_row['parent_display_id']):
                 parent_uuid = self.resolveToUUID(ingest_row['parent_display_id'])
                 ingest_row.update({'parent_uuid':parent_uuid})
예제 #11
0
def identifyGroups(groups):
    groupIdByName = {}
    for group in groups:
        if 'name' in group and 'uuid' in group and 'generateuuid' in group and 'displayname' in group and not string_helper.isBlank(
                group['name']) and not string_helper.isBlank(group['uuid']) and not string_helper.isBlank(
            group['displayname']):
            group_obj = {'name': group['name'].lower().strip(), 'uuid': group['uuid'].lower().strip(),
                         'displayname': group['displayname'], 'generateuuid': group['generateuuid']}
            if 'tmc_prefix' in group:
                group_obj['tmc_prefix'] = group['tmc_prefix']
            if 'data_provider' in group:
                group_obj['data_provider'] = group['data_provider']
            if 'shortname' in group:
                group_obj['shortname'] = group['shortname']
            groupIdByName[group['name'].lower().strip()] = group_obj
            AuthCache.groupsById[group['uuid']] = group_obj
    return groupIdByName
예제 #12
0
 def ingest_collections(self):
     for ingest_row in self.dataset_info:
         if ingest_row['type'].upper() == "COLLECTION":
             if string_helper.isBlank(ingest_row['new_entity_uuid']):
                 col_uuid = self.create_collection(ingest_row['name'], ingest_row['description'], ingest_row['collection_key'])
                 ingest_row.update({'new_entity_uuid':col_uuid})
                 print("Collection created key:" + ingest_row['parent_display_id'] + "uuid:" + col_uuid)
             self.collections[ingest_row['parent_display_id']] = ingest_row
예제 #13
0
 def check_io():
     while True:
         output = process.stdout.readline().decode()
         if output:
             if not string_helper.isBlank(output):
                 logger.log(logging.INFO, output.rstrip())
         else:
             break
예제 #14
0
def getFirstNonComment(file):
    with open(file) as f:
        foundLine = ""
        for line in f:
            if not string_helper.isBlank(line):
                checkline = line.strip()
                if not checkline.startswith('#'):
                    foundLine = checkline
                    break
        return foundLine
예제 #15
0
def init():
    global logger
    global worker
    try:
        logger = logging.getLogger('uuid.service')
        logger.setLevel(logging.INFO)
        logFH = logging.FileHandler(LOG_FILE_NAME)
        logger.addHandler(logFH)
        logger.info("started")
    except Exception as e:
        print("Error opening log file during startup")
        print(str(e))

    try:
        if 'APP_CLIENT_ID' not in app.config or isBlank(
                app.config['APP_CLIENT_ID']):
            raise Exception(
                "Required configuration parameter APP_CLIENT_ID not found in application configuration."
            )
        if 'APP_CLIENT_SECRET' not in app.config or isBlank(
                app.config['APP_CLIENT_ID']):
            raise Exception(
                "Required configuration parameter APP_CLIENT_SECRET not found in application configuration."
            )
        cId = app.config['APP_CLIENT_ID']
        cSecret = app.config['APP_CLIENT_SECRET']
        dbHost = app.config['DB_HOST']
        dbName = app.config['DB_NAME']
        dbUsername = app.config['DB_USERNAME']
        dbPassword = app.config['DB_PASSWORD']
        worker = UUIDWorker(clientId=cId,
                            clientSecret=cSecret,
                            dbHost=dbHost,
                            dbName=dbName,
                            dbUsername=dbUsername,
                            dbPassword=dbPassword)
        logger.info("initialized")

    except Exception as e:
        print("Error during startup.")
        print(str(e))
        logger.error(e, exc_info=True)
        print("Check the log file for further information: " + LOG_FILE_NAME)
 def convert_to_globus_url(self, path_to_file):
     if string_helper.isBlank(path_to_file):
         return ("")
     else:
         pfile = path_to_file.strip()
         if pfile.startswith(self.globus_system_dir):
             #url = self.globus_base_url + file_helper.ensureBeginningSlashURL(pfile[len(self.globus_system_dir):]).replace("/", "%2F").replace(" ", "%20")
             url = self.globus_base_url + file_helper.ensureBeginningSlashURL(
                 pfile[len(self.globus_system_dir):]).replace(" ", "%20")
             return url
         else:
             return ""
 def get(self, prop_name, required=False):
     if not prop_name in self.props:
         if required:
             raise ErrorMessage("Required property " + prop_name +
                                " not found in " + self.prop_file_name)
         else:
             return None
     val = self.props[prop_name]
     if required and string_helper.isBlank(val):
         raise ErrorMessage("Required property " + prop_name + " from " +
                            self.prop_file_name + " is blank")
     return self.props[prop_name]
예제 #18
0
    def __resolve_lab_id(self, lab_id, user_id, user_email):
        if isBlank(lab_id):
            return None
        check_id = lab_id.strip().lower()
        r_val = {}
        with closing(self.hmdb.getDBConnection()) as dbConn:
            with closing(dbConn.cursor()) as curs:
                curs.execute(
                    "select hm_uuid, dc_code from hm_data_centers where hm_uuid = '"
                    + check_id + "' or dc_uuid = '" + check_id + "'")
                result = curs.fetchone()
                if result is None:
                    try:
                        # Deprecate the use of Provenance
                        #lab = self.prov_helper.get_group_by_identifier(check_id)

                        # Get the globus groups info based on the groups json file in commons package
                        globus_groups_info = globus_groups.get_globus_groups_info(
                        )
                        groups_by_tmc_prefix_dict = globus_groups_info[
                            'by_tmc_prefix']
                        if not check_id in groups_by_tmc_prefix_dict:
                            lab = {}
                        else:
                            lab = groups_by_tmc_prefix_dict[check_id]
                    except ValueError:
                        return Response(
                            "A valid lab with specified id not found id:" +
                            check_id, 400)

                    if not 'tmc_prefix' in lab:
                        return Response(
                            "Lab with specified id:" + check_id +
                            " does not contain a tmc_prefix.", 400)

                    uuid_json = self.newUUIDs([],
                                              "LAB",
                                              user_id,
                                              user_email,
                                              1,
                                              gen_base_ids=False)
                    uuid_info = json.loads(uuid_json)
                    r_val['dc_code'] = lab['tmc_prefix']
                    r_val['hm_uuid'] = uuid_info[0]['uuid']
                    curs.execute(
                        "insert into hm_data_centers (HM_UUID, DC_UUID, DC_CODE) VALUES ('"
                        + r_val['hm_uuid'] + "','" + check_id + "','" +
                        r_val['dc_code'] + "')")
                    dbConn.commit()
                else:
                    r_val['dc_code'] = result[1]
                    r_val['hm_uuid'] = result[0]
        return r_val
def get_collection_key(parent_dir):
    if string_helper.isBlank(parent_dir):
        raise Exception("Can't create a collection with an empty name.")
    coll_key = parent_dir.strip().upper()
    if not coll_key in collections:
        block_id = csv_data.get_block_id(parent_dir)
        description = substitute_into_template("collection-description.txt",
                                               block_id, None)
        collection_info = {}
        collection_info['description'] = description
        collection_info['name'] = get_dataset_name(block_id, 'collection')
        collections[coll_key] = collection_info
    return coll_key
예제 #20
0
    def check_fields(self, collection, row_num):
        all_good = True
        if not 'coll_num' in collection or string_helper.isBlank(
                collection['coll_num']):
            print("coll_num field missing in row " + str(row_num) +
                  ". Will not import row.")
            return False
        if collection['coll_num'].strip().startswith('#'):
            print("collection " + collection['coll_num'] +
                  " is commented out.  Will not import")
            return False
        if not 'title' in collection or string_helper.isBlank(
                collection['title']):
            print("collection with coll_num " + collection['coll_num'] +
                  " is missing the title field.  Will not be imported.")
            all_good = False
        if not 'description' in collection or string_helper.isBlank(
                collection['description']):
            print("collection with coll_num " + collection['coll_num'] +
                  " is missing the description field.  Will not be imported.")
            all_good = False
        if not 'creators' in collection or string_helper.isBlank(
                collection['creators']):
            print("collection with coll_num " + collection['coll_num'] +
                  " is missing the creators field.  Will not be imported.")
            all_good = False
        if not self.is_valid_json(collection['creators']):
            print(
                "collection with coll_num " + collection['coll_num'] +
                " does not contain valid json for the creators field.  Will not be imported."
            )
            all_good = False
        if not 'datasets' in collection or string_helper.isBlank(
                collection['datasets']):
            print("collection with coll_num " + collection['coll_num'] +
                  " is missing the datasets field.  Will not be imported.")
            all_good = False

        return all_good
예제 #21
0
 def getAuthorizationTokens(self, requestHeaders):
     hasMauth=False
     hasAuth=False
     if 'Mauthorization' in requestHeaders: hasMauth=True
     if 'Authorization' in requestHeaders: hasAuth=True
     
     if hasMauth:
         mauthHeader = requestHeaders['Mauthorization']
         if string_helper.isBlank(mauthHeader):
             return Response("Empty Mauthorization header", 401)
         mauthHeader = mauthHeader.strip()
         """if len(mauthHeader) <= 8:
             return Response("Invalid Mauthorization header", 401)"""
         jsonTokens = mauthHeader
         if mauthHeader.upper().startswith("MBEARER"):
             jsonTokens = mauthHeader[7:].strip()
         try:
             tokens = json.loads(jsonTokens)
         except Exception as e:
             print("ERROR!: " + str(e))
             return Response("Error decoding json included in Mauthorization header", 401)    
         return tokens
     
     elif hasAuth:
         authHeader = requestHeaders['Authorization']
         if string_helper.isBlank(authHeader):
             return Response("Empty Authorization header", 401)
         authHeader = authHeader.strip()
         if len(authHeader) <= 7:
             return Response("Invalid Authorization header", 401)
         if not authHeader.upper().startswith("BEARER"):
             return Response("Bearer Authorization required", 401)
         token = authHeader[6:].strip()
         if string_helper.isBlank(token):
             return Response('Invalid Bearer Authorization', 401)
         return(token)
     else:
         return Response('No Authorization header', 401)
예제 #22
0
    def __init__(self, clientId, clientSecret, dbHost, dbName, dbUsername,
                 dbPassword):
        if clientId is None or clientSecret is None or isBlank(
                clientId) or isBlank(clientSecret):
            raise Exception(
                "Globus client id and secret are required in AuthHelper")

        if not AuthHelper.isInitialized():
            self.authHelper = AuthHelper.create(clientId=clientId,
                                                clientSecret=clientSecret)
        else:
            self.authHelper.instance()

        #Open the config file
        self.logger = logging.getLogger('uuid.service')

        self.dbHost = dbHost
        self.dbName = dbName
        self.dbUsername = dbUsername
        self.dbPassword = dbPassword
        self.lock = threading.RLock()
        self.hmdb = DBConn(self.dbHost, self.dbUsername, self.dbPassword,
                           self.dbName)
def load_dataset_info():
    with open(ingest_info_file, newline='') as tsvfile:
        reader = csv.DictReader(tsvfile, delimiter='\t')
        for row in reader:
            info_row = {}
            for key in row.keys():
                info_row[key] = row[key]
            if info_row['type'] == 'DATASET':
                if string_helper.isBlank(info_row['new_entity_uuid']):
                    print("WARNING: DATASET WITHOUT UUID: " +
                          info_row['parent_display_id'])
                else:
                    dataset_info_by_uuid[
                        info_row['new_entity_uuid']] = info_row
예제 #24
0
def isValidHMId(hmid):
    if isBlank(hmid): return False
    if startsWithComponentPrefix(hmid):
        return True
    tid = stripHMid(hmid)
    l = len(tid)
    if not (l == 10 or l == 32): return False
    tid = tid.upper()
    if l == 10:
        if not set(tid[0:3]).issubset(HMID_NUM_CHARS): return False
        if not set(tid[3:7]).issubset(HMID_ALPHA_CHARS): return False
        if not set(tid[7:]).issubset(HMID_NUM_CHARS): return False
    if l == 32:
        if not set(tid).issubset(HEX_CHARS): return False
    return True
예제 #25
0
 def __init__(self, clientId, clientSecret, globusGroups=None):
     global helperInstance
     if helperInstance is not None:
         raise Exception("An instance of singleton AuthHelper exists already.  Use AuthHelper.instance() to retrieve it")
     
     if clientId is None or clientSecret is None or string_helper.isBlank(clientId) or string_helper.isBlank(clientSecret):
         raise Exception("Globus client id and secret are required in AuthHelper")
     
     self.applicationClientId = clientId
     self.applicationClientSecret = clientSecret
     if globusGroups is not None:
         AuthCache.setGlobusGroups(globusGroups)
     AuthCache.setProcessSecret(re.sub(r'[^a-zA-Z0-9]', '', clientSecret))
     if helperInstance is None:
         helperInstance = self
예제 #26
0
 def create_dataset(self, ingest_row):
     '''
     {
         "dataset_name": "Test Name",
         "dataset_description": "This is a test description",
         "dataset_collection_uuid": "ab93b3983acge938294857fe292429234",
         "source_uuids": ["ea93b3983acge938294857fe292429234", "f343b3983acge938294857fe292429234", "cdeb3983acge938294857fe292429234"],
         "data_types": ["PAS"],
         "creator_email": "*****@*****.**",
         "creator_name": "Dataset Owner",
         "group_uuid": "193439-29392-2939243",
         "group_name": "HuBMAP-Test",
         "contains_human_genomic_sequences": "no"  
     }
     '''
     recd = {}
     recd['dataset_name'] = ingest_row['name'].encode(encoding='ascii', errors='ignore').decode('ascii')
     recd['dataset_description'] = ingest_row['description'].encode(encoding='ascii', errors='ignore').decode('ascii')
     if not ingest_row['collection_key'].startswith("NO_COLLECTION"):
         recd['dataset_collection_uuid'] = self.lookup_collection_uuid(ingest_row['collection_key'])
     source_uuids = ingest_row['parent_uuid'].split('|')
     recd['source_uuids'] = source_uuids
     data_type = []
     dtype = ingest_row['assay_type']
     if not string_helper.isBlank(dtype) and dtype.upper() == 'LC':
         dtype = 'LC-MS-untargeted'
     data_type.append(dtype)
     recd['data_types'] = data_type
     recd['creator_email'] = ingest_row['creator_email']
     recd['creator_name'] = ingest_row['creator_name']
     recd['group_uuid'] = ingest_row['group_id']
     recd['group_name'] = ingest_row['group_name']
     recd['contains_human_genomic_sequences'] = 'no'
     
     url = file_helper.ensureTrailingSlashURL(self.ingest_api_url) + "datasets/ingest"
     heads = {'Authorization': 'Bearer ' + self.nexus_token, 'Content-Type': 'application/json'}
     recds = json.dumps(recd)
     resp = requests.post(url, headers=heads, data=recds, verify=False)
     status_code = resp.status_code
     if status_code < 200 or status_code >= 300:
         print("Unable to create RECORDSET for parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] , file=sys.stderr)
         resp.raise_for_status()
     val = resp.json()
     if val is None or not 'uuid' in val:
         raise Exception("No UUID returned on creation of DATASET parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] )
     print("Created Dataset ingest_id:" + ingest_row['ingest_id'] + " UUID:" + val['uuid'] + " parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] )
     return val['uuid']
    def print_protocol_info(self,
                            entity,
                            entity_type,
                            include_uuid=False,
                            include_test_group=False):
        group_id = self.get_dict_attrib(
            entity, "provenance_group_uuid").strip().lower()
        if string_helper.isBlank(group_id):
            return
        if not include_test_group:
            if self.test_group_id == group_id:
                return
        group_name = self.get_dict_attrib(entity, "provenance_group_name")
        display_doi = self.get_dict_attrib(entity, "display_doi")
        uuid = self.get_dict_attrib(entity, "uuid")
        display_id = self.get_dict_attrib(entity, "hubmap_identifier")
        protocols = self.get_dict_attrib(entity, "protocols")
        prot_files = []
        prot_globus_urls = []
        prot_dois = []
        if not string_helper.isBlank(protocols):
            prots = json.loads(protocols.replace("'", '"'))
            for prot in prots:
                if "protocol_file" in prot and not string_helper.isBlank(
                        prot['protocol_file']):
                    prot_files.append(prot['protocol_file'])
                    prot_globus_urls.append(
                        self.convert_to_globus_url(prot['protocol_file']))
                if "protocol_url" in prot and not string_helper.isBlank(
                        prot['protocol_url']):
                    prot_dois.append(prot['protocol_url'])
        protocol = self.get_dict_attrib(entity, "protocol")
        if not string_helper.isBlank(protocol):
            prot_dois.append(protocol)
        protocol_file = self.get_dict_attrib(entity, "protocol_file")
        if not string_helper.isBlank(protocol_file):
            prot_files.append(protocol_file)
            prot_globus_urls.append(self.convert_to_globus_url(protocol_file))

        vals = []
        vals.append(entity_type)
        vals.append(group_name)
        vals.append(display_id)
        vals.append(display_doi)
        if include_uuid:
            vals.append(uuid)
        vals.append(string_helper.listToDelimited(prot_dois,
                                                  delimitChar=" | "))
        vals.append(
            string_helper.listToDelimited(prot_globus_urls, delimitChar=" | "))
        vals.append(
            string_helper.listToDelimited(prot_files, delimitChar=" | "))
        print(string_helper.listToCommaSeparated(vals))
def write_tsv(row, datatype):
    uuid = row['hmuuid']
    if string_helper.isBlank(uuid):
        print("ERROR: no uuid for row in datatype: " + datatype)
        return

    output_path = output_dir + "/" + row['hmuuid'] + "-" + datatype + ".tsv"
    with open(output_path, 'w') as writer:
        keys = []
        data = []
        base_submission_path = None
        if not uuid in dataset_info_by_uuid:
            print("WARNING: dataset information not found for " + uuid)
        else:
            base_submission_path = dataset_info_by_uuid[uuid]['local_path']
            if not base_submission_path.lower().startswith('/hive/hubmap/lz'):
                print(
                    "WARNING: dataset local path doesn't start with /hive/hubmap/lz: "
                    + uuid)
            else:
                base_submission_path = base_submission_path[15:].lower()

        for key in row.keys():
            if not key == 'hmuuid':
                val = row[key]
                keyl = key.lower()
                if keyl == 'metadata_path' or key == 'data_path':
                    if base_submission_path is None:
                        print("ERROR: no base_submittion_path set on " + key +
                              ": " + val)
                    if val is None:
                        print("WARNING: no path found for " + key +
                              " for type: " + datatype + " on uuid: " + uuid)
                    elif not val.lower().startswith(base_submission_path):
                        print("WARNING: path " + key + ": " + val +
                              " does not start with base path: " +
                              base_submission_path)
                    val = val[len(base_submission_path):]
                keys.append(key)
                data.append(val)

        writer.write(
            string_helper.listToDelimited(keys, delimitChar='\t') + '\n')
        writer.write(
            string_helper.listToDelimited(data, delimitChar='\t') + '\n')
예제 #29
0
    def __init__(self, file_name):
        if string_helper.isBlank(file_name):
            raise Exception("ERROR: TSV Filename Not Found")
        if not os.path.isfile(file_name):
            raise ErrorMessage("TSV file " + file_name + " not found.")

        self.records = []
        self.header = []
        with open(file_name, newline='') as tsvfile:
            reader = csv.DictReader(tsvfile, delimiter='\t')
            first = True
            for row in reader:
                data_row = {}
                for key in row.keys():
                    if first: self.header.append(key)
                    data_row[key] = row[key]
                self.records.append(data_row)
                if first: first = False
예제 #30
0
    def __authRecord(appKey, token, getGroups=False):
        rVal = {}
        now = datetime.datetime.now()
        info = AuthCache.__userInfo(appKey, token, getGroups)
        rVal['info'] = info
        rVal['timestamp'] = now
        
        if isinstance(rVal['info'], Response):        
            rVal['valid'] = False
        elif not 'active' in info or info['active'] is None:
            rVal['valid'] = False
        else:
            rVal['valid'] = info['active']

        if rVal['valid'] and 'scope' in info and not string_helper.isBlank(info['scope']):
            info['hmscopes'] = info['scope'].lower().strip().split()
        
        return rVal