Esempio n. 1
0
 def __init__(self, property_file_name):
     self.dataset_info = None
     self.dataset_info_tsv_path = None
     self.prop_file_name = property_file_name
     if not os.path.isfile(property_file_name):
         raise Exception("property file does not exist: " + property_file_name)
     #Open the properties file
     propMgr = Property()
     self.props = propMgr.load_property_files(property_file_name)
     self.data_root_path = file_helper.ensureTrailingSlash(self.get_prop('root.path.to.data'))
     self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("ingest.api.url"))
     self.nexus_token = self.get_prop("nexus.token").strip()
     self.entity_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("entity.api.url"))
     self.uuid_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("uuid.api.url"))
     self.dataset_info_tsv_path = self.get_prop("vand.dataset.info.tsv")
     if string_helper.isBlank(self.dataset_info_tsv_path) or not os.path.isfile(self.dataset_info_tsv_path):
         raise Exception("dataset info file does not exist:" + self.dataset_info_tsv_path)
     if not self.dataset_info_tsv_path.endswith(".tsv"):
         raise Exception("dataset info file must be of type .tsv : " + self.dataset_info_tsv_path)
     self.dataset_info = []
     with open(self.dataset_info_tsv_path, newline='') as tsvfile:
         reader = csv.DictReader(tsvfile, delimiter='\t')
         for row in reader:
             info_row = {}
             for key in row.keys():
                 info_row[key] = row[key]
             self.dataset_info.append(info_row)
             
     self.collections = {}
     self.meta_info = None
    def __init__(self, property_file_name):
        self.props = IngestProps(property_file_name, required_props = ['nexus.token', 'ingest.api.url', 'search.api.url', 'uuid.api.url', 'dataset.uuid.file', 'globus.app.client.id', 'globus.app.client.secret'])
        if len(sys.argv) >= 2:
            self.id_file = sys.argv[1]
        else:
            self.id_file = self.props.get('dataset.uuid.file')
            if string_helper.isBlank(self.id_file):
                raise ErrorMessage("ERROR: A list of dataset uuids must be specified in " + self.prop_file_name + " as as property 'dataset.uuid.file' or as the first argument on the command line")
        if not os.path.isfile(self.id_file):
            raise ErrorMessage("ERROR: Input file " + self.id_file + " does not exist.")

        base_file_name = os.path.splitext(os.path.basename(self.id_file))[0] 
        dir_path = file_helper.ensureTrailingSlash(os.path.dirname(self.id_file))
        
        
        #set up log files, first for errors, second to record all actions
        cur_time = time.strftime("%d-%m-%Y-%H-%M-%S")
        error_log_filename = dir_path + base_file_name + "-errors." + cur_time + ".log"
        self.error_logger = logging.getLogger('publish.datasets.err')
        self.error_logger.setLevel(logging.INFO)
        error_logFH = logging.FileHandler(error_log_filename)
        self.error_logger.addHandler(error_logFH)
        
        recording_log_filename = dir_path + base_file_name + "-rcding." + cur_time + ".log" 
        self.recording_logger = logging.getLogger('publish.datasets.rcd')
        self.recording_logger.setLevel(logging.INFO)
        recording_logFH = logging.FileHandler(recording_log_filename)
        self.recording_logger.addHandler(recording_logFH)
        
        #initialize variables, get required values from property file
        self.dataset_info = None
        self.dataset_info_tsv_path = None

        self.token = self.props.get('nexus.token')
        self.search_api_url = file_helper.ensureTrailingSlashURL(self.props.get('search.api.url'))
        self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.props.get('ingest.api.url'))
        
        #initialize the auth helper and use it to get the
        #user information for the person running the script
        auth_helper = AuthHelper.create(self.props.get('globus.app.client.id'), self.props.get('globus.app.client.secret'))
        user_info = auth_helper.getUserInfo(self.token, getGroups = True)        
        if isinstance(user_info, Response):
            raise ErrorMessage("error validating auth token: " + user_info.get_data(as_text=True))
        
        id_f = open(self.id_file, 'r') 
        id_lines = id_f.readlines()
        id_f.close()
        
        self.ds_ids = []
        for id_line in id_lines:
            if not string_helper.isBlank(id_line):
                tl = id_line.strip()
                if not tl.startswith('#'):
                    self.ds_ids.append(tl)

        self.donors_to_reindex = []
        self.set_acl_commands = []
Esempio n. 3
0
 def __init__(self, property_file_name):
     self.props = PropHelper(
         property_file_name,
         required_props=[
             'neo4j.server', 'neo4j.username', 'neo4j.password', 'db.host',
             'db.name', 'db.username', 'db.password', 'user.mapping',
             'client.id', 'client.secret', 'old.ingest.upload.dir',
             'new.ingest.upload.dir', 'uuid.api.url', 'user.nexus.token'
         ])
     self.graph = Graph(self.props.get('neo4j.server'),
                        auth=(self.props.get('neo4j.username'),
                              self.props.get('neo4j.password')))
     self.uuid_wrker = UUIDWorker(self.props.get('client.id'),
                                  self.props.get('client.secret'),
                                  self.props.get('db.host'),
                                  self.props.get('db.name'),
                                  self.props.get('db.username'),
                                  self.props.get('db.password'))
     self.old_ingest_upload_dir = file_helper.ensureTrailingSlash(
         self.props.get('old.ingest.upload.dir'))
     self.new_ingest_upload_dir = file_helper.ensureTrailingSlash(
         self.props.get('new.ingest.upload.dir'))
     self.uuid_api_url = file_helper.ensureTrailingSlashURL(
         self.props.get('uuid.api.url')) + "hmuuid"
     self.user_token = self.props.get('user.nexus.token')
Esempio n. 4
0
 def __init__(self, property_file_name, old_ds_info_file_name,
              output_file_name):
     props = IngestProps(property_file_name,
                         required_props=[
                             'uuid.api.url', 'entity.api.url',
                             'ingest.api.url', 'nexus.token'
                         ])
     self.graph = Graph(props.get('neo4j.server'),
                        auth=(props.get('neo4j.username'),
                              props.get('neo4j.password')))
     with open(old_ds_info_file_name) as ds_info:
         self.old_dsets = json.load(ds_info)
     self.out_file_name = output_file_name
     self.uuid_url = file_helper.ensureTrailingSlashURL(
         props.get('uuid.api.url'))
     self.entity_api_url = file_helper.ensureTrailingSlashURL(
         props.get('entity.api.url'))
     self.ingest_api_url = file_helper.ensureTrailingSlashURL(
         props.get('ingest.api.url'))
     self.nexus_token = props.get('nexus.token')
     with open(self.out_file_name, 'a') as f:
         f.write("======newer\tolder\toldest\n")
Esempio n. 5
0
 def __init__(self, property_file_name, assay_row_key):
     #Open the properties file
     propMgr = Property()
     self.prop_file_name = property_file_name
     self.assay_row_key = assay_row_key
     if not os.path.isfile(self.prop_file_name):
         raise Exception("Required property file does not exist: " + self.prop_file_name)
     self.props = propMgr.load_property_files(self.prop_file_name)
     root_path = file_helper.ensureTrailingSlashURL(self.get_prop("root.path.to.data"))
     clinical_meta_path = self.join_dir(root_path, "metadata/clinical")
     assay_meta_path = self.join_dir(root_path, "metadata/assay")
     self.clinical_metadata = self.convert_clin_meta_to_dict(clinical_meta_path)
     self.assay_metadata = self.convert_assay_meta_to_dict(assay_meta_path)
Esempio n. 6
0
 def create_collection(self, name, description, collection_key):
     url = file_helper.ensureTrailingSlashURL(self.ingest_api_url) + "new-collection"
     heads = {'Authorization': 'Bearer ' + self.nexus_token, 'Content-Type': 'application/json'}
     dats = {'label': name, 'description':description}
     dataj = json.dumps(dats)
     resp = requests.post(url, headers=heads, data=dataj, verify=False)
     status_code = resp.status_code
     if status_code < 200 or status_code >= 300:
         print("Unable to create COLLECTION for " + collection_key, file=sys.stderr)
         resp.raise_for_status()
     val = resp.json()
     if not 'uuid' in val:
         raise Exception("No UUID returned on creation of COLLECTION " + collection_key)
     return val['uuid']
    def relink_to_public(self, dataset_uuid):
        lnk_path = self.appconfig['HUBMAP_WEBSERVICE_FILEPATH']
        lnk_path = lnk_path.strip()
        if lnk_path[-1] == '/': lnk_path = lnk_path[:-1]
        lnk_path = self.dataset_asset_directory_absolute_path(dataset_uuid)
        pub_path = file_helper.ensureTrailingSlashURL(
            self.appconfig['GLOBUS_PUBLIC_ENDPOINT_FILEPATH']) + dataset_uuid
        try:
            os.unlink(lnk_path)
        except:
            print("Error unlinking " + lnk_path)

        if os.path.exists(pub_path):
            file_helper.linkDir(pub_path, lnk_path)
Esempio n. 8
0
 def create_dataset(self, ingest_row):
     '''
     {
         "dataset_name": "Test Name",
         "dataset_description": "This is a test description",
         "dataset_collection_uuid": "ab93b3983acge938294857fe292429234",
         "source_uuids": ["ea93b3983acge938294857fe292429234", "f343b3983acge938294857fe292429234", "cdeb3983acge938294857fe292429234"],
         "data_types": ["PAS"],
         "creator_email": "*****@*****.**",
         "creator_name": "Dataset Owner",
         "group_uuid": "193439-29392-2939243",
         "group_name": "HuBMAP-Test",
         "contains_human_genomic_sequences": "no"  
     }
     '''
     recd = {}
     recd['dataset_name'] = ingest_row['name'].encode(encoding='ascii', errors='ignore').decode('ascii')
     recd['dataset_description'] = ingest_row['description'].encode(encoding='ascii', errors='ignore').decode('ascii')
     if not ingest_row['collection_key'].startswith("NO_COLLECTION"):
         recd['dataset_collection_uuid'] = self.lookup_collection_uuid(ingest_row['collection_key'])
     source_uuids = ingest_row['parent_uuid'].split('|')
     recd['source_uuids'] = source_uuids
     data_type = []
     dtype = ingest_row['assay_type']
     if not string_helper.isBlank(dtype) and dtype.upper() == 'LC':
         dtype = 'LC-MS-untargeted'
     data_type.append(dtype)
     recd['data_types'] = data_type
     recd['creator_email'] = ingest_row['creator_email']
     recd['creator_name'] = ingest_row['creator_name']
     recd['group_uuid'] = ingest_row['group_id']
     recd['group_name'] = ingest_row['group_name']
     recd['contains_human_genomic_sequences'] = 'no'
     
     url = file_helper.ensureTrailingSlashURL(self.ingest_api_url) + "datasets/ingest"
     heads = {'Authorization': 'Bearer ' + self.nexus_token, 'Content-Type': 'application/json'}
     recds = json.dumps(recd)
     resp = requests.post(url, headers=heads, data=recds, verify=False)
     status_code = resp.status_code
     if status_code < 200 or status_code >= 300:
         print("Unable to create RECORDSET for parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] , file=sys.stderr)
         resp.raise_for_status()
     val = resp.json()
     if val is None or not 'uuid' in val:
         raise Exception("No UUID returned on creation of DATASET parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] )
     print("Created Dataset ingest_id:" + ingest_row['ingest_id'] + " UUID:" + val['uuid'] + " parent id:" + ingest_row['parent_display_id'] + " assay type:" + ingest_row['assay_type'] )
     return val['uuid']
    def __init__(self, prop_file_name):

        self._prop_file_name = prop_file_name

        #check to make sure we have a properties file
        if not os.path.isfile(prop_file_name):
            raise Exception("Property file " + prop_file_name +
                            " is required and does not exist.")

        #Open the properties file
        propMgr = Property()
        self.props = propMgr.load_property_files(prop_file_name)

        self.entity_api_url = file_helper.ensureTrailingSlashURL(
            self.get_prop("entity.api.url"))
        self.nexus_token = self.get_prop("nexus.token").strip()
        self.globus_system_dir = self.get_prop("globus.system.dir").strip()
        self.globus_base_url = file_helper.removeTrailingSlashURL(
            self.get_prop("globus.single.file.base.url").strip())
        self.test_group_id = self.get_prop("test.group.id").strip().lower()
        self.globus_app_base_url = self.get_prop("globus.app.base.url").strip()
Esempio n. 10
0
    def ingest_sample_metadata(self, metadata, uuid, sample_id, row_num):
        update_recd = {}
        update_recd['metadata'] = metadata

        header = {
            'Authorization': 'Bearer ' + self.token,
            'Content-Type': 'application/json'
        }
        url = file_helper.ensureTrailingSlashURL(
            self.entity_api_url) + "samples/" + uuid
        resp = requests.put(url, headers=header, data=json.dumps(update_recd))
        status_code = resp.status_code
        if status_code != 200:
            msg = "unable to update metadata on Sample info for id: " + sample_id + " row number " + str(
                row_num) + ".  "
            print(msg + " Check the log file " + self.log_filename)
            self.logger.error(msg)
            self.logger.error("Web service return code " + str(status_code))
            self.logger.error("Web service response " + resp.text)
            return False
        return True
Esempio n. 11
0
def upload_validate(upload_uuid):
    ingest_helper = IngestFileHelper(app.config)
    url = commons_file_helper.ensureTrailingSlashURL(
        app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + upload_uuid
    auth_headers = {
        'Authorization': request.headers["AUTHORIZATION"],
        'X-Hubmap-Application': 'ingest-api'
    }
    resp = requests.get(url, headers=auth_headers)
    if resp.status_code >= 300:
        return Response(resp.text, resp.status_code)
    upload = resp.json()
    prev_status = upload['status']
    upload_path = ingest_helper.get_upload_directory_absolute_path(
        None, upload['group_uuid'], upload_uuid)
    if not os.path.exists(upload_path):
        return Response(f"upload directory does not exist: {upload_path}", 500)
    mock_cfg_path = commons_file_helper.ensureTrailingSlash(
        upload_path) + "mock_run.json"
    if not os.path.exists(mock_cfg_path):
        return Response(
            f"mock configuration json file does not exist: {mock_cfg_path}")
    ''' Example mock_run.json
    {
      "mock_processing_time_seconds": 20,
      "new_status_message": "new message",
      "new_status": "Invalid"
    }
    '''
    #read the mock_run json file into a dict
    with open(mock_cfg_path) as json_file:
        mock_run = json.load(json_file)

    x = threading.Thread(
        target=__apply_mock_run,
        args=[mock_run, upload_path, upload_uuid, auth_headers, prev_status])
    x.start()

    return Response("Accepted", 202)
Esempio n. 12
0
 def import_rui_location(self, ingest_row):
     if 'local_path' in ingest_row and  not string_helper.isBlank(ingest_row['local_path']) and os.path.isfile(ingest_row['local_path']):
         with open(ingest_row['local_path']) as file:
             rui_location = file.read().replace("\n", " ")
             
         recd = {}
         recd['rui_location'] = rui_location
         
         url = file_helper.ensureTrailingSlashURL(self.entity_api_url) + "entities/sample/" + ingest_row['parent_uuid']
         heads = {'Authorization': 'Bearer ' + self.nexus_token, 'Content-Type': 'application/json'}
         recds = json.dumps(recd)
         print("URL:" + url)
         print("DATA:" + recds)
         resp = requests.put(url, headers=heads, data=recds, verify=False)
         status_code = resp.status_code
         if status_code != 200:
             print("Unable to import location info for file:  parent id:" + ingest_row['local_path'], file=sys.stderr)
             resp.raise_for_status()
             print("Imported location info ingest_id:" + ingest_row['ingest_id'] + " parent id:" + ingest_row['parent_display_id'] )            
         return("COMPLETE")
     else:
         return("FAIL:RUI FILE NOT FOUND")
Esempio n. 13
0
def __apply_mock_run(mock_run_data, upload_path, upload_uuid, auth_headers,
                     prev_status):
    try:
        wait_seconds = mock_run_data['mock_processing_time_seconds']
        update_rcd = {'status': prev_status}
        if 'new_status' in mock_run_data:
            update_rcd['status'] = mock_run_data['new_status']
        if 'new_status_message' in mock_run_data:
            update_rcd['validation_message'] = mock_run_data[
                'new_status_message']
        update_url = commons_file_helper.ensureTrailingSlashURL(
            app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + upload_uuid

        time.sleep(wait_seconds)
        resp = requests.put(update_url, json=update_rcd, headers=auth_headers)
        if resp.status_code >= 300:
            print(
                f"ERROR calling Upload update method received status: {resp.status_code} with message: {resp.text}"
            )

    except Exception as e:
        print(f"Exception while applying mock run for Upload: {upload_uuid}")
        traceback.print_exc()
 def dataset_asset_directory_absolute_path(self, dataset_uuid):
     return file_helper.ensureTrailingSlashURL(
         self.appconfig['HUBMAP_WEBSERVICE_FILEPATH']) + dataset_uuid
def get_prop(prop_name):
    if not prop_name in props:
        raise Exception("Required property " + prop_name + " not found in " +
                        prop_file_name)
    val = props[prop_name]
    if string_helper.isBlank(val):
        raise Exception("Required property " + prop_name + " from " +
                        prop_file_name + " is blank")
    return props[prop_name]


#check to make sure we have a properties file
if not os.path.isfile(prop_file_name):
    raise Exception("Property file " + prop_file_name +
                    " is required and does not exist.")

#Open the properties file
propMgr = Property()
props = propMgr.load_property_files(prop_file_name)
root_path = file_helper.ensureTrailingSlashURL(get_prop("root.path.to.data"))
uuid_api_url = file_helper.ensureTrailingSlashURL(get_prop("uuid.api.url"))
nexus_token = get_prop("nexus.token").strip()

dirs = os.listdir(root_path)
printHeader()
for name in dirs:
    if not name.startswith("LC "):
        handleMultDatasetDir(os.path.join(root_path, name))

    print(name)
                               data_type):
    xlsx_file = os.path.join(dir_path, xlsx_file_name).strip()
    if not os.path.isfile(xlsx_file):
        raise Exception("File does not exist: " + xlsx_file)
    if xlsx_file.endswith(".xlsx"):
        out_file = os.path.join(dir_path, data_type + ".csv")
    else:
        raise Exception("File not of type .xlsx: " + xlsx_file)
    read_file = pandas.read_excel(xlsx_file, sheet_name=sheet_name)
    read_file.to_csv(out_file, index=None, header=True)


#Open the properties file
propMgr = Property()
props = propMgr.load_property_files(prop_file_name)
root_path = file_helper.ensureTrailingSlashURL(get_prop("root.path.to.data"))
clinical_path = os.path.join(root_path, "metadata/clinical/")
check_dir(root_path)
assay_path = os.path.join(root_path, "metadata/assay/")
check_dir(clinical_path)
check_dir(assay_path)
for clin_file in os.listdir(clinical_path):
    file_path = os.path.join(clinical_path, clin_file)
    if os.path.isfile(file_path) and clin_file.endswith(".xlsx"):
        convert_clinical_metadata_file(clinical_path, clin_file)

for assay in assay_meta_sheet_names:
    path_to_file = os.path.join(assay_path, assay_meta_file_name)
    if not os.path.isfile(path_to_file):
        raise Exception("Required file does not exist: " + path_to_file)
    convert_assay_sheet_to_csv(assay_path, assay_meta_file_name,
Esempio n. 17
0
    def create_derived_datastage(self, nexus_token, json_data):
        auth_header = {'Authorization': 'Bearer ' + nexus_token}
        app_header = {'X-Hubmap-Application': 'ingest-api'}

        source_dataset_uuids = json_data['source_dataset_uuids']
        source_uuids = []

        if isinstance(source_dataset_uuids, str):
            # Create a list from this string
            source_uuids = [source_dataset_uuids]
        elif isinstance(source_dataset_uuids, list):
            source_uuids = source_dataset_uuids
        else:
            raise TypeError(
                "json_data['source_dataset_uuids'] must either be a string or a list"
            )

        # All of the source datasets come from the same data provider
        # Get the group_uuid based on the first source dataset via entity-api
        first_source_uuid = source_uuids[0]
        get_url = file_helper.ensureTrailingSlashURL(
            self.confdata['ENTITY_WEBSERVICE_URL']
        ) + 'entities/' + first_source_uuid
        response = requests.get(get_url, headers=auth_header, verify=False)

        if response.status_code != 200:
            raise HTTPException(
                "Error retrieving source dataset " + first_source_uuid,
                response.status_code)

        first_source_dataset = response.json()

        # Create the derived dataset via entity-api
        # The entity-api validates each of the provided source dataset uuid for existenace check
        # The derived dataset will have the same group_uuid as the source datasets
        derived_dataset_to_post = {
            'title': json_data['derived_dataset_name'],
            'data_types': json_data['derived_dataset_types'],
            'direct_ancestor_uuids': source_uuids,
            'contains_human_genetic_sequences': False,
            'group_uuid': first_source_dataset['group_uuid']
        }

        post_url = file_helper.ensureTrailingSlashURL(
            self.confdata['ENTITY_WEBSERVICE_URL']) + 'entities/dataset'

        # Merge the auth_header and app_header for creating new Dataset
        response = requests.post(post_url,
                                 json=derived_dataset_to_post,
                                 headers={
                                     **auth_header,
                                     **app_header
                                 },
                                 verify=False)

        if response.status_code != 200:
            raise HTTPException(
                "Error creating derived dataset: " + response.text,
                response.status_code)

        derived_dataset = response.json()

        file_help = IngestFileHelper(self.confdata)
        sym_path = os.path.join(
            str(self.confdata['HUBMAP_WEBSERVICE_FILEPATH']),
            derived_dataset['uuid'])

        new_directory_path = file_help.get_dataset_directory_absolute_path(
            derived_dataset, derived_dataset['group_uuid'],
            derived_dataset['uuid'])
        new_path = IngestFileHelper.make_directory(new_directory_path,
                                                   sym_path)

        try:
            x = threading.Thread(target=file_help.set_dir_permissions,
                                 args=['consortium', new_path])
            x.start()
        except Exception as e:
            logger.error(e, exc_info=True)

        response_data = {
            'derived_dataset_uuid': derived_dataset['uuid'],
            'group_uuid': derived_dataset['group_uuid'],
            'group_display_name': derived_dataset['group_name'],
            'full_path': new_path
        }

        return response_data