def fetch_and_preprocess_irods_metadata_by_metadata(
         search_criteria, irods_zone, issues_dict, reference):
     """
     This function takes some filtering/matching criteria for selecting data from iRODS based on metadata.
     The client also passes an issues_dict to this function as parameter, which the current function just needs to
     update with the issues found on the files found in iRODS to match the criteria.
     :param issues_dict: an existing dictionary of issues, to which this function needs to add the issues found
     :param irods_zone: the irods zone where to search for the data matching the criteria given
     :param search_criteria: a dict formed of key= attr name, val = attr value. The operator is by default =.
     :return: a dict of key: fpath, value: the iRODS metadata for that path
     """
     irods_metadata_by_path = {}
     try:
         all_files_metadata_objs_list = iRODSMetadataProvider.retrieve_raw_files_metadata_by_metadata(
             search_criteria, irods_zone)
     except Exception as e:
         print(e)
         sys.exit(1)
     else:
         for raw_metadata in all_files_metadata_objs_list:
             check_results = []
             file_metadata = IrodsSeqFileMetadata.from_raw_metadata(
                 raw_metadata)
             check_results.extend(file_metadata.check_metadata(reference))
             irods_metadata_by_path[raw_metadata.fpath] = file_metadata
             issues_dict[raw_metadata.fpath].extend(check_results)
     return irods_metadata_by_path
 def fetch_and_preprocess_irods_metadata_by_path(irods_fpaths, issues_dict,
                                                 reference):
     """
     This function fetches the irods metadata by file path and preprocesses it.
     It also adds the issues found to the issues_dict given as parameter.
     :param irods_fpaths:
     :param issues_dict:
     :param reference:
     :return:
     """
     irods_metadata_dict = defaultdict(list)
     for fpath in irods_fpaths:
         try:
             raw_metadata = iRODSMetadataProvider.fetch_raw_file_metadata_by_path(
                 fpath)
         except Exception as e:
             print(e)
             sys.exit(1)
         else:
             check_results = []
             file_metadata = IrodsSeqFileMetadata.from_raw_metadata(
                 raw_metadata)
             check_results.extend(file_metadata.check_metadata(reference))
             irods_metadata_dict[fpath] = file_metadata
             issues_dict[fpath].extend(check_results)
     return irods_metadata_dict
Exemple #3
0
 def test_from_raw_metadata_only_replicas(self):
     replicas = [
         baton_models.DataObjectReplica(number=1, checksum="123abc"),
         baton_models.DataObjectReplica(number=2, checksum="abc"),]
     raw_metadata = IrodsRawFileMetadata(fpath='/seq/123.bam', file_replicas=replicas)
     seq_metadata = IrodsSeqFileMetadata.from_raw_metadata(raw_metadata)
     expected = {'name': set(), 'accession_number': set(), 'internal_id': set()}
     self.assertEqual(seq_metadata.samples, expected)
     self.assertEqual(seq_metadata.libraries, expected)
     self.assertEqual(seq_metadata.checksum_in_meta, set())