def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) # adding a new study for further testing info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create(qdb.user.User('*****@*****.**'), "Some New Study to get files", info) # check that you can't call a this function using two unrelated # study_id and prep_template_id with self.assertRaises(IncompetentQiitaDeveloperError): study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ')
def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) # adding a new study for further testing info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create( qdb.user.User('*****@*****.**'), "Some New Study to get files", info) # check that you can't call a this function using two unrelated # study_id and prep_template_id with self.assertRaises(IncompetentQiitaDeveloperError): study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ')
def test_study_files_get_req_multiple(self): study_id = 1 # adding a new prep for testing PREP = qdb.metadata_template.prep_template.PrepTemplate prep_info_dict = { 'SKB7.640196': { 'run_prefix': 'test_1' }, 'SKB8.640193': { 'run_prefix': 'test_2' } } prep_info = pd.DataFrame.from_dict(prep_info_dict, orient='index', dtype=str) pt = npt.assert_warns(qdb.exceptions.QiitaDBWarning, PREP.create, prep_info, qdb.study.Study(study_id), "Metagenomic") # getting the upload folder so we can test study_upload_dir = join( qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) # adding just foward per sample FASTQ to the upload folder filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req('*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_reverse_seqs', False, [])] } self.assertEqual(obs, exp) # let's add reverse filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req('*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_reverse_seqs', False, ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])] } self.assertEqual(obs, exp) # let's an extra file that matches filenames = ['test_1.R3.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req('*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': [ 'test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', 'test_1.R3.fastq.gz', 'uploaded_file.txt' ], 'message': "Check these run_prefix:\n'test_1' has 3 matches.", 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz']), ('raw_reverse_seqs', False, ['test_2.R2.fastq.gz'])] } self.assertEqual(obs, exp) # now if we select FASTQ we have 3 columns so the extra file should go # to the 3rd column obs = study_files_get_req('*****@*****.**', 1, pt.id, 'FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'remaining': ['uploaded_file.txt'], 'message': '', 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')], 'file_types': [('raw_barcodes', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_forward_seqs', True, ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']), ('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])] } self.assertEqual(obs, exp) PREP.delete(pt.id)
def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create(qdb.user.User('*****@*****.**'), "Some New Study to get files", [1], info) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [] } self.assertEqual(obs, exp) # Create some 'sff' files upload_dir = qdb.util.get_mountpoint("uploads")[0][1] study_upload_dir = join(upload_dir, str(new_study.id)) fps = [] for i in range(2): fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir) close(fd) with open(fp, 'w') as f: f.write('\n') fps.append(fp) self._clean_up_files.extend(fps) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'SFF') exp = { 'status': 'success', 'message': '', 'remaining': [basename(fpath) for fpath in sorted(fps)], 'file_types': [('raw_sff', True, [])], 'num_prefixes': 0, 'artifacts': [] } self.assertEqual(obs, exp)
def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create(qdb.user.User('*****@*****.**'), "Some New Study", [1], info) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')] } self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = { 'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [] } self.assertEqual(obs, exp)
def test_study_files_get_req_multiple(self): study_id = 1 # adding a new prep for testing PREP = qdb.metadata_template.prep_template.PrepTemplate prep_info_dict = { 'SKB7.640196': {'run_prefix': 'test_1'}, 'SKB8.640193': {'run_prefix': 'test_2'} } prep_info = pd.DataFrame.from_dict(prep_info_dict, orient='index', dtype=str) pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, PREP.create, prep_info, qdb.study.Study(study_id), "Metagenomic") # getting the upload folder so we can test study_upload_dir = join( qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) # adding just foward per sample FASTQ to the upload folder filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req( '*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [ ('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_reverse_seqs', False, [])]} self.assertEqual(obs, exp) # let's add reverse filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req( '*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_reverse_seqs', False, ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]} self.assertEqual(obs, exp) # let's an extra file that matches filenames = ['test_1.R3.fastq.gz'] for f in filenames: fpt = join(study_upload_dir, f) open(fpt, 'w', 0).close() self._clean_up_files.append(fpt) obs = study_files_get_req( '*****@*****.**', 1, pt.id, 'per_sample_FASTQ') exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', 'test_1.R3.fastq.gz', 'uploaded_file.txt'], 'message': "Check these run_prefix:\n'test_1' has 3 matches.", 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz']), ('raw_reverse_seqs', False, ['test_2.R2.fastq.gz'])]} self.assertEqual(obs, exp) # now if we select FASTQ we have 3 columns so the extra file should go # to the 3rd column obs = study_files_get_req( '*****@*****.**', 1, pt.id, 'FASTQ') exp = {'status': 'success', 'num_prefixes': 2, 'remaining': ['uploaded_file.txt'], 'message': '', 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')], 'file_types': [ ('raw_barcodes', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_forward_seqs', True, ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']), ('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])]} self.assertEqual(obs, exp) PREP.delete(pt.id)
def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create( qdb.user.User('*****@*****.**'), "Some New Study", [1], info) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': []} self.assertEqual(obs, exp) # Create some 'sff' files upload_dir = qdb.util.get_mountpoint("uploads")[0][1] study_upload_dir = join(upload_dir, str(new_study.id)) fps = [] for i in range(2): fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir) close(fd) with open(fp, 'w') as f: f.write('\n') fps.append(fp) self._clean_up_files.extend(fps) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'SFF') exp = {'status': 'success', 'message': '', 'remaining': [basename(fpath) for fpath in sorted(fps)], 'file_types': [('raw_sff', True, [])], 'num_prefixes': 0, 'artifacts': []} self.assertEqual(obs, exp)
def test_study_files_get_req(self): obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', 1, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': ['uploaded_file.txt'], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "DESC", "study_abstract": "ABS", "emp_person_id": qdb.study.StudyPerson(2), "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } new_study = qdb.study.Study.create( qdb.user.User('*****@*****.**'), "Some New Study", [1], info) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': [(1, 'Identification of the Microbiomes for ' 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) obs = study_files_get_req('*****@*****.**', new_study.id, 1, 'FASTQ') exp = {'status': 'success', 'message': '', 'remaining': [], 'file_types': [('raw_barcodes', True, []), ('raw_forward_seqs', True, []), ('raw_reverse_seqs', False, [])], 'num_prefixes': 1, 'artifacts': []} self.assertEqual(obs, exp)