def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') # This part should fail fp1 = self.path_builder('sample1_failure.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) forward_filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type') fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] with self.assertRaises(ValueError): _get_preprocess_fastq_cmd(raw_data, prep_template, params)
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) study_id = 1 rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute( "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,)) # moving filepaths move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split('_', 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) study_id = 1 rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute( "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid, )) # moving filepaths move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split('_', 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def test_clear_filepaths(self): rd = RawData.create(self.filetype, self.studies, self.filepaths) self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE raw_data_id=%s)", (rd.id,))[0]) rd.clear_filepaths() self.assertFalse(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE raw_data_id=%s)", (rd.id,))[0])
def test_remove_filepath(self): rd = RawData.create(self.filetype, self.studies, self.filepaths) fp = join(self.db_test_raw_dir, "3_%s" % basename(self.seqs_fp)) rd.remove_filepath(fp) self.assertFalse(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE filepath_id=17)")[0]) self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE filepath_id=18)")[0])
def setUp(self): metadata_dict = { 'SKB8.640193': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 1' }, 'SKD8.640184': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 2' }, 'SKB7.640196': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 3' } } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_raw_data = RawData(1) fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) filepaths = [(seqs_fp, 1), (barcodes_fp, 2)] with open(seqs_fp, "w") as f: f.write("\n") with open(barcodes_fp, "w") as f: f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) self._clean_up_files = [db_seqs_fp, db_barcodes_fp] self.tester = PrepTemplate(1) self.exp_sample_ids = { 'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192' }
def test_create(self): """Correctly creates all the rows in the DB for the raw data""" # Check that the returned object has the correct id exp_id = get_count("qiita.raw_data") + 1 obs = RawData.create(self.filetype, self.prep_templates, self.filepaths) self.assertEqual(obs.id, exp_id) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id) # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[exp_id, 2, 'idle']]) # Check that the raw data has been correctly linked with the prep # templates sql = """SELECT prep_template_id FROM qiita.prep_template WHERE raw_data_id = %s ORDER BY prep_template_id""" obs = self.conn_handler.execute_fetchall(sql, (exp_id,)) self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]]) # Check that the files have been copied to right location exp_seqs_fp = join(self.db_test_raw_dir, "%d_%s" % (exp_id, basename(self.seqs_fp))) self.assertTrue(exists(exp_seqs_fp)) self._clean_up_files.append(exp_seqs_fp) exp_bc_fp = join(self.db_test_raw_dir, "%d_%s" % (exp_id, basename(self.barcodes_fp))) self.assertTrue(exists(exp_bc_fp)) self._clean_up_files.append(exp_bc_fp) # Check that the filepaths have been correctly added to the DB top_id = self.conn_handler.execute_fetchone( "SELECT count(1) FROM qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d or " "filepath_id=%d" % (top_id - 1, top_id)) exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp)) exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp)) # filepath_id, path, filepath_type_id exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5], [top_id, exp_bc_fp, 2, '852952723', 1, 5]] self.assertEqual(obs, exp) # Check that the raw data have been correctly linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id) # raw_data_id, filepath_id self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
def create_raw_data(self, study, user, callback): """Adds a (new) raw data to the study Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ msg = "Raw data successfully added" msg_level = "success" # Get the arguments needed to create a raw data object filetype = self.get_argument('filetype', None) previous_raw_data = self.get_argument('previous_raw_data', None) if filetype and previous_raw_data: # The user selected a filetype and an existing raw data msg = ("You can not specify both a new raw data and a previously " "used one") msg_level = "danger" elif filetype: # We are creating a new raw data object try: rd_id = RawData.create(filetype, [study]).id except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError) as e: msg = html_error_message % ( "creating a new raw data object for study:", str(study.id), str(e)) msg_level = "danger" elif previous_raw_data: previous_raw_data = previous_raw_data.split(',') raw_data = [RawData(rd) for rd in previous_raw_data] study.add_raw_data(raw_data) rd_id = raw_data[0].id else: # The user did not provide a filetype neither an existing raw data # If using the interface, we should never reach this if, but # better be safe than sorry msg = ("You should choose a filetype for a new raw data or " "choose a raw data previously used") msg_level = "danger" rd_id = None callback((msg, msg_level, 'raw_data_tab', rd_id, None))
def setUp(self): metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 1'}, 'SKD8.640184': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 2'}, 'SKB7.640196': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 3'} } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_raw_data = RawData(1) fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) filepaths = [(seqs_fp, 1), (barcodes_fp, 2)] with open(seqs_fp, "w") as f: f.write("\n") with open(barcodes_fp, "w") as f: f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) self._clean_up_files = [db_seqs_fp, db_barcodes_fp] self.tester = PrepTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
def test_create(self): """Correctly creates all the rows in the DB for the raw data""" # Check that the returned object has the correct id exp_id = 1 + self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.raw_data")[0] obs = RawData.create(self.filetype, self.studies, self.filepaths) self.assertEqual(obs.id, exp_id) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id) # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[exp_id, 2, 'idle']]) # Check that the raw data have been correctly linked with the study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id) # study_id , raw_data_id self.assertEqual(obs, [[1, exp_id]]) # Check that the files have been copied to right location exp_seqs_fp = join(self.db_test_raw_dir, "%d_%s" % (exp_id, basename(self.seqs_fp))) self.assertTrue(exists(exp_seqs_fp)) self._clean_up_files.append(exp_seqs_fp) exp_bc_fp = join(self.db_test_raw_dir, "%d_%s" % (exp_id, basename(self.barcodes_fp))) self.assertTrue(exists(exp_bc_fp)) self._clean_up_files.append(exp_bc_fp) # Check that the filepaths have been correctly added to the DB top_id = self.conn_handler.execute_fetchone( "SELECT count(1) FROM qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d or " "filepath_id=%d" % (top_id - 1, top_id)) exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp)) exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp)) # filepath_id, path, filepath_type_id exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5], [top_id, exp_bc_fp, 2, '852952723', 1, 5]] self.assertEqual(obs, exp) # Check that the raw data have been correctly linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id) # raw_data_id, filepath_id self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
def test_remove_filepath(self): top_id = self.conn_handler.execute_fetchone( "SELECT count(1) FROM qiita.raw_filepath")[0] raw_id = self.conn_handler.execute_fetchone( "SELECT count(1) FROM qiita.raw_data")[0] rd = RawData.create(self.filetype, self.studies, self.filepaths) fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1, basename(self.seqs_fp))) rd.remove_filepath(fp) self.assertFalse(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE filepath_id=%d)" % (top_id - 1))[0]) self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE filepath_id=%d)" % (top_id - 2))[0])
def test_status(self): rd = RawData(1) s = Study(1) self.assertEqual(rd.status(s), 'private') # Since the status is inferred from the processed data, change the # status of the processed data so we can check how it changes in the # preprocessed data pd = ProcessedData(1) pd.status = 'public' self.assertEqual(rd.status(s), 'public') # Check that new raw data has sandbox as status since no # processed data exists for them rd = RawData.create(self.filetype, self.studies, self.filepaths) self.assertEqual(rd.status(s), 'sandbox')
def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') fp1 = self.path_builder('sample1.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample2.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') fps = [(fp1, filepath_id), (fp2, filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data, prep_template, params) raw_fps = ','.join([fp for _, fp, _ in sorted(raw_data.get_filepaths())]) exp_cmd = ( "split_libraries_fastq.py --store_demultiplexed_fastq -i " "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type " "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 " "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 " "--sequence_max_n 0").format(raw_fps, obs_output_dir) self.assertEqual(obs_cmd, exp_cmd)
def test_delete(self): rd = RawData.create(self.filetype, self.prep_templates, self.filepaths) sql_pt = """SELECT prep_template_id FROM qiita.prep_template WHERE raw_data_id = %s ORDER BY prep_template_id""" obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]]) # This delete call will only unlink the raw data from the prep template RawData.delete(rd.id, self.pt2.id) # Check that it successfully unlink the raw data from pt2 obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) self.assertEqual(obs, [[self.pt1.id]]) self.assertEqual(self.pt2.raw_data, None) # If we try to remove the RawData now, it should raise an error # because it still has files attached to it with self.assertRaises(QiitaDBError): RawData.delete(rd.id, self.pt1.id) # Clear the files so we can actually remove the RawData study_id = rd.studies[0] path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) self._clean_up_files.extend([join(path_for_removal, basename(f).split('_', 1)[1]) for _, f, _ in rd.get_filepaths()]) rd.clear_filepaths() RawData.delete(rd.id, self.pt1.id) obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) self.assertEqual(obs, []) # Check that all expected rows have been deleted sql = """SELECT EXISTS( SELECT * FROM qiita.raw_filepath WHERE raw_data_id = %s)""" self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0]) sql = """SELECT EXISTS( SELECT * FROM qiita.raw_data WHERE raw_data_id=%s)""" self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0])
def test_create(self): """Correctly creates all the rows in the DB for the raw data""" # Check that the returned object has the correct id obs = RawData.create(self.filetype, self.studies, self.filepaths) self.assertEqual(obs.id, 3) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=3") # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[3, 2, 'idle']]) # Check that the raw data have been correctly linked with the study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3") # study_id , raw_data_id self.assertEqual(obs, [[1, 3]]) # Check that the files have been copied to right location exp_seqs_fp = join(self.db_test_raw_dir, "3_%s" % basename(self.seqs_fp)) self.assertTrue(exists(exp_seqs_fp)) self._clean_up_files.append(exp_seqs_fp) exp_bc_fp = join(self.db_test_raw_dir, "3_%s" % basename(self.barcodes_fp)) self.assertTrue(exists(exp_bc_fp)) self._clean_up_files.append(exp_bc_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=17 or " "filepath_id=18") exp_seqs_fp = "3_%s" % basename(self.seqs_fp) exp_bc_fp = "3_%s" % basename(self.barcodes_fp) # filepath_id, path, filepath_type_id exp = [[17, exp_seqs_fp, 1, '852952723', 1, 5], [18, exp_bc_fp, 2, '852952723', 1, 5]] self.assertEqual(obs, exp) # Check that the raw data have been correctly linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3") # raw_data_id, filepath_id self.assertEqual(obs, [[3, 17], [3, 18]])
def test_clear_filepaths(self): rd = RawData.create(self.filetype, self.studies, self.filepaths) self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE raw_data_id=%s)", (rd.id,))[0]) # add files to clean before cleaning the filepaths study_id = rd.studies[0] path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) self._clean_up_files = [join(path_for_removal, basename(f).split('_', 1)[1]) for _, f, _ in rd.get_filepaths()] # cleaning the filepaths rd.clear_filepaths() self.assertFalse(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE raw_data_id=%s)", (rd.id,))[0])
def test_create(self): """Correctly creates all the rows in the DB for the raw data""" # Check that the returned object has the correct id obs = RawData.create(self.filetype, self.filepaths, self.studies) self.assertEqual(obs.id, 3) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=3") # raw_data_id, filetype, submitted_to_insdc self.assertEqual(obs, [[3, 2]]) # Check that the raw data have been correctly linked with the study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3") # study_id , raw_data_id self.assertEqual(obs, [[1, 3]]) # Check that the files have been copied to right location exp_seqs_fp = join(self.db_test_raw_dir, "3_%s" % basename(self.seqs_fp)) self.assertTrue(exists(exp_seqs_fp)) self._clean_up_files.append(exp_seqs_fp) exp_bc_fp = join(self.db_test_raw_dir, "3_%s" % basename(self.barcodes_fp)) self.assertTrue(exists(exp_bc_fp)) self._clean_up_files.append(exp_bc_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=15 or " "filepath_id=16") # filepath_id, path, filepath_type_id exp = [[15, exp_seqs_fp, 1, '852952723', 1], [16, exp_bc_fp, 2, '852952723', 1]] self.assertEqual(obs, exp) # Check that the raw data have been correctly linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3") # raw_data_id, filepath_id self.assertEqual(obs, [[3, 15], [3, 16]])
def setUp(self): # Create a sample template file fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) with open(seqs_fp, "w") as f: f.write("\n") with open(barcodes_fp, "w") as f: f.write("\n") self.pt_contents = PREP_TEMPLATE self.raw_data = RawData.create( 2, [(seqs_fp, 1), (barcodes_fp, 2)], [Study(1)]) join_f = partial(join, join(get_db_files_base_dir(), 'raw_data')) self.files_to_remove = [ join_f("%s_%s" % (self.raw_data.id, basename(seqs_fp))), join_f("%s_%s" % (self.raw_data.id, basename(barcodes_fp)))]
def test_create_no_filepaths(self): """Correctly creates a raw data object with no filepaths attached""" # Check that the returned object has the correct id obs = RawData.create(self.filetype, self.studies) self.assertEqual(obs.id, 3) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=3") # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[3, 2, 'idle']]) # Check that the raw data have been correctly linked with the study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3") # study_id , raw_data_id self.assertEqual(obs, [[1, 3]]) # Check that no files have been linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3") self.assertEqual(obs, [])
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix="_seqs.fastq") close(fd) st = Study(1) metadata_dict = { "SKB8.640193": { "center_name": "ANL", "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "GTCCGCAAGTTA", "run_prefix": "s_G1_L001_sequences", "platform": "ILLUMINA", "library_construction_protocol": "AAAA", "experiment_design_description": "BBBB", } } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index") pt = PrepTemplate.create(metadata, Study(1), "16S") rd = RawData.create(2, [pt], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute("DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,)) # moving filepaths move_filepaths_to_upload_folder(st.id, filepaths) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(st.id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split("_", 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def test_create_no_filepaths(self): """Correctly creates a raw data object with no filepaths attached""" # Check that the returned object has the correct id exp_id = 1 + self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.raw_data")[0] obs = RawData.create(self.filetype, self.studies) self.assertEqual(obs.id, exp_id) # Check that the raw data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id) # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[exp_id, 2, 'idle']]) # Check that the raw data have been correctly linked with the study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id) # study_id , raw_data_id self.assertEqual(obs, [[1, exp_id]]) # Check that no files have been linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id) self.assertEqual(obs, [])
def post(self, study_id): study_id = int(study_id) user = User(self.current_user) try: study = Study(study_id) except QiitaDBUnknownIDError: # Study not in database so fail nicely raise HTTPError(404, "Study %d does not exist" % study_id) else: check_access(User(self.current_user), study, raise_error=True) # vars to add sample template msg = '' msg_level = '' tab_to_display = '' sample_template = self.get_argument('sample_template', None) # vars to add raw data filetype = self.get_argument('filetype', None) previous_raw_data = self.get_argument('previous_raw_data', None) # vars to add prep template add_prep_template = self.get_argument('add_prep_template', None) raw_data_id = self.get_argument('raw_data_id', None) data_type_id = self.get_argument('data_type_id', None) make_public = self.get_argument('make_public', False) make_sandbox = self.get_argument('make_sandbox', False) approve_study = self.get_argument('approve_study', False) request_approval = self.get_argument('request_approval', False) investigation_type = self.get_argument('investigation-type', None) user_defined_investigation_type = self.get_argument( 'user-defined-investigation-type', None) new_investigation_type = self.get_argument('new-investigation-type', None) # None Selected is the equivalent to the user not specifying the info # thus we should make the investigation_type None if investigation_type == "" or investigation_type == "None Selected": investigation_type = None # to update investigation type update_investigation_type = self.get_argument( 'update_investigation_type', None) edit_investigation_type = self.get_argument('edit-investigation-type', None) edit_user_defined_investigation_type = self.get_argument( 'edit-user-defined-investigation-type', None) edit_new_investigation_type = self.get_argument( 'edit-new-investigation-type', None) # None Selected is the equivalent to the user not specifying the info # thus we should make the investigation_type None if edit_investigation_type == "" or \ edit_investigation_type == "None Selected": edit_investigation_type = None msg_level = 'success' if sample_template: # processing sample templates _, base_fp = get_mountpoint("uploads")[0] fp_rsp = join(base_fp, str(study_id), sample_template) if not exists(fp_rsp): raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: # deleting previous uploads and inserting new one yield Task(self.remove_add_study_template, study.raw_data, study_id, fp_rsp) except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError) as e: msg = html_error_message % ('parsing the sample template:', basename(fp_rsp), str(e)) self.display_template(study, msg, "danger") return msg = ("The sample template '%s' has been added" % sample_template) tab_to_display = "" elif request_approval: study.status = 'awaiting_approval' msg = "Study sent to admin for approval" tab_to_display = "" elif make_public: msg = '' study.status = 'public' msg = "Study set to public" tab_to_display = "" elif make_sandbox: msg = '' study.status = 'sandbox' msg = "Study reverted to sandbox" tab_to_display = "" elif approve_study: # make sure user is admin, then make full private study if user.level == 'admin' or not \ qiita_config.require_approval: study.status = 'private' msg = "Study approved" tab_to_display = "" elif filetype or previous_raw_data: # adding blank raw data if filetype and previous_raw_data: msg = ("You can not specify both a new raw data and a " "previouly used one") elif filetype: try: RawData.create(filetype, [study]) except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError) as e: msg = html_error_message % ("creating a new raw data " "object for study:", str(study.id), str(e)) self.display_template(study, msg, "danger") return msg = "" else: raw_data = [RawData(rd) for rd in previous_raw_data] study.add_raw_data(raw_data) msg = "" tab_to_display = "" elif add_prep_template and raw_data_id and data_type_id: # adding prep templates if investigation_type == 'Other' and \ user_defined_investigation_type == 'New Type': investigation_type = new_investigation_type # this is a new user defined investigation type so store it ontology = Ontology(convert_to_id('ENA', 'ontology')) ontology.add_user_defined_term(investigation_type) elif investigation_type == 'Other' and \ user_defined_investigation_type != 'New Type': investigation_type = user_defined_investigation_type raw_data_id = int(raw_data_id) _, base_path = get_mountpoint("uploads")[0] fp_rpt = join(base_path, str(study_id), add_prep_template) if not exists(fp_rpt): raise HTTPError(400, "This file doesn't exist: %s" % fp_rpt) try: # inserting prep templates yield Task(self.remove_add_prep_template, fp_rpt, raw_data_id, study, data_type_id, investigation_type) except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, CParserError) as e: msg = html_error_message % ("parsing the prep template: ", basename(fp_rpt), str(e)) self.display_template(study, msg, "danger", str(raw_data_id)) return msg = "Your prep template was added" tab_to_display = str(raw_data_id) elif update_investigation_type: # updating the prep template investigation type pt = PrepTemplate(update_investigation_type) investigation_type = edit_investigation_type # figure out whether to add it as a user defined term or not if edit_investigation_type == 'Other' and \ edit_user_defined_investigation_type == 'New Type': investigation_type = edit_new_investigation_type # this is a new user defined investigation type so store it ontology = Ontology(convert_to_id('ENA', 'ontology')) ontology.add_user_defined_term(investigation_type) elif investigation_type == 'Other' and \ user_defined_investigation_type != 'New Type': investigation_type = edit_user_defined_investigation_type try: pt.investigation_type = investigation_type except QiitaDBColumnError as e: msg = html_error_message % (", invalid investigation type: ", investigation_type, str(e)) self.display_template(study, msg, "danger", str(pt.raw_data)) return msg = "The prep template has been updated!" tab_to_display = str(pt.raw_data) else: msg = ("Error, did you select a valid uploaded file or are " "passing the correct parameters?") msg_level = 'danger' tab_to_display = "" self.display_template(study, msg, msg_level, tab_to_display)
def create_raw_data(filetype, prep_template, filepaths): """Creates a new raw data Needs to be dispachable because it moves large files """ RawData.create(filetype, [prep_template], filepaths)
def setUp(self): self.db_dir = get_db_files_base_dir() # Create a SFF dataset: add prep template and a RawData study = Study(1) md_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKD8.640184': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKB7.640196': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CCTCTGAGAGCT', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'} } md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template = PrepTemplate.create(md, study, "16S") tmp_dir = mkdtemp() self.path_builder = partial(join, tmp_dir) fp1 = self.path_builder('preprocess_test1.sff') with open(fp1, 'w') as f: f.write('\n') fp2 = self.path_builder('preprocess_test2.sff') with open(fp2, 'w') as f: f.write('\n') self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type') fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data = RawData.create(1, [self.sff_prep_template], fps) md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_gz = PrepTemplate.create(md, study, "16S") fp1_gz = self.path_builder('preprocess_test1.sff.gz') with gzip.open(fp1_gz, 'w') as f: f.write('\n') fps = [(fp1_gz, self.raw_sff_id)] self.raw_data_gz = RawData.create(1, [self.sff_prep_template_gz], fps) # Create a SFF dataset with multiple run prefix: # add prep template and a RawData md_dict['SKD8.640184']['run_prefix'] = "new" md_rp = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S") rp_fp1 = self.path_builder('preprocess_test1.sff') with open(rp_fp1, 'w') as f: f.write('\n') rp_fp2 = self.path_builder('preprocess_test2.sff') with open(rp_fp2, 'w') as f: f.write('\n') fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps) # Make sure that we clean up all created files self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2] self.dirs_to_remove = [tmp_dir] for pt in [self.sff_prep_template, self.sff_prep_template_rp]: for _, fp in pt.get_filepaths(): self.files_to_remove.append(fp)
def test_create_error(self): with self.assertRaises(QiitaDBError): RawData.create(self.filetype, [PrepTemplate(1)], self.filepaths)
def test_is_preprocessed(self): self.assertTrue(RawData(1)._is_preprocessed()) rd = RawData.create(self.filetype, self.prep_templates, self.filepaths) self.assertFalse(rd._is_preprocessed())