def test_get_process_target_gene_cmd(self): preprocessed_data = PreprocessedData(1) params = ProcessedSortmernaParams(1) obs_cmd, obs_output_dir = _get_process_target_gene_cmd( preprocessed_data, params) _, ref_dir = get_mountpoint('reference')[0] _, preprocessed_dir = get_mountpoint('preprocessed_data')[0] exp_cmd = ("pick_closed_reference_otus.py -i {}1_seqs.fna -r " "{}GreenGenes_13_8_97_otus.fasta -o {} -p placeholder -t " "{}GreenGenes_13_8_97_otu_taxonomy.txt".format( preprocessed_dir, ref_dir, obs_output_dir, ref_dir)) obs_tokens = obs_cmd.split()[::-1] exp_tokens = exp_cmd.split()[::-1] self.assertEqual(len(obs_tokens), len(exp_tokens)) while obs_tokens: o_t = obs_tokens.pop() e_t = exp_tokens.pop() if o_t == '-p': # skip parameters file obs_tokens.pop() exp_tokens.pop() else: self.assertEqual(o_t, e_t)
def test_prep_template_filepaths_get_req(self): obs = prep_template_filepaths_get_req(1, '*****@*****.**') exp = { 'status': 'success', 'message': '', 'filepaths': [(21, join( get_mountpoint('templates')[0][1], '1_prep_1_qiime_19700101-000000.txt')), (20, join( get_mountpoint('templates')[0][1], '1_prep_1_19700101-000000.txt')), (19, join( get_mountpoint('templates')[0][1], '1_prep_1_qiime_19700101-000000.txt')), (18, join( get_mountpoint('templates')[0][1], '1_prep_1_19700101-000000.txt'))] } self.assertEqual(obs, exp)
def test_prep_template_filepaths_get_req(self): obs = prep_template_filepaths_get_req(1, '*****@*****.**') exp = {'status': 'success', 'message': '', 'filepaths': [ (19, join(get_mountpoint('templates')[0][1], '1_prep_1_qiime_19700101-000000.txt')), (18, join(get_mountpoint('templates')[0][1], '1_prep_1_19700101-000000.txt'))]} self.assertEqual(obs, exp)
def tearDown(self): with open(self.biom_fp, 'w') as f: f.write("") with open(self.map_fp, 'w') as f: f.write("") fp = join(get_mountpoint('analysis')[0][1], 'testfile.txt') if exists(fp): remove(fp) mp = get_mountpoint("processed_data")[0][1] study2fp = join(mp, "2_2_study_1001_closed_reference_otu_table.biom") if exists(study2fp): move(study2fp, join(mp, "2_study_1001_closed_reference_otu_table.biom"))
def get_filepaths(self, conn_handler=None): r"""Retrieves the list of (filepath_id, filepath)""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = conn_handler if conn_handler else SQLConnectionHandler() if self._table == 'required_sample_info': table = 'sample_template_filepath' column = 'study_id' elif self._table == 'common_prep_info': table = 'prep_template_filepath' column = 'prep_template_id' else: raise QiitaDBNotImplementedError( 'get_filepath for %s' % self._table) try: filepath_ids = conn_handler.execute_fetchall( "SELECT filepath_id, filepath FROM qiita.filepath WHERE " "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE " "{1}=%s) ORDER BY filepath_id DESC".format(table, column), (self.id, )) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates', conn_handler)[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def create_from_scratch(self, prep_template, study_id): raw_data_filetype = self.get_argument("filetype") barcodes_str = self.get_argument("barcodes") forward_reads_str = self.get_argument("forward") sff_str = self.get_argument("sff") fasta_str = self.get_argument("fasta") qual_str = self.get_argument("qual") reverse_reads_str = self.get_argument("reverse") def _split(x): return x.split(",") if x else [] filepaths, fps = [], [] fps.append((_split(barcodes_str), "raw_barcodes")) fps.append((_split(fasta_str), "raw_fasta")) fps.append((_split(qual_str), "raw_qual")) fps.append((_split(forward_reads_str), "raw_forward_seqs")) fps.append((_split(reverse_reads_str), "raw_reverse_seqs")) fps.append((_split(sff_str), "raw_sff")) # We need to retrieve the full path for all the files, as the # arguments only contain the file name. Since we don't know in which # mountpoint the data lives, we retrieve all of them and we loop # through all the files checking if they exist or not. for _, f in get_mountpoint("uploads", retrieve_all=True): f = join(f, str(study_id)) for fp_set, filetype in fps: for t in fp_set: ft = join(f, t) if exists(ft): filepaths.append((ft, filetype)) return submit(self.current_user.id, create_raw_data, raw_data_filetype, prep_template, filepaths)
def check_fp(study_id, filename): """Check whether an uploaded file exists Parameters ---------- study_id : int Study file uploaded to filename : str name of the uploaded file Returns ------- dict {'status': status, 'message': msg, 'file': str} file contains full filepath if status is success, otherwise it contains the filename """ # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study_id), filename) if not exists(fp_rsp): # The file does not exist, fail nicely return { 'status': 'error', 'message': 'file does not exist', 'file': filename } return {'status': 'success', 'message': '', 'file': fp_rsp}
def post(self, study_id, prep_id): study = self.safe_get_study(study_id) if study is None: return prep_id = to_int(prep_id) try: p = PrepTemplate(prep_id) except QiitaDBUnknownIDError: self.fail('Preparation not found', 404) return if p.study_id != study.id: self.fail('Preparation ID not associated with the study', 409) return artifact_deets = json_decode(self.request.body) _, upload = get_mountpoint('uploads')[0] base = os.path.join(upload, study_id) filepaths = [(os.path.join(base, fp), fp_type) for fp, fp_type in artifact_deets['filepaths']] try: art = Artifact.create(filepaths, artifact_deets['artifact_type'], artifact_deets['artifact_name'], p) except QiitaError as e: self.fail(str(e), 406) return self.write({'id': art.id}) self.set_status(201) self.finish()
def test_check_fp(self): obs = check_fp(1, 'uploaded_file.txt') _, base_fp = get_mountpoint("uploads")[0] exp = {'status': 'success', 'message': '', 'file': join(base_fp, '1', 'uploaded_file.txt')} self.assertEqual(obs, exp)
def setUp(self): uploads_path = get_mountpoint('uploads')[0][1] # Create prep test file to point at self.update_fp = join(uploads_path, '1', 'update.txt') with open(self.update_fp, 'w') as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self._files_to_remove = [self.update_fp] self._files_to_remove = [] # creating temporal files and artifact # NOTE: we don't need to remove the artifact created cause it's # used to test the delete functionality fd, fp = mkstemp(suffix='_seqs.fna') close(fd) with open(fp, 'w') as f: f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n") # 4 Demultiplexed filepaths_processed = [(fp, 4)] # 1 for default parameters and input data exp_params = Parameters.from_default_params(DefaultParameters(1), {'input_data': 1}) self.artifact = Artifact.create(filepaths_processed, "Demultiplexed", parents=[Artifact(1)], processing_parameters=exp_params)
def setUp(self): fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] _, self.db_test_raw_dir = get_mountpoint('raw_data')[0] with open(self.seqs_fp, "w") as f: f.write("\n") with open(self.barcodes_fp, "w") as f: f.write("\n") self._clean_up_files = [] # Create some new PrepTemplates metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.pt1 = PrepTemplate.create(metadata, Study(1), "16S") self.pt2 = PrepTemplate.create(metadata, Study(1), "18S") self.prep_templates = [self.pt1, self.pt2]
def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, "w") as f: f.write("\n") self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint("raw_data")[0] removed_fps = [join(raw_data_mp, "2_sequences_barcodes.fastq.gz"), join(raw_data_mp, "2_sequences.fastq.gz")] for fp in removed_fps: with open(fp, "w") as f: f.write("\n") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES ('2_sequences_barcodes.fastq.gz', 3, '852952723', 1, 5), ('2_sequences.fastq.gz', 1, '852952723', 1, 5) RETURNING filepath_id""" fp_ids = self.conn_handler.execute_fetchall(sql) fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths() obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (fp_ids[0][0],))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (fp_ids[1][0],))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def get_filepaths(self): r"""Retrieves the list of (filepath_id, filepath)""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = SQLConnectionHandler() try: filepath_ids = conn_handler.execute_fetchall( "SELECT filepath_id, filepath FROM qiita.filepath WHERE " "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE " "{1}=%s) ORDER BY filepath_id DESC".format( self._filepath_table, self._id_column), (self.id, )) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates')[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def check_fp(study_id, filename): """Check whether an uploaded file exists Parameters ---------- study_id : int Study file uploaded to filename : str name of the uploaded file Returns ------- dict {'status': status, 'message': msg, 'file': str} file contains full filepath if status is success, otherwise it contains the filename """ # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study_id), filename) if not exists(fp_rsp): # The file does not exist, fail nicely return {'status': 'error', 'message': 'file does not exist', 'file': filename} return {'status': 'success', 'message': '', 'file': fp_rsp}
def test_move_upload_files_to_trash(self): test_filename = 'this_is_a_test_file.txt' # create file to move to trash fid, folder = get_mountpoint("uploads")[0] open(join(folder, '1', test_filename), 'w').write('test') exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # move file move_upload_files_to_trash(1, [(fid, test_filename)]) exp = [(fid, 'uploaded_file.txt')] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # testing errors with self.assertRaises(QiitaDBError): move_upload_files_to_trash(2, [(fid, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(10, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(fid, test_filename)]) # removing trash folder rmtree(join(folder, '1', 'trash'))
def setUp(self): fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] self.studies = [Study(1)] _, self.db_test_raw_dir = get_mountpoint('raw_data')[0] with open(self.seqs_fp, "w") as f: f.write("\n") with open(self.barcodes_fp, "w") as f: f.write("\n") self._clean_up_files = [] # Create a new study info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } Study.create(User("*****@*****.**"), "Test study 2", [1], info)
def test_post_valid(self): dontcare, uploads_dir = get_mountpoint('uploads')[0] foo_fp = os.path.join(uploads_dir, '1', 'foo.txt') bar_fp = os.path.join(uploads_dir, '1', 'bar.txt') with open(foo_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") with open(bar_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) response = self.post('/api/v1/study/1/preparation?data_type=16S', data=prep_table.T.to_dict(), headers=self.headers, asjson=True) prepid = json_decode(response.body)['id'] uri = '/api/v1/study/1/preparation/%d/artifact' % prepid # 1 -> fwd or rev sequences in fastq # 3 -> barcodes body = {'artifact_type': 'FASTQ', 'filepaths': [['foo.txt', 1], ['bar.txt', 'raw_barcodes']], 'artifact_name': 'a name is a name'} response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 201) obs = json_decode(response.body)['id'] prep_instance = PrepTemplate(prepid) exp = prep_instance.artifact.id self.assertEqual(obs, exp)
def test_post_valid(self): dontcare, uploads_dir = get_mountpoint('uploads')[0] foo_fp = os.path.join(uploads_dir, '1', 'foo.txt') bar_fp = os.path.join(uploads_dir, '1', 'bar.txt') with open(foo_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") with open(bar_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) response = self.post('/api/v1/study/1/preparation?data_type=16S', data=prep_table.T.to_dict(), headers=self.headers, asjson=True) prepid = json_decode(response.body)['id'] uri = '/api/v1/study/1/preparation/%d/artifact' % prepid # 1 -> fwd or rev sequences in fastq # 3 -> barcodes body = { 'artifact_type': 'FASTQ', 'filepaths': [['foo.txt', 1], ['bar.txt', 'raw_barcodes']], 'artifact_name': 'a name is a name' } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 201) obs = json_decode(response.body)['id'] prep_instance = PrepTemplate(prepid) exp = prep_instance.artifact.id self.assertEqual(obs, exp)
def test_move_upload_files_to_trash(self): test_filename = "this_is_a_test_file.txt" # create file to move to trash fid, folder = get_mountpoint("uploads")[0] test_fp = join(folder, "1", test_filename) with open(test_fp, "w") as f: f.write("test") self.files_to_remove.append(test_fp) exp = [(fid, "this_is_a_test_file.txt"), (fid, "uploaded_file.txt")] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # move file move_upload_files_to_trash(1, [(fid, test_filename)]) exp = [(fid, "uploaded_file.txt")] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # testing errors with self.assertRaises(QiitaDBError): move_upload_files_to_trash(2, [(fid, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(10, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(fid, test_filename)]) # removing trash folder rmtree(join(folder, "1", "trash"))
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) study_id = 1 rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute( "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid, )) # moving filepaths move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split('_', 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def test_build_mapping_file(self): new_id = get_count('qiita.filepath') + 1 samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']} self.analysis._build_mapping_file(samples) obs = self.analysis.mapping_file self.assertEqual(obs, self.map_fp) base_dir = get_mountpoint('analysis')[0][1] obs = pd.read_csv(obs, sep='\t', infer_datetime_format=True, parse_dates=True, index_col=False, comment='\t') exp = pd.read_csv(join(base_dir, '1_analysis_mapping_exp.txt'), sep='\t', infer_datetime_format=True, parse_dates=True, index_col=False, comment='\t') assert_frame_equal(obs, exp) sql = """SELECT * FROM qiita.filepath WHERE filepath=%s ORDER BY filepath_id""" obs = self.conn_handler.execute_fetchall( sql, ("%d_analysis_mapping.txt" % self.analysis.id,)) exp = [[13, '1_analysis_mapping.txt', 9, '852952723', 1, 1], [new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]] self.assertEqual(obs, exp) sql = """SELECT * FROM qiita.analysis_filepath WHERE analysis_id=%s ORDER BY filepath_id""" obs = self.conn_handler.execute_fetchall(sql, (self.analysis.id,)) exp = [[1L, 14L, 2L], [1L, 15L, None], [1L, new_id, None]]
def create_from_scratch(self, prep_template, study_id): raw_data_filetype = self.get_argument('filetype') barcodes_str = self.get_argument('barcodes') forward_reads_str = self.get_argument('forward') sff_str = self.get_argument('sff') fasta_str = self.get_argument('fasta') qual_str = self.get_argument('qual') reverse_reads_str = self.get_argument('reverse') def _split(x): return x.split(',') if x else [] filepaths, fps = [], [] fps.append((_split(barcodes_str), 'raw_barcodes')) fps.append((_split(fasta_str), 'raw_fasta')) fps.append((_split(qual_str), 'raw_qual')) fps.append((_split(forward_reads_str), 'raw_forward_seqs')) fps.append((_split(reverse_reads_str), 'raw_reverse_seqs')) fps.append((_split(sff_str), 'raw_sff')) # We need to retrieve the full path for all the files, as the # arguments only contain the file name. Since we don't know in which # mountpoint the data lives, we retrieve all of them and we loop # through all the files checking if they exist or not. for _, f in get_mountpoint("uploads", retrieve_all=True): f = join(f, str(study_id)) for fp_set, filetype in fps: for t in fp_set: ft = join(f, t) if exists(ft): filepaths.append((ft, filetype)) return submit(self.current_user.id, create_raw_data, raw_data_filetype, prep_template, filepaths)
def setUp(self): super(NewArtifactHandlerTests, self).setUp() tmp_dir = join(get_mountpoint('uploads')[0][1], '1') # Create prep test file to point at fd, prep_fp = mkstemp(dir=tmp_dir, suffix='.txt') close(fd) with open(prep_fp, 'w') as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self.prep = npt.assert_warns( QiitaDBWarning, PrepTemplate.create, pd.DataFrame({'new_col': { '1.SKD6.640190': 1 }}), Study(1), "16S") fd, self.fwd_fp = mkstemp(dir=tmp_dir, suffix=".fastq") close(fd) with open(self.fwd_fp, 'w') as f: f.write("@seq\nTACGA\n+ABBBB\n") fd, self.barcodes_fp = mkstemp(dir=tmp_dir, suffix=".fastq") close(fd) with open(self.barcodes_fp, 'w') as f: f.write("@seq\nTACGA\n+ABBBB\n") self._files_to_remove = [prep_fp, self.fwd_fp, self.barcodes_fp]
def setUp(self): # Create a directory with the test split libraries output self.test_slo = mkdtemp(prefix='test_slo_') path_builder = partial(join, self.test_slo) fna_fp = path_builder('seqs.fna') fastq_fp = path_builder('seqs.fastq') log_fp = path_builder('split_library_log.txt') demux_fp = path_builder('seqs.demux') with open(fna_fp, 'w') as f: f.write(FASTA_SEQS) with open(fastq_fp, 'w') as f: f.write(FASTQ_SEQS) with open(log_fp, 'w') as f: f.write("Test log\n") generate_demux_file(self.test_slo) self._filepaths_to_remove = [fna_fp, fastq_fp, demux_fp, log_fp] self._dirpaths_to_remove = [self.test_slo] # Generate a directory with test split libraries output missing files self.missing_slo = mkdtemp(prefix='test_missing_') path_builder = partial(join, self.test_slo) fna_fp = path_builder('seqs.fna') fastq_fp = path_builder('seqs.fastq') with open(fna_fp, 'w') as f: f.write(FASTA_SEQS) with open(fastq_fp, 'w') as f: f.write(FASTQ_SEQS) self._filepaths_to_remove.append(fna_fp) self._filepaths_to_remove.append(fastq_fp) self._dirpaths_to_remove.append(self.missing_slo) # Create a study with no preprocessed data info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.no_ppd_study = Study.create( User('*****@*****.**'), "Test study", [1], info) # Get the directory where the preprocessed data is usually copied. _, self.db_ppd_dir = get_mountpoint('preprocessed_data')[0]
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) study_id = 1 rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute( "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,)) # moving filepaths move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split('_', 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def update_prep_template(self, study, user, callback): """Update a prep template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the prep template file does not exists """ # If we are on this function, the arguments "prep_template_id", # "update_prep_template_file" must defined. If not, let tornado # raise its error pt_id = int(self.get_argument('prep_template_id')) prep_template = self.get_argument('update_prep_template_file') # Define here the message and message level in case of success msg = "The prep template '%s' has been updated" % prep_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the prep template in the uploads folder fp = join(base_fp, str(study.id), prep_template) if not exists(fp): # The file does not exist, fail nicely # Using 400 because we want the user to get the error in the GUI raise HTTPError(400, "This file doesn't exist: %s" % fp) try: with warnings.catch_warnings(record=True) as warns: pt = PrepTemplate(pt_id) pt.update(load_template_to_dataframe(fp)) remove(fp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '; '.join([str(w.message) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the prep template:', basename(fp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, 'prep_template_tab', pt_id, None))
def setUp(self): uploads_path = get_mountpoint('uploads')[0][1] # Create prep test file to point at self.update_fp = join(uploads_path, '1', 'update.txt') with open(self.update_fp, 'w') as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self._files_to_remove = [self.update_fp]
def add_prep_template(self, study, user, callback): """Adds a prep template to the system Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ msg = "Your prep template was added" msg_level = "success" # If we are on this function, the arguments "raw_data_id", # "prep_template" and "data_type_id" must be defined. If not, # let tornado raise its error raw_data_id = self.get_argument('raw_data_id') prep_template = self.get_argument('prep_template') data_type_id = self.get_argument('data_type_id') # These parameters are optional investigation_type = self.get_argument('investigation-type', None) user_defined_investigation_type = self.get_argument( 'user-defined-investigation-type', None) new_investigation_type = self.get_argument('new-investigation-type', None) investigation_type = self._process_investigation_type( investigation_type, user_defined_investigation_type, new_investigation_type) # Make sure that the id is an integer raw_data_id = _to_int(raw_data_id) # Get the upload base directory _, base_path = get_mountpoint("uploads")[0] # Get the path to the prep template fp_rpt = join(base_path, str(study.id), prep_template) if not exists(fp_rpt): # The file does not exists, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rpt) try: pt_id = self.remove_add_prep_template(fp_rpt, raw_data_id, study, data_type_id, investigation_type) except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, CParserError) as e: # Some error occurred while processing the prep template # Show the error to the user so he can fix the template msg = html_error_message % ("parsing the prep template: ", basename(fp_rpt), str(e)) msg_level = "danger" callback((msg, msg_level, 'raw_data_tab', raw_data_id, pt_id))
def update_sample_template(self, study, user, callback): """Update a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the argument "sample_template" must # defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') # Define here the message and message level in case of success msg = "The sample template '%s' has been updated" % sample_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study.id) df = load_template_to_dataframe(fp_rsp) st.extend(df) st.update(df) remove(fp_rsp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the sample template:', basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def post(self): # vars to add files to raw data study_id = self.get_argument('study_id') raw_data_id = self.get_argument('raw_data_id') barcodes_str = self.get_argument('barcodes') forward_reads_str = self.get_argument('forward') reverse_reads_str = self.get_argument('reverse', None) study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: # Study not in database so fail nicely raise HTTPError(404, "Study %d does not exist" % study_id) else: check_access(User(self.current_user), study, raise_error=True) barcodes, forward_reads, reverse_reads = [], [], [] for _, f in get_mountpoint("uploads", retrive_all=True): f = join(f, str(study_id)) for t in barcodes_str.split(','): ft = join(f, t) if exists(ft): barcodes.append([ft, "raw_barcodes"]) for t in forward_reads_str.split(','): ft = join(f, t) if exists(ft): forward_reads.append([ft, "raw_forward_seqs"]) if reverse_reads_str: for t in reverse_reads_str.split(','): ft = join(f, t) if exists(ft): reverse_reads.append([ft, "raw_reverse_seqs"]) # this should never happen if following the GUI pipeline # but rather be save than sorry if (len(barcodes) != len(forward_reads) or (barcodes and len(barcodes) != len(forward_reads))): raise HTTPError(404, "user %s tried to submit a wrong pair of " "barcodes/forward/reverse reads" % self.current_user) # join all files to send on single var filepaths = barcodes filepaths.extend(forward_reads) if reverse_reads: filepaths.extend(reverse_reads) job_id = submit(self.current_user, add_files_to_raw_data, raw_data_id, filepaths) self.render('compute_wait.html', user=self.current_user, job_id=job_id, title='Adding files to your raw data', completion_redirect=( '/study/description/%s?top_tab=raw_data_tab&sub_tab=%s' % (study_id, raw_data_id)))
def test_check_fp(self): obs = check_fp(1, 'uploaded_file.txt') _, base_fp = get_mountpoint("uploads")[0] exp = { 'status': 'success', 'message': '', 'file': join(base_fp, '1', 'uploaded_file.txt') } self.assertEqual(obs, exp)
def tearDown(self): new_uploaded_files = get_files_from_uploads_folders(str(self.study.id)) new_files = set(new_uploaded_files).difference(self.uploaded_files) path_builder = partial(join, get_mountpoint("uploads")[0][1], '1') for _, fp in new_files: self._clean_up_files.append(path_builder(fp)) for f in self._clean_up_files: if exists(f): remove(f)
def setUp(self): super(TestPrepTemplateHandler, self).setUp() uploads_dp = get_mountpoint('uploads')[0][1] self.new_prep = join(uploads_dp, '1', 'new_template.txt') with open(self.new_prep, 'w') as f: f.write("sample_name\tnew_col\nSKD6.640190\tnew_value\n") self.broken_prep = join(uploads_dp, '1', 'broke_template.txt') with open(self.broken_prep, 'w') as f: f.write("sample_name\tbroke |col\nSKD6.640190\tnew_value\n")
def tearDown(self): if exists(self.update_fp): remove(self.update_fp) fp = join(get_mountpoint("uploads")[0][1], '1', 'uploaded_file.txt') if not exists(fp): with open(fp, 'w') as f: f.write('') r_client.flushdb()
def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [ join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp) ] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint('raw_data')[0] removed_fps = [ join(raw_data_mp, '2_sequences_barcodes.fastq.gz'), join(raw_data_mp, '2_sequences.fastq.gz') ] fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths(self.conn_handler) obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (3, ))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (4, ))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def tearDown(self): for fp in self._files_to_remove: if exists(fp): remove(fp) # Replace file if removed as part of function testing uploads_path = get_mountpoint('uploads')[0][1] fp = join(uploads_path, '1', 'uploaded_file.txt') if not exists(fp): with open(fp, 'w') as f: f.write('')
def post(self, study_id): method = self.get_argument('remote-request-type') url = self.get_argument('inputURL') ssh_key = self.request.files['ssh-key'][0]['body'] status = 'success' message = '' try: study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason="Study %s does not exist" % study_id) check_access(self.current_user, study, no_public=True, raise_error=True) _, upload_folder = get_mountpoint("uploads")[0] upload_folder = join(upload_folder, study_id) ssh_key_fp = join(upload_folder, '.key.txt') create_nested_path(upload_folder) with open(ssh_key_fp, 'wb') as f: f.write(ssh_key) chmod(ssh_key_fp, 0o600) qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') if method == 'list': cmd = qiita_plugin.get_command('list_remote_files') params = Parameters.load(cmd, values_dict={ 'url': url, 'private_key': ssh_key_fp, 'study_id': study_id }) elif method == 'transfer': cmd = qiita_plugin.get_command('download_remote_files') params = Parameters.load(cmd, values_dict={ 'url': url, 'private_key': ssh_key_fp, 'destination': upload_folder }) else: status = 'error' message = 'Not a valid method' if status == 'success': job = ProcessingJob.create(self.current_user, params, True) job.submit() r_client.set(UPLOAD_STUDY_FORMAT % study_id, dumps({'job_id': job.id})) self.write({'status': status, 'message': message})
def test_insert_processed_data_target_gene(self): fd, fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, qual_fp = mkstemp(suffix='_seqs.qual') close(fd) filepaths = [ (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')), (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type')) ] preprocessed_data = PreprocessedData.create( Study(1), "preprocessed_sequence_illumina_params", 1, filepaths, data_type="18S") params = ProcessedSortmernaParams(1) pick_dir = mkdtemp() path_builder = partial(join, pick_dir) db_path_builder = partial(join, get_mountpoint('processed_data')[0][1]) # Create a placeholder for the otu table with open(path_builder('otu_table.biom'), 'w') as f: f.write('\n') # Create a placeholder for the directory mkdir(path_builder('sortmerna_picked_otus')) # Create the log file fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt') close(fd) with open(fp, 'w') as f: f.write('\n') _insert_processed_data_target_gene(preprocessed_data, params, pick_dir) new_id = get_count('qiita.processed_data') # Check that the files have been copied db_files = [ db_path_builder("%s_otu_table.biom" % new_id), db_path_builder("%s_sortmerna_picked_otus" % new_id), db_path_builder("%s_%s" % (new_id, basename(fp))) ] for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue( self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE " "processed_data_id=%s)", (new_id, ))[0])
def test_sample_template_handler_post_request(self): # Test user doesn't have access with self.assertRaisesRegex(HTTPError, 'User does not have access to study'): sample_template_handler_post_request(1, User('*****@*****.**'), 'ignored') # Test study doesn't exist user = User('*****@*****.**') with self.assertRaisesRegex(HTTPError, 'Study does not exist'): sample_template_handler_post_request(1000000, user, 'ignored') # Test file doesn't exist with self.assertRaisesRegex(HTTPError, 'Filepath not found'): sample_template_handler_post_request(1, user, 'DoesNotExist.txt') # Test looks like mapping file and no data_type provided uploads_dir = join(get_mountpoint('uploads')[0][1], '1') fd, fp = mkstemp(suffix='.txt', dir=uploads_dir) self._clean_up_files.append(fp) close(fd) with open(fp, 'w') as f: f.write('#SampleID\tCol1\nSample1\tVal1') with self.assertRaisesRegex( HTTPError, 'Please, choose a data type if uploading a QIIME ' 'mapping file'): sample_template_handler_post_request(1, user, fp) # Test success obs = sample_template_handler_post_request(1, user, 'uploaded_file.txt') self.assertCountEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) # Test direct upload obs = sample_template_handler_post_request(1, user, fp, data_type='16S', direct_upload=True) self.assertCountEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def tearDown(self): super(NewArtifactHandlerTests, self).tearDown() for fp in self._files_to_remove: if exists(fp): remove(fp) # Replace file if removed as part of function testing uploads_path = get_mountpoint('uploads')[0][1] fp = join(uploads_path, '1', 'uploaded_file.txt') if not exists(fp): with open(fp, 'w') as f: f.write('')
def test_artifact_get_req(self): obs = artifact_get_req('*****@*****.**', 1) exp = {'id': 1, 'type': 'FASTQ', 'study': 1, 'data_type': '18S', 'timestamp': datetime(2012, 10, 1, 9, 30, 27), 'visibility': 'private', 'can_submit_vamps': False, 'can_submit_ebi': False, 'processing_parameters': None, 'ebi_run_accessions': None, 'is_submitted_vamps': False, 'parents': [], 'filepaths': [ (1, join(get_mountpoint('raw_data')[0][1], '1_s_G1_L001_sequences.fastq.gz'), 'raw_forward_seqs'), (2, join(get_mountpoint('raw_data')[0][1], '1_s_G1_L001_sequences_barcodes.fastq.gz'), 'raw_barcodes')] } self.assertEqual(obs, exp)
def tearDown(self): for fp in self._files_to_remove: if exists(fp): remove(fp) # Replace file if removed as part of function testing uploads_path = get_mountpoint('uploads')[0][1] fp = join(uploads_path, '1', 'uploaded_file.txt') if not exists(fp): with open(fp, 'w') as f: f.write('') r_client.flushdb()
def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint('raw_data')[0] removed_fps = [ join(raw_data_mp, '2_sequences_barcodes.fastq.gz'), join(raw_data_mp, '2_sequences.fastq.gz')] fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths(self.conn_handler) obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (3,))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (4,))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def setUp(self): self.preprocessed_data = PreprocessedData(1) self.params_table = "processed_params_uclust" self.params_id = 1 fd, self.biom_fp = mkstemp(suffix='_table.biom') close(fd) self.filepaths = [(self.biom_fp, 6)] self.date = datetime(2014, 5, 29, 12, 24, 51) _, self.db_test_pd_dir = get_mountpoint( 'processed_data')[0] with open(self.biom_fp, "w") as f: f.write("\n") self._clean_up_files = []
def setUp(self): self.name = "Fake Greengenes" self.version = "13_8" fd, self.seqs_fp = mkstemp(suffix="_seqs.fna") close(fd) fd, self.tax_fp = mkstemp(suffix="_tax.txt") close(fd) fd, self.tree_fp = mkstemp(suffix="_tree.tre") close(fd) _, self.db_dir = get_mountpoint('reference')[0] self._clean_up_files = []
def post(self): # vars to add files to raw data study_id = self.get_argument('study_id') raw_data_id = self.get_argument('raw_data_id') barcodes_str = self.get_argument('barcodes') forward_reads_str = self.get_argument('forward') sff_str = self.get_argument('sff') fasta_str = self.get_argument('fasta') qual_str = self.get_argument('qual') reverse_reads_str = self.get_argument('reverse') study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: # Study not in database so fail nicely raise HTTPError(404, "Study %d does not exist" % study_id) else: check_access(self.current_user, study, raise_error=True) def _split(x): return x.split(',') if x else [] filepaths, fps = [], [] fps.append((_split(barcodes_str), 'raw_barcodes')) fps.append((_split(fasta_str), 'raw_fasta')) fps.append((_split(qual_str), 'raw_qual')) fps.append((_split(forward_reads_str), 'raw_forward_seqs')) fps.append((_split(reverse_reads_str), 'raw_reverse_seqs')) fps.append((_split(sff_str), 'raw_sff')) for _, f in get_mountpoint("uploads", retrieve_all=True): f = join(f, str(study_id)) for fp_set, filetype in fps: for t in fp_set: ft = join(f, t) if exists(ft): filepaths.append((ft, filetype)) job_id = submit(self.current_user.id, add_files_to_raw_data, raw_data_id, filepaths) self.render( 'compute_wait.html', job_id=job_id, title='Adding files to your raw data', completion_redirect=( '/study/description/%s?top_tab=raw_data_tab&sub_tab=%s' % (study_id, raw_data_id)))
def test_add_file(self): fp = join(get_mountpoint('analysis')[0][1], 'testfile.txt') with open(fp, 'w') as f: f.write('testfile!') self.analysis._add_file('testfile.txt', 'plain_text', '18S') obs = self.conn_handler.execute_fetchall( 'SELECT * FROM qiita.filepath WHERE filepath_id = 19') exp = [[19, 'testfile.txt', 9, '3675007573', 1, 1]] self.assertEqual(obs, exp) obs = self.conn_handler.execute_fetchall( 'SELECT * FROM qiita.analysis_filepath WHERE filepath_id = 19') exp = [[1, 19, 2]] self.assertEqual(obs, exp)
def test_artifact_get_req(self): obs = artifact_get_req('*****@*****.**', 1) path_builder = partial(join, get_mountpoint('raw_data')[0][1]) exp = { 'id': 1, 'type': 'FASTQ', 'study': 1, 'data_type': '18S', 'timestamp': datetime(2012, 10, 1, 9, 30, 27), 'visibility': 'private', 'can_submit_vamps': False, 'can_submit_ebi': False, 'processing_parameters': None, 'ebi_run_accessions': None, 'is_submitted_vamps': False, 'parents': [], 'filepaths': [{ 'fp_id': 1, 'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"), 'fp_type': "raw_forward_seqs", 'checksum': '2125826711', 'fp_size': 58 }, { 'fp_id': 2, 'fp': path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), 'fp_type': "raw_barcodes", 'checksum': '2125826711', 'fp_size': 58 }] } self.assertEqual(obs, exp)
def get(self, path): user = self.current_user if user.level != 'admin': raise HTTPError(403, reason="%s doesn't have access to download " "uploaded files" % user.email) # [0] because it returns a list # [1] we only need the filepath filepath = get_mountpoint("uploads")[0][1][ len(get_db_files_base_dir()):] relpath = join(filepath, path) self._write_nginx_placeholder_file(relpath) self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) self._set_nginx_headers(basename(relpath)) self.finish()