def test_delete_files(self): try: Job.delete(1) with self.assertRaises(QiitaDBUnknownIDError): Job(1) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 11 OR " "filepath_id = 15") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 1") self.assertEqual(obs, []) self.assertFalse(exists(join(get_db_files_base_dir(), "job/1_job_result.txt"))) finally: if not exists(join(get_db_files_base_dir(), "job/1_job_result.txt")): with open(join(get_db_files_base_dir(), "job/1_job_result.txt"), 'w') as f: f.write("job1result.txt")
def post(self, analysis_id): command_args = self.get_arguments("commands") split = [x.split("#") for x in command_args] analysis = Analysis(analysis_id) commands = [] # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 fp, mapping_file = mkstemp(suffix="_map_file.txt") close(fp) SampleTemplate(1).to_file(mapping_file) study_fps = {} for pd in Study(1).processed_data: processed = ProcessedData(pd) study_fps[processed.data_type] = processed.get_filepaths()[0][0] for data_type, command in split: opts = { "--otu_table_fp": study_fps[data_type], "--mapping_fp": mapping_file } if command == "Beta Diversity" and data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") elif command == "Beta Diversity": opts["--parameter_fp"] = join(get_db_files_base_dir(), "reference", "params_qiime.txt") Job.create(data_type, command, opts, analysis) commands.append("%s: %s" % (data_type, command)) user = self.get_current_user() self.render("analysis_waiting.html", user=user, aid=analysis_id, aname=analysis.name, commands=commands) # fire off analysis run here # currently synch run so redirect done here. Will remove after demo run_analysis(user, analysis)
def test_delete_folders(self): try: Job.delete(2) with self.assertRaises(QiitaDBUnknownIDError): Job(2) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 12") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 2") self.assertEqual(obs, []) self.assertFalse(exists(join(get_db_files_base_dir(), "job/2_test_folder"))) finally: # put the test data back basedir = get_db_files_base_dir() if not exists(join(basedir, "job/2_test_folder")): mkdir(join(basedir, "job", "2_test_folder")) mkdir(join(basedir, "job", "2_test_folder", "subdir")) with open(join(basedir, "job", "2_test_folder", "testfile.txt"), 'w') as f: f.write("DATA") with open(join(basedir, "job", "2_test_folder", "testres.htm"), 'w') as f: f.write("DATA") with open(join(basedir, "job", "2_test_folder", "subdir", "subres.html"), 'w') as f: f.write("DATA")
def test_delete_files(self): try: Job.delete(1) with self.assertRaises(QiitaDBUnknownIDError): Job(1) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 11 OR " "filepath_id = 15") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 1") self.assertEqual(obs, []) self.assertFalse( exists(join(get_db_files_base_dir(), "job/1_job_result.txt"))) finally: if not exists(join(get_db_files_base_dir(), "job/1_job_result.txt")): with open( join(get_db_files_base_dir(), "job/1_job_result.txt"), 'w') as f: f.write("job1result.txt")
def test_purge_filepaths(self): # Add a new filepath to the database fd, fp = mkstemp() close(fd) fp_id = self.conn_handler.execute_fetchone( "INSERT INTO qiita.filepath " "(filepath, filepath_type_id, checksum, checksum_algorithm_id) " "VALUES (%s, %s, %s, %s) RETURNING filepath_id", (fp, 1, "", 1))[0] self.assertEqual(fp_id, 17) # Connect the just added filepath to a raw data self.conn_handler.execute( "INSERT INTO qiita.raw_filepath (raw_data_id, filepath_id) VALUES" "(%s, %s)", (1, 17)) # Get the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath FROM qiita.filepath WHERE filepath_id=%s" fp1 = self.conn_handler.execute_fetchone(sql_fp, (1,))[0] fp1 = join(get_db_files_base_dir(), fp1) # Make sure that the file exists - specially for travis with open(fp1, 'w') as f: f.write('\n') fp17 = self.conn_handler.execute_fetchone(sql_fp, (17,))[0] fp17 = join(get_db_files_base_dir(), fp17) # Nothing should be removed purge_filepaths(self.conn_handler) sql_ids = ("SELECT filepath_id FROM qiita.filepath ORDER BY " "filepath_id") obs = self.conn_handler.execute_fetchall(sql_ids) exp = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [17]] self.assertEqual(obs, exp) # Check that the files still exist self.assertTrue(exists(fp1)) self.assertTrue(exists(fp17)) # Unlink the filepath from the raw data self.conn_handler.execute( "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (17,)) # Only filepath 16 should be removed purge_filepaths(self.conn_handler) obs = self.conn_handler.execute_fetchall(sql_ids) exp = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15]] self.assertEqual(obs, exp) # Check that only the file for the removed filepath has been removed self.assertTrue(exists(fp1)) self.assertFalse(exists(fp17))
def test_build_files(self): biom_fp = join(get_db_files_base_dir(), "analysis", "1_analysis_18S.biom") map_fp = join(get_db_files_base_dir(), "analysis", "1_analysis_mapping.txt") try: self.analysis.build_files() finally: with open(biom_fp, 'w') as f: f.write("") with open(map_fp, 'w') as f: f.write("")
def test_insert_filepaths_string(self): fd, fp = mkstemp() close(fd) with open(fp, "w") as f: f.write("\n") self.files_to_remove.append(fp) exp_new_id = 1 + self.conn_handler.execute_fetchone( "SELECT count(1) FROM qiita.filepath")[0] obs = insert_filepaths([(fp, "raw_forward_seqs")], 1, "raw_data", "filepath", self.conn_handler) self.assertEqual(obs, [exp_new_id]) # Check that the files have been copied correctly exp_fp = join(get_db_files_base_dir(), "raw_data", "1_%s" % basename(fp)) self.assertTrue(exists(exp_fp)) self.files_to_remove.append(exp_fp) # Check that the filepaths have been added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % exp_new_id) exp_fp = "1_%s" % basename(fp) exp = [[exp_new_id, exp_fp, 1, '852952723', 1, 5]] self.assertEqual(obs, exp)
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} dropped_samples = analysis.dropped_samples if dropped_samples: for proc_data_id, samples in viewitems(dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join( get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def _list_dir_files_nginx(self, dirpath): """Generates a nginx list of files in the given dirpath for nginx Parameters ---------- dirpath : str Path to the directory Returns ------- list of (str, str, str) The path information needed by nginx for each file in the directory """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] for dp, _, fps in walk(dirpath): for fn in fps: fullpath = join(dp, fn) spath = fullpath if fullpath.startswith(basedir): spath = fullpath[basedir_len:] to_download.append((fullpath, spath, spath)) return to_download
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def validate_absolute_path(self, root, absolute_path): """Overrides StaticFileHandler's method to include authentication """ # Get the filename (or the base directory) of the result len_prefix = len(commonprefix([root, absolute_path])) base_requested_fp = absolute_path[len_prefix:].split(sep, 1)[0] current_user = self.current_user # If the user is an admin, then allow access if current_user.level == 'admin': return super(ResultsHandler, self).validate_absolute_path( root, absolute_path) # otherwise, we have to check if they have access to the requested # resource user_id = current_user.id accessible_filepaths = check_access_to_analysis_result( user_id, base_requested_fp) # Turn these filepath IDs into absolute paths db_files_base_dir = get_db_files_base_dir() relpaths = filepath_ids_to_rel_paths(accessible_filepaths) accessible_filepaths = {join(db_files_base_dir, relpath) for relpath in relpaths.values()} # check if the requested resource is a file (or is in a directory) that # the user has access to if join(root, base_requested_fp) in accessible_filepaths: return super(ResultsHandler, self).validate_absolute_path( root, absolute_path) else: raise QiitaPetAuthorizationError(user_id, absolute_path)
def test_build_biom_tables(self): biom_fp = join(get_db_files_base_dir(), "analysis", "1_analysis_18S.biom") try: samples = {1: ['SKB8.640193', 'SKD8.640184', 'SKB7.640196']} self.analysis._build_biom_tables(samples, 100, conn_handler=self.conn_handler) obs = self.analysis.biom_tables self.assertEqual(obs, {'18S': biom_fp}) table = load_table(biom_fp) obs = set(table.ids(axis='sample')) exp = {'SKB8.640193', 'SKD8.640184', 'SKB7.640196'} self.assertEqual(obs, exp) obs = table.metadata('SKB8.640193') exp = { 'Study': 'Identification of the Microbiomes for Cannabis Soils', 'Processed_id': 1 } self.assertEqual(obs, exp) finally: with open(biom_fp, 'w') as f: f.write("")
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped_samples = analysis.dropped_samples dropped = defaultdict(list) for proc_data_id, samples in viewitems(dropped_samples): if not samples: continue proc_data = ProcessedData(proc_data_id) data_type = proc_data.data_type() study = proc_data.study dropped[data_type].append((Study(study).title, len(samples), ', '.join(samples))) self.render("analysis_results.html", analysis_id=analysis_id, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def _list_dir_files_nginx(self, dirpath): """Generates a nginx list of files in the given dirpath for nginx Parameters ---------- dirpath : str Path to the directory Returns ------- list of (str, str, str) The path information needed by nginx for each file in the directory """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] for dp, _, fps in walk(dirpath): for fn in fps: fullpath = join(dp, fn) spath = fullpath if fullpath.startswith(basedir): spath = fullpath[basedir_len:] to_download.append((spath, spath, '-', str(getsize(fullpath)))) return to_download
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for jobject in analysis.jobs: results = [] for res in jobject.results: name = basename(res) if name.startswith('index'): name = basename(dirname(res)).replace('_', ' ') results.append((res, name)) jobres[jobject.datatype].append((jobject.command[0], results)) dropped_samples = analysis.dropped_samples dropped = defaultdict(list) for proc_data_id, samples in viewitems(dropped_samples): if not samples: continue proc_data = Artifact(proc_data_id) data_type = proc_data.data_type dropped[data_type].append((proc_data.study.title, len(samples), ', '.join(samples))) self.render("analysis_results.html", analysis_id=analysis_id, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for jobject in analysis.jobs: results = [] for res in jobject.results: name = basename(res) if name.startswith('index'): name = basename(dirname(res)).replace('_', ' ') results.append((res, name)) jobres[jobject.datatype].append((jobject.command[0], results)) dropped_samples = analysis.dropped_samples dropped = defaultdict(list) for proc_data_id, samples in viewitems(dropped_samples): if not samples: continue proc_data = Artifact(proc_data_id) data_type = proc_data.data_type dropped[data_type].append( (proc_data.study.title, len(samples), ', '.join(samples))) self.render("analysis_results.html", analysis_id=analysis_id, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def test_add_results(self): self.job.add_results([(join(get_db_files_base_dir(), "job", "1_job_result.txt"), "plain_text")]) # make sure files attached to job properly obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, [[1, 11], [1, 15]])
def test_retrieve_options(self): self.assertEqual( self.job.options, { '--otu_table_fp': 1, '--output_dir': join(get_db_files_base_dir(), 'job/' '1_summarize_taxa_through_plots.py' '_output_dir') })
def setUp(self): metadata_dict = { 'SKB8.640193': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 1' }, 'SKD8.640184': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 2' }, 'SKB7.640196': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 3' } } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_raw_data = RawData(1) fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) filepaths = [(seqs_fp, 1), (barcodes_fp, 2)] with open(seqs_fp, "w") as f: f.write("\n") with open(barcodes_fp, "w") as f: f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) self._clean_up_files = [db_seqs_fp, db_barcodes_fp] self.tester = PrepTemplate(1) self.exp_sample_ids = { 'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192' }
def test_add_results_dir(self): # Create a test directory test_dir = join(get_db_files_base_dir(), "job", "2_test_folder") # add folder to job self.job.add_results([(test_dir, "directory")]) # make sure files attached to job properly obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1") self.assertEqual(obs, [[1, 11], [1, 15]])
def test_build_files_job_comm_wrapper(self): # basic setup needed for test job = Job(3) # create the files needed for job, testing _build_analysis_files analysis = Analysis(2) _build_analysis_files(analysis, 100) self._del_files.append(join(get_db_files_base_dir(), "analysis", "2_analysis_mapping.txt")) self._del_files.append(join(get_db_files_base_dir(), "analysis", "2_analysis_18S.biom")) self.assertTrue(exists(join(get_db_files_base_dir(), "analysis", "2_analysis_mapping.txt"))) self.assertTrue(exists(join(get_db_files_base_dir(), "analysis", "2_analysis_18S.biom"))) self.assertEqual([3], analysis.jobs) _job_comm_wrapper("*****@*****.**", 2, job) self.assertEqual(job.status, "error")
def get(self, aid): analysis = Analysis(aid) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) self.render("analysis_results.html", user=self.get_current_user(), jobres=jobres, aname=analysis.name, basefolder=get_db_files_base_dir())
def test_retrieve_dropped_samples(self): biom_fp = join(get_db_files_base_dir(), "analysis", "1_analysis_18S.biom") try: samples = {1: ['SKB8.640193', 'SKD8.640184', 'SKB7.640196']} self.analysis._build_biom_tables(samples, 100, conn_handler=self.conn_handler) exp = {1: {'SKM4.640180', 'SKM9.640192'}} self.assertEqual(self.analysis.dropped_samples, exp) finally: with open(biom_fp, 'w') as f: f.write("")
def test_get_mountpoint_path_by_id(self): exp = join(get_db_files_base_dir(), "raw_data", "") obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "analysis", "") obs = get_mountpoint_path_by_id(1) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "job", "") obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones self.conn_handler.execute("UPDATE qiita.data_directory SET active=false WHERE " "data_directory_id=1") self.conn_handler.execute( "INSERT INTO qiita.data_directory (data_type, mountpoint, " "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', " "true), ('raw_data', 'raw_data', 'tmp', false)" ) # this should have been updated exp = join(get_db_files_base_dir(), "analysis", "tmp") obs = get_mountpoint_path_by_id(10) self.assertEqual(obs, exp) # these 2 shouldn't exp = join(get_db_files_base_dir(), "raw_data", "") obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "job", "") obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp)
def setUp(self): self.preprocessed_data = PreprocessedData(1) self.params_table = "processed_params_uclust" self.params_id = 1 fd, self.biom_fp = mkstemp(suffix='_table.biom') close(fd) self.filepaths = [(self.biom_fp, 6)] self.date = datetime(2014, 5, 29, 12, 24, 51) self.db_test_pd_dir = join(get_db_files_base_dir(), 'processed_data') with open(self.biom_fp, "w") as f: f.write("\n") self._clean_up_files = []
def test_add_jobs_in_construct_job_graphs(self): analysis = Analysis(2) npt.assert_warns(QiitaDBWarning, analysis.build_files) RunAnalysis()._construct_job_graph( analysis, [('18S', 'Summarize Taxa')], comm_opts={'Summarize Taxa': {'opt1': 5}}) self.assertEqual(analysis.jobs, [Job(3), Job(4)]) job = Job(4) self.assertEqual(job.datatype, '18S') self.assertEqual(job.command, ['Summarize Taxa', 'summarize_taxa_through_plots.py']) expopts = { '--mapping_fp': join( get_db_files_base_dir(), 'analysis/2_analysis_mapping.txt'), '--otu_table_fp': join( get_db_files_base_dir(), 'analysis/2_analysis_dt-18S_r-1_c-3.biom'), '--output_dir': join( get_db_files_base_dir(), 'job', '4_summarize_taxa_through_plots.py_output_dir'), 'opt1': 5} self.assertEqual(job.options, expopts)
def setUp(self): self.name = "Fake GreenGenes" self.version = "13_8" fd, self.seqs_fp = mkstemp(suffix="_seqs.fna") close(fd) fd, self.tax_fp = mkstemp(suffix="_tax.txt") close(fd) fd, self.tree_fp = mkstemp(suffix="_tree.tre") close(fd) self.db_dir = join(get_db_files_base_dir(), 'reference') self._clean_up_files = []
def test_delete_folders(self): try: Job.delete(2) with self.assertRaises(QiitaDBUnknownIDError): Job(2) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id = 12") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.analysis_job WHERE job_id = 2") self.assertEqual(obs, []) self.assertFalse( exists(join(get_db_files_base_dir(), "job/2_test_folder"))) finally: # put the test data back basedir = get_db_files_base_dir() if not exists(join(basedir, "job/2_test_folder")): mkdir(join(basedir, "job", "2_test_folder")) mkdir(join(basedir, "job", "2_test_folder", "subdir")) with open( join(basedir, "job", "2_test_folder", "testfile.txt"), 'w') as f: f.write("DATA") with open(join(basedir, "job", "2_test_folder", "testres.htm"), 'w') as f: f.write("DATA") with open( join(basedir, "job", "2_test_folder", "subdir", "subres.html"), 'w') as f: f.write("DATA")
def setUp(self): metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 1'}, 'SKD8.640184': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 2'}, 'SKB7.640196': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status_id': 1, 'data_type_id': 2, 'str_column': 'Value for sample 3'} } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_raw_data = RawData(1) fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) filepaths = [(seqs_fp, 1), (barcodes_fp, 2)] with open(seqs_fp, "w") as f: f.write("\n") with open(barcodes_fp, "w") as f: f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) self._clean_up_files = [db_seqs_fp, db_barcodes_fp] self.tester = PrepTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
def setUp(self): fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] self.studies = [Study(1)] self.db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') with open(self.seqs_fp, "w") as f: f.write("\n") with open(self.barcodes_fp, "w") as f: f.write("\n") self._clean_up_files = []
def setUp(self): self.tmpdir = mkdtemp() fd, self.file1 = mkstemp(dir=self.tmpdir) close(fd) fd, self.file2 = mkstemp(dir=self.tmpdir) close(fd) with open(self.file1, "w") as f: f.write("\n") with open(self.file2, "w") as f: f.write("\n") self.files_to_remove = [self.file1, self.file2] self.dirs_to_remove = [self.tmpdir] self.db_test_ppd_dir = join(get_db_files_base_dir(), 'preprocessed_data')
def test_add_jobs_in_construct_job_graphs(self): analysis = Analysis(2) RunAnalysis()._construct_job_graph( analysis, [('18S', 'Summarize Taxa')], comm_opts={'Summarize Taxa': {'opt1': 5}}) self.assertEqual(analysis.jobs, [3, 4]) job = Job(4) self.assertEqual(job.datatype, '18S') self.assertEqual(job.command, ['Summarize Taxa', 'summarize_taxa_through_plots.py']) expopts = { '--output_dir': join( get_db_files_base_dir(), 'job', '4_summarize_taxa_through_plots.py_output_dir'), 'opt1': 5} self.assertEqual(job.options, expopts)
def setUp(self): fd, self.otu_table_fp = mkstemp(suffix='_otu_table.biom') close(fd) fd, self.otu_table_2_fp = mkstemp(suffix='_otu_table2.biom') close(fd) with open(self.otu_table_fp, "w") as f: f.write("\n") with open(self.otu_table_2_fp, "w") as f: f.write("\n") self.files_to_remove = [] self.files_to_remove.append(self.otu_table_fp) self.files_to_remove.append(self.otu_table_2_fp) self.db_test_processed_data_dir = join(get_db_files_base_dir(), 'processed_data')
def test_build_mapping_file(self): map_fp = join(get_db_files_base_dir(), "analysis", "1_analysis_mapping.txt") try: samples = {1: ['SKB8.640193', 'SKD8.640184', 'SKB7.640196']} self.analysis._build_mapping_file(samples, conn_handler=self.conn_handler) obs = self.analysis.mapping_file self.assertEqual(obs, map_fp) with open(map_fp) as f: mapdata = f.readlines() # check some columns for correctness obs = [line.split('\t')[0] for line in mapdata] exp = ['#SampleID', 'SKB8.640193', 'SKD8.640184', 'SKB7.640196'] self.assertEqual(obs, exp) obs = [line.split('\t')[1] for line in mapdata] exp = [ 'BarcodeSequence', 'AGCGCTCACATC', 'TGAGTGGTCTGT', 'CGGCCTAAGTTC' ] self.assertEqual(obs, exp) obs = [line.split('\t')[2] for line in mapdata] exp = [ 'LinkerPrimerSequence', 'GTGCCAGCMGCCGCGGTAA', 'GTGCCAGCMGCCGCGGTAA', 'GTGCCAGCMGCCGCGGTAA' ] self.assertEqual(obs, exp) obs = [line.split('\t')[19] for line in mapdata] exp = ['host_subject_id', '1001:M7', '1001:D9', '1001:M8'] self.assertEqual(obs, exp) obs = [line.split('\t')[47] for line in mapdata] exp = ['tot_org_carb', '5.0', '4.32', '5.0'] self.assertEqual(obs, exp) obs = [line.split('\t')[-1] for line in mapdata] exp = ['Description\n'] + ['Cannabis Soil Microbiome\n'] * 3 self.assertEqual(obs, exp) finally: with open(map_fp, 'w') as f: f.write("")
def _list_artifact_files_nginx(self, artifact): """Generates a nginx list of files for the given artifact Parameters ---------- artifact : qiita_db.artifact.Artifact The artifact to retrieve the files Returns ------- list of (str, str, str) The path information needed by nginx for each file in the artifact """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] for i, x in enumerate(artifact.filepaths): # ignore if tgz as they could create problems and the # raw data is in the folder if x['fp_type'] == 'tgz': continue if isdir(x['fp']): # If we have a directory, we actually need to list all the # files from the directory so NGINX can actually download all # of them to_download.extend(self._list_dir_files_nginx(x['fp'])) elif x['fp'].startswith(basedir): spath = x['fp'][basedir_len:] to_download.append( (spath, spath, '-', str(x['fp_size']))) else: to_download.append( (x['fp'], x['fp'], '-', str(x['fp_size']))) for pt in artifact.prep_templates: # the latest prep template file is always the first [0] tuple and # we need the filepath [1] pt_fp = pt.get_filepaths()[0][1] if pt_fp is not None: spt_fp = pt_fp if pt_fp.startswith(basedir): spt_fp = pt_fp[basedir_len:] fname = 'mapping_files/%s_mapping_file.txt' % artifact.id to_download.append((spt_fp, fname, '-', str(getsize(pt_fp)))) return to_download
def setUp(self): self.raw_data = RawData(1) self.study = Study(1) self.params_table = "preprocessed_sequence_illumina_params" self.params_id = 1 fd, self.fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, self.qual_fp = mkstemp(suffix='_seqs.qual') close(fd) self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)] self.db_test_ppd_dir = join(get_db_files_base_dir(), 'preprocessed_data') with open(self.fna_fp, "w") as f: f.write("\n") with open(self.qual_fp, "w") as f: f.write("\n") self._clean_up_files = []
def test_get_mountpoint(self): exp = [(5, join(get_db_files_base_dir(), 'raw_data', ''))] obs = get_mountpoint("raw_data") self.assertEqual(obs, exp) exp = [(1, join(get_db_files_base_dir(), 'analysis', ''))] obs = get_mountpoint("analysis") self.assertEqual(obs, exp) exp = [(2, join(get_db_files_base_dir(), 'job', ''))] obs = get_mountpoint("job") self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones self.conn_handler.execute( "UPDATE qiita.data_directory SET active=false WHERE " "data_directory_id=1") self.conn_handler.execute( "INSERT INTO qiita.data_directory (data_type, mountpoint, " "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', " "true), ('raw_data', 'raw_data', 'tmp', false)") # this should have been updated exp = [(10, join(get_db_files_base_dir(), 'analysis', 'tmp'))] obs = get_mountpoint("analysis") self.assertEqual(obs, exp) # these 2 shouldn't exp = [(5, join(get_db_files_base_dir(), 'raw_data', ''))] obs = get_mountpoint("raw_data") self.assertEqual(obs, exp) exp = [(2, join(get_db_files_base_dir(), 'job', ''))] obs = get_mountpoint("job") self.assertEqual(obs, exp) # testing multi returns exp = [(5, join(get_db_files_base_dir(), 'raw_data', '')), (11, join(get_db_files_base_dir(), 'raw_data', 'tmp'))] obs = get_mountpoint("raw_data", retrieve_all=True) self.assertEqual(obs, exp)
def get(self, path): user = self.current_user if user.level != 'admin': raise HTTPError(403, reason="%s doesn't have access to download " "uploaded files" % user.email) # [0] because it returns a list # [1] we only need the filepath filepath = get_mountpoint("uploads")[0][1][ len(get_db_files_base_dir()):] relpath = join(filepath, path) self._write_nginx_placeholder_file(relpath) self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) self._set_nginx_headers(basename(relpath)) self.finish()
def _list_artifact_files_nginx(self, artifact): """Generates a nginx list of files for the given artifact Parameters ---------- artifact : qiita_db.artifact.Artifact The artifact to retrieve the files Returns ------- list of (str, str, str) The path information needed by nginx for each file in the artifact """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] for i, x in enumerate(artifact.filepaths): # ignore if tgz as they could create problems and the # raw data is in the folder if x['fp_type'] == 'tgz': continue if isdir(x['fp']): # If we have a directory, we actually need to list all the # files from the directory so NGINX can actually download all # of them to_download.extend(self._list_dir_files_nginx(x['fp'])) elif x['fp'].startswith(basedir): spath = x['fp'][basedir_len:] to_download.append( (spath, spath, str(x['checksum']), str(x['fp_size']))) else: to_download.append( (x['fp'], x['fp'], str(x['checksum']), str(x['fp_size']))) for pt in artifact.prep_templates: qmf = pt.qiime_map_fp if qmf is not None: sqmf = qmf if qmf.startswith(basedir): sqmf = qmf[basedir_len:] fname = 'mapping_files/%s_mapping_file.txt' % artifact.id to_download.append((sqmf, fname, '-', str(getsize(qmf)))) return to_download
def _list_artifact_files_nginx(self, artifact): """Generates a nginx list of files for the given artifact Parameters ---------- artifact : qiita_db.artifact.Artifact The artifact to retrieve the files Returns ------- list of (str, str, str) The path information needed by nginx for each file in the artifact """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] for i, (fid, path, data_type) in enumerate(artifact.filepaths): # ignore if tgz as they could create problems and the # raw data is in the folder if data_type == 'tgz': continue if isdir(path): # If we have a directory, we actually need to list all the # files from the directory so NGINX can actually download all # of them to_download.extend(self._list_dir_files_nginx(path)) elif path.startswith(basedir): spath = path[basedir_len:] to_download.append((path, spath, spath)) else: to_download.append((path, path, path)) for pt in artifact.prep_templates: qmf = pt.qiime_map_fp if qmf is not None: sqmf = qmf if qmf.startswith(basedir): sqmf = qmf[basedir_len:] to_download.append( (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' % artifact.id)) return to_download
def setUp(self): self.raw_data = RawData(1) self.study = Study(1) self.params_table = "preprocessed_sequence_illumina_params" self.params_id = 1 fd, self.fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, self.qual_fp = mkstemp(suffix='_seqs.qual') close(fd) self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)] self.db_test_ppd_dir = join(get_db_files_base_dir(), 'preprocessed_data') self.ebi_submission_accession = "EBI123456-A" self.ebi_study_accession = "EBI123456-B" with open(self.fna_fp, "w") as f: f.write("\n") with open(self.qual_fp, "w") as f: f.write("\n") self._clean_up_files = []
def test_failure_callback(self): """Make sure failure at file creation step doesn't hang everything""" # rename a needed file for creating the biom table base = get_db_files_base_dir() rename(join(base, "processed_data", "1_study_1001_closed_reference_otu_table.biom"), join(base, "processed_data", "1_study_1001.bak")) analysis = Analysis(2) group = get_id_from_user("*****@*****.**") try: app = RunAnalysis(moi_context=ctx_default, moi_parent_id=group) app(analysis, [], rarefaction_depth=100) self.assertEqual(analysis.status, 'error') for job_id in analysis.jobs: self.assertEqual(Job(job_id).status, 'error') finally: rename(join(base, "processed_data", "1_study_1001.bak"), join(base, "processed_data", "1_study_1001_closed_reference_otu_table.biom"))