Beispiel #1
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, "w") as f:
                    f.write("\n")
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint("raw_data")[0]

        removed_fps = [join(raw_data_mp, "2_sequences_barcodes.fastq.gz"), join(raw_data_mp, "2_sequences.fastq.gz")]

        for fp in removed_fps:
            with open(fp, "w") as f:
                f.write("\n")

        sql = """INSERT INTO qiita.filepath
                    (filepath, filepath_type_id, checksum,
                     checksum_algorithm_id, data_directory_id)
                VALUES ('2_sequences_barcodes.fastq.gz', 3, '852952723', 1, 5),
                       ('2_sequences.fastq.gz', 1, '852952723', 1, 5)
                RETURNING filepath_id"""
        fp_ids = self.conn_handler.execute_fetchall(sql)

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths()

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[0][0],))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[1][0],))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Beispiel #2
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [
            join(get_mountpoint_path_by_id(dd_id), fp)
            for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)
        ]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('\n')
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint('raw_data')[0]

        removed_fps = [
            join(raw_data_mp, '2_sequences_barcodes.fastq.gz'),
            join(raw_data_mp, '2_sequences.fastq.gz')
        ]

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths(self.conn_handler)

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (3, ))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (4, ))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Beispiel #3
0
    def test_get_mountpoint_path_by_id(self):
        exp = join(get_db_files_base_dir(), "raw_data", "")
        obs = get_mountpoint_path_by_id(5)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), "analysis", "")
        obs = get_mountpoint_path_by_id(1)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), "job", "")
        obs = get_mountpoint_path_by_id(2)
        self.assertEqual(obs, exp)

        # inserting new ones so we can test that it retrieves these and
        # doesn't alter other ones
        self.conn_handler.execute("UPDATE qiita.data_directory SET active=false WHERE " "data_directory_id=1")
        self.conn_handler.execute(
            "INSERT INTO qiita.data_directory (data_type, mountpoint, "
            "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', "
            "true), ('raw_data', 'raw_data', 'tmp', false)"
        )

        # this should have been updated
        exp = join(get_db_files_base_dir(), "analysis", "tmp")
        obs = get_mountpoint_path_by_id(10)
        self.assertEqual(obs, exp)

        # these 2 shouldn't
        exp = join(get_db_files_base_dir(), "raw_data", "")
        obs = get_mountpoint_path_by_id(5)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), "job", "")
        obs = get_mountpoint_path_by_id(2)
        self.assertEqual(obs, exp)
Beispiel #4
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in
               self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('\n')
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint('raw_data')[0]

        removed_fps = [
            join(raw_data_mp, '2_sequences_barcodes.fastq.gz'),
            join(raw_data_mp, '2_sequences.fastq.gz')]

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths(self.conn_handler)

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (3,))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (4,))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Beispiel #5
0
    def test_get_mountpoint_path_by_id(self):
        exp = join(get_db_files_base_dir(), 'raw_data', '')
        obs = get_mountpoint_path_by_id(5)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), 'analysis', '')
        obs = get_mountpoint_path_by_id(1)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), 'job', '')
        obs = get_mountpoint_path_by_id(2)
        self.assertEqual(obs, exp)

        # inserting new ones so we can test that it retrieves these and
        # doesn't alter other ones
        self.conn_handler.execute(
            "UPDATE qiita.data_directory SET active=false WHERE "
            "data_directory_id=1")
        self.conn_handler.execute(
            "INSERT INTO qiita.data_directory (data_type, mountpoint, "
            "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', "
            "true), ('raw_data', 'raw_data', 'tmp', false)")

        # this should have been updated
        exp = join(get_db_files_base_dir(), 'analysis', 'tmp')
        obs = get_mountpoint_path_by_id(10)
        self.assertEqual(obs, exp)

        # these 2 shouldn't
        exp = join(get_db_files_base_dir(), 'raw_data', '')
        obs = get_mountpoint_path_by_id(5)
        self.assertEqual(obs, exp)

        exp = join(get_db_files_base_dir(), 'job', '')
        obs = get_mountpoint_path_by_id(2)
        self.assertEqual(obs, exp)
Beispiel #6
0
        sql = """SELECT filepath, data_directory_id
                 FROM qiita.analysis_job
                    JOIN qiita.job USING (job_id)
                    JOIN qiita.job_results_filepath USING (job_id)
                    JOIN qiita.filepath USING (filepath_id)
                 WHERE analysis_id = %s"""
        TRN.add(sql, [analysis_id])
        fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])

        # no filepaths in the analysis
        if not fps:
            continue

        tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
        if not exists(tgz):
            full_fps = [join(get_mountpoint_path_by_id(mid), f)
                        for f, mid in fps]
            with taropen(tgz, "w:gz") as tar:
                for f in full_fps:
                    tar.add(f, arcname=basename(f))

        # Add the new tgz file to the analysis.
        fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis',
                                  move_files=False)
        sql = """INSERT INTO qiita.analysis_filepath
                    (analysis_id, filepath_id)
                 VALUES (%s, %s)"""
        sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
        TRN.add(sql, sql_args, many=True)
        TRN.execute()
Beispiel #7
0
                 FROM qiita.analysis_job
                    JOIN qiita.job USING (job_id)
                    JOIN qiita.job_results_filepath USING (job_id)
                    JOIN qiita.filepath USING (filepath_id)
                 WHERE analysis_id = %s"""
        TRN.add(sql, [analysis_id])
        fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])

        # no filepaths in the analysis
        if not fps:
            continue

        tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
        if not exists(tgz):
            full_fps = [
                join(get_mountpoint_path_by_id(mid), f) for f, mid in fps
            ]
            with taropen(tgz, "w:gz") as tar:
                for f in full_fps:
                    tar.add(f, arcname=basename(f))

        # Add the new tgz file to the analysis.
        fp_ids = insert_filepaths([(tgz, tgz_id)],
                                  analysis_id,
                                  'analysis',
                                  move_files=False)
        sql = """INSERT INTO qiita.analysis_filepath
                    (analysis_id, filepath_id)
                 VALUES (%s, %s)"""
        sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
        TRN.add(sql, sql_args, many=True)
Beispiel #8
0
        sql = """SELECT filepath, data_directory_id
                 FROM qiita.analysis_job
                    JOIN qiita.job USING (job_id)
                    JOIN qiita.job_results_filepath USING (job_id)
                    JOIN qiita.filepath USING (filepath_id)
                 WHERE analysis_id = %s"""
        TRN.add(sql, [analysis_id])
        fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])

        # no filepaths in the analysis
        if not fps:
            continue

        tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
        if not exists(tgz):
            full_fps = [join(get_mountpoint_path_by_id(mid), f)
                        for f, mid in fps]
            with taropen(tgz, "w:gz") as tar:
                for f in full_fps:
                    tar.add(f, arcname=basename(f))

        # Add the new tgz file to the analysis.
        fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis',
                                  "filepath", move_files=False)
        sql = """INSERT INTO qiita.analysis_filepath
                    (analysis_id, filepath_id)
                 VALUES (%s, %s)"""
        sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
        TRN.add(sql, sql_args, many=True)
        TRN.execute()