def test_get_process_target_gene_cmd(self):
        preprocessed_data = PreprocessedData(1)
        params = ProcessedSortmernaParams(1)

        obs_cmd, obs_output_dir = _get_process_target_gene_cmd(
            preprocessed_data, params)

        _, ref_dir = get_mountpoint('reference')[0]
        _, preprocessed_dir = get_mountpoint('preprocessed_data')[0]

        exp_cmd = ("pick_closed_reference_otus.py -i {}1_seqs.fna -r "
                   "{}GreenGenes_13_8_97_otus.fasta -o {} -p placeholder -t "
                   "{}GreenGenes_13_8_97_otu_taxonomy.txt".format(
                       preprocessed_dir, ref_dir, obs_output_dir, ref_dir))

        obs_tokens = obs_cmd.split()[::-1]
        exp_tokens = exp_cmd.split()[::-1]
        self.assertEqual(len(obs_tokens), len(exp_tokens))
        while obs_tokens:
            o_t = obs_tokens.pop()
            e_t = exp_tokens.pop()
            if o_t == '-p':
                # skip parameters file
                obs_tokens.pop()
                exp_tokens.pop()
            else:
                self.assertEqual(o_t, e_t)
    def test_get_process_target_gene_cmd(self):
        preprocessed_data = PreprocessedData(1)
        params = ProcessedSortmernaParams(1)

        obs_cmd, obs_output_dir = _get_process_target_gene_cmd(
            preprocessed_data, params)

        _, ref_dir = get_mountpoint('reference')[0]
        _, preprocessed_dir = get_mountpoint('preprocessed_data')[0]

        exp_cmd = ("pick_closed_reference_otus.py -i {}1_seqs.fna -r "
                   "{}GreenGenes_13_8_97_otus.fasta -o {} -p placeholder -t "
                   "{}GreenGenes_13_8_97_otu_taxonomy.txt".format(
                       preprocessed_dir, ref_dir, obs_output_dir, ref_dir))

        obs_tokens = obs_cmd.split()[::-1]
        exp_tokens = exp_cmd.split()[::-1]
        self.assertEqual(len(obs_tokens), len(exp_tokens))
        while obs_tokens:
            o_t = obs_tokens.pop()
            e_t = exp_tokens.pop()
            if o_t == '-p':
                # skip parameters file
                obs_tokens.pop()
                exp_tokens.pop()
            else:
                self.assertEqual(o_t, e_t)
Exemple #3
0
 def test_prep_template_filepaths_get_req(self):
     obs = prep_template_filepaths_get_req(1, '*****@*****.**')
     exp = {
         'status':
         'success',
         'message':
         '',
         'filepaths': [(21,
                        join(
                            get_mountpoint('templates')[0][1],
                            '1_prep_1_qiime_19700101-000000.txt')),
                       (20,
                        join(
                            get_mountpoint('templates')[0][1],
                            '1_prep_1_19700101-000000.txt')),
                       (19,
                        join(
                            get_mountpoint('templates')[0][1],
                            '1_prep_1_qiime_19700101-000000.txt')),
                       (18,
                        join(
                            get_mountpoint('templates')[0][1],
                            '1_prep_1_19700101-000000.txt'))]
     }
     self.assertEqual(obs, exp)
Exemple #4
0
 def test_prep_template_filepaths_get_req(self):
     obs = prep_template_filepaths_get_req(1, '*****@*****.**')
     exp = {'status': 'success',
            'message': '',
            'filepaths': [
                (19, join(get_mountpoint('templates')[0][1],
                          '1_prep_1_qiime_19700101-000000.txt')),
                (18, join(get_mountpoint('templates')[0][1],
                          '1_prep_1_19700101-000000.txt'))]}
     self.assertEqual(obs, exp)
Exemple #5
0
    def tearDown(self):
        with open(self.biom_fp, 'w') as f:
            f.write("")
        with open(self.map_fp, 'w') as f:
            f.write("")

        fp = join(get_mountpoint('analysis')[0][1], 'testfile.txt')
        if exists(fp):
            remove(fp)

        mp = get_mountpoint("processed_data")[0][1]
        study2fp = join(mp, "2_2_study_1001_closed_reference_otu_table.biom")
        if exists(study2fp):
            move(study2fp,
                 join(mp, "2_study_1001_closed_reference_otu_table.biom"))
Exemple #6
0
    def tearDown(self):
        with open(self.biom_fp, 'w') as f:
                f.write("")
        with open(self.map_fp, 'w') as f:
                f.write("")

        fp = join(get_mountpoint('analysis')[0][1], 'testfile.txt')
        if exists(fp):
            remove(fp)

        mp = get_mountpoint("processed_data")[0][1]
        study2fp = join(mp, "2_2_study_1001_closed_reference_otu_table.biom")
        if exists(study2fp):
            move(study2fp,
                 join(mp, "2_study_1001_closed_reference_otu_table.biom"))
    def get_filepaths(self, conn_handler=None):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = conn_handler if conn_handler else SQLConnectionHandler()

        if self._table == 'required_sample_info':
            table = 'sample_template_filepath'
            column = 'study_id'
        elif self._table == 'common_prep_info':
            table = 'prep_template_filepath'
            column = 'prep_template_id'
        else:
            raise QiitaDBNotImplementedError(
                'get_filepath for %s' % self._table)

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(table, column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates', conn_handler)[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
Exemple #8
0
    def create_from_scratch(self, prep_template, study_id):
        raw_data_filetype = self.get_argument("filetype")
        barcodes_str = self.get_argument("barcodes")
        forward_reads_str = self.get_argument("forward")
        sff_str = self.get_argument("sff")
        fasta_str = self.get_argument("fasta")
        qual_str = self.get_argument("qual")
        reverse_reads_str = self.get_argument("reverse")

        def _split(x):
            return x.split(",") if x else []

        filepaths, fps = [], []
        fps.append((_split(barcodes_str), "raw_barcodes"))
        fps.append((_split(fasta_str), "raw_fasta"))
        fps.append((_split(qual_str), "raw_qual"))
        fps.append((_split(forward_reads_str), "raw_forward_seqs"))
        fps.append((_split(reverse_reads_str), "raw_reverse_seqs"))
        fps.append((_split(sff_str), "raw_sff"))

        # We need to retrieve the full path for all the files, as the
        # arguments only contain the file name. Since we don't know in which
        # mountpoint the data lives, we retrieve all of them and we loop
        # through all the files checking if they exist or not.
        for _, f in get_mountpoint("uploads", retrieve_all=True):
            f = join(f, str(study_id))
            for fp_set, filetype in fps:
                for t in fp_set:
                    ft = join(f, t)
                    if exists(ft):
                        filepaths.append((ft, filetype))

        return submit(self.current_user.id, create_raw_data, raw_data_filetype, prep_template, filepaths)
Exemple #9
0
def check_fp(study_id, filename):
    """Check whether an uploaded file exists

    Parameters
    ----------
    study_id : int
        Study file uploaded to
    filename : str
        name of the uploaded file

    Returns
    -------
    dict
        {'status': status,
         'message': msg,
         'file': str}
        file contains full filepath if status is success, otherwise it contains
        the filename
    """
    # Get the uploads folder
    _, base_fp = get_mountpoint("uploads")[0]
    # Get the path of the sample template in the uploads folder
    fp_rsp = join(base_fp, str(study_id), filename)

    if not exists(fp_rsp):
        # The file does not exist, fail nicely
        return {
            'status': 'error',
            'message': 'file does not exist',
            'file': filename
        }
    return {'status': 'success', 'message': '', 'file': fp_rsp}
Exemple #10
0
    def post(self, study_id, prep_id):
        study = self.safe_get_study(study_id)
        if study is None:
            return

        prep_id = to_int(prep_id)
        try:
            p = PrepTemplate(prep_id)
        except QiitaDBUnknownIDError:
            self.fail('Preparation not found', 404)
            return

        if p.study_id != study.id:
            self.fail('Preparation ID not associated with the study', 409)
            return

        artifact_deets = json_decode(self.request.body)
        _, upload = get_mountpoint('uploads')[0]
        base = os.path.join(upload, study_id)
        filepaths = [(os.path.join(base, fp), fp_type)
                     for fp, fp_type in artifact_deets['filepaths']]

        try:
            art = Artifact.create(filepaths,
                                  artifact_deets['artifact_type'],
                                  artifact_deets['artifact_name'],
                                  p)
        except QiitaError as e:
            self.fail(str(e), 406)
            return

        self.write({'id': art.id})
        self.set_status(201)
        self.finish()
Exemple #11
0
 def test_check_fp(self):
     obs = check_fp(1, 'uploaded_file.txt')
     _, base_fp = get_mountpoint("uploads")[0]
     exp = {'status': 'success',
            'message': '',
            'file': join(base_fp, '1', 'uploaded_file.txt')}
     self.assertEqual(obs, exp)
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
        self._files_to_remove = []

        # creating temporal files and artifact
        # NOTE: we don't need to remove the artifact created cause it's
        # used to test the delete functionality
        fd, fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        with open(fp, 'w') as f:
            f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 "
                    "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n"
                    "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n")
        # 4 Demultiplexed
        filepaths_processed = [(fp, 4)]
        # 1 for default parameters and input data
        exp_params = Parameters.from_default_params(DefaultParameters(1),
                                                    {'input_data': 1})
        self.artifact = Artifact.create(filepaths_processed, "Demultiplexed",
                                        parents=[Artifact(1)],
                                        processing_parameters=exp_params)
Exemple #13
0
    def setUp(self):
        fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        self.filetype = 2
        self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)]
        _, self.db_test_raw_dir = get_mountpoint('raw_data')[0]

        with open(self.seqs_fp, "w") as f:
            f.write("\n")
        with open(self.barcodes_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []

        # Create some new PrepTemplates
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}}
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.pt1 = PrepTemplate.create(metadata, Study(1), "16S")
        self.pt2 = PrepTemplate.create(metadata, Study(1), "18S")
        self.prep_templates = [self.pt1, self.pt2]
Exemple #14
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, "w") as f:
                    f.write("\n")
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint("raw_data")[0]

        removed_fps = [join(raw_data_mp, "2_sequences_barcodes.fastq.gz"), join(raw_data_mp, "2_sequences.fastq.gz")]

        for fp in removed_fps:
            with open(fp, "w") as f:
                f.write("\n")

        sql = """INSERT INTO qiita.filepath
                    (filepath, filepath_type_id, checksum,
                     checksum_algorithm_id, data_directory_id)
                VALUES ('2_sequences_barcodes.fastq.gz', 3, '852952723', 1, 5),
                       ('2_sequences.fastq.gz', 1, '852952723', 1, 5)
                RETURNING filepath_id"""
        fp_ids = self.conn_handler.execute_fetchall(sql)

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths()

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[0][0],))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[1][0],))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
    def get_filepaths(self):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(
                    self._filepath_table, self._id_column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates')[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
Exemple #16
0
def check_fp(study_id, filename):
    """Check whether an uploaded file exists

    Parameters
    ----------
    study_id : int
        Study file uploaded to
    filename : str
        name of the uploaded file

    Returns
    -------
    dict
        {'status': status,
         'message': msg,
         'file': str}
        file contains full filepath if status is success, otherwise it contains
        the filename
    """
    # Get the uploads folder
    _, base_fp = get_mountpoint("uploads")[0]
    # Get the path of the sample template in the uploads folder
    fp_rsp = join(base_fp, str(study_id), filename)

    if not exists(fp_rsp):
        # The file does not exist, fail nicely
        return {'status': 'error',
                'message': 'file does not exist',
                'file': filename}
    return {'status': 'success',
            'message': '',
            'file': fp_rsp}
Exemple #17
0
    def test_move_upload_files_to_trash(self):
        test_filename = 'this_is_a_test_file.txt'

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        open(join(folder, '1', test_filename), 'w').write('test')

        exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, '1', 'trash'))
Exemple #18
0
    def setUp(self):
        fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        self.filetype = 2
        self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)]
        self.studies = [Study(1)]
        _, self.db_test_raw_dir = get_mountpoint('raw_data')[0]

        with open(self.seqs_fp, "w") as f:
            f.write("\n")
        with open(self.barcodes_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []

        # Create a new study
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        Study.create(User("*****@*****.**"), "Test study 2", [1], info)
    def test_post_valid(self):
        dontcare, uploads_dir = get_mountpoint('uploads')[0]
        foo_fp = os.path.join(uploads_dir, '1', 'foo.txt')
        bar_fp = os.path.join(uploads_dir, '1', 'bar.txt')
        with open(foo_fp, 'w') as fp:
            fp.write("@x\nATGC\n+\nHHHH\n")
        with open(bar_fp, 'w') as fp:
            fp.write("@x\nATGC\n+\nHHHH\n")

        prep = StringIO(EXP_PREP_TEMPLATE.format(1))
        prep_table = load_template_to_dataframe(prep)

        response = self.post('/api/v1/study/1/preparation?data_type=16S',
                             data=prep_table.T.to_dict(),
                             headers=self.headers, asjson=True)
        prepid = json_decode(response.body)['id']

        uri = '/api/v1/study/1/preparation/%d/artifact' % prepid
        # 1 -> fwd or rev sequences in fastq
        # 3 -> barcodes
        body = {'artifact_type': 'FASTQ', 'filepaths': [['foo.txt', 1],
                                                        ['bar.txt',
                                                         'raw_barcodes']],
                'artifact_name': 'a name is a name'}

        response = self.post(uri, data=body, headers=self.headers, asjson=True)
        self.assertEqual(response.code, 201)
        obs = json_decode(response.body)['id']

        prep_instance = PrepTemplate(prepid)
        exp = prep_instance.artifact.id
        self.assertEqual(obs, exp)
Exemple #20
0
    def test_post_valid(self):
        dontcare, uploads_dir = get_mountpoint('uploads')[0]
        foo_fp = os.path.join(uploads_dir, '1', 'foo.txt')
        bar_fp = os.path.join(uploads_dir, '1', 'bar.txt')
        with open(foo_fp, 'w') as fp:
            fp.write("@x\nATGC\n+\nHHHH\n")
        with open(bar_fp, 'w') as fp:
            fp.write("@x\nATGC\n+\nHHHH\n")

        prep = StringIO(EXP_PREP_TEMPLATE.format(1))
        prep_table = load_template_to_dataframe(prep)

        response = self.post('/api/v1/study/1/preparation?data_type=16S',
                             data=prep_table.T.to_dict(),
                             headers=self.headers,
                             asjson=True)
        prepid = json_decode(response.body)['id']

        uri = '/api/v1/study/1/preparation/%d/artifact' % prepid
        # 1 -> fwd or rev sequences in fastq
        # 3 -> barcodes
        body = {
            'artifact_type': 'FASTQ',
            'filepaths': [['foo.txt', 1], ['bar.txt', 'raw_barcodes']],
            'artifact_name': 'a name is a name'
        }

        response = self.post(uri, data=body, headers=self.headers, asjson=True)
        self.assertEqual(response.code, 201)
        obs = json_decode(response.body)['id']

        prep_instance = PrepTemplate(prepid)
        exp = prep_instance.artifact.id
        self.assertEqual(obs, exp)
Exemple #21
0
    def test_move_upload_files_to_trash(self):
        test_filename = "this_is_a_test_file.txt"

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        test_fp = join(folder, "1", test_filename)
        with open(test_fp, "w") as f:
            f.write("test")

        self.files_to_remove.append(test_fp)

        exp = [(fid, "this_is_a_test_file.txt"), (fid, "uploaded_file.txt")]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, "uploaded_file.txt")]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, "1", "trash"))
Exemple #22
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid, ))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
Exemple #23
0
    def test_build_mapping_file(self):
        new_id = get_count('qiita.filepath') + 1
        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']}
        self.analysis._build_mapping_file(samples)
        obs = self.analysis.mapping_file
        self.assertEqual(obs, self.map_fp)

        base_dir = get_mountpoint('analysis')[0][1]
        obs = pd.read_csv(obs, sep='\t', infer_datetime_format=True,
                          parse_dates=True, index_col=False, comment='\t')
        exp = pd.read_csv(join(base_dir, '1_analysis_mapping_exp.txt'),
                          sep='\t', infer_datetime_format=True,
                          parse_dates=True, index_col=False, comment='\t')

        assert_frame_equal(obs, exp)

        sql = """SELECT * FROM qiita.filepath
                 WHERE filepath=%s ORDER BY filepath_id"""
        obs = self.conn_handler.execute_fetchall(
            sql, ("%d_analysis_mapping.txt" % self.analysis.id,))

        exp = [[13, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
               [new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
        self.assertEqual(obs, exp)

        sql = """SELECT * FROM qiita.analysis_filepath
                 WHERE analysis_id=%s ORDER BY filepath_id"""
        obs = self.conn_handler.execute_fetchall(sql, (self.analysis.id,))
        exp = [[1L, 14L, 2L], [1L, 15L, None], [1L, new_id, None]]
Exemple #24
0
    def create_from_scratch(self, prep_template, study_id):
        raw_data_filetype = self.get_argument('filetype')
        barcodes_str = self.get_argument('barcodes')
        forward_reads_str = self.get_argument('forward')
        sff_str = self.get_argument('sff')
        fasta_str = self.get_argument('fasta')
        qual_str = self.get_argument('qual')
        reverse_reads_str = self.get_argument('reverse')

        def _split(x):
            return x.split(',') if x else []

        filepaths, fps = [], []
        fps.append((_split(barcodes_str), 'raw_barcodes'))
        fps.append((_split(fasta_str), 'raw_fasta'))
        fps.append((_split(qual_str), 'raw_qual'))
        fps.append((_split(forward_reads_str), 'raw_forward_seqs'))
        fps.append((_split(reverse_reads_str), 'raw_reverse_seqs'))
        fps.append((_split(sff_str), 'raw_sff'))

        # We need to retrieve the full path for all the files, as the
        # arguments only contain the file name. Since we don't know in which
        # mountpoint the data lives, we retrieve all of them and we loop
        # through all the files checking if they exist or not.
        for _, f in get_mountpoint("uploads", retrieve_all=True):
            f = join(f, str(study_id))
            for fp_set, filetype in fps:
                for t in fp_set:
                    ft = join(f, t)
                    if exists(ft):
                        filepaths.append((ft, filetype))

        return submit(self.current_user.id, create_raw_data, raw_data_filetype,
                      prep_template, filepaths)
Exemple #25
0
    def setUp(self):
        super(NewArtifactHandlerTests, self).setUp()
        tmp_dir = join(get_mountpoint('uploads')[0][1], '1')

        # Create prep test file to point at
        fd, prep_fp = mkstemp(dir=tmp_dir, suffix='.txt')
        close(fd)
        with open(prep_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")
        self.prep = npt.assert_warns(
            QiitaDBWarning, PrepTemplate.create,
            pd.DataFrame({'new_col': {
                '1.SKD6.640190': 1
            }}), Study(1), "16S")

        fd, self.fwd_fp = mkstemp(dir=tmp_dir, suffix=".fastq")
        close(fd)
        with open(self.fwd_fp, 'w') as f:
            f.write("@seq\nTACGA\n+ABBBB\n")

        fd, self.barcodes_fp = mkstemp(dir=tmp_dir, suffix=".fastq")
        close(fd)
        with open(self.barcodes_fp, 'w') as f:
            f.write("@seq\nTACGA\n+ABBBB\n")

        self._files_to_remove = [prep_fp, self.fwd_fp, self.barcodes_fp]
Exemple #26
0
    def setUp(self):
        fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        self.filetype = 2
        self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)]
        self.studies = [Study(1)]
        _, self.db_test_raw_dir = get_mountpoint('raw_data')[0]

        with open(self.seqs_fp, "w") as f:
            f.write("\n")
        with open(self.barcodes_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []

        # Create a new study
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        Study.create(User("*****@*****.**"), "Test study 2", [1], info)
Exemple #27
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
        self._files_to_remove = []

        # creating temporal files and artifact
        # NOTE: we don't need to remove the artifact created cause it's
        # used to test the delete functionality
        fd, fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        with open(fp, 'w') as f:
            f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 "
                    "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n"
                    "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n")
        # 4 Demultiplexed
        filepaths_processed = [(fp, 4)]
        # 1 for default parameters and input data
        exp_params = Parameters.from_default_params(DefaultParameters(1),
                                                    {'input_data': 1})
        self.artifact = Artifact.create(filepaths_processed, "Demultiplexed",
                                        parents=[Artifact(1)],
                                        processing_parameters=exp_params)
Exemple #28
0
    def setUp(self):
        # Create a directory with the test split libraries output
        self.test_slo = mkdtemp(prefix='test_slo_')
        path_builder = partial(join, self.test_slo)
        fna_fp = path_builder('seqs.fna')
        fastq_fp = path_builder('seqs.fastq')
        log_fp = path_builder('split_library_log.txt')
        demux_fp = path_builder('seqs.demux')

        with open(fna_fp, 'w') as f:
            f.write(FASTA_SEQS)

        with open(fastq_fp, 'w') as f:
            f.write(FASTQ_SEQS)

        with open(log_fp, 'w') as f:
            f.write("Test log\n")

        generate_demux_file(self.test_slo)

        self._filepaths_to_remove = [fna_fp, fastq_fp, demux_fp, log_fp]
        self._dirpaths_to_remove = [self.test_slo]

        # Generate a directory with test split libraries output missing files
        self.missing_slo = mkdtemp(prefix='test_missing_')
        path_builder = partial(join, self.test_slo)
        fna_fp = path_builder('seqs.fna')
        fastq_fp = path_builder('seqs.fastq')

        with open(fna_fp, 'w') as f:
            f.write(FASTA_SEQS)

        with open(fastq_fp, 'w') as f:
            f.write(FASTQ_SEQS)

        self._filepaths_to_remove.append(fna_fp)
        self._filepaths_to_remove.append(fastq_fp)
        self._dirpaths_to_remove.append(self.missing_slo)

        # Create a study with no preprocessed data
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.no_ppd_study = Study.create(
            User('*****@*****.**'), "Test study", [1], info)

        # Get the directory where the preprocessed data is usually copied.
        _, self.db_ppd_dir = get_mountpoint('preprocessed_data')[0]
Exemple #29
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
Exemple #30
0
    def test_move_upload_files_to_trash(self):
        test_filename = 'this_is_a_test_file.txt'

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        open(join(folder, '1', test_filename), 'w').write('test')

        exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, '1', 'trash'))
    def update_prep_template(self, study, user, callback):
        """Update a prep template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the prep template file does not exists
        """
        # If we are on this function, the arguments "prep_template_id",
        # "update_prep_template_file" must defined. If not, let tornado
        # raise its error
        pt_id = int(self.get_argument('prep_template_id'))
        prep_template = self.get_argument('update_prep_template_file')

        # Define here the message and message level in case of success
        msg = "The prep template '%s' has been updated" % prep_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the prep template in the uploads folder
        fp = join(base_fp, str(study.id), prep_template)

        if not exists(fp):
            # The file does not exist, fail nicely
            # Using 400 because we want the user to get the error in the GUI
            raise HTTPError(400, "This file doesn't exist: %s" % fp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                pt = PrepTemplate(pt_id)
                pt.update(load_template_to_dataframe(fp))
                remove(fp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([str(w.message) for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the prep template:',
                                        basename(fp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, 'prep_template_tab', pt_id, None))
Exemple #32
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
Exemple #33
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
    def add_prep_template(self, study, user, callback):
        """Adds a prep template to the system

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Your prep template was added"
        msg_level = "success"

        # If we are on this function, the arguments "raw_data_id",
        # "prep_template" and "data_type_id" must be defined. If not,
        # let tornado raise its error
        raw_data_id = self.get_argument('raw_data_id')
        prep_template = self.get_argument('prep_template')
        data_type_id = self.get_argument('data_type_id')

        # These parameters are optional
        investigation_type = self.get_argument('investigation-type', None)
        user_defined_investigation_type = self.get_argument(
            'user-defined-investigation-type', None)
        new_investigation_type = self.get_argument('new-investigation-type',
                                                   None)

        investigation_type = self._process_investigation_type(
            investigation_type, user_defined_investigation_type,
            new_investigation_type)

        # Make sure that the id is an integer
        raw_data_id = _to_int(raw_data_id)
        # Get the upload base directory
        _, base_path = get_mountpoint("uploads")[0]
        # Get the path to the prep template
        fp_rpt = join(base_path, str(study.id), prep_template)
        if not exists(fp_rpt):
            # The file does not exists, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rpt)

        try:
            pt_id = self.remove_add_prep_template(fp_rpt, raw_data_id, study,
                                                  data_type_id,
                                                  investigation_type)
        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError,
                CParserError) as e:
            # Some error occurred while processing the prep template
            # Show the error to the user so he can fix the template
            msg = html_error_message % ("parsing the prep template: ",
                                        basename(fp_rpt), str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'raw_data_tab', raw_data_id, pt_id))
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                df = load_template_to_dataframe(fp_rsp)
                st.extend(df)
                st.update(df)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '\n'.join(set(str(w.message) for w in warns))
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
Exemple #36
0
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                df = load_template_to_dataframe(fp_rsp)
                st.extend(df)
                st.update(df)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '\n'.join(set(str(w.message) for w in warns))
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
Exemple #37
0
    def post(self):

        # vars to add files to raw data
        study_id = self.get_argument('study_id')
        raw_data_id = self.get_argument('raw_data_id')
        barcodes_str = self.get_argument('barcodes')
        forward_reads_str = self.get_argument('forward')
        reverse_reads_str = self.get_argument('reverse', None)

        study_id = int(study_id)
        try:
            study = Study(study_id)
        except QiitaDBUnknownIDError:
            # Study not in database so fail nicely
            raise HTTPError(404, "Study %d does not exist" % study_id)
        else:
            check_access(User(self.current_user), study, raise_error=True)

        barcodes, forward_reads, reverse_reads = [], [], []
        for _, f in get_mountpoint("uploads", retrive_all=True):
            f = join(f, str(study_id))
            for t in barcodes_str.split(','):
                ft = join(f, t)
                if exists(ft):
                    barcodes.append([ft, "raw_barcodes"])
            for t in forward_reads_str.split(','):
                ft = join(f, t)
                if exists(ft):
                    forward_reads.append([ft, "raw_forward_seqs"])
            if reverse_reads_str:
                for t in reverse_reads_str.split(','):
                    ft = join(f, t)
                    if exists(ft):
                        reverse_reads.append([ft, "raw_reverse_seqs"])

        # this should never happen if following the GUI pipeline
        # but rather be save than sorry
        if (len(barcodes) != len(forward_reads)
           or (barcodes and len(barcodes) != len(forward_reads))):
            raise HTTPError(404, "user %s tried to submit a wrong pair of "
                                 "barcodes/forward/reverse reads" %
                                 self.current_user)

        # join all files to send on single var
        filepaths = barcodes
        filepaths.extend(forward_reads)
        if reverse_reads:
            filepaths.extend(reverse_reads)

        job_id = submit(self.current_user, add_files_to_raw_data, raw_data_id,
                        filepaths)

        self.render('compute_wait.html', user=self.current_user,
                    job_id=job_id, title='Adding files to your raw data',
                    completion_redirect=(
                        '/study/description/%s?top_tab=raw_data_tab&sub_tab=%s'
                        % (study_id, raw_data_id)))
Exemple #38
0
 def test_check_fp(self):
     obs = check_fp(1, 'uploaded_file.txt')
     _, base_fp = get_mountpoint("uploads")[0]
     exp = {
         'status': 'success',
         'message': '',
         'file': join(base_fp, '1', 'uploaded_file.txt')
     }
     self.assertEqual(obs, exp)
Exemple #39
0
 def tearDown(self):
     new_uploaded_files = get_files_from_uploads_folders(str(self.study.id))
     new_files = set(new_uploaded_files).difference(self.uploaded_files)
     path_builder = partial(join, get_mountpoint("uploads")[0][1], '1')
     for _, fp in new_files:
         self._clean_up_files.append(path_builder(fp))
     for f in self._clean_up_files:
         if exists(f):
             remove(f)
Exemple #40
0
    def setUp(self):
        super(TestPrepTemplateHandler, self).setUp()
        uploads_dp = get_mountpoint('uploads')[0][1]
        self.new_prep = join(uploads_dp, '1', 'new_template.txt')
        with open(self.new_prep, 'w') as f:
            f.write("sample_name\tnew_col\nSKD6.640190\tnew_value\n")

        self.broken_prep = join(uploads_dp, '1', 'broke_template.txt')
        with open(self.broken_prep, 'w') as f:
            f.write("sample_name\tbroke |col\nSKD6.640190\tnew_value\n")
    def tearDown(self):
        if exists(self.update_fp):
            remove(self.update_fp)

        fp = join(get_mountpoint("uploads")[0][1], '1', 'uploaded_file.txt')
        if not exists(fp):
            with open(fp, 'w') as f:
                f.write('')

        r_client.flushdb()
Exemple #42
0
    def tearDown(self):
        if exists(self.update_fp):
            remove(self.update_fp)

        fp = join(get_mountpoint("uploads")[0][1], '1', 'uploaded_file.txt')
        if not exists(fp):
            with open(fp, 'w') as f:
                f.write('')

        r_client.flushdb()
Exemple #43
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [
            join(get_mountpoint_path_by_id(dd_id), fp)
            for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)
        ]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('\n')
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint('raw_data')[0]

        removed_fps = [
            join(raw_data_mp, '2_sequences_barcodes.fastq.gz'),
            join(raw_data_mp, '2_sequences.fastq.gz')
        ]

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths(self.conn_handler)

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (3, ))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (4, ))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Exemple #44
0
    def tearDown(self):
        for fp in self._files_to_remove:
            if exists(fp):
                remove(fp)

        # Replace file if removed as part of function testing
        uploads_path = get_mountpoint('uploads')[0][1]
        fp = join(uploads_path, '1', 'uploaded_file.txt')
        if not exists(fp):
            with open(fp, 'w') as f:
                f.write('')
Exemple #45
0
    def post(self, study_id):
        method = self.get_argument('remote-request-type')
        url = self.get_argument('inputURL')
        ssh_key = self.request.files['ssh-key'][0]['body']
        status = 'success'
        message = ''

        try:
            study = Study(int(study_id))
        except QiitaDBUnknownIDError:
            raise HTTPError(404, reason="Study %s does not exist" % study_id)
        check_access(self.current_user,
                     study,
                     no_public=True,
                     raise_error=True)

        _, upload_folder = get_mountpoint("uploads")[0]
        upload_folder = join(upload_folder, study_id)
        ssh_key_fp = join(upload_folder, '.key.txt')

        create_nested_path(upload_folder)

        with open(ssh_key_fp, 'wb') as f:
            f.write(ssh_key)
        chmod(ssh_key_fp, 0o600)

        qiita_plugin = Software.from_name_and_version('Qiita', 'alpha')
        if method == 'list':
            cmd = qiita_plugin.get_command('list_remote_files')
            params = Parameters.load(cmd,
                                     values_dict={
                                         'url': url,
                                         'private_key': ssh_key_fp,
                                         'study_id': study_id
                                     })
        elif method == 'transfer':
            cmd = qiita_plugin.get_command('download_remote_files')
            params = Parameters.load(cmd,
                                     values_dict={
                                         'url': url,
                                         'private_key': ssh_key_fp,
                                         'destination': upload_folder
                                     })
        else:
            status = 'error'
            message = 'Not a valid method'

        if status == 'success':
            job = ProcessingJob.create(self.current_user, params, True)
            job.submit()
            r_client.set(UPLOAD_STUDY_FORMAT % study_id,
                         dumps({'job_id': job.id}))

        self.write({'status': status, 'message': message})
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))
        ]

        preprocessed_data = PreprocessedData.create(
            Study(1),
            "preprocessed_sequence_illumina_params",
            1,
            filepaths,
            data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [
            db_path_builder("%s_otu_table.biom" % new_id),
            db_path_builder("%s_sortmerna_picked_otus" % new_id),
            db_path_builder("%s_%s" % (new_id, basename(fp)))
        ]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(
            self.conn_handler.execute_fetchone(
                "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
                "processed_data_id=%s)", (new_id, ))[0])
    def test_sample_template_handler_post_request(self):
        # Test user doesn't have access
        with self.assertRaisesRegex(HTTPError,
                                    'User does not have access to study'):
            sample_template_handler_post_request(1, User('*****@*****.**'),
                                                 'ignored')

        # Test study doesn't exist
        user = User('*****@*****.**')
        with self.assertRaisesRegex(HTTPError, 'Study does not exist'):
            sample_template_handler_post_request(1000000, user, 'ignored')

        # Test file doesn't exist
        with self.assertRaisesRegex(HTTPError, 'Filepath not found'):
            sample_template_handler_post_request(1, user, 'DoesNotExist.txt')

        # Test looks like mapping file and no data_type provided
        uploads_dir = join(get_mountpoint('uploads')[0][1], '1')
        fd, fp = mkstemp(suffix='.txt', dir=uploads_dir)
        self._clean_up_files.append(fp)
        close(fd)

        with open(fp, 'w') as f:
            f.write('#SampleID\tCol1\nSample1\tVal1')

        with self.assertRaisesRegex(
                HTTPError, 'Please, choose a data type if uploading a QIIME '
                'mapping file'):
            sample_template_handler_post_request(1, user, fp)

        # Test success
        obs = sample_template_handler_post_request(1, user,
                                                   'uploaded_file.txt')
        self.assertCountEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_1')
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])

        # Test direct upload
        obs = sample_template_handler_post_request(1,
                                                   user,
                                                   fp,
                                                   data_type='16S',
                                                   direct_upload=True)
        self.assertCountEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_1')
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
Exemple #48
0
    def tearDown(self):
        super(NewArtifactHandlerTests, self).tearDown()

        for fp in self._files_to_remove:
            if exists(fp):
                remove(fp)

        # Replace file if removed as part of function testing
        uploads_path = get_mountpoint('uploads')[0][1]
        fp = join(uploads_path, '1', 'uploaded_file.txt')
        if not exists(fp):
            with open(fp, 'w') as f:
                f.write('')
Exemple #49
0
 def test_artifact_get_req(self):
     obs = artifact_get_req('*****@*****.**', 1)
     exp = {'id': 1,
            'type': 'FASTQ',
            'study': 1,
            'data_type': '18S',
            'timestamp': datetime(2012, 10, 1, 9, 30, 27),
            'visibility': 'private',
            'can_submit_vamps': False,
            'can_submit_ebi': False,
            'processing_parameters': None,
            'ebi_run_accessions': None,
            'is_submitted_vamps': False,
            'parents': [],
            'filepaths': [
                (1, join(get_mountpoint('raw_data')[0][1],
                 '1_s_G1_L001_sequences.fastq.gz'), 'raw_forward_seqs'),
                (2,  join(get_mountpoint('raw_data')[0][1],
                 '1_s_G1_L001_sequences_barcodes.fastq.gz'),
                 'raw_barcodes')]
            }
     self.assertEqual(obs, exp)
 def test_artifact_get_req(self):
     obs = artifact_get_req('*****@*****.**', 1)
     exp = {'id': 1,
            'type': 'FASTQ',
            'study': 1,
            'data_type': '18S',
            'timestamp': datetime(2012, 10, 1, 9, 30, 27),
            'visibility': 'private',
            'can_submit_vamps': False,
            'can_submit_ebi': False,
            'processing_parameters': None,
            'ebi_run_accessions': None,
            'is_submitted_vamps': False,
            'parents': [],
            'filepaths': [
                (1, join(get_mountpoint('raw_data')[0][1],
                 '1_s_G1_L001_sequences.fastq.gz'), 'raw_forward_seqs'),
                (2,  join(get_mountpoint('raw_data')[0][1],
                 '1_s_G1_L001_sequences_barcodes.fastq.gz'),
                 'raw_barcodes')]
            }
     self.assertEqual(obs, exp)
    def tearDown(self):
        for fp in self._files_to_remove:
            if exists(fp):
                remove(fp)

        # Replace file if removed as part of function testing
        uploads_path = get_mountpoint('uploads')[0][1]
        fp = join(uploads_path, '1', 'uploaded_file.txt')
        if not exists(fp):
            with open(fp, 'w') as f:
                f.write('')

        r_client.flushdb()
Exemple #52
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in
               self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('\n')
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint('raw_data')[0]

        removed_fps = [
            join(raw_data_mp, '2_sequences_barcodes.fastq.gz'),
            join(raw_data_mp, '2_sequences.fastq.gz')]

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths(self.conn_handler)

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (3,))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (4,))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Exemple #53
0
    def setUp(self):
        self.preprocessed_data = PreprocessedData(1)
        self.params_table = "processed_params_uclust"
        self.params_id = 1
        fd, self.biom_fp = mkstemp(suffix='_table.biom')
        close(fd)
        self.filepaths = [(self.biom_fp, 6)]
        self.date = datetime(2014, 5, 29, 12, 24, 51)
        _, self.db_test_pd_dir = get_mountpoint(
            'processed_data')[0]

        with open(self.biom_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
Exemple #54
0
    def setUp(self):
        self.name = "Fake Greengenes"
        self.version = "13_8"

        fd, self.seqs_fp = mkstemp(suffix="_seqs.fna")
        close(fd)
        fd, self.tax_fp = mkstemp(suffix="_tax.txt")
        close(fd)
        fd, self.tree_fp = mkstemp(suffix="_tree.tre")
        close(fd)

        _, self.db_dir = get_mountpoint('reference')[0]

        self._clean_up_files = []
Exemple #55
0
    def post(self):

        # vars to add files to raw data
        study_id = self.get_argument('study_id')
        raw_data_id = self.get_argument('raw_data_id')
        barcodes_str = self.get_argument('barcodes')
        forward_reads_str = self.get_argument('forward')
        sff_str = self.get_argument('sff')
        fasta_str = self.get_argument('fasta')
        qual_str = self.get_argument('qual')
        reverse_reads_str = self.get_argument('reverse')

        study_id = int(study_id)
        try:
            study = Study(study_id)
        except QiitaDBUnknownIDError:
            # Study not in database so fail nicely
            raise HTTPError(404, "Study %d does not exist" % study_id)
        else:
            check_access(self.current_user, study, raise_error=True)

        def _split(x):
            return x.split(',') if x else []

        filepaths, fps = [], []
        fps.append((_split(barcodes_str), 'raw_barcodes'))
        fps.append((_split(fasta_str), 'raw_fasta'))
        fps.append((_split(qual_str), 'raw_qual'))
        fps.append((_split(forward_reads_str), 'raw_forward_seqs'))
        fps.append((_split(reverse_reads_str), 'raw_reverse_seqs'))
        fps.append((_split(sff_str), 'raw_sff'))

        for _, f in get_mountpoint("uploads", retrieve_all=True):
            f = join(f, str(study_id))
            for fp_set, filetype in fps:
                for t in fp_set:
                    ft = join(f, t)
                    if exists(ft):
                        filepaths.append((ft, filetype))

        job_id = submit(self.current_user.id, add_files_to_raw_data,
                        raw_data_id, filepaths)

        self.render(
            'compute_wait.html',
            job_id=job_id,
            title='Adding files to your raw data',
            completion_redirect=(
                '/study/description/%s?top_tab=raw_data_tab&sub_tab=%s' %
                (study_id, raw_data_id)))
Exemple #56
0
    def test_add_file(self):
        fp = join(get_mountpoint('analysis')[0][1], 'testfile.txt')
        with open(fp, 'w') as f:
            f.write('testfile!')
        self.analysis._add_file('testfile.txt', 'plain_text', '18S')

        obs = self.conn_handler.execute_fetchall(
            'SELECT * FROM qiita.filepath WHERE filepath_id = 19')
        exp = [[19, 'testfile.txt', 9, '3675007573', 1, 1]]
        self.assertEqual(obs, exp)

        obs = self.conn_handler.execute_fetchall(
            'SELECT * FROM qiita.analysis_filepath WHERE filepath_id = 19')
        exp = [[1, 19, 2]]
        self.assertEqual(obs, exp)
Exemple #57
0
 def test_artifact_get_req(self):
     obs = artifact_get_req('*****@*****.**', 1)
     path_builder = partial(join, get_mountpoint('raw_data')[0][1])
     exp = {
         'id':
         1,
         'type':
         'FASTQ',
         'study':
         1,
         'data_type':
         '18S',
         'timestamp':
         datetime(2012, 10, 1, 9, 30, 27),
         'visibility':
         'private',
         'can_submit_vamps':
         False,
         'can_submit_ebi':
         False,
         'processing_parameters':
         None,
         'ebi_run_accessions':
         None,
         'is_submitted_vamps':
         False,
         'parents': [],
         'filepaths': [{
             'fp_id': 1,
             'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"),
             'fp_type': "raw_forward_seqs",
             'checksum': '2125826711',
             'fp_size': 58
         }, {
             'fp_id':
             2,
             'fp':
             path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"),
             'fp_type':
             "raw_barcodes",
             'checksum':
             '2125826711',
             'fp_size':
             58
         }]
     }
     self.assertEqual(obs, exp)
Exemple #58
0
    def get(self, path):
        user = self.current_user
        if user.level != 'admin':
            raise HTTPError(403, reason="%s doesn't have access to download "
                            "uploaded files" % user.email)

        # [0] because it returns a list
        # [1] we only need the filepath
        filepath = get_mountpoint("uploads")[0][1][
            len(get_db_files_base_dir()):]
        relpath = join(filepath, path)

        self._write_nginx_placeholder_file(relpath)
        self.set_header('Content-Type', 'application/octet-stream')
        self.set_header('Content-Transfer-Encoding', 'binary')
        self.set_header('X-Accel-Redirect', '/protected/' + relpath)
        self._set_nginx_headers(basename(relpath))
        self.finish()