Beispiel #1
0
    def test_get_filetypes(self):
        """Tests that get_filetypes works with valid arguments"""

        obs = get_filetypes()
        exp = {"SFF": 1, "FASTA_Sanger": 2, "FASTQ": 3, "FASTA": 4, "per_sample_FASTQ": 5}
        self.assertEqual(obs, exp)

        obs = get_filetypes(key="filetype_id")
        exp = {v: k for k, v in exp.items()}
        self.assertEqual(obs, exp)
Beispiel #2
0
    def test_get_filetypes(self):
        """Tests that get_filetypes works with valid arguments"""

        obs = get_filetypes()
        exp = {'FASTA': 1, 'FASTQ': 2, 'SPECTRA': 3}
        self.assertEqual(obs, exp)

        obs = get_filetypes(key='filetype_id')
        exp = {v: k for k, v in exp.items()}
        self.assertEqual(obs, exp)
Beispiel #3
0
    def test_get_filetypes(self):
        """Tests that get_filetypes works with valid arguments"""

        obs = get_filetypes()
        exp = {'SFF': 1, 'FASTA-Sanger': 2, 'FASTQ': 3, 'FASTA': 4}
        self.assertEqual(obs, exp)

        obs = get_filetypes(key='filetype_id')
        exp = {v: k for k, v in exp.items()}
        self.assertEqual(obs, exp)
Beispiel #4
0
    def test_get_filetypes(self):
        """Tests that get_filetypes works with valid arguments"""

        obs = get_filetypes()
        exp = {'SFF': 1, 'FASTA_Sanger': 2, 'FASTQ': 3, 'FASTA': 4,
               'per_sample_FASTQ': 5}
        self.assertEqual(obs, exp)

        obs = get_filetypes(key='filetype_id')
        exp = {v: k for k, v in exp.items()}
        self.assertEqual(obs, exp)
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
Beispiel #6
0
    def get(self, study_id):
        fp = get_study_fp(study_id)

        if exists(fp):
            fs = [f for f in listdir(fp)]
        else:
            fs = []

        fts = [' '.join(k.split('_')[1:])
               for k in get_filetypes().keys() if k.startswith('raw_')]

        self.render('study_description.html', user=self.current_user,
                    study_info=Study(study_id).info, study_id=study_id,
                    files=fs, max_upoad_size=qiita_config.max_upoad_size,
                    filetypes=fts)
Beispiel #7
0
    def render(self, study):
        user = self.current_user

        filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1))
        other_studies_rd = sorted(
            viewitems(get_raw_data_from_other_studies(user, study)))

        raw_data_info = [(rd.id, rd.filetype, rd)
                         for rd in get_raw_data(study.raw_data())]

        return self.render_string(
            "study_description_templates/raw_data_tab.html",
            filetypes=filetypes,
            other_studies_rd=other_studies_rd,
            available_raw_data=raw_data_info,
            study=study)
Beispiel #8
0
    def render(self, study):
        user = self.current_user

        filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1))
        other_studies_rd = sorted(viewitems(
            get_raw_data_from_other_studies(user, study)))

        raw_data_info = [(rd.id, rd.filetype, rd)
                         for rd in get_raw_data(study.raw_data())]

        return self.render_string(
            "study_description_templates/raw_data_tab.html",
            filetypes=filetypes,
            other_studies_rd=other_studies_rd,
            available_raw_data=raw_data_info,
            study=study)
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
Beispiel #10
0
    def render(self, study, full_access):
        user = self.current_user

        filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1))
        other_studies_rd = sorted(viewitems(
            get_raw_data_from_other_studies(user, study)))

        raw_data_info = [
            (rd.id, rd.filetype, rd, STATUS_STYLER[rd.status(study)])
            for rd in get_raw_data(study.raw_data())
            if full_access or rd.status(study) == 'public']

        return self.render_string(
            "study_description_templates/raw_data_tab.html",
            filetypes=filetypes,
            other_studies_rd=other_studies_rd,
            available_raw_data=raw_data_info,
            study=study,
            full_access=full_access)
Beispiel #11
0
 def test_get_filetypes_fail(self):
     """Tests that get_Filetypes fails with invalid argument"""
     with self.assertRaises(QiitaDBColumnError):
         get_filetypes(key='invalid')
Beispiel #12
0
 def test_get_filetypes_fail(self):
     """Tests that get_Filetypes fails with invalid argument"""
     with self.assertRaises(QiitaDBColumnError):
         get_filetypes(key='invalid')
Beispiel #13
0
    def render(self, study, prep_template, full_access, ena_terms,
               user_defined_terms):
        user = self.current_user
        is_local_request = is_localhost(self.request.headers['host'])

        template_fps = []
        qiime_fps = []
        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        for id_, fp in prep_template.get_filepaths():
            if 'qiime' in basename(fp):
                qiime_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Qiime mapping'))
            else:
                template_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Prep template'))

        # Since get_filepaths returns the paths sorted from newest to oldest,
        # the first in both list is the latest one
        current_template_fp = template_fps[0]
        current_qiime_fp = qiime_fps[0]

        if len(template_fps) > 1:
            show_old_templates = True
            old_templates = template_fps[1:]
        else:
            show_old_templates = False
            old_templates = None

        if len(qiime_fps) > 1:
            show_old_qiime_fps = True
            old_qiime_fps = qiime_fps[1:]
        else:
            show_old_qiime_fps = False
            old_qiime_fps = None

        filetypes = sorted(
            ((ft, ft_id, fp_type_by_ft[ft])
             for ft, ft_id in viewitems(get_filetypes())),
            key=itemgetter(1))
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        other_studies_rd = sorted(viewitems(
            _get_accessible_raw_data(user)))

        # A prep template can be modified if its status is sandbox
        is_editable = prep_template.status == 'sandbox'

        raw_data_id = prep_template.raw_data
        preprocess_options = []
        preprocessed_data = None
        show_preprocess_btn = True
        no_preprocess_msg = None
        if raw_data_id:
            rd = RawData(raw_data_id)
            rd_ft = rd.filetype

            # If the prep template has a raw data associated, it can be
            # preprocessed. Retrieve the pre-processing parameters
            if rd_ft in ('SFF', 'FASTA'):
                param_iter = Preprocessed454Params.iter()
            elif rd_ft == 'FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] != 'not-barcoded']
            elif rd_ft == 'per_sample_FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] == 'not-barcoded']
            else:
                raise NotImplementedError(
                    "Pre-processing of %s files currently not supported."
                    % rd_ft)

            preprocess_options = []
            for param in param_iter:
                text = ("<b>%s:</b> %s" % (k, v)
                        for k, v in viewitems(param.values))
                preprocess_options.append((param.id,
                                           param.name,
                                           '<br>'.join(text)))
            preprocessed_data = prep_template.preprocessed_data

            # Check if the template have all the required columns for
            # preprocessing
            raw_data_files = rd.get_filepaths()
            if len(raw_data_files) == 0:
                show_preprocess_btn = False
                no_preprocess_msg = (
                    "Preprocessing disabled because there are no files "
                    "linked with the Raw Data")
            else:
                if prep_template.data_type() in TARGET_GENE_DATA_TYPES:
                    raw_forward_fps = [fp for _, fp, ftype in raw_data_files
                                       if ftype == 'raw_forward_seqs']
                    key = ('demultiplex_multiple' if len(raw_forward_fps) > 1
                           else 'demultiplex')
                    missing_cols = prep_template.check_restrictions(
                        [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]])

                    if rd_ft == 'per_sample_FASTQ':
                        show_preprocess_btn = 'run_prefix' not in missing_cols
                    else:
                        show_preprocess_btn = len(missing_cols) == 0

                    no_preprocess_msg = None
                    if not show_preprocess_btn:
                        no_preprocess_msg = (
                            "Preprocessing disabled due to missing columns in "
                            "the prep template: %s" % ', '.join(missing_cols))

        preprocessing_status = prep_template.preprocessing_status

        return self.render_string(
            "study_description_templates/prep_template_info_tab.html",
            pt_id=prep_template.id,
            study_id=study.id,
            raw_data=raw_data_id,
            current_template_fp=current_template_fp,
            current_qiime_fp=current_qiime_fp,
            show_old_templates=show_old_templates,
            old_templates=old_templates,
            show_old_qiime_fps=show_old_qiime_fps,
            old_qiime_fps=old_qiime_fps,
            filetypes=filetypes,
            files=files,
            other_studies_rd=other_studies_rd,
            prep_template=prep_template,
            study=study,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            investigation_type=prep_template.investigation_type,
            is_editable=is_editable,
            preprocess_options=preprocess_options,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            show_preprocess_btn=show_preprocess_btn,
            no_preprocess_msg=no_preprocess_msg)
Beispiel #14
0
    def display_template(self, study, msg, msg_level, tab_to_display=""):
        """Simple function to avoid duplication of code"""
        # Check if the request came from a local source
        is_local_request = ('localhost' in self.request.headers['host'] or
                            '127.0.0.1' in self.request.headers['host'])

        # getting raw filepath_ types
        fts = [k.split('_', 1)[1].replace('_', ' ')
               for k in get_filepath_types() if k.startswith('raw_')]
        fts = ['<option value="%s">%s</option>' % (f, f) for f in fts]

        user = User(self.current_user)
        # getting the RawData and its prep templates
        available_raw_data = yield Task(self.get_raw_data, study.raw_data())
        available_prep_templates = yield Task(self.get_prep_templates,
                                              available_raw_data)
        # set variable holding if we have files attached to all raw data or not
        raw_files = True if available_raw_data else False
        for r in available_raw_data:
            if not r.get_filepaths():
                raw_files = False

        # set variable holding if we have all prep templates or not
        prep_templates = True if available_prep_templates else False
        for key, val in viewitems(available_prep_templates):
            if not val:
                prep_templates = False
        # other general vars, note that we create the select options here
        # so we do not have to loop several times over them in the template
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))
        data_types = ['<option value="%s">%s</option>' % (v, k)
                      for k, v in data_types]
        filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1))
        filetypes = ['<option value="%s">%s</option>' % (v, k)
                     for k, v in filetypes]
        other_studies_rd = yield Task(self.get_raw_data_from_other_studies,
                                      user, study)
        other_studies_rd = ['<option value="%s">%s</option>' % (k,
                            "id: %d, study: %s" % (k, v))
                            for k, v in viewitems(other_studies_rd)]

        ontology = Ontology(convert_to_id('ENA', 'ontology'))

        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']
        princ_inv = StudyPerson(study.info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))

        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            sample_templates = []

        self.render('study_description.html', user=self.current_user,
                    study_title=study.title, study_info=study.info,
                    study_id=study.id, filetypes=''.join(filetypes),
                    user_level=user.level, data_types=''.join(data_types),
                    available_raw_data=available_raw_data,
                    available_prep_templates=available_prep_templates,
                    ste=SampleTemplate.exists(study.id),
                    study_status=study.status,
                    filepath_types=''.join(fts), ena_terms=''.join(ena_terms),
                    tab_to_display=tab_to_display, level=msg_level,
                    message=msg, prep_templates=prep_templates,
                    raw_files=raw_files,
                    can_upload=check_access(user, study, no_public=True),
                    other_studies_rd=''.join(other_studies_rd),
                    user_defined_terms=user_defined_terms,
                    files=get_files_from_uploads_folders(str(study.id)),
                    is_public=study.status == 'public',
                    pmids=", ".join([pubmed_linkifier([pmid])
                                     for pmid in study.pmids]),
                    principal_investigator=pi_link,
                    is_local_request=is_local_request,
                    sample_templates=sample_templates)