Example #1
0
    def test_move_upload_files_to_trash(self):
        test_filename = 'this_is_a_test_file.txt'

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        open(join(folder, '1', test_filename), 'w').write('test')

        exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, '1', 'trash'))
Example #2
0
    def test_move_upload_files_to_trash(self):
        test_filename = "this_is_a_test_file.txt"

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        test_fp = join(folder, "1", test_filename)
        with open(test_fp, "w") as f:
            f.write("test")

        self.files_to_remove.append(test_fp)

        exp = [(fid, "this_is_a_test_file.txt"), (fid, "uploaded_file.txt")]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, "uploaded_file.txt")]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, "1", "trash"))
Example #3
0
    def test_move_upload_files_to_trash(self):
        test_filename = 'this_is_a_test_file.txt'

        # create file to move to trash
        fid, folder = get_mountpoint("uploads")[0]
        open(join(folder, '1', test_filename), 'w').write('test')

        exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # move file
        move_upload_files_to_trash(1, [(fid, test_filename)])
        exp = [(fid, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertItemsEqual(obs, exp)

        # testing errors
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(2, [(fid, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(10, test_filename)])
        with self.assertRaises(QiitaDBError):
            move_upload_files_to_trash(1, [(fid, test_filename)])

        # removing trash folder
        rmtree(join(folder, '1', 'trash'))
Example #4
0
    def test_get_files_from_uploads_folders(self):
        # something has been uploaded
        exp = ['uploaded_file.txt']
        obs = get_files_from_uploads_folders("1")
        self.assertEqual(obs, exp)

        # nothing has been uploaded
        exp = []
        obs = get_files_from_uploads_folders("2")
        self.assertEqual(obs, exp)
Example #5
0
    def test_get_files_from_uploads_folders(self):
        # something has been uploaded and ignoring hidden files/folders
        # and folders
        exp = [(7, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertEqual(obs, exp)

        # nothing has been uploaded
        exp = []
        obs = get_files_from_uploads_folders("2")
        self.assertEqual(obs, exp)
Example #6
0
    def test_get_files_from_uploads_folders(self):
        # something has been uploaded and ignoring hidden files/folders
        # and folders
        exp = [(7, 'uploaded_file.txt')]
        obs = get_files_from_uploads_folders("1")
        self.assertEqual(obs, exp)

        # nothing has been uploaded
        exp = []
        obs = get_files_from_uploads_folders("2")
        self.assertEqual(obs, exp)
Example #7
0
def new_prep_template_get_req(study_id):
    """Returns the information needed to populate the new prep info template

    Parameters
    ----------
    study_id : int
        The study id

    Returns
    -------
    (list of str, list of str, dict of {str: list of str})
        The list of txt, tsv files in the upload dir for the given study
        The list of available data types
        The investigation type ontology information
    """
    prep_files = [f for _, f in get_files_from_uploads_folders(study_id)
                  if f.endswith(('.txt', '.tsv'))]
    data_types = sorted(Study.all_data_types())

    # Get all the ENA terms for the investigation type
    ontology_info = _get_ENA_ontology()

    return {'status': 'success',
            'prep_files': prep_files,
            'data_types': data_types,
            'ontology': ontology_info}
Example #8
0
def new_prep_template_get_req(study_id):
    """Returns the information needed to populate the new prep info template

    Parameters
    ----------
    study_id : int
        The study id

    Returns
    -------
    (list of str, list of str, dict of {str: list of str})
        The list of txt, tsv files in the upload dir for the given study
        The list of available data types
        The investigation type ontology information
    """
    prep_files = [f for _, f, _ in get_files_from_uploads_folders(study_id)
                  if f.endswith(('.txt', '.tsv'))]
    data_types = sorted(Study.all_data_types())

    # Get all the ENA terms for the investigation type
    ontology_info = _get_ENA_ontology()

    return {'status': 'success',
            'prep_files': prep_files,
            'data_types': data_types,
            'ontology': ontology_info}
Example #9
0
    def get(self):
        """Send formatted summary page of sample template"""
        study_id = self.get_argument('study_id')
        files = [
            f for _, f in get_files_from_uploads_folders(study_id)
            if f.endswith(('txt', 'tsv'))
        ]
        data_types = sorted(data_types_get_req()['data_types'])
        # Get the most recent version for download and build the link
        download = sample_template_filepaths_get_req(study_id,
                                                     self.current_user.id)

        download_id = (download['filepaths'][0][0]
                       if download['status'] == 'success' else None)

        stats = sample_template_summary_get_req(study_id, self.current_user.id)
        if stats['status'] != 'success':
            if 'does not exist' in stats['message']:
                raise HTTPError(404, stats['message'])
            if 'User does not have access to study' in stats['message']:
                raise HTTPError(403, stats['message'])

        stats['download_id'] = download_id
        stats['files'] = files
        stats['study_id'] = study_id
        stats['data_types'] = data_types
        # URL encode in case message has javascript-breaking characters in it
        stats['alert_message'] = url_escape(stats['alert_message'])
        self.render('study_ajax/sample_summary.html', **stats)
Example #10
0
    def get(self):
        """Send formatted summary page of sample template"""
        study_id = self.get_argument('study_id')
        files = [f for _, f in get_files_from_uploads_folders(study_id)
                 if f.endswith(('txt', 'tsv'))]
        data_types = sorted(data_types_get_req()['data_types'])
        # Get the most recent version for download and build the link
        download = sample_template_filepaths_get_req(
            study_id, self.current_user.id)

        download_id = (download['filepaths'][0][0]
                       if download['status'] == 'success' else None)

        stats = sample_template_summary_get_req(study_id, self.current_user.id)
        if stats['status'] != 'success':
            if 'does not exist' in stats['message']:
                raise HTTPError(404, stats['message'])
            if 'User does not have access to study' in stats['message']:
                raise HTTPError(403, stats['message'])

        stats['download_id'] = download_id
        stats['files'] = files
        stats['study_id'] = study_id
        stats['data_types'] = data_types
        # URL encode in case message has javascript-breaking characters in it
        stats['alert_message'] = url_escape(stats['alert_message'])
        self.render('study_ajax/sample_summary.html', **stats)
Example #11
0
    def render(self, study, full_access):
        files = [
            f for _, f in get_files_from_uploads_folders(str(study.id))
            if f.endswith(('txt', 'tsv'))
        ]
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        prep_templates_info = [
            res for res in _template_generator(study, full_access)
        ]
        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        return self.render_string(
            "study_description_templates/prep_template_tab.html",
            files=files,
            data_types=data_types,
            available_prep_templates=prep_templates_info,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            study=study,
            full_access=full_access)
Example #12
0
    def render(self, study, full_access):
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))
                 if f.endswith(('txt', 'tsv'))]
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))
        prep_templates_info = [
            res for res in _template_generator(study, full_access)]
        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        return self.render_string(
            "study_description_templates/prep_template_tab.html",
            files=files,
            data_types=data_types,
            available_prep_templates=prep_templates_info,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            study=study,
            full_access=full_access)
Example #13
0
def sample_template_overview_handler_get_request(study_id, user):
    # Check if the current user has access to the sample template
    sample_template_checks(study_id, user)

    # Check if the sample template exists
    exists = SampleTemplate.exists(study_id)

    # The following information should always be provided:
    # The files that have been uploaded to the system and can be a
    # sample template file
    files = [
        f for _, f in get_files_from_uploads_folders(study_id)
        if f.endswith(('txt', 'tsv'))
    ]
    # If there is a job associated with the sample information, the job id
    job = None
    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id)
    if job_info:
        job = loads(job_info)['job_id']

    # Specific information if it exists or not:
    data_types = []
    st_fp_id = None
    old_files = []
    num_samples = 0
    num_cols = 0
    if exists:
        # If it exists we need to provide:
        # The id of the sample template file so the user can download it and
        # the list of old filepaths
        st = SampleTemplate(study_id)
        all_st_files = st.get_filepaths()
        # The current sample template file is the first one in the list
        # (pop(0)) and we are interested only in the id ([0])
        st_fp_id = all_st_files.pop(0)[0]
        # For the old filepaths we are only interested in their basename
        old_files = [basename(fp) for _, fp in all_st_files]
        # The number of samples - this is a space efficient way of counting
        # the number of samples. Doing len(list(st.keys())) creates a list
        # that we are not using
        num_samples = sum(1 for _ in st.keys())
        # The number of columns
        num_cols = len(st.categories())
    else:
        # It doesn't exist, we also need to provide the data_types in case
        # the user uploads a QIIME mapping file
        data_types = sorted(data_types_get_req()['data_types'])

    return {
        'exists': exists,
        'uploaded_files': files,
        'data_types': data_types,
        'user_can_edit': Study(study_id).can_edit(user),
        'job': job,
        'download_id': st_fp_id,
        'old_files': old_files,
        'num_samples': num_samples,
        'num_columns': num_cols
    }
Example #14
0
    def render(self, study):
        study_info = study.info
        id = study.id
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids])
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study. Filter them to only include the
        # ones that ends with 'txt' or 'tsv'.
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))
                 if f.endswith(('txt', 'tsv'))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = is_localhost(self.request.headers['host'])

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        # Ebi information
        ebi_status = study.ebi_submission_status
        ebi_accession = study.ebi_study_accession
        if ebi_accession:
            ebi_accession = (EBI_LINKIFIER.format(ebi_accession))

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            id=id,
            pmids=pmids,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request,
            data_types=data_types,
            ebi_status=ebi_status,
            ebi_accession=ebi_accession)
Example #15
0
 def tearDown(self):
     new_uploaded_files = get_files_from_uploads_folders(str(self.study.id))
     new_files = set(new_uploaded_files).difference(self.uploaded_files)
     path_builder = partial(join, get_mountpoint("uploads")[0][1], '1')
     for _, fp in new_files:
         self._clean_up_files.append(path_builder(fp))
     for f in self._clean_up_files:
         if exists(f):
             remove(f)
Example #16
0
def sample_template_overview_handler_get_request(study_id, user):
    # Check if the current user has access to the sample template
    sample_template_checks(study_id, user)

    # Check if the sample template exists
    exists = SampleTemplate.exists(study_id)

    # The following information should always be provided:
    # The files that have been uploaded to the system and can be a
    # sample template file
    files = [f for _, f, _ in get_files_from_uploads_folders(study_id)
             if f.endswith(('txt', 'tsv', 'xlsx'))]
    # If there is a job associated with the sample information, the job id
    job = None
    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id)
    if job_info:
        job = loads(job_info)['job_id']

    # Specific information if it exists or not:
    data_types = []
    st_fp_id = None
    old_files = []
    num_samples = 0
    num_cols = 0
    if exists:
        # If it exists we need to provide:
        # The id of the sample template file so the user can download it and
        # the list of old filepaths
        st = SampleTemplate(study_id)
        all_st_files = st.get_filepaths()
        # The current sample template file is the first one in the list
        # (pop(0)) and we are interested only in the id ([0])
        st_fp_id = all_st_files.pop(0)[0]
        # For the old filepaths we are only interested in their basename
        old_files = [basename(fp) for _, fp in all_st_files]
        # The number of samples - this is a space efficient way of counting
        # the number of samples. Doing len(list(st.keys())) creates a list
        # that we are not using
        num_samples = sum(1 for _ in st.keys())
        # The number of columns
        num_cols = len(st.categories())
    else:
        # It doesn't exist, we also need to provide the data_types in case
        # the user uploads a QIIME mapping file
        data_types = sorted(data_types_get_req()['data_types'])

    return {'exists': exists,
            'uploaded_files': files,
            'data_types': data_types,
            'user_can_edit': Study(study_id).can_edit(user),
            'job': job,
            'download_id': st_fp_id,
            'old_files': old_files,
            'num_samples': num_samples,
            'num_columns': num_cols}
Example #17
0
    def display_template(self, study_id, msg):
        """Simple function to avoid duplication of code"""
        study_id = int(study_id)
        study = Study(study_id)
        user = self.current_user
        check_access(user, study, no_public=True, raise_error=True)

        # getting the ontologies
        self.render('upload.html',
                    study_title=study.title, study_info=study.info,
                    study_id=study_id, is_admin=user.level == 'admin',
                    extensions=','.join(qiita_config.valid_upload_extension),
                    max_upload_size=qiita_config.max_upload_size,
                    files=get_files_from_uploads_folders(str(study_id)))
Example #18
0
    def display_template(self, study_id, msg):
        """Simple function to avoid duplication of code"""
        study_id = int(study_id)
        study = Study(study_id)
        user = self.current_user
        check_access(user, study, no_public=True, raise_error=True)

        # getting the ontologies
        self.render('upload.html',
                    study_title=study.title, study_info=study.info,
                    study_id=study_id, is_admin=user.level == 'admin',
                    extensions=','.join(qiita_config.valid_upload_extension),
                    max_upload_size=qiita_config.max_upload_size,
                    files=get_files_from_uploads_folders(str(study_id)))
Example #19
0
File: upload.py Project: jlab/qiita
    def display_template(self, study_id, msg):
        """Simple function to avoid duplication of code"""
        study_id = int(study_id)
        study = Study(study_id)
        user = self.current_user
        level = 'info'
        message = ''
        remote_url = ''
        remote_files = []
        check_access(user, study, no_public=True, raise_error=True)

        job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id)
        if job_info:
            job_info = defaultdict(lambda: '', loads(job_info))
            job_id = job_info['job_id']
            job = ProcessingJob(job_id)
            job_status = job.status
            processing = job_status not in ('success', 'error')
            url = job.parameters.values['url']
            if processing:
                if job.command.name == 'list_remote_files':
                    message = 'Retrieving remote files: listing %s' % url
                else:
                    message = 'Retrieving remote files: download %s' % url
            elif job_status == 'error':
                level = 'danger'
                message = job.log.msg.replace('\n', '</br>')
                # making errors nicer for users
                if 'No such file' in message:
                    message = 'URL not valid: <i>%s</i>, please review.' % url
            else:
                remote_url = job_info['url']
                remote_files = job_info['files']
                level = job_info['alert_type']
                message = job_info['alert_msg'].replace('\n', '</br>')

        # getting the ontologies
        self.render('upload.html',
                    study_title=study.title,
                    study_info=study.info,
                    study_id=study_id,
                    is_admin=user.level == 'admin',
                    extensions=','.join(qiita_config.valid_upload_extension),
                    max_upload_size=qiita_config.max_upload_size,
                    level=level,
                    message=message,
                    remote_url=remote_url,
                    remote_files=remote_files,
                    files=get_files_from_uploads_folders(str(study_id)))
Example #20
0
    def display_template(self, study_id, msg):
        """Simple function to avoid duplication of code"""
        study_id = int(study_id)
        study = Study(study_id)
        user = self.current_user
        level = 'info'
        message = ''
        remote_url = ''
        remote_files = []
        check_access(user, study, no_public=True, raise_error=True)

        job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id)
        if job_info:
            job_info = defaultdict(lambda: '', loads(job_info))
            job_id = job_info['job_id']
            job = ProcessingJob(job_id)
            job_status = job.status
            processing = job_status not in ('success', 'error')
            url = job.parameters.values['url']
            if processing:
                if job.command.name == 'list_remote_files':
                    message = 'Retrieving remote files: listing %s' % url
                else:
                    message = 'Retrieving remote files: download %s' % url
            elif job_status == 'error':
                level = 'danger'
                message = job.log.msg.replace('\n', '</br>')
                # making errors nicer for users
                if 'No such file' in message:
                    message = 'URL not valid: <i>%s</i>, please review.' % url
            else:
                remote_url = job_info['url']
                remote_files = job_info['files']
                level = job_info['alert_type']
                message = job_info['alert_msg'].replace('\n', '</br>')

        # getting the ontologies
        self.render('upload.html',
                    study_title=study.title, study_info=study.info,
                    study_id=study_id, is_admin=user.level == 'admin',
                    extensions=','.join(qiita_config.valid_upload_extension),
                    max_upload_size=qiita_config.max_upload_size, level=level,
                    message=message, remote_url=remote_url,
                    remote_files=remote_files,
                    files=get_files_from_uploads_folders(str(study_id)))
Example #21
0
    def render(self, study):
        study_info = study.info
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids])
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = self._is_local()

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            pmids=pmids,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request)
Example #22
0
    def setUp(self):
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        self.filepaths = [seqs_fp, barcodes_fp]
        self.checksums = []
        for fp in sorted(self.filepaths):
            with open(fp, 'w') as f:
                f.write("%s\n" % fp)
            self.checksums.append(compute_checksum(fp))
        self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"]
        self._clean_up_files = [seqs_fp, barcodes_fp]

        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User("*****@*****.**"),
                                      "Update raw data test",
                                      efo=[1], info=info)
        self.study = Study(1)
        # The files for the RawData object attached to study 1 does not exist.
        # Create them so we can actually perform the tests
        for _, fp, _ in RawData(1).get_filepaths():
            with open(fp, 'w') as f:
                f.write('\n')
            self._clean_up_files.append(fp)

        self.uploaded_files = get_files_from_uploads_folders(
            str(self.study.id))
Example #23
0
    def get(self):
        study_id = self.get_argument('study_id')
        prep_file = self.get_argument('prep_file')
        prep_type = self.get_argument('type')

        # TODO: Get file types for the artifact type
        # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED
        file_types = supported_filepath_types(prep_type)

        selected = []
        not_selected = []
        _, base = get_mountpoint("uploads")[0]
        uploaded = get_files_from_uploads_folders(study_id)
        prep = pd.read_table(join(base, study_id, prep_file), sep='\t')
        if 'run_prefix' in prep.columns:
            # Use run_prefix column of prep template to auto-select
            # per-prefix uploaded files if available.
            per_prefix = True
            prep_prefixes = set(prep['run_prefix'])
            for _, filename in uploaded:
                for prefix in prep_prefixes:
                    if filename.startswith(prefix):
                        selected.append(filename)
                    else:
                        not_selected.append(filename)
        else:
            per_prefix = False
            not_selected = [f for _, f, _ in uploaded]

        # Write out if this prep template supports per-prefix files, and the
        # as well as pre-selected and remaining files
        self.write({
            'per_prefix': per_prefix,
            'file_types': file_types,
            'selected': selected,
            'remaining': not_selected
        })
Example #24
0
    def get(self):
        study_id = self.get_argument('study_id')
        prep_file = self.get_argument('prep_file')
        prep_type = self.get_argument('type')

        # TODO: Get file types for the artifact type
        # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED
        file_types = supported_filepath_types(prep_type)

        selected = []
        not_selected = []
        _, base = get_mountpoint("uploads")[0]
        uploaded = get_files_from_uploads_folders(study_id)
        prep = pd.read_table(join(base, study_id, prep_file), sep='\t')
        if 'run_prefix' in prep.columns:
            # Use run_prefix column of prep template to auto-select
            # per-prefix uploaded files if available.
            per_prefix = True
            prep_prefixes = set(prep['run_prefix'])
            for _, filename in uploaded:
                for prefix in prep_prefixes:
                    if filename.startswith(prefix):
                        selected.append(filename)
                    else:
                        not_selected.append(filename)
        else:
            per_prefix = False
            not_selected = [f for _, f in uploaded]

        # Write out if this prep template supports per-prefix files, and the
        # as well as pre-selected and remaining files
        self.write({
            'per_prefix': per_prefix,
            'file_types': file_types,
            'selected': selected,
            'remaining': not_selected})
Example #25
0
    def display_template(self, study, msg, msg_level, tab_to_display=""):
        """Simple function to avoid duplication of code"""
        # Check if the request came from a local source
        is_local_request = ('localhost' in self.request.headers['host'] or
                            '127.0.0.1' in self.request.headers['host'])

        # getting raw filepath_ types
        fts = [k.split('_', 1)[1].replace('_', ' ')
               for k in get_filepath_types() if k.startswith('raw_')]
        fts = ['<option value="%s">%s</option>' % (f, f) for f in fts]

        user = User(self.current_user)
        # getting the RawData and its prep templates
        available_raw_data = yield Task(self.get_raw_data, study.raw_data())
        available_prep_templates = yield Task(self.get_prep_templates,
                                              available_raw_data)
        # set variable holding if we have files attached to all raw data or not
        raw_files = True if available_raw_data else False
        for r in available_raw_data:
            if not r.get_filepaths():
                raw_files = False

        # set variable holding if we have all prep templates or not
        prep_templates = True if available_prep_templates else False
        for key, val in viewitems(available_prep_templates):
            if not val:
                prep_templates = False
        # other general vars, note that we create the select options here
        # so we do not have to loop several times over them in the template
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))
        data_types = ['<option value="%s">%s</option>' % (v, k)
                      for k, v in data_types]
        filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1))
        filetypes = ['<option value="%s">%s</option>' % (v, k)
                     for k, v in filetypes]
        other_studies_rd = yield Task(self.get_raw_data_from_other_studies,
                                      user, study)
        other_studies_rd = ['<option value="%s">%s</option>' % (k,
                            "id: %d, study: %s" % (k, v))
                            for k, v in viewitems(other_studies_rd)]

        ontology = Ontology(convert_to_id('ENA', 'ontology'))

        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']
        princ_inv = StudyPerson(study.info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))

        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            sample_templates = []

        self.render('study_description.html', user=self.current_user,
                    study_title=study.title, study_info=study.info,
                    study_id=study.id, filetypes=''.join(filetypes),
                    user_level=user.level, data_types=''.join(data_types),
                    available_raw_data=available_raw_data,
                    available_prep_templates=available_prep_templates,
                    ste=SampleTemplate.exists(study.id),
                    study_status=study.status,
                    filepath_types=''.join(fts), ena_terms=''.join(ena_terms),
                    tab_to_display=tab_to_display, level=msg_level,
                    message=msg, prep_templates=prep_templates,
                    raw_files=raw_files,
                    can_upload=check_access(user, study, no_public=True),
                    other_studies_rd=''.join(other_studies_rd),
                    user_defined_terms=user_defined_terms,
                    files=get_files_from_uploads_folders(str(study.id)),
                    is_public=study.status == 'public',
                    pmids=", ".join([pubmed_linkifier([pmid])
                                     for pmid in study.pmids]),
                    principal_investigator=pi_link,
                    is_local_request=is_local_request,
                    sample_templates=sample_templates)
Example #26
0
def prep_template_ajax_get_req(user_id, prep_id):
    """Returns the prep tempalte information needed for the AJAX handler

    Parameters
    ----------
    user_id : str
        The user id
    prep_id : int
        The prep template id

    Returns
    -------
    dict of {str: object}
        A dictionary with the following keys:
        - status: str, whether the request is successful or not
        - message: str, if the request is unsuccessful, a human readable error
        - name: str, the name of the prep template
        - files: list of str, the files available to update the prep template
        - download_prep: int, the filepath_id of the prep file
        - download_qiime, int, the filepath_id of the qiime mapping file
        - num_samples: int, the number of samples present in the template
        - num_columns: int, the number of columns present in the template
        - investigation_type: str, the investigation type of the template
        - ontology: str, dict of {str, list of str} containing the information
        of the ENA ontology
        - artifact_attached: bool, whether the template has an artifact
        attached
        - study_id: int, the study id of the template
    """
    # Currently there is no name attribute, but it will be soon
    name = "Prep information %d" % prep_id
    pt = PrepTemplate(prep_id)
    artifact_attached = pt.artifact is not None
    study_id = pt.study_id
    files = [f for _, f in get_files_from_uploads_folders(study_id)
             if f.endswith(('.txt', '.tsv'))]

    # The call to list is needed because keys is an iterator
    num_samples = len(list(pt.keys()))
    num_columns = len(pt.categories())
    investigation_type = pt.investigation_type

    # Retrieve the information to download the prep template and QIIME
    # mapping file. See issue https://github.com/biocore/qiita/issues/1675
    download_prep = []
    download_qiime = []
    for fp_id, fp in pt.get_filepaths():
        if 'qiime' in basename(fp):
            download_qiime.append(fp_id)
        else:
            download_prep.append(fp_id)
    download_prep = download_prep[0]
    download_qiime = download_qiime[0]

    ontology = _get_ENA_ontology()

    job_id = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id)
    if job_id:
        redis_info = loads(r_client.get(job_id))
        processing = redis_info['status_msg'] == 'Running'
        if processing:
            alert_type = 'info'
            alert_msg = 'This prep template is currently being updated'
        else:
            alert_type = redis_info['return']['status']
            alert_msg = redis_info['return']['message'].replace('\n', '</br>')
    else:
        processing = False
        alert_type = ''
        alert_msg = ''

    editable = Study(study_id).can_edit(User(user_id)) and not processing

    return {'status': 'success',
            'message': '',
            'name': name,
            'files': files,
            'download_prep': download_prep,
            'download_qiime': download_qiime,
            'num_samples': num_samples,
            'num_columns': num_columns,
            'investigation_type': investigation_type,
            'ontology': ontology,
            'artifact_attached': artifact_attached,
            'study_id': study_id,
            'editable': editable,
            'data_type': pt.data_type(),
            'alert_type': alert_type,
            'alert_message': alert_msg}
Example #27
0
    def render(self, study, raw_data):
        user = self.current_user
        study_status = study.status
        user_level = user.level
        raw_data_id = raw_data.id
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # Get the available prep template data types
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        # Get all the information about the prep templates
        available_prep_templates = []
        for p in sorted(raw_data.prep_templates):
            if PrepTemplate.exists(p):
                pt = PrepTemplate(p)
                # if the prep template doesn't belong to this study, skip
                if study.id == pt.study_id:
                    available_prep_templates.append(pt)

        # getting filepath_types
        if raw_data.filetype == 'SFF':
            fts = ['sff']
        elif raw_data.filetype == 'FASTA':
            fts = ['fasta', 'qual']
        elif raw_data.filetype == 'FASTQ':
            fts = ['barcodes', 'forward seqs', 'reverse seqs']
        else:
            fts = [
                k.split('_', 1)[1].replace('_', ' ')
                for k in get_filepath_types() if k.startswith('raw_')
            ]

        # The raw data can be edited (e.i. adding prep templates and files)
        # only if the study is sandboxed or the current user is an admin
        is_editable = study_status == 'sandbox' or user_level == 'admin'

        # Get the files linked with the raw_data
        raw_data_files = raw_data.get_filepaths()

        # Get the status of the data linking
        raw_data_link_status = raw_data.link_filepaths_status

        # By default don't show the unlink button
        show_unlink_btn = False
        # By default disable the the link file button
        disable_link_btn = True
        # Define the message for the link status
        if raw_data_link_status == 'linking':
            link_msg = "Linking files..."
        elif raw_data_link_status == 'unlinking':
            link_msg = "Unlinking files..."
        else:
            # The link button is only disable if raw data link status is
            # linking or unlinking, so we can enable it here
            disable_link_btn = False
            # The unlink button is only shown if the study is editable, the raw
            # data linking status is not in linking or unlinking, and there are
            # files attached to the raw data. At this  point, we are sure that
            # the raw data linking status is not in linking or unlinking so we
            # still need to check if it is editable or there are files attached
            show_unlink_btn = is_editable and raw_data_files
            if raw_data_link_status.startswith('failed'):
                link_msg = "Error (un)linking files: %s" % raw_data_link_status
            else:
                link_msg = ""

        # Get the raw_data filetype
        raw_data_filetype = raw_data.filetype

        return self.render_string(
            "study_description_templates/raw_data_editor_tab.html",
            study_id=study.id,
            study_status=study_status,
            user_level=user_level,
            raw_data_id=raw_data_id,
            files=files,
            data_types=data_types,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            available_prep_templates=available_prep_templates,
            filepath_types=fts,
            is_editable=is_editable,
            show_unlink_btn=show_unlink_btn,
            link_msg=link_msg,
            raw_data_files=raw_data_files,
            raw_data_filetype=raw_data_filetype,
            disable_link_btn=disable_link_btn)
Example #28
0
    def render(self, study, raw_data, full_access):
        user = self.current_user
        study_status = study.status
        user_level = user.level
        raw_data_id = raw_data.id
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # Get the available prep template data types
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        # Get all the information about the prep templates
        available_prep_templates = []
        for p in sorted(raw_data.prep_templates):
            if PrepTemplate.exists(p):
                pt = PrepTemplate(p)
                # if the prep template doesn't belong to this study, skip
                if (study.id == pt.study_id and
                        (full_access or pt.status == 'public')):
                    available_prep_templates.append(pt)

        # getting filepath_types
        if raw_data.filetype == 'SFF':
            fts = ['sff']
        elif raw_data.filetype == 'FASTA':
            fts = ['fasta', 'qual']
        elif raw_data.filetype == 'FASTQ':
            fts = ['barcodes', 'forward seqs', 'reverse seqs']
        else:
            fts = [k.split('_', 1)[1].replace('_', ' ')
                   for k in get_filepath_types() if k.startswith('raw_')]

        # The raw data can be edited (e.i. adding prep templates and files)
        # only if the study is sandboxed or the current user is an admin
        is_editable = study_status == 'sandbox' or user_level == 'admin'

        # Get the files linked with the raw_data
        raw_data_files = raw_data.get_filepaths()

        # Get the status of the data linking
        raw_data_link_status = raw_data.link_filepaths_status

        # By default don't show the unlink button
        show_unlink_btn = False
        # By default disable the the link file button
        disable_link_btn = True
        # Define the message for the link status
        if raw_data_link_status == 'linking':
            link_msg = "Linking files..."
        elif raw_data_link_status == 'unlinking':
            link_msg = "Unlinking files..."
        else:
            # The link button is only disable if raw data link status is
            # linking or unlinking, so we can enable it here
            disable_link_btn = False
            # The unlink button is only shown if the study is editable, the raw
            # data linking status is not in linking or unlinking, and there are
            # files attached to the raw data. At this  point, we are sure that
            # the raw data linking status is not in linking or unlinking so we
            # still need to check if it is editable or there are files attached
            show_unlink_btn = is_editable and raw_data_files
            if raw_data_link_status.startswith('failed'):
                link_msg = "Error (un)linking files: %s" % raw_data_link_status
            else:
                link_msg = ""

        # Get the raw_data filetype
        raw_data_filetype = raw_data.filetype

        return self.render_string(
            "study_description_templates/raw_data_editor_tab.html",
            study_id=study.id,
            study_status=study_status,
            user_level=user_level,
            raw_data_id=raw_data_id,
            files=files,
            data_types=data_types,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            available_prep_templates=available_prep_templates,
            filepath_types=fts,
            is_editable=is_editable,
            show_unlink_btn=show_unlink_btn,
            link_msg=link_msg,
            raw_data_files=raw_data_files,
            raw_data_filetype=raw_data_filetype,
            disable_link_btn=disable_link_btn)
Example #29
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {'status': 'success',
            'message': '',
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}
Example #30
0
def prep_template_ajax_get_req(user_id, prep_id):
    """Returns the prep tempalte information needed for the AJAX handler

    Parameters
    ----------
    user_id : str
        The user id
    prep_id : int
        The prep template id

    Returns
    -------
    dict of {str: object}
        A dictionary with the following keys:
        - status: str, whether the request is successful or not
        - message: str, if the request is unsuccessful, a human readable error
        - name: str, the name of the prep template
        - files: list of str, the files available to update the prep template
        - download_prep: int, the filepath_id of the prep file
        - download_qiime, int, the filepath_id of the qiime mapping file
        - num_samples: int, the number of samples present in the template
        - num_columns: int, the number of columns present in the template
        - investigation_type: str, the investigation type of the template
        - ontology: str, dict of {str, list of str} containing the information
        of the ENA ontology
        - artifact_attached: bool, whether the template has an artifact
        attached
        - study_id: int, the study id of the template
    """
    pt = PrepTemplate(prep_id)
    name = pt.name

    # Initialize variables here
    processing = False
    alert_type = ''
    alert_msg = ''
    job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id)
    if job_info:
        job_info = defaultdict(lambda: '', loads(job_info))
        job_id = job_info['job_id']
        job = ProcessingJob(job_id)
        job_status = job.status
        processing = job_status not in ('success', 'error')
        if processing:
            alert_type = 'info'
            alert_msg = 'This prep template is currently being updated'
        elif job_status == 'error':
            alert_type = 'danger'
            alert_msg = job.log.msg.replace('\n', '</br>')
        else:
            alert_type = job_info['alert_type']
            alert_msg = job_info['alert_msg'].replace('\n', '</br>')

    artifact_attached = pt.artifact is not None
    study_id = pt.study_id
    files = [f for _, f, _ in get_files_from_uploads_folders(study_id)
             if f.endswith(('.txt', '.tsv'))]

    # The call to list is needed because keys is an iterator
    num_samples = len(list(pt.keys()))
    num_columns = len(pt.categories())
    investigation_type = pt.investigation_type

    download_prep_id = None
    download_qiime_id = None
    other_filepaths = []
    for fp_id, fp in pt.get_filepaths():
        fp = basename(fp)
        if 'qiime' in fp:
            if download_qiime_id is None:
                download_qiime_id = fp_id
        else:
            if download_prep_id is None:
                download_prep_id = fp_id
            else:
                other_filepaths.append(fp)

    ontology = _get_ENA_ontology()

    editable = Study(study_id).can_edit(User(user_id)) and not processing

    return {'status': 'success',
            'message': '',
            'name': name,
            'files': files,
            'download_prep_id': download_prep_id,
            'download_qiime_id': download_qiime_id,
            'other_filepaths': other_filepaths,
            'num_samples': num_samples,
            'num_columns': num_columns,
            'investigation_type': investigation_type,
            'ontology': ontology,
            'artifact_attached': artifact_attached,
            'study_id': study_id,
            'editable': editable,
            'data_type': pt.data_type(),
            'alert_type': alert_type,
            'is_submitted_to_ebi': pt.is_submitted_to_ebi,
            'alert_message': alert_msg}
Example #31
0
    def render(self, study, prep_template, full_access, ena_terms,
               user_defined_terms):
        user = self.current_user
        is_local_request = is_localhost(self.request.headers['host'])

        template_fps = []
        qiime_fps = []
        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        for id_, fp in prep_template.get_filepaths():
            if 'qiime' in basename(fp):
                qiime_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Qiime mapping'))
            else:
                template_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Prep template'))

        # Since get_filepaths returns the paths sorted from newest to oldest,
        # the first in both list is the latest one
        current_template_fp = template_fps[0]
        current_qiime_fp = qiime_fps[0]

        if len(template_fps) > 1:
            show_old_templates = True
            old_templates = template_fps[1:]
        else:
            show_old_templates = False
            old_templates = None

        if len(qiime_fps) > 1:
            show_old_qiime_fps = True
            old_qiime_fps = qiime_fps[1:]
        else:
            show_old_qiime_fps = False
            old_qiime_fps = None

        filetypes = sorted(
            ((ft, ft_id, fp_type_by_ft[ft])
             for ft, ft_id in viewitems(get_filetypes())),
            key=itemgetter(1))
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        other_studies_rd = sorted(viewitems(
            _get_accessible_raw_data(user)))

        # A prep template can be modified if its status is sandbox
        is_editable = prep_template.status == 'sandbox'

        raw_data_id = prep_template.raw_data
        preprocess_options = []
        preprocessed_data = None
        show_preprocess_btn = True
        no_preprocess_msg = None
        if raw_data_id:
            rd = RawData(raw_data_id)
            rd_ft = rd.filetype

            # If the prep template has a raw data associated, it can be
            # preprocessed. Retrieve the pre-processing parameters
            if rd_ft in ('SFF', 'FASTA'):
                param_iter = Preprocessed454Params.iter()
            elif rd_ft == 'FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] != 'not-barcoded']
            elif rd_ft == 'per_sample_FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] == 'not-barcoded']
            else:
                raise NotImplementedError(
                    "Pre-processing of %s files currently not supported."
                    % rd_ft)

            preprocess_options = []
            for param in param_iter:
                text = ("<b>%s:</b> %s" % (k, v)
                        for k, v in viewitems(param.values))
                preprocess_options.append((param.id,
                                           param.name,
                                           '<br>'.join(text)))
            preprocessed_data = prep_template.preprocessed_data

            # Check if the template have all the required columns for
            # preprocessing
            raw_data_files = rd.get_filepaths()
            if len(raw_data_files) == 0:
                show_preprocess_btn = False
                no_preprocess_msg = (
                    "Preprocessing disabled because there are no files "
                    "linked with the Raw Data")
            else:
                if prep_template.data_type() in TARGET_GENE_DATA_TYPES:
                    raw_forward_fps = [fp for _, fp, ftype in raw_data_files
                                       if ftype == 'raw_forward_seqs']
                    key = ('demultiplex_multiple' if len(raw_forward_fps) > 1
                           else 'demultiplex')
                    missing_cols = prep_template.check_restrictions(
                        [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]])

                    if rd_ft == 'per_sample_FASTQ':
                        show_preprocess_btn = 'run_prefix' not in missing_cols
                    else:
                        show_preprocess_btn = len(missing_cols) == 0

                    no_preprocess_msg = None
                    if not show_preprocess_btn:
                        no_preprocess_msg = (
                            "Preprocessing disabled due to missing columns in "
                            "the prep template: %s" % ', '.join(missing_cols))

        preprocessing_status = prep_template.preprocessing_status

        return self.render_string(
            "study_description_templates/prep_template_info_tab.html",
            pt_id=prep_template.id,
            study_id=study.id,
            raw_data=raw_data_id,
            current_template_fp=current_template_fp,
            current_qiime_fp=current_qiime_fp,
            show_old_templates=show_old_templates,
            old_templates=old_templates,
            show_old_qiime_fps=show_old_qiime_fps,
            old_qiime_fps=old_qiime_fps,
            filetypes=filetypes,
            files=files,
            other_studies_rd=other_studies_rd,
            prep_template=prep_template,
            study=study,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            investigation_type=prep_template.investigation_type,
            is_editable=is_editable,
            preprocess_options=preprocess_options,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            show_preprocess_btn=show_preprocess_btn,
            no_preprocess_msg=no_preprocess_msg)
Example #32
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f, _ in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f, _ in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in sfiles.items():
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f, _ in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i + 1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in user_artifacts.items():
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message else '\n'.join(['Check these run_prefix:'] +
                                                message))

    return {
        'status': 'success',
        'message': message,
        'remaining': sorted(remaining),
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
Example #33
0
    def render(self, study, prep_template, full_access, ena_terms,
               user_defined_terms):
        user = self.current_user
        is_local_request = is_localhost(self.request.headers['host'])

        template_fps = []
        qiime_fps = []
        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        for id_, fp in prep_template.get_filepaths():
            if 'qiime' in basename(fp):
                qiime_fps.append(
                    download_link_or_path(is_local_request, fp, id_,
                                          'Qiime mapping'))
            else:
                template_fps.append(
                    download_link_or_path(is_local_request, fp, id_,
                                          'Prep template'))

        # Since get_filepaths returns the paths sorted from newest to oldest,
        # the first in both list is the latest one
        current_template_fp = template_fps[0]
        current_qiime_fp = qiime_fps[0]

        if len(template_fps) > 1:
            show_old_templates = True
            old_templates = template_fps[1:]
        else:
            show_old_templates = False
            old_templates = None

        if len(qiime_fps) > 1:
            show_old_qiime_fps = True
            old_qiime_fps = qiime_fps[1:]
        else:
            show_old_qiime_fps = False
            old_qiime_fps = None

        filetypes = sorted(((ft, ft_id, fp_type_by_ft[ft])
                            for ft, ft_id in viewitems(get_artifact_types())),
                           key=itemgetter(1))
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        other_studies_rd = sorted(viewitems(_get_accessible_raw_data(user)))

        # A prep template can be modified if its status is sandbox
        is_editable = prep_template.status == 'sandbox'

        raw_data = prep_template.artifact
        preprocess_options = []
        preprocessed_data = None
        show_preprocess_btn = True
        no_preprocess_msg = None
        preprocessing_status = 'Not processed'
        preprocessing_status_msg = ""
        if raw_data:
            raw_data_ft = raw_data.artifact_type
            # If the prep template has a raw data associated, it can be
            # preprocessed. Retrieve the pre-processing parameters
            # Hardcoding the command ids until the interface is refactored
            if raw_data_ft in ('SFF', 'FASTA'):
                param_iter = Command(2).default_parameter_sets
            elif raw_data_ft == 'FASTQ':
                param_iter = [
                    p for p in Command(1).default_parameter_sets
                    if p.values['barcode_type'] != 'not-barcoded'
                ]
            elif raw_data_ft == 'per_sample_FASTQ':
                param_iter = [
                    p for p in Command(1).default_parameter_sets
                    if p.values['barcode_type'] == 'not-barcoded'
                ]
            else:
                raise NotImplementedError(
                    "Pre-processing of %s files currently not supported." %
                    raw_data_ft)

            preprocess_options = []
            for param in param_iter:
                text = ("<b>%s:</b> %s" % (k, v)
                        for k, v in viewitems(param.values))
                preprocess_options.append(
                    (param.id, param.name, '<br>'.join(text)))
            preprocessed_data = raw_data.children

            # Check if the template have all the required columns for
            # preprocessing
            raw_data_files = raw_data.filepaths
            if len(raw_data_files) == 0:
                show_preprocess_btn = False
                no_preprocess_msg = (
                    "Preprocessing disabled because there are no files "
                    "linked with the Raw Data")
            else:
                if prep_template.data_type() in TARGET_GENE_DATA_TYPES:
                    raw_forward_fps = [
                        fp for _, fp, ftype in raw_data_files
                        if ftype == 'raw_forward_seqs'
                    ]
                    key = ('demultiplex_multiple'
                           if len(raw_forward_fps) > 1 else 'demultiplex')
                    missing_cols = prep_template.check_restrictions(
                        [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]])

                    if raw_data_ft == 'per_sample_FASTQ':
                        show_preprocess_btn = 'run_prefix' not in missing_cols
                    else:
                        show_preprocess_btn = len(missing_cols) == 0

                    no_preprocess_msg = None
                    if not show_preprocess_btn:
                        no_preprocess_msg = (
                            "Preprocessing disabled due to missing columns in "
                            "the prep template: %s" % ', '.join(missing_cols))

            # Check the processing status
            preprocessing_status, preprocessing_status_msg = \
                get_artifact_processing_status(raw_data)

        ebi_link = None
        if prep_template.is_submitted_to_ebi:
            ebi_link = EBI_LINKIFIER.format(study.ebi_study_accession)

        return self.render_string(
            "study_description_templates/prep_template_info_tab.html",
            raw_data=raw_data,
            current_template_fp=current_template_fp,
            current_qiime_fp=current_qiime_fp,
            show_old_templates=show_old_templates,
            old_templates=old_templates,
            show_old_qiime_fps=show_old_qiime_fps,
            old_qiime_fps=old_qiime_fps,
            filetypes=filetypes,
            files=files,
            other_studies_rd=other_studies_rd,
            prep_template=prep_template,
            study=study,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            investigation_type=prep_template.investigation_type,
            is_editable=is_editable,
            preprocess_options=preprocess_options,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            preprocessing_status_message=preprocessing_status_msg,
            show_preprocess_btn=show_preprocess_btn,
            no_preprocess_msg=no_preprocess_msg,
            ebi_link=ebi_link)
Example #34
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in viewitems(sfiles):
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message
               else '\n'.join(['Check these run_prefix:'] + message))

    return {'status': 'success',
            'message': message,
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}
Example #35
0
def prep_template_ajax_get_req(user_id, prep_id):
    """Returns the prep tempalte information needed for the AJAX handler

    Parameters
    ----------
    user_id : str
        The user id
    prep_id : int
        The prep template id

    Returns
    -------
    dict of {str: object}
        A dictionary with the following keys:
        - status: str, whether the request is successful or not
        - message: str, if the request is unsuccessful, a human readable error
        - name: str, the name of the prep template
        - files: list of str, the files available to update the prep template
        - download_prep: int, the filepath_id of the prep file
        - download_qiime, int, the filepath_id of the qiime mapping file
        - num_samples: int, the number of samples present in the template
        - num_columns: int, the number of columns present in the template
        - investigation_type: str, the investigation type of the template
        - ontology: str, dict of {str, list of str} containing the information
        of the ENA ontology
        - artifact_attached: bool, whether the template has an artifact
        attached
        - study_id: int, the study id of the template
    """
    # Currently there is no name attribute, but it will be soon
    name = "Prep information %d" % prep_id
    pt = PrepTemplate(prep_id)

    job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id)
    if job_info:
        job_info = loads(job_info)
        job_id = job_info['job_id']
        if job_id:
            redis_info = loads(r_client.get(job_id))
            processing = redis_info['status_msg'] == 'Running'
            if processing:
                alert_type = 'info'
                alert_msg = 'This prep template is currently being updated'
            elif redis_info['status_msg'] == 'Success':
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
                payload = {'job_id': None,
                           'status': alert_type,
                           'message': alert_msg}
                r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id,
                             dumps(payload))
            else:
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
        else:
            processing = False
            alert_type = job_info['status']
            alert_msg = job_info['message'].replace('\n', '</br>')
    else:
        processing = False
        alert_type = ''
        alert_msg = ''

    artifact_attached = pt.artifact is not None
    study_id = pt.study_id
    files = [f for _, f in get_files_from_uploads_folders(study_id)
             if f.endswith(('.txt', '.tsv'))]

    # The call to list is needed because keys is an iterator
    num_samples = len(list(pt.keys()))
    num_columns = len(pt.categories())
    investigation_type = pt.investigation_type

    # Retrieve the information to download the prep template and QIIME
    # mapping file. See issue https://github.com/biocore/qiita/issues/1675
    download_prep = []
    download_qiime = []
    for fp_id, fp in pt.get_filepaths():
        if 'qiime' in basename(fp):
            download_qiime.append(fp_id)
        else:
            download_prep.append(fp_id)
    download_prep = download_prep[0]
    download_qiime = download_qiime[0]

    ontology = _get_ENA_ontology()

    editable = Study(study_id).can_edit(User(user_id)) and not processing

    return {'status': 'success',
            'message': '',
            'name': name,
            'files': files,
            'download_prep': download_prep,
            'download_qiime': download_qiime,
            'num_samples': num_samples,
            'num_columns': num_columns,
            'investigation_type': investigation_type,
            'ontology': ontology,
            'artifact_attached': artifact_attached,
            'study_id': study_id,
            'editable': editable,
            'data_type': pt.data_type(),
            'alert_type': alert_type,
            'is_submitted_to_ebi': pt.is_submitted_to_ebi,
            'alert_message': alert_msg}
Example #36
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    if (any(ft.startswith('raw_') for ft, _ in supp_file_types)
            and 'run_prefix' in pt.columns):
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {
        'status': 'success',
        'message': '',
        'remaining': remaining,
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
Example #37
0
    def render(self, study):
        study_info = study.info
        id = study.id
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        publications = []
        for doi, pmid in study.publications:
            if doi is not None:
                publications.append(doi_linkifier([doi]))
            if pmid is not None:
                publications.append(pubmed_linkifier([pmid]))
        publications = ", ".join(publications)
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']

        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study. Filter them to only include the
        # ones that ends with 'txt' or 'tsv'.
        files = [
            f for _, f in get_files_from_uploads_folders(str(study.id))
            if f.endswith(('txt', 'tsv'))
        ]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = is_localhost(self.request.headers['host'])

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (study.status == 'sandbox'
                              or self.current_user.level == 'admin')

        # EBI information
        ebi_status = study.ebi_submission_status
        ebi_accession = study.ebi_study_accession
        if ebi_accession:
            ebi_accession = (EBI_LINKIFIER.format(ebi_accession))

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            id=id,
            publications=publications,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request,
            data_types=data_types,
            ebi_status=ebi_status,
            ebi_accession=ebi_accession)