コード例 #1
0
ファイル: download.py プロジェクト: yotohoshi/qiita
    def get(self):
        data = self.get_argument("data", None)
        study_id = self.get_argument("study_id", None)
        data_type = self.get_argument("data_type", None)
        dtypes = get_data_types().keys()

        if data is None or study_id is None or data not in ('raw', 'biom'):
            raise HTTPError(422,
                            reason='You need to specify both data (the '
                            'data type you want to download - raw/biom) and '
                            'study_id')
        elif data_type is not None and data_type not in dtypes:
            raise HTTPError(422,
                            reason='Not a valid data_type. Valid types '
                            'are: %s' % ', '.join(dtypes))
        else:
            study_id = int(study_id)
            try:
                study = Study(study_id)
            except QiitaDBUnknownIDError:
                raise HTTPError(422, reason='Study does not exist')
            else:
                public_raw_download = study.public_raw_download
                if study.status != 'public':
                    raise HTTPError(422,
                                    reason='Study is not public. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                elif data == 'raw' and not public_raw_download:
                    raise HTTPError(422,
                                    reason='No raw data access. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                else:
                    to_download = []
                    for a in study.artifacts(
                            dtype=data_type,
                            artifact_type='BIOM' if data == 'biom' else None):
                        if a.visibility != 'public':
                            continue
                        to_download.extend(self._list_artifact_files_nginx(a))

                    if not to_download:
                        raise HTTPError(422,
                                        reason='Nothing to download. If '
                                        'this is a mistake contact: '
                                        '*****@*****.**')
                    else:
                        self._write_nginx_file_list(to_download)

                        zip_fn = 'study_%d_%s_%s.zip' % (
                            study_id, data,
                            datetime.now().strftime('%m%d%y-%H%M%S'))

                        self._set_nginx_headers(zip_fn)
        self.finish()
コード例 #2
0
ファイル: test_download.py プロジェクト: lemetrec/qiita
    def test_download_raw_data(self):
        # it's possible that one of the tests is deleting the raw data
        # so we will make sure that the files exists so this test passes
        study = Study(1)
        all_files = [x['fp'] for a in study.artifacts()
                     for x in a.filepaths]
        for fp in all_files:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('')
        response = self.get('/download_raw_data/1')
        self.assertEqual(response.code, 200)

        exp = (
            '2125826711 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
            'raw_data/1_s_G1_L001_sequences.fastq.gz\n'
            '2125826711 58 /protected/raw_data/'
            '1_s_G1_L001_sequences_barcodes.fastq.gz '
            'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/1_mapping_file.txt\n'
            '1756512010 1093210 /protected/BIOM/7/biom_table.biom '
            'BIOM/7/biom_table.biom\n'
            '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/7_mapping_file.txt\n')
        self.assertRegex(response.body.decode('ascii'), exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        # now, let's make sure that when artifacts are public AND the
        # public_raw_download any user can download the files
        study.public_raw_download = True
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)
        # 7 is an uploaded biom, which should now be available but as it's a
        # biom, only the prep info file will be retrieved
        Artifact(7).visibility = 'public'
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/7_mapping_file.txt\n')
        self.assertRegex(response.body.decode('ascii'), exp)
コード例 #3
0
ファイル: test_download.py プロジェクト: antgonza/qiita
    def test_download_raw_data(self):
        # it's possible that one of the tests is deleting the raw data
        # so we will make sure that the files exists so this test passes
        study = Study(1)
        all_files = [x['fp'] for a in study.artifacts()
                     for x in a.filepaths]
        for fp in all_files:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('')
        response = self.get('/download_raw_data/1')
        self.assertEqual(response.code, 200)

        exp = (
            '2125826711 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
            'raw_data/1_s_G1_L001_sequences.fastq.gz\n'
            '2125826711 58 /protected/raw_data/'
            '1_s_G1_L001_sequences_barcodes.fastq.gz '
            'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/1_mapping_file.txt\n'
            '1756512010 1093210 /protected/BIOM/7/biom_table.biom '
            'BIOM/7/biom_table.biom\n'
            '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/7_mapping_file.txt\n')
        self.assertRegex(response.body.decode('ascii'), exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        # now, let's make sure that when artifacts are public AND the
        # public_raw_download any user can download the files
        study.public_raw_download = True
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)
        # 7 is an uploaded biom, which should now be available but as it's a
        # biom, only the prep info file will be retrieved
        Artifact(7).visibility = 'public'
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
            'mapping_files/7_mapping_file.txt\n')
        self.assertRegex(response.body.decode('ascii'), exp)
コード例 #4
0
ファイル: studies.py プロジェクト: colinbrislawn/qiita
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    if (any(ft.startswith('raw_') for ft, _ in supp_file_types)
            and 'run_prefix' in pt.columns):
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {
        'status': 'success',
        'message': '',
        'remaining': remaining,
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
コード例 #5
0
ファイル: download.py プロジェクト: mestaki/qiita
    def get(self):
        data = self.get_argument("data", None)
        study_id = self.get_argument("study_id",  None)
        prep_id = self.get_argument("prep_id",  None)
        data_type = self.get_argument("data_type",  None)
        dtypes = get_data_types().keys()

        templates = ['sample_information', 'prep_information']
        valid_data = ['raw', 'biom'] + templates

        to_download = []
        if data is None or (study_id is None and prep_id is None) or \
                data not in valid_data:
            raise HTTPError(422, reason='You need to specify both data (the '
                            'data type you want to download - %s) and '
                            'study_id or prep_id' % '/'.join(valid_data))
        elif data_type is not None and data_type not in dtypes:
            raise HTTPError(422, reason='Not a valid data_type. Valid types '
                            'are: %s' % ', '.join(dtypes))
        elif data in templates and prep_id is None and study_id is None:
            raise HTTPError(422, reason='If downloading a sample or '
                            'preparation file you need to define study_id or'
                            ' prep_id')
        elif data in templates:
            if data_type is not None:
                raise HTTPError(422, reason='If requesting an information '
                                'file you cannot specify the data_type')
            elif prep_id is not None and data == 'prep_information':
                fname = 'preparation_information_%s' % prep_id
                prep_id = int(prep_id)
                try:
                    infofile = PrepTemplate(prep_id)
                except QiitaDBUnknownIDError:
                    raise HTTPError(
                        422, reason='Preparation information does not exist')
            elif study_id is not None and data == 'sample_information':
                fname = 'sample_information_%s' % study_id
                study_id = int(study_id)
                try:
                    infofile = SampleTemplate(study_id)
                except QiitaDBUnknownIDError:
                    raise HTTPError(
                        422, reason='Sample information does not exist')
            else:
                raise HTTPError(422, reason='Review your parameters, not a '
                                'valid combination')
            x = retrieve_filepaths(
                infofile._filepath_table, infofile._id_column, infofile.id,
                sort='descending')[0]

            basedir_len = len(get_db_files_base_dir()) + 1
            fp = x['fp'][basedir_len:]
            to_download.append((fp, fp, str(x['checksum']), str(x['fp_size'])))
            self._write_nginx_file_list(to_download)

            zip_fn = '%s_%s.zip' % (
                fname, datetime.now().strftime('%m%d%y-%H%M%S'))
            self._set_nginx_headers(zip_fn)
        else:
            study_id = int(study_id)
            try:
                study = Study(study_id)
            except QiitaDBUnknownIDError:
                raise HTTPError(422, reason='Study does not exist')
            else:
                public_raw_download = study.public_raw_download
                if study.status != 'public':
                    raise HTTPError(404, reason='Study is not public. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                elif data == 'raw' and not public_raw_download:
                    raise HTTPError(422, reason='No raw data access. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                else:
                    # raw data
                    artifacts = [a for a in study.artifacts(dtype=data_type)
                                 if not a.parents]
                    # bioms
                    if data == 'biom':
                        artifacts = study.artifacts(
                            dtype=data_type, artifact_type='BIOM')
                    for a in artifacts:
                        if a.visibility != 'public':
                            continue
                        to_download.extend(self._list_artifact_files_nginx(a))

                if not to_download:
                    raise HTTPError(422, reason='Nothing to download. If '
                                    'this is a mistake contact: '
                                    '*****@*****.**')
                else:
                    self._write_nginx_file_list(to_download)

                    zip_fn = 'study_%d_%s_%s.zip' % (
                        study_id, data, datetime.now().strftime(
                            '%m%d%y-%H%M%S'))

                    self._set_nginx_headers(zip_fn)

        self.finish()
コード例 #6
0
def _build_study_info(user, study_proc=None, proc_samples=None):
    """Builds list of dicts for studies table, with all HTML formatted

    Parameters
    ----------
    user : User object
        logged in user
    study_proc : dict of lists, optional
        Dictionary keyed on study_id that lists all processed data associated
        with that study. Required if proc_samples given.
    proc_samples : dict of lists, optional
        Dictionary keyed on proc_data_id that lists all samples associated with
        that processed data. Required if study_proc given.

    Returns
    -------
    infolist: list of dict of lists and dicts
        study and processed data info for JSON serialiation for datatables
        Each dict in the list is a single study, and contains the text

    Notes
    -----
    Both study_proc and proc_samples must be passed, or neither passed.
    """
    build_samples = False
    # Logic check to make sure both needed parts passed
    if study_proc is not None and proc_samples is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass proc_samples when study_proc given')
    elif proc_samples is not None and study_proc is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass study_proc when proc_samples given')
    elif study_proc is None:
        build_samples = True

    # get list of studies for table
    study_set = user.user_studies.union(
        Study.get_by_status('public')).union(user.shared_studies)
    if study_proc is not None:
        study_set = study_set.intersection(study_proc)
    if not study_set:
        # No studies left so no need to continue
        return []

    cols = ['study_id', 'email', 'principal_investigator_id',
            'publication_doi', 'study_title', 'metadata_complete',
            'number_samples_collected', 'study_abstract']
    study_info = Study.get_info([s.id for s in study_set], cols)

    # get info for the studies
    infolist = []
    for info in study_info:
        # Convert DictCursor to proper dict
        info = dict(info)
        study = Study(info['study_id'])
        # Build the processed data info for the study if none passed
        if build_samples:
            proc_data_list = [ar for ar in study.artifacts()
                              if ar.artifact_type == 'BIOM']
            proc_samples = {}
            study_proc = {study.id: defaultdict(list)}
            for proc_data in proc_data_list:
                study_proc[study.id][proc_data.data_type].append(proc_data.id)
                # there is only one prep template for each processed data
                proc_samples[proc_data.id] = proc_data.prep_templates[0].keys()

        study_info = _build_single_study_info(study, info, study_proc,
                                              proc_samples)
        infolist.append(study_info)

    return infolist
コード例 #7
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {'status': 'success',
            'message': '',
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}
コード例 #8
0
ファイル: studies.py プロジェクト: mestaki/qiita
def study_get_req(study_id, user_id):
    """Returns information available for the given study

    Parameters
    ----------
    study_id : int
        Study id to get prep template info for
    user_id : str
        User requesting the info

    Returns
    -------
    dict
        Data types information in the form
        {'status': status,
         'message': message,
         'info': dict of objects
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info contains study information seperated by data type, in the form
        {col_name: value, ...} with value being a string, int, or list of
        strings or ints
    """
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error
    # Can only pass ids over API, so need to instantiate object
    study = Study(study_id)
    study_info = study.info
    # Add needed info that is not part of the initial info pull
    study_info['publication_doi'] = []
    study_info['publication_pid'] = []
    for pub, is_doi in study.publications:
        if is_doi:
            study_info['publication_doi'].append(pub)
        else:
            study_info['publication_pid'].append(pub)
    study_info['study_id'] = study.id
    study_info['study_title'] = study.title
    study_info['shared_with'] = [s.id for s in study.shared_with]
    study_info['status'] = study.status
    study_info['ebi_study_accession'] = study.ebi_study_accession
    study_info['ebi_submission_status'] = study.ebi_submission_status
    study_info['public_raw_download'] = study.public_raw_download
    study_info['notes'] = study.notes

    # Clean up StudyPerson objects to string for display
    pi = study_info['principal_investigator']
    study_info['principal_investigator'] = {
        'name': pi.name,
        'email': pi.email,
        'affiliation': pi.affiliation
    }

    lab_person = study_info['lab_person']
    if lab_person:
        study_info['lab_person'] = {
            'name': lab_person.name,
            'email': lab_person.email,
            'affiliation': lab_person.affiliation
        }

    samples = study.sample_template
    study_info['num_samples'] = 0 if samples is None else len(list(samples))
    study_info['owner'] = study.owner.id
    # Study.has_access no_public=True, will return True only if the user_id is
    # the owner of the study or if the study is shared with the user_id; this
    # with study.public_raw_download will define has_access_to_raw_data
    study_info['has_access_to_raw_data'] = study.has_access(
        User(user_id), True) or study.public_raw_download

    study_info['show_biom_download_button'] = 'BIOM' in [
        a.artifact_type for a in study.artifacts()
    ]
    study_info['show_raw_download_button'] = any(
        [True for pt in study.prep_templates() if pt.artifact is not None])

    # getting study processing status from redis
    processing = False
    study_info['level'] = ''
    study_info['message'] = ''
    job_info = r_client.get(STUDY_KEY_FORMAT % study_id)
    if job_info:
        job_info = defaultdict(lambda: '', loads(job_info))
        job_id = job_info['job_id']
        job = ProcessingJob(job_id)
        job_status = job.status
        processing = job_status not in ('success', 'error')
        if processing:
            study_info['level'] = 'info'
            study_info['message'] = 'This study is currently being processed'
        elif job_status == 'error':
            study_info['level'] = 'danger'
            study_info['message'] = job.log.msg.replace('\n', '</br>')
        else:
            study_info['level'] = job_info['alert_type']
            study_info['message'] = job_info['alert_msg'].replace(
                '\n', '</br>')

    return {
        'status': 'success',
        'message': '',
        'study_info': study_info,
        'editable': study.can_edit(User(user_id))
    }
コード例 #9
0
ファイル: studies.py プロジェクト: mestaki/qiita
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f, _ in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f, _ in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in sfiles.items():
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f, _ in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i + 1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in user_artifacts.items():
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message else '\n'.join(['Check these run_prefix:'] +
                                                message))

    return {
        'status': 'success',
        'message': message,
        'remaining': sorted(remaining),
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
コード例 #10
0
def _build_study_info(user, study_proc=None, proc_samples=None):
    """Builds list of dicts for studies table, with all HTML formatted

    Parameters
    ----------
    user : User object
        logged in user
    study_proc : dict of lists, optional
        Dictionary keyed on study_id that lists all processed data associated
        with that study. Required if proc_samples given.
    proc_samples : dict of lists, optional
        Dictionary keyed on proc_data_id that lists all samples associated with
        that processed data. Required if study_proc given.

    Returns
    -------
    infolist: list of dict of lists and dicts
        study and processed data info for JSON serialiation for datatables
        Each dict in the list is a single study, and contains the text

    Notes
    -----
    Both study_proc and proc_samples must be passed, or neither passed.
    """
    build_samples = False
    # Logic check to make sure both needed parts passed
    if study_proc is not None and proc_samples is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass proc_samples when study_proc given')
    elif proc_samples is not None and study_proc is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass study_proc when proc_samples given')
    elif study_proc is None:
        build_samples = True

    # get list of studies for table
    study_set = user.user_studies.union(
        Study.get_by_status('public')).union(user.shared_studies)
    if study_proc is not None:
        study_set = study_set.intersection(study_proc)
    if not study_set:
        # No studies left so no need to continue
        return []

    cols = ['study_id', 'email', 'principal_investigator_id',
            'publication_doi', 'study_title', 'metadata_complete',
            'number_samples_collected', 'study_abstract']
    study_info = Study.get_info([s.id for s in study_set], cols)

    # get info for the studies
    infolist = []
    for info in study_info:
        # Convert DictCursor to proper dict
        info = dict(info)
        study = Study(info['study_id'])
        # Build the processed data info for the study if none passed
        if build_samples:
            proc_data_list = [ar for ar in study.artifacts()
                              if ar.artifact_type == 'BIOM']
            proc_samples = {}
            study_proc = {study.id: defaultdict(list)}
            for proc_data in proc_data_list:
                study_proc[study.id][proc_data.data_type].append(proc_data.id)
                # there is only one prep template for each processed data
                proc_samples[proc_data.id] = proc_data.prep_templates[0].keys()

        study_info = _build_single_study_info(study, info, study_proc,
                                              proc_samples)
        infolist.append(study_info)

    return infolist
コード例 #11
0
ファイル: studies.py プロジェクト: josenavas/QiiTa
def study_get_req(study_id, user_id):
    """Returns information available for the given study

    Parameters
    ----------
    study_id : int
        Study id to get prep template info for
    user_id : str
        User requesting the info

    Returns
    -------
    dict
        Data types information in the form
        {'status': status,
         'message': message,
         'info': dict of objects
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info contains study information seperated by data type, in the form
        {col_name: value, ...} with value being a string, int, or list of
        strings or ints
    """
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error
    # Can only pass ids over API, so need to instantiate object
    study = Study(study_id)
    study_info = study.info
    # Add needed info that is not part of the initial info pull
    study_info['publication_doi'] = []
    study_info['publication_pid'] = []
    for pub, is_doi in study.publications:
        if is_doi:
            study_info['publication_doi'].append(pub)
        else:
            study_info['publication_pid'].append(pub)
    study_info['study_id'] = study.id
    study_info['study_title'] = study.title
    study_info['shared_with'] = [s.id for s in study.shared_with]
    study_info['status'] = study.status
    study_info['ebi_study_accession'] = study.ebi_study_accession
    study_info['ebi_submission_status'] = study.ebi_submission_status

    # Clean up StudyPerson objects to string for display
    pi = study_info['principal_investigator']
    study_info['principal_investigator'] = {
        'name': pi.name,
        'email': pi.email,
        'affiliation': pi.affiliation}

    lab_person = study_info['lab_person']
    if lab_person:
        study_info['lab_person'] = {
            'name': lab_person.name,
            'email': lab_person.email,
            'affiliation': lab_person.affiliation}

    samples = study.sample_template
    study_info['num_samples'] = 0 if samples is None else len(list(samples))
    study_info['owner'] = study.owner.id
    # Study.has_access no_public=True, will return True only if the user_id is
    # the owner of the study or if the study is shared with the user_id
    study_info['has_access_to_raw_data'] = study.has_access(
        User(user_id), True)

    study_info['show_biom_download_button'] = 'BIOM' in [
        a.artifact_type for a in study.artifacts()]
    study_info['show_raw_download_button'] = any([
        True for pt in study.prep_templates() if pt.artifact is not None])

    # getting study processing status from redis
    processing = False
    study_info['level'] = ''
    study_info['message'] = ''
    job_info = r_client.get(STUDY_KEY_FORMAT % study_id)
    if job_info:
        job_info = defaultdict(lambda: '', loads(job_info))
        job_id = job_info['job_id']
        job = ProcessingJob(job_id)
        job_status = job.status
        processing = job_status not in ('success', 'error')
        if processing:
            study_info['level'] = 'info'
            study_info['message'] = 'This study is currently being processed'
        elif job_status == 'error':
            study_info['level'] = 'danger'
            study_info['message'] = job.log.msg.replace('\n', '</br>')
        else:
            study_info['level'] = job_info['alert_type']
            study_info['message'] = job_info['alert_msg'].replace(
                '\n', '</br>')

    return {'status': 'success',
            'message': '',
            'study_info': study_info,
            'editable': study.can_edit(User(user_id))}
コード例 #12
0
ファイル: studies.py プロジェクト: josenavas/QiiTa
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in viewitems(sfiles):
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message
               else '\n'.join(['Check these run_prefix:'] + message))

    return {'status': 'success',
            'message': message,
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}
コード例 #13
0
ファイル: listing_handlers.py プロジェクト: mivamo1214/qiita
<<<<<<< HEAD
    cols = ['study_id', 'email', 'principal_investigator_id',
            'publication_doi', 'study_title', 'metadata_complete',
            'number_samples_collected', 'study_abstract']
    study_info = Study.get_info([s.id for s in study_set], cols)

    # get info for the studies
    infolist = []
    for info in study_info:
        # Convert DictCursor to proper dict
        info = dict(info)
        study = Study(info['study_id'])
        # Build the processed data info for the study if none passed
        if build_samples:
            proc_data_list = [ar for ar in study.artifacts()
                              if ar.artifact_type == 'BIOM']
            proc_samples = {}
            study_proc = {study.id: defaultdict(list)}
            for proc_data in proc_data_list:
                study_proc[study.id][proc_data.data_type].append(proc_data.id)
                # there is only one prep template for each processed data
                proc_samples[proc_data.id] = proc_data.prep_templates[0].keys()

        study_info = _build_single_study_info(study, info, study_proc,
                                              proc_samples)
        infolist.append(study_info)

    return infolist
=======
    return generate_study_list([s.id for s in study_set],
コード例 #14
0
ファイル: public.py プロジェクト: mestaki/qiita
    def get(self):
        study_id = self.get_argument("study_id", None)
        artifact_id = self.get_argument("artifact_id", None)

        if study_id is None and artifact_id is None:
            raise HTTPError(
                422, reason='You need to specify study_id or artifact_id')
            self.finish()
        elif study_id is not None:
            try:
                study = Study(int(study_id))
            except QiitaDBUnknownIDError:
                raise HTTPError(422,
                                reason="Study %s doesn't exist" % study_id)
                self.finish()
            artifact_ids = [
                a.id for a in study.artifacts() if a.visibility == 'public'
            ]
        else:
            try:
                artifact = Artifact(int(artifact_id))
            except QiitaDBUnknownIDError:
                raise HTTPError(422,
                                reason="Artifact %s doesn't exist" %
                                artifact_id)
                self.finish()
            if artifact.visibility != 'public':
                raise HTTPError(422,
                                reason="Artifact %s is not public" %
                                artifact_id)
                self.finish()

            study = artifact.study
            if study is None:
                raise HTTPError(422,
                                reason="Artifact %s doesn't belong to "
                                "a study" % artifact_id)
                self.finish()
            artifact_ids = [artifact.id]

        if study.status != 'public':
            raise HTTPError(422, reason='Not a public study')
            self.finish()

        study_info = study.info
        study_info['study_id'] = study.id
        study_info['study_title'] = study.title
        study_info['shared_with'] = [s.id for s in study.shared_with]
        study_info['status'] = study.status
        study_info['ebi_study_accession'] = study.ebi_study_accession
        study_info['ebi_submission_status'] = study.ebi_submission_status

        # Clean up StudyPerson objects to string for display
        email = '<a href="mailto:{email}">{name} ({affiliation})</a>'
        pi = study.info['principal_investigator']
        study_info['principal_investigator'] = email.format(
            **{
                'name': pi.name,
                'email': pi.email,
                'affiliation': pi.affiliation
            })

        study_info['owner'] = study.owner.id
        # Add needed info that is not part of the initial info pull
        study_info['publications'] = []
        for pub, is_doi in study.publications:
            if is_doi:
                study_info['publications'].append(pubmed_linkifier([pub]))
            else:
                study_info['publications'].append(doi_linkifier([pub]))
        study_info['publications'] = ', '.join(study_info['publications'])

        if study_info['ebi_study_accession']:
            links = ''.join([
                EBI_LINKIFIER.format(a)
                for a in study_info['ebi_study_accession'].split(',')
            ])
            study_info['ebi_study_accession'] = '%s (%s)' % (
                links, study_info['ebi_submission_status'])

        self.render("public.html",
                    study_info=study_info,
                    artifacts_info=get_artifacts_information(
                        artifact_ids, False))
コード例 #15
0
ファイル: studies.py プロジェクト: experimentAccount0/qiita
def study_get_req(study_id, user_id):
    """Returns information available for the given study

    Parameters
    ----------
    study_id : int
        Study id to get prep template info for
    user_id : str
        User requesting the info

    Returns
    -------
    dict
        Data types information in the form
        {'status': status,
         'message': message,
         'info': dict of objects
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info contains study information seperated by data type, in the form
        {col_name: value, ...} with value being a string, int, or list of
        strings or ints
    """
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error
    # Can only pass ids over API, so need to instantiate object
    study = Study(study_id)
    study_info = study.info
    # Add needed info that is not part of the initial info pull
    study_info['publication_doi'] = []
    study_info['publication_pid'] = []
    for pub, is_doi in study.publications:
        if is_doi:
            study_info['publication_doi'].append(pub)
        else:
            study_info['publication_pid'].append(pub)
    study_info['study_id'] = study.id
    study_info['study_title'] = study.title
    study_info['shared_with'] = [s.id for s in study.shared_with]
    study_info['status'] = study.status
    study_info['ebi_study_accession'] = study.ebi_study_accession
    study_info['ebi_submission_status'] = study.ebi_submission_status

    # Clean up StudyPerson objects to string for display
    pi = study_info['principal_investigator']
    study_info['principal_investigator'] = {
        'name': pi.name,
        'email': pi.email,
        'affiliation': pi.affiliation}

    lab_person = study_info['lab_person']
    if lab_person:
        study_info['lab_person'] = {
            'name': lab_person.name,
            'email': lab_person.email,
            'affiliation': lab_person.affiliation}

    samples = study.sample_template
    study_info['num_samples'] = 0 if samples is None else len(list(samples))
    study_info['owner'] = study.owner.id
    # Study.has_access no_public=True, will return True only if the user_id is
    # the owner of the study or if the study is shared with the user_id
    study_info['has_access_to_raw_data'] = study.has_access(
        User(user_id), True)

    study_info['show_biom_download_button'] = 'BIOM' in [
        a.artifact_type for a in study.artifacts()]
    study_info['show_raw_download_button'] = any([
        True for pt in study.prep_templates() if pt.artifact is not None])

    return {'status': 'success',
            'message': '',
            'study_info': study_info,
            'editable': study.can_edit(User(user_id))}
コード例 #16
0
ファイル: download.py プロジェクト: experimentAccount0/qiita
    def get(self, study_id):
        study_id = int(study_id)
        # Check access to study
        study_info = study_get_req(study_id, self.current_user.id)

        if study_info['status'] != 'success':
            raise HTTPError(
                405,
                "%s: %s, %s" % (study_info['message'], self.current_user.email,
                                str(study_id)))

        study = Study(study_id)
        basedir = get_db_files_base_dir()
        basedir_len = len(basedir) + 1
        # loop over artifacts and retrieve those that we have access to
        to_download = []
        for a in study.artifacts():
            if a.artifact_type == 'BIOM':
                for i, (fid, path, data_type) in enumerate(a.filepaths):
                    # ignore if tgz as they could create problems and the
                    # raw data is in the folder
                    if data_type == 'tgz':
                        continue
                    if data_type == 'directory':
                        # If we have a directory, we actually need to list
                        # all the files from the directory so NGINX can
                        # actually download all of them
                        for dp, _, fps in walk(path):
                            for fname in fps:
                                fullpath = join(dp, fname)
                                spath = fullpath
                                if fullpath.startswith(basedir):
                                    spath = fullpath[basedir_len:]
                                to_download.append((fullpath, spath, spath))
                    elif path.startswith(basedir):
                        spath = path[basedir_len:]
                        to_download.append((path, spath, spath))
                    else:
                        # We are not aware of any case that can trigger this
                        # situation, but we wanted to be overly cautious
                        # There is no test for this line cause we don't know
                        # how to trigger it
                        to_download.append((path, path, path))

                for pt in a.prep_templates:
                    qmf = pt.qiime_map_fp
                    if qmf is not None:
                        sqmf = qmf
                        if qmf.startswith(basedir):
                            sqmf = qmf[basedir_len:]
                        to_download.append(
                            (qmf, sqmf,
                             'mapping_files/%s_mapping_file.txt' % a.id))

        # If we don't have nginx, write a file that indicates this
        all_files = '\n'.join([
            "- %s /protected/%s %s" % (getsize(fp), sfp, n)
            for fp, sfp, n in to_download
        ])
        self.write("%s\n" % all_files)

        zip_fn = 'study_%d_%s.zip' % (study_id,
                                      datetime.now().strftime('%m%d%y-%H%M%S'))

        self.set_header('Content-Description', 'File Transfer')
        self.set_header('Expires', '0')
        self.set_header('Cache-Control', 'no-cache')
        self.set_header('X-Archive-Files', 'zip')
        self.set_header('Content-Disposition',
                        'attachment; filename=%s' % zip_fn)
        self.finish()