Esempio n. 1
0
    def get(self):
        study_id = self.get_argument('study_id')
        prep_file = self.get_argument('prep_file')
        prep_type = self.get_argument('type')

        # TODO: Get file types for the artifact type
        # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED
        file_types = supported_filepath_types(prep_type)

        selected = []
        not_selected = []
        _, base = get_mountpoint("uploads")[0]
        uploaded = get_files_from_uploads_folders(study_id)
        prep = pd.read_table(join(base, study_id, prep_file), sep='\t')
        if 'run_prefix' in prep.columns:
            # Use run_prefix column of prep template to auto-select
            # per-prefix uploaded files if available.
            per_prefix = True
            prep_prefixes = set(prep['run_prefix'])
            for _, filename in uploaded:
                for prefix in prep_prefixes:
                    if filename.startswith(prefix):
                        selected.append(filename)
                    else:
                        not_selected.append(filename)
        else:
            per_prefix = False
            not_selected = [f for _, f, _ in uploaded]

        # Write out if this prep template supports per-prefix files, and the
        # as well as pre-selected and remaining files
        self.write({
            'per_prefix': per_prefix,
            'file_types': file_types,
            'selected': selected,
            'remaining': not_selected
        })
Esempio n. 2
0
    def get(self):
        study_id = self.get_argument('study_id')
        prep_file = self.get_argument('prep_file')
        prep_type = self.get_argument('type')

        # TODO: Get file types for the artifact type
        # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED
        file_types = supported_filepath_types(prep_type)

        selected = []
        not_selected = []
        _, base = get_mountpoint("uploads")[0]
        uploaded = get_files_from_uploads_folders(study_id)
        prep = pd.read_table(join(base, study_id, prep_file), sep='\t')
        if 'run_prefix' in prep.columns:
            # Use run_prefix column of prep template to auto-select
            # per-prefix uploaded files if available.
            per_prefix = True
            prep_prefixes = set(prep['run_prefix'])
            for _, filename in uploaded:
                for prefix in prep_prefixes:
                    if filename.startswith(prefix):
                        selected.append(filename)
                    else:
                        not_selected.append(filename)
        else:
            per_prefix = False
            not_selected = [f for _, f in uploaded]

        # Write out if this prep template supports per-prefix files, and the
        # as well as pre-selected and remaining files
        self.write({
            'per_prefix': per_prefix,
            'file_types': file_types,
            'selected': selected,
            'remaining': not_selected})
Esempio n. 3
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    if (any(ft.startswith('raw_') for ft, _ in supp_file_types)
            and 'run_prefix' in pt.columns):
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {
        'status': 'success',
        'message': '',
        'remaining': remaining,
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
Esempio n. 4
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to do a
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []

    uploaded = get_files_from_uploads_folders(study_id)
    pt = PrepTemplate(prep_template_id).to_dataframe()

    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        for _, filename in uploaded:
            if filename.startswith(prep_prefixes):
                selected.append(filename)
            else:
                remaining.append(filename)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # At this point we can't do anything smart about selecting by default
    # the files for each type. The only thing that we can do is assume that
    # the first in the supp_file_types list is the default one where files
    # should be added in case of 'run_prefix' being present
    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
    first = supp_file_types[0]
    # Note that this works even if `run_prefix` is not in the prep template
    # because selected is initialized to the empty list
    file_types.insert(0, (first[0], first[1], selected))

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    return {'status': 'success',
            'message': '',
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}
Esempio n. 5
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f, _ in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f, _ in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in sfiles.items():
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f, _ in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i + 1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in user_artifacts.items():
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message else '\n'.join(['Check these run_prefix:'] +
                                                message))

    return {
        'status': 'success',
        'message': message,
        'remaining': sorted(remaining),
        'file_types': file_types,
        'num_prefixes': num_prefixes,
        'artifacts': artifact_options
    }
Esempio n. 6
0
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
    """Returns the uploaded files for the study id categorized by artifact_type

    It retrieves the files uploaded for the given study and tries to
    guess on how those files should be added to the artifact of the given
    type. Uses information on the prep template to try to do a better guess.

    Parameters
    ----------
    user_id : str
        The id of the user making the request
    study_id : int
        The study id
    prep_template_id : int
        The prep template id
    artifact_type : str
        The artifact type

    Returns
    -------
    dict of {str: object}
        A dict of the form {'status': str,
                            'message': str,
                            'remaining': list of str,
                            'file_types': list of (str, bool, list of str),
                            'num_prefixes': int}
        where 'status' is a string specifying whether the query is successfull,
        'message' is a human-readable description of the error (optional),
        'remaining' is the list of files that could not be categorized,
        'file_types' is a list of the available filetypes, if it is required
        or not and the list of categorized files for the given artifact type
        and 'num_prefixes' is the number of different run prefix values in
        the given prep template.
    """
    supp_file_types = supported_filepath_types(artifact_type)
    selected = []
    remaining = []
    message = []

    pt = PrepTemplate(prep_template_id)
    if pt.study_id != study_id:
        raise IncompetentQiitaDeveloperError(
            "The requested prep id (%d) doesn't belong to the study "
            "(%d)" % (pt.study_id, study_id))

    uploaded = get_files_from_uploads_folders(study_id)
    pt = pt.to_dataframe()
    ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                 if ft != 'raw_sff')
    if any(ftypes_if) and 'run_prefix' in pt.columns:
        prep_prefixes = tuple(set(pt['run_prefix']))
        num_prefixes = len(prep_prefixes)
        # sorting prefixes by length to avoid collisions like: 100 1002
        # 10003
        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
        # group files by prefix
        sfiles = defaultdict(list)
        for p in prep_prefixes:
            to_remove = []
            for fid, f in uploaded:
                if f.startswith(p):
                    sfiles[p].append(f)
                    to_remove.append((fid, f))
            uploaded = [x for x in uploaded if x not in to_remove]
        inuse = [y for x in sfiles.values() for y in x]
        remaining.extend([f for _, f in uploaded if f not in inuse])
        supp_file_types_len = len(supp_file_types)

        for k, v in viewitems(sfiles):
            len_files = len(v)
            # if the number of files in the k group is larger than the
            # available columns add to the remaining group, if not put them in
            # the selected group
            if len_files > supp_file_types_len:
                remaining.extend(v)
                message.append("'%s' has %d matches." % (k, len_files))
            else:
                v.sort()
                selected.append(v)
    else:
        num_prefixes = 0
        remaining = [f for _, f in uploaded]

    # get file_types, format: filetype, required, list of files
    file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
                  for i, (t, req) in enumerate(supp_file_types)]

    # Create a list of artifacts that the user has access to, in case that
    # he wants to import the files from another artifact
    user = User(user_id)
    artifact_options = []
    user_artifacts = user.user_artifacts(artifact_type=artifact_type)
    study = Study(study_id)
    if study not in user_artifacts:
        user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
    for study, artifacts in viewitems(user_artifacts):
        study_label = "%s (%d)" % (study.title, study.id)
        for a in artifacts:
            artifact_options.append(
                (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))

    message = ('' if not message
               else '\n'.join(['Check these run_prefix:'] + message))

    return {'status': 'success',
            'message': message,
            'remaining': sorted(remaining),
            'file_types': file_types,
            'num_prefixes': num_prefixes,
            'artifacts': artifact_options}