Esempio n. 1
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 1)
    if level == 0:
        return

    context = system['context']
    request = system['request']
    linked = set()
    for schema_path in context.type_info.schema_links:
        if schema_path in ['supercedes', 'step_run', 'other_processed_files']:
            continue
        linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        linked_value = request.embed(path + '@@object')
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
#        if linked_level == 0:
#            detail = '{} {} has {} subobject {}'.format(
#                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
#            yield AuditFailure('mismatched status', detail, level='INTERNAL_ACTION')
        if linked_level < level:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
            yield AuditFailure('mismatched status', detail, level='INTERNAL_ACTION')
Esempio n. 2
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    if level == 0:
        return

    if value['status'] in ['revoked', 'archived']:
        level += 50

    context = system['context']
    request = system['request']
    linked = set()

    for schema_path in context.type_info.schema_links:
        if schema_path in [
                'supersedes', 'step_run', 'derived_from', 'controlled_by',
                'possible_controls'
        ]:
            continue
        else:
            linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        linked_value = request.embed(path + '@@object')
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        if (  # Special case: A revoked file can have a deleted replicate ticket #2938
                'File' in value['@type'] and value['status'] == 'revoked'
                and 'Replicate' in linked_value['@type']
                and linked_value['status'] == 'deleted'):
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
        if linked_value['status'] in ['revoked', 'archived']:
            linked_level += 50
        if linked_level == 0:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'],
                linked_value['@id'])
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
        elif linked_level < level:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'],
                linked_value['@id'])
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
def make_experiment_cell(paths, experiment):
    last = []
    for path in paths:
        cell_value = []
        for value in simple_path_ids(experiment, path):
            if str(value) not in cell_value:
                cell_value.append(str(value))
        if last and cell_value:
            last = [v + ' ' + cell_value[0] for v in last]
        else:
            last = cell_value
    return ', '.join(set(last))
Esempio n. 4
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    if level == 0:
        return

    if value['status'] in ['revoked', 'archived']:
        level += 50

    context = system['context']
    request = system['request']
    linked = set()

    for schema_path in context.type_info.schema_links:
        if schema_path in ['supersedes',
                           'step_run',
                           'derived_from',
                           'controlled_by',
                           'possible_controls',
                           'elements']:
            continue
        else:
            linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        linked_value = request.embed(path + '@@object')
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        if (  # Special case: A revoked file can have a deleted replicate ticket #2938
            'File' in value['@type'] and
            value['status'] == 'revoked' and
            'Replicate' in linked_value['@type'] and
            linked_value['status'] == 'deleted'
        ):
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
        if linked_value['status'] in ['revoked', 'archived']:
            linked_level += 50
        if linked_level == 0:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
            yield AuditFailure('mismatched status', detail, level='INTERNAL_ACTION')
        elif linked_level < level:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
            yield AuditFailure('mismatched status', detail, level='INTERNAL_ACTION')
Esempio n. 5
0
def file_matches_file_params(file_, positive_file_param_set):
    # Expects file_param_set where 'files.' has been
    # stripped off of key (files.file_type -> file_type)
    # and params with field negation (i.e. file_type!=bigWig)
    # have been filtered out. Param values should be
    # coerced to ints ('2' -> 2) or booleans ('true' -> True)
    # and put into a set for comparison with file values.
    for field, set_of_param_values in positive_file_param_set.items():
        file_value = list(simple_path_ids(file_, field))
        if not file_value:
            return False
        if '*' in set_of_param_values:
            continue
        if not set_of_param_values.intersection(file_value):
            return False
    return True
Esempio n. 6
0
def make_cell(header_column, row, exp_data_row):
    temp = []
    for column in _tsv_mapping[header_column]:
        c_value = []
        for value in simple_path_ids(row, column):
            if str(value) not in c_value:
                c_value.append(str(value))
        if column == 'replicates.library.biosample.post_synchronization_time' and len(temp):
            if len(c_value):
                temp[0] = temp[0] + ' + ' + c_value[0]
        elif len(temp):
            if len(c_value):
                temp = [x + ' ' + c_value[0] for x in temp]
        else:
            temp = c_value
    exp_data_row.append(', '.join(list(set(temp))))
Esempio n. 7
0
def make_cell(header_column, row, exp_data_row):
    temp = []
    for column in _tsv_mapping[header_column]:
        c_value = []
        for value in simple_path_ids(row, column):
            if str(value) not in c_value:
                c_value.append(str(value))
        if column == 'replicates.library.biosample.post_synchronization_time' and len(temp):
            if len(c_value):
                temp[0] = temp[0] + ' + ' + c_value[0]
        elif len(temp):
            if len(c_value):
                temp = [x + ' ' + c_value[0] for x in temp]
        else:
            temp = c_value
    exp_data_row.append(', '.join(list(set(temp))))
Esempio n. 8
0
def make_audit_cell(header_column, experiment_json, file_json):
    categories = []
    paths = []
    for column in _audit_mapping[header_column]:
        for value in simple_path_ids(experiment_json, column):
            if 'path' in column:
                paths.append(value)
            elif 'category' in column:
                categories.append(value)
    data = []
    for i, path in enumerate(paths):
        if '/files/' in path and file_json.get('title', '') not in path:
            # Skip file audits that does't belong to the file
            continue
        else:
            data.append(categories[i])
    return ', '.join(list(set(data)))
Esempio n. 9
0
def make_audit_cell(header_column, experiment_json, file_json):
    categories = []
    paths = []
    for column in _audit_mapping[header_column]:
        for value in simple_path_ids(experiment_json, column):
            if 'path' in column:
                paths.append(value)
            elif 'category' in column:
                categories.append(value)
    data = []
    for i, path in enumerate(paths):
        if '/files/' in path and file_json.get('title', '') not in path:
            # Skip file audits that does't belong to the file
            continue
        else:
            data.append(categories[i])
    return ', '.join(list(set(data)))
def make_file_cell(paths, file_):
    # Quick return if one level deep.
    if len(paths) == 1 and '.' not in paths[0]:
        value = file_.get(paths[0], '')
        if isinstance(value, list):
            return ', '.join([str(v) for v in value])
        return value
    # Else crawl nested objects.
    last = []
    for path in paths:
        cell_value = []
        for value in simple_path_ids(file_, path):
            cell_value.append(str(value))
        if last and cell_value:
            last = [v + ' ' + cell_value[0] for v in last]
        else:
            last = cell_value
    return ', '.join(sorted(set(last)))
def metadata_tsv(context, request):
    qs = QueryString(request)
    param_list = qs.group_values_by_key()
    if 'referrer' in param_list:
        search_path = '/{}/'.format(param_list.pop('referrer')[0])
    else:
        search_path = '/search/'
    type_param = param_list.get('type', [''])[0]
    cart_uuids = param_list.get('cart', [])

    # Only allow specific type= query-string values, or cart=.
    if not type_param and not cart_uuids:
        raise HTTPBadRequest(
            explanation='URL must include a "type" or "cart" parameter.')
    if not type_param.lower() in _allowed_types:
        raise HTTPBadRequest(explanation='"{}" not a valid type for metadata'.
                             format(type_param))

    # Handle special-case metadata.tsv generation.
    if type_param:
        if type_param.lower() == 'annotation':
            return _get_annotation_metadata(request, search_path, param_list)
        if type_param.lower() == 'publicationdata':
            return _get_publicationdata_metadata(request)

    param_list['field'] = []
    header = []
    file_attributes = []
    for prop in _tsv_mapping:
        if prop not in _excluded_columns:
            header.append(prop)
            if _tsv_mapping[prop][0].startswith('files'):
                file_attributes = file_attributes + [_tsv_mapping[prop][0]]
        param_list['field'] = param_list['field'] + _tsv_mapping[prop]

    # Handle metadata.tsv lines from cart-generated files.txt.
    if cart_uuids:
        # metadata.tsv line includes cart UUID, so load the specified cart and
        # get its "elements" property for a list of items to retrieve.
        cart_uuid = cart_uuids.pop()
        del param_list['cart']
        try:
            cart = request.embed(cart_uuid, '@@object')
        except KeyError:
            raise HTTPBadRequest(explanation='Specified cart does not exist.')
        else:
            if cart.get('elements'):
                param_list['@id'] = cart['elements']
    else:
        # If the metadata.tsv line includes a JSON payload, get its "elements"
        # property for a list of items to retrieve.
        try:
            elements = request.json.get('elements')
        except ValueError:
            pass
        else:
            param_list['@id'] = elements
    default_params = [
        ('field', 'audit'),
        ('limit', 'all'),
    ]
    field_params = [('field', p) for p in param_list.get('field', [])]
    at_id_params = [('@id', p) for p in param_list.get('@id', [])]
    qs.drop('limit')

    # Check for the "visualizable" and/or "raw" options in the query string for file filtering.
    visualizable_only = qs.is_param('option', 'visualizable')
    raw_only = qs.is_param('option', 'raw')
    qs.drop('option')

    qs.extend(default_params + field_params + at_id_params)
    path = '{}?{}'.format(search_path, str(qs))
    results = request.embed(quote(path), as_user=True)
    rows = []
    for experiment_json in results['@graph']:
        if experiment_json.get('files', []):
            exp_data_row = []
            for column in header:
                if not _tsv_mapping[column][0].startswith('files'):
                    make_cell(column, experiment_json, exp_data_row)

            f_attributes = [
                'files.title', 'files.file_type', 'files.file_format',
                'files.file_format_type', 'files.output_type', 'files.assembly'
            ]

            for f in experiment_json['files']:
                if not files_prop_param_list(f, param_list):
                    continue
                if visualizable_only and not is_file_visualizable(f):
                    continue
                if raw_only and f.get('assembly'):
                    # "raw" option only allows files w/o assembly.
                    continue
                if restricted_files_present(f):
                    continue
                if is_no_file_available(f):
                    continue
                f['href'] = request.host_url + f['href']
                f_row = []
                for attr in f_attributes:
                    f_row.append(f.get(attr[6:], ''))
                data_row = f_row + exp_data_row
                for prop in file_attributes:
                    if prop in f_attributes:
                        continue
                    path = prop[6:]
                    temp = []
                    for value in simple_path_ids(f, path):
                        temp.append(str(value))
                    if prop == 'files.replicate.rbns_protein_concentration':
                        if 'replicate' in f and 'rbns_protein_concentration_units' in f[
                                'replicate']:
                            temp[0] = temp[0] + ' ' + f['replicate'][
                                'rbns_protein_concentration_units']
                    if prop in ['files.paired_with', 'files.derived_from']:
                        # chopping of path to just accession
                        if len(temp):
                            new_values = [t[7:-1] for t in temp]
                            temp = new_values
                    data = list(set(temp))
                    data.sort()
                    data_row.append(', '.join(data))
                audit_info = [
                    make_audit_cell(audit_type, experiment_json, f)
                    for audit_type in _audit_mapping
                ]
                data_row.extend(audit_info)
                rows.append(data_row)
    fout = io.StringIO()
    writer = csv.writer(fout, delimiter='\t', lineterminator='\n')
    header.extend([prop for prop in _audit_mapping])
    writer.writerow(header)
    writer.writerows(rows)
    return Response(content_type='text/tsv',
                    body=fout.getvalue(),
                    content_disposition='attachment;filename="%s"' %
                    'metadata.tsv')
Esempio n. 12
0
def audit_item_relations_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    context = system['context']
    request = system['request']

    for schema_path in context.type_info.schema_links:
        if schema_path in ['supersedes']:
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
                    detail = \
                        '{} with status \'{}\' supersedes {} with status \'{}\''.format(
                            value['@id'],
                            value['status'],
                            linked_value['@id'],
                            linked_value['status']
                            )
                    if level == 100 and linked_level in [0, 50, 100]:
                        yield AuditFailure('mismatched status',
                                           detail,
                                           level='INTERNAL_ACTION')
                    elif level == 50 and linked_level in [0, 50]:
                        yield AuditFailure('mismatched status',
                                           detail,
                                           level='INTERNAL_ACTION')
                    elif level in [30, 40] and linked_level in [0, 50, 100]:
                        yield AuditFailure('mismatched status',
                                           detail,
                                           level='INTERNAL_ACTION')

        elif schema_path in [
                'derived_from', 'controlled_by', 'possible_controls'
        ]:
            message = 'has a possible control'
            if schema_path == 'derived_from':
                message = 'is derived from'
            elif schema_path == 'controlled_by':
                message = 'is controlled by'
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
                    if level > linked_level:
                        detail = \
                            '{} with status \'{}\' {} {} with status \'{}\''.format(
                                value['@id'],
                                value['status'],
                                message,
                                linked_value['@id'],
                                linked_value['status']
                                )
                        yield AuditFailure('mismatched status',
                                           detail,
                                           level='INTERNAL_ACTION')
Esempio n. 13
0
def audit_item_relations_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    context = system['context']
    request = system['request']

    for schema_path in context.type_info.schema_links:
        if schema_path in ['supersedes']:
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(
                        linked_value['status'], 50)
                    detail = \
                        '{} with status \'{}\' supersedes {} with status \'{}\''.format(
                            value['@id'],
                            value['status'],
                            linked_value['@id'],
                            linked_value['status']
                            )
                    if level == 100 and linked_level in [0, 50, 100]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
                    elif level == 50 and linked_level in [0, 50]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
                    elif level in [30, 40] and linked_level in [0, 50, 100]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')

        elif schema_path in ['derived_from',
                             'controlled_by',
                             'possible_controls']:
            message = 'has a possible control'
            if schema_path == 'derived_from':
                message = 'is derived from'
            elif schema_path == 'controlled_by':
                message = 'is controlled by'
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(
                        linked_value['status'], 50)
                    if level > linked_level:
                        detail = \
                            '{} with status \'{}\' {} {} with status \'{}\''.format(
                                value['@id'],
                                value['status'],
                                message,
                                linked_value['@id'],
                                linked_value['status']
                                )
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
Esempio n. 14
0
def metadata_tsv(context, request):
    param_list = parse_qs(request.matchdict['search_params'])
    if 'referrer' in param_list:
        search_path = '/{}/'.format(param_list.pop('referrer')[0])
    else:
        search_path = '/search/'
    param_list['field'] = []
    header = []
    file_attributes = []
    for prop in _tsv_mapping:
        header.append(prop)
        param_list['field'] = param_list['field'] + _tsv_mapping[prop]
        if _tsv_mapping[prop][0].startswith('files'):
            file_attributes = file_attributes + [_tsv_mapping[prop][0]]
    param_list['limit'] = ['all']
    path = '{}?{}'.format(search_path, urlencode(param_list, True))
    results = request.embed(path, as_user=True)
    rows = []
    for experiment_json in results['@graph']:
        if experiment_json['files']:
            exp_data_row = []
            for column in header:
                if not _tsv_mapping[column][0].startswith('files'):
                    make_cell(column, experiment_json, exp_data_row)

            f_attributes = [
                'files.title', 'files.file_type', 'files.output_type'
            ]

            for f in experiment_json['files']:
                if 'files.file_type' in param_list:
                    if f['file_type'] not in param_list['files.file_type']:
                        continue
                f['href'] = request.host_url + f['href']
                f_row = []
                for attr in f_attributes:
                    f_row.append(f[attr[6:]])
                data_row = f_row + exp_data_row
                for prop in file_attributes:
                    if prop in f_attributes:
                        continue
                    path = prop[6:]
                    temp = []
                    for value in simple_path_ids(f, path):
                        temp.append(str(value))
                    if prop == 'files.replicate.rbns_protein_concentration':
                        if 'replicate' in f and 'rbns_protein_concentration_units' in f[
                                'replicate']:
                            temp[0] = temp[0] + ' ' + f['replicate'][
                                'rbns_protein_concentration_units']
                    if prop == 'files.paired_with':
                        # chopping of path to just accession
                        if len(temp):
                            new_values = [t[7:-1] for t in temp]
                            temp = new_values
                    data = list(set(temp))
                    data.sort()
                    data_row.append(', '.join(data))
                audit_info = [
                    make_audit_cell(audit_type, experiment_json, f)
                    for audit_type in _audit_mapping
                ]
                data_row.extend(audit_info)
                rows.append(data_row)
    fout = io.StringIO()
    writer = csv.writer(fout, delimiter='\t')
    header.extend([prop for prop in _audit_mapping])
    writer.writerow(header)
    writer.writerows(rows)
    return Response(content_type='text/tsv',
                    body=fout.getvalue(),
                    content_disposition='attachment;filename="%s"' %
                    'metadata.tsv')
Esempio n. 15
0
def metadata_tsv(context, request):
    param_list = parse_qs(request.matchdict['search_params'])
    if 'referrer' in param_list:
        search_path = '/{}/'.format(param_list.pop('referrer')[0])
    else:
        search_path = '/search/'
    type_param = param_list.get('type', [''])[0]
    if type_param and type_param.lower() == 'annotation':
        return _get_annotation_metadata(request, search_path, param_list)
    param_list['field'] = []
    header = []
    file_attributes = []
    for prop in _tsv_mapping:
        if prop not in _excluded_columns:
            header.append(prop)
            if _tsv_mapping[prop][0].startswith('files'):
                file_attributes = file_attributes + [_tsv_mapping[prop][0]]
        param_list['field'] = param_list['field'] + _tsv_mapping[prop]
        
    # Handle metadata.tsv lines from cart-generated files.txt.
    cart_uuids = param_list.get('cart', [])
    if cart_uuids:
        # metadata.tsv line includes cart UUID, so load the specified cart and
        # get its "elements" property for a list of items to retrieve.
        cart_uuid = cart_uuids.pop()
        del param_list['cart']
        try:
            cart = request.embed(cart_uuid, '@@object')
        except KeyError:
            pass
        else:
            if cart.get('elements'):
                param_list['@id'] = cart['elements']
    else:
        # If the metadata.tsv line includes a JSON payload, get its "elements"
        # property for a list of items to retrieve.
        try:
            elements = request.json.get('elements')
        except ValueError:
            pass
        else:
            param_list['@id'] = elements

    param_list['limit'] = ['all']
    path = '{}?{}'.format(search_path, urlencode(param_list, True))
    results = request.embed(path, as_user=True)
    rows = []
    for experiment_json in results['@graph']:
        if experiment_json.get('files', []):
            exp_data_row = []
            for column in header:
                if not _tsv_mapping[column][0].startswith('files'):
                    make_cell(column, experiment_json, exp_data_row)

            f_attributes = ['files.title', 'files.file_type',
                            'files.output_type']

            for f in experiment_json['files']:
                if 'files.file_type' in param_list:
                    if f['file_type'] not in param_list['files.file_type']:
                        continue
                if restricted_files_present(f):
                    continue
                if is_no_file_available(f):
                    continue
                f['href'] = request.host_url + f['href']
                f_row = []
                for attr in f_attributes:
                    f_row.append(f[attr[6:]])
                data_row = f_row + exp_data_row
                for prop in file_attributes:
                    if prop in f_attributes:
                        continue
                    path = prop[6:]
                    temp = []
                    for value in simple_path_ids(f, path):
                        temp.append(str(value))
                    if prop == 'files.replicate.rbns_protein_concentration':
                        if 'replicate' in f and 'rbns_protein_concentration_units' in f['replicate']:
                            temp[0] = temp[0] + ' ' + f['replicate']['rbns_protein_concentration_units']
                    if prop in ['files.paired_with', 'files.derived_from']:
                        # chopping of path to just accession
                        if len(temp):
                            new_values = [t[7:-1] for t in temp]
                            temp = new_values
                    data = list(set(temp))
                    data.sort()
                    data_row.append(', '.join(data))
                audit_info = [make_audit_cell(audit_type, experiment_json, f) for audit_type in _audit_mapping]
                data_row.extend(audit_info)
                rows.append(data_row)
    fout = io.StringIO()
    writer = csv.writer(fout, delimiter='\t')
    header.extend([prop for prop in _audit_mapping])
    writer.writerow(header)
    writer.writerows(rows)
    return Response(
        content_type='text/tsv',
        body=fout.getvalue(),
        content_disposition='attachment;filename="%s"' % 'metadata.tsv'
    )
Esempio n. 16
0
def metadata_tsv(context, request):
    param_list = parse_qs(request.matchdict['search_params'])
    if 'referrer' in param_list:
        search_path = '/{}/'.format(param_list.pop('referrer')[0])
    else:
        search_path = '/search/'
    type_param = param_list.get('type', [''])[0]
    if type_param and type_param.lower() == 'annotation':
        return _get_annotation_metadata(request, search_path, param_list)
    param_list['field'] = []
    header = []
    file_attributes = []
    for prop in _tsv_mapping:
        if prop not in _excluded_columns:
            header.append(prop)
            if _tsv_mapping[prop][0].startswith('files'):
                file_attributes = file_attributes + [_tsv_mapping[prop][0]]
        param_list['field'] = param_list['field'] + _tsv_mapping[prop]
        
    # Handle metadata.tsv lines from cart-generated files.txt.
    cart_uuids = param_list.get('cart', [])
    if cart_uuids:
        # metadata.tsv line includes cart UUID, so load the specified cart and
        # get its "elements" property for a list of items to retrieve.
        cart_uuid = cart_uuids.pop()
        del param_list['cart']
        try:
            cart = request.embed(cart_uuid, '@@object')
        except KeyError:
            pass
        else:
            if cart.get('elements'):
                param_list['@id'] = cart['elements']
    else:
        # If the metadata.tsv line includes a JSON payload, get its "elements"
        # property for a list of items to retrieve.
        try:
            elements = request.json.get('elements')
        except ValueError:
            pass
        else:
            param_list['@id'] = elements

    param_list['limit'] = ['all']
    path = '{}?{}'.format(search_path, quote(urlencode(param_list, True)))
    results = request.embed(path, as_user=True)
    rows = []
    for experiment_json in results['@graph']:
        if experiment_json.get('files', []):
            exp_data_row = []
            for column in header:
                if not _tsv_mapping[column][0].startswith('files'):
                    make_cell(column, experiment_json, exp_data_row)

            f_attributes = ['files.title', 'files.file_type',
                            'files.output_type']

            for f in experiment_json['files']:
                # If we're looking for a file type but it doesn't match, ignore file
                if not files_prop_param_list(f, param_list):
                    continue
                if restricted_files_present(f):
                    continue
                if is_no_file_available(f):
                    continue
                f['href'] = request.host_url + f['href']
                f_row = []
                for attr in f_attributes:
                    f_row.append(f[attr[6:]])
                data_row = f_row + exp_data_row
                for prop in file_attributes:
                    if prop in f_attributes:
                        continue
                    path = prop[6:]
                    temp = []
                    for value in simple_path_ids(f, path):
                        temp.append(str(value))
                    if prop == 'files.replicate.rbns_protein_concentration':
                        if 'replicate' in f and 'rbns_protein_concentration_units' in f['replicate']:
                            temp[0] = temp[0] + ' ' + f['replicate']['rbns_protein_concentration_units']
                    if prop in ['files.paired_with', 'files.derived_from']:
                        # chopping of path to just accession
                        if len(temp):
                            new_values = [t[7:-1] for t in temp]
                            temp = new_values
                    data = list(set(temp))
                    data.sort()
                    data_row.append(', '.join(data))
                audit_info = [make_audit_cell(audit_type, experiment_json, f) for audit_type in _audit_mapping]
                data_row.extend(audit_info)
                rows.append(data_row)
    fout = io.StringIO()
    writer = csv.writer(fout, delimiter='\t', lineterminator='\n')
    header.extend([prop for prop in _audit_mapping])
    writer.writerow(header)
    writer.writerows(rows)
    return Response(
        content_type='text/tsv',
        body=fout.getvalue(),
        content_disposition='attachment;filename="%s"' % 'metadata.tsv'
    )
Esempio n. 17
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    if level == 0:
        return

    if value['status'] in ['revoked', 'archived']:
        level += 50

    context = system['context']
    request = system['request']
    linked = set()

    for schema_path in context.type_info.schema_links:
        if schema_path in [
                'supersedes', 'step_run', 'derived_from', 'controlled_by',
                'possible_controls', 'elements'
        ]:
            continue
        else:
            linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        # Avoid pulling the full @@object frame into request._embedded_uuids.
        linked_value = request.embed(
            path +
            '@@filtered_object?include=@id&include=@type&include=uuid&include=status'
        )
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        if (  # Special case: A revoked file can have a deleted replicate ticket #2938
                'File' in value['@type'] and value['status'] == 'revoked'
                and 'Replicate' in linked_value['@type']
                and linked_value['status'] == 'deleted'):
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
        if linked_value['status'] in ['revoked', 'archived']:
            linked_level += 50
        if linked_level == 0:
            detail = ('{} {} {} has {} subobject {} {}'.format(
                value['status'].capitalize(),
                space_in_words(value['@type'][0]).lower(),
                audit_link(path_to_text(value['@id']), value['@id']),
                linked_value['status'],
                space_in_words(linked_value['@type'][0]).lower(),
                audit_link(path_to_text(linked_value['@id']),
                           linked_value['@id'])))
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
        elif linked_level < level:
            detail = ('{} {} {} has {} subobject {} {}'.format(
                value['status'].capitalize(),
                space_in_words(value['@type'][0]).lower(),
                audit_link(path_to_text(value['@id']), value['@id']),
                linked_value['status'],
                space_in_words(linked_value['@type'][0]).lower(),
                audit_link(path_to_text(linked_value['@id']),
                           linked_value['@id'])))
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
Esempio n. 18
0
def metadata_tsv(context, request):
    param_list = parse_qs(request.matchdict['search_params'])
    if 'referrer' in param_list:
        search_path = '/{}/'.format(param_list.pop('referrer')[0])
    else:
        search_path = '/search/'
    param_list['field'] = []
    header = []
    file_attributes = []
    for prop in _tsv_mapping:
        header.append(prop)
        param_list['field'] = param_list['field'] + _tsv_mapping[prop]
        if _tsv_mapping[prop][0].startswith('files'):
            file_attributes = file_attributes + [_tsv_mapping[prop][0]]
    param_list['limit'] = ['all']
    path = '{}?{}'.format(search_path, urlencode(param_list, True))
    results = request.embed(path, as_user=True)
    rows = []
    for experiment_json in results['@graph']:
        if experiment_json['files']:
            exp_data_row = []
            for column in header:
                if not _tsv_mapping[column][0].startswith('files'):
                    make_cell(column, experiment_json, exp_data_row)

            f_attributes = ['files.title', 'files.file_type',
                            'files.output_type']

            for f in experiment_json['files']:
                if 'files.file_type' in param_list:
                    if f['file_type'] not in param_list['files.file_type']:
                        continue
                f['href'] = request.host_url + f['href']
                f_row = []
                for attr in f_attributes:
                    f_row.append(f[attr[6:]])
                data_row = f_row + exp_data_row
                for prop in file_attributes:
                    if prop in f_attributes:
                        continue
                    path = prop[6:]
                    temp = []
                    for value in simple_path_ids(f, path):
                        temp.append(str(value))
                    if prop == 'files.replicate.rbns_protein_concentration':
                        if 'replicate' in f and 'rbns_protein_concentration_units' in f['replicate']:
                            temp[0] = temp[0] + ' ' + f['replicate']['rbns_protein_concentration_units']
                    if prop == 'files.paired_with':
                        # chopping of path to just accession
                        if len(temp):
                            new_values = [t[7:-1] for t in temp]
                            temp = new_values
                    data = list(set(temp))
                    data.sort()
                    data_row.append(', '.join(data))
                audit_info = [make_audit_cell(audit_type, experiment_json, f) for audit_type in _audit_mapping]
                data_row.extend(audit_info)
                rows.append(data_row)
    fout = io.StringIO()
    writer = csv.writer(fout, delimiter='\t')
    header.extend([prop for prop in _audit_mapping])
    writer.writerow(header)
    writer.writerows(rows)
    return Response(
        content_type='text/tsv',
        body=fout.getvalue(),
        content_disposition='attachment;filename="%s"' % 'metadata.tsv'
    )
Esempio n. 19
0
 def get_values_for_field(item, field):
     c_value = []
     for value in simple_path_ids(item, field):
         if str(value) not in c_value:
             c_value.append(str(value))
     return list(set(c_value))