def audit_pipeline_assay_term_names(value, system):
    ''' All pipelines should have the assay_term_names property specified.
    '''
    if 'assay_term_names' not in value or not 'assay_term_names' in value:
        detail = ('Pipeline {} has no assay_term_names specified.'.format(audit_link(path_to_text(value['@id']), value['@id'])))
        yield AuditFailure('missing assay_term_names', detail, level='ERROR')
Exemple #2
0
def audit_item_relations_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    context = system['context']
    request = system['request']

    for schema_path in context.type_info.schema_links:
        if schema_path in ['supersedes']:
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(
                        linked_value['status'], 50)
                    detail = \
                        '{} with status \'{}\' supersedes {} with status \'{}\''.format(
                            value['@id'],
                            value['status'],
                            linked_value['@id'],
                            linked_value['status']
                            )
                    if level == 100 and linked_level in [0, 50, 100]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
                    elif level == 50 and linked_level in [0, 50]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
                    elif level in [30, 40] and linked_level in [0, 50, 100]:
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')

        elif schema_path in ['derived_from',
                             'controlled_by',
                             'possible_controls']:
            message = 'has a possible control'
            if schema_path == 'derived_from':
                message = 'is derived from'
            elif schema_path == 'controlled_by':
                message = 'is controlled by'
            for path in simple_path_ids(value, schema_path):
                linked_value = request.embed(path + '@@object')
                if 'status' not in linked_value:
                    continue
                else:
                    linked_level = STATUS_LEVEL.get(
                        linked_value['status'], 50)
                    if level > linked_level:
                        detail = \
                            '{} with status \'{}\' {} {} with status \'{}\''.format(
                                value['@id'],
                                value['status'],
                                message,
                                linked_value['@id'],
                                linked_value['status']
                                )
                        yield AuditFailure(
                            'mismatched status',
                            detail,
                            level='INTERNAL_ACTION')
Exemple #3
0
def audit_file_controlled_by(value, system):
    '''
    A fastq in a ChIP-seq experiment should have a controlled_by
    '''

    if value['dataset'].get('assay_term_name') not in ['ChIP-seq',
                                                       'RAMPAGE',
                                                       'CAGE',
                                                       'shRNA knockdown followed by RNA-seq',
                                                       'siRNA knockdown followed by RNA-seq',
                                                       'CRISPR genome editing followed by RNA-seq']:

        return

    if value['file_format'] not in ['fastq']:
        return

    if 'target' in value['dataset'] and \
       'control' in value['dataset']['target'].get('investigated_as', []):
        return

    if not value.get('controlled_by'):
        detail = 'controlled_by is a list of files that are used as ' + \
                 'controls for a given experimental file. ' + \
                 'Fastq files generated in a {} assay require the '.format(
                     value['dataset']['assay_term_name']) + \
                 'specification of control fastq file(s) in the controlled_by list. ' + \
                 'Fastq file {} '.format(
                     value['@id']) + \
                 'is missing the requisite file specification in controlled_by list.'
        yield AuditFailure('missing controlled_by', detail, level='NOT_COMPLIANT')
        return

    possible_controls = value['dataset'].get('possible_controls')
    biosample = value['dataset'].get('biosample_ontology', {}).get('term_id')
    biosample_term_name = value['dataset'].get('biosample_ontology', {}).get('term_name')
    run_type = value.get('run_type', None)
    read_length = value.get('read_length', None)

    if value['controlled_by']:
        for ff in value['controlled_by']:
            control_bs = ff['dataset'].get('biosample_ontology', {}).get('term_id')
            control_run = ff.get('run_type', None)
            control_length = ff.get('read_length', None)

            if control_bs != biosample:
                detail = 'controlled_by is a list of files that are used as controls for a given file. ' + \
                         'This experiment was performed using {}, but '.format(biosample_term_name) + \
                         'file {} contains in controlled_by list a file '.format(value['@id']) + \
                         '{} that belongs to experiment with different biosample {}.'.format(
                             ff['@id'],
                             ff['dataset'].get('biosample_ontology', {}).get('term_name'))
                yield AuditFailure('inconsistent control', detail, level='ERROR')
                return

            if ff['file_format'] != value['file_format']:
                detail = 'controlled_by is a list of files that are used as controls for a given file. ' + \
                         'File {} with file_format {} contains in controlled_by list '.format(
                             value['@id'],
                             value['file_format'],) + \
                         'a file {} with different file_format {}.'.format(
                             ff['@id'],
                             ff['file_format'])
                yield AuditFailure('inconsistent control', detail, level='ERROR')
                return

            if (possible_controls is None) or (ff['dataset']['@id'] not in possible_controls):
                detail = 'possible_controls is a list of experiment(s) that can serve as ' + \
                         'analytical controls for a given experiment. ' + \
                         'controlled_by is a list of files that are used as ' + \
                         'controls for a given file. ' + \
                         'File {} contains in controlled_by list a file {} '.format(
                             value['@id'],
                             ff['@id']) + \
                         'that belongs to an experiment {} that '.format(ff['dataset']['@id']) + \
                         'is not specified in possible_controls list of this experiment.'

                yield AuditFailure('inconsistent control', detail, level='ERROR')
                return

            if (run_type is None) or (control_run is None):
                continue

            if (read_length is None) or (control_length is None):
                continue

            if run_type != control_run and \
               value['dataset'].get('assay_term_name') not in ['RAMPAGE', 'CAGE']:
                detail = 'File {} is {} but its control file {} is {}'.format(
                    value['@id'],
                    run_type,
                    ff['@id'],
                    control_run
                )
                yield AuditFailure('inconsistent control run_type',
                                   detail, level='WARNING')

            if read_length != control_length and \
               abs(read_length - control_length) > 2 and \
               value['dataset'].get('assay_term_name') not in \
                    ['shRNA knockdown followed by RNA-seq',
                     'siRNA knockdown followed by RNA-seq',
                     'CRISPR genome editing followed by RNA-seq']:

                detail = 'File {} is {} but its control file {} is {}'.format(
                    value['@id'],
                    value['read_length'],
                    ff['@id'],
                    ff['read_length']
                )
                yield AuditFailure('inconsistent control read length',
                                   detail, level='WARNING')
                return
Exemple #4
0
def check_control_read_depth_standards(value, read_depth, target_name,
                                       is_control_file, control_to_target,
                                       target_investigated_as,
                                       standards_version):

    marks = pipelines_with_read_depth['ChIP-seq read mapping']

    if is_control_file is True:  # treat this file as control_bam -
        # raising insufficient control read depth
        if target_name not in ['Control-human', 'Control-mouse']:
            detail = 'Control alignment file {} '.format(value['@id']) + \
                     'has a target {} that is neither '.format(target_name) + \
                     'Control-human nor Control-mouse.'
            yield AuditFailure('inconsistent target of control experiment',
                               detail,
                               level='WARNING')
            return

        if control_to_target == 'empty':
            return

        elif 'broad histone mark' in target_investigated_as:  #  control_to_target in broad_peaks_targets:
            if 'assembly' in value:
                detail = 'Control alignment file {} mapped to {} assembly has {} '.format(
                    value['@id'],
                    value['assembly'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting broad ' + \
                    'histone mark {} '.format(control_to_target) + \
                    'is 40 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 45 million. (See /data-standards/chip-seq/ )'
            else:
                detail = 'Control alignment file {} has {} '.format(
                    value['@id'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting broad ' + \
                    'histone mark {} '.format(control_to_target) + \
                    'is 40 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 45 million. (See /data-standards/chip-seq/ )'
            if read_depth >= 40000000 and read_depth < marks['broad']:
                yield AuditFailure('control low read depth',
                                   detail,
                                   level='WARNING')
            elif read_depth >= 5000000 and read_depth < 40000000:
                yield AuditFailure('control insufficient read depth',
                                   detail,
                                   level='NOT_COMPLIANT')
            elif read_depth < 5000000:
                yield AuditFailure('control extremely low read depth',
                                   detail,
                                   level='ERROR')
        elif 'narrow histone mark' in target_investigated_as:  # else:
            if 'assembly' in value:
                detail = 'Control alignment file {} mapped to {} assembly has {} '.format(
                    value['@id'],
                    value['assembly'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting narrow ' + \
                    'histone mark {} '.format(control_to_target) + \
                    'is 10 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 20 million. (See /data-standards/chip-seq/ )'
            else:
                detail = 'Control alignment file {} has {} '.format(
                    value['@id'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting narrow ' + \
                    'histone mark {} '.format(control_to_target) + \
                    'is 10 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 20 million. (See /data-standards/chip-seq/ )'
            if read_depth >= 10000000 and read_depth < marks['narrow']:
                yield AuditFailure('control low read depth',
                                   detail,
                                   level='WARNING')
            elif read_depth >= 5000000 and read_depth < 10000000:
                yield AuditFailure('control low read depth',
                                   detail,
                                   level='NOT_COMPLIANT')
            elif read_depth < 5000000:
                yield AuditFailure('control extremely low read depth',
                                   detail,
                                   level='ERROR')

        else:
            if 'assembly' in value:
                detail = 'Control alignment file {} mapped to {} assembly has {} '.format(
                    value['@id'],
                    value['assembly'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting ' + \
                    '{} and investigated as a transcription factor '.format(control_to_target) + \
                    'is 10 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 20 million. (See /data-standards/chip-seq/ )'
            else:
                detail = 'Control alignment file {} has {} '.format(
                    value['@id'],
                    read_depth) + \
                    'usable fragments. ' + \
                    'The minimum AMP-T2D standard for a control of ChIP-seq assays targeting ' + \
                    '{} and investigated as a transcription factor '.format(control_to_target) + \
                    'is 10 million usable fragments, the recommended number of usable ' + \
                    'fragments is > 20 million. (See /data-standards/chip-seq/ )'
            if read_depth >= 10000000 and read_depth < marks['narrow']:
                yield AuditFailure('control low read depth',
                                   detail,
                                   level='WARNING')
            elif read_depth >= 3000000 and read_depth < 10000000:
                yield AuditFailure('control low read depth',
                                   detail,
                                   level='NOT_COMPLIANT')
            elif read_depth < 3000000:
                yield AuditFailure('control extremely low read depth',
                                   detail,
                                   level='ERROR')
        return
Exemple #5
0
def audit_reference_epigenome_assay_types_requirments(value, system):
    detail_prefix = 'Reference Epigenome {} '.format(value['@id'])
    if 'related_datasets' not in value:
        detail = detail_prefix + \
            'has no related datasets. It lacks all of the IHEC required ' + \
            'assays.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
        return

    roadmap_flag = False
    if 'award' in value and \
       value['award']['rfa'] == 'Roadmap':
        roadmap_flag = True
        required_assays = {
            ('OBI:0000716', 'Control'): 0,
            ('OBI:0000716', 'H3K27me3'): 0,
            ('OBI:0000716', 'H3K36me3'): 0,
            ('OBI:0000716', 'H3K4me1'): 0,
            ('OBI:0000716', 'H3K4me3'): 0,
            ('OBI:0000716', 'H3K9me3'): 0,
            'OBI:0001271': 0,  # RNA-seq
            'OBI:0001463': 0,  # Arrays
            'OBI:0000693': 0,  # MeDIP
            'OBI:0001861': 0,  # MRE-seq
            'OBI:0001863': 0,  # MethylCap-seq
            'OBI:0001862': 0
        }  # RRBS
        project_detail = 'required according to standards of NIH ' + \
                         'Roadmap Minimal Reference Epigenome'
    else:
        required_assays = {
            ('OBI:0000716', 'Control'): 0,
            ('OBI:0000716', 'H3K27me3'): 0,
            ('OBI:0000716', 'H3K36me3'): 0,
            ('OBI:0000716', 'H3K4me1'): 0,
            ('OBI:0000716', 'H3K4me3'): 0,
            ('OBI:0000716', 'H3K27ac'): 0,
            ('OBI:0000716', 'H3K9me3'): 0,
            'OBI:0001863': 0,  # WGBS
            'OBI:0001271': 0
        }  # RNA-seq
        project_detail = 'required according to standards of Minimal IHEC Reference Epigenome.'

    for assay in value['related_datasets']:
        assay_id = assay['assay_term_id']
        if (assay_id == 'OBI:0000716'):
            if 'target' in assay:
                assay_taget = assay['target']['label']
                key = (assay_id, assay_taget)
                if key in required_assays:
                    required_assays[key] = 1
        elif assay_id in required_assays:
            required_assays[assay_id] = 1

    if required_assays[('OBI:0000716', 'Control')] == 0:
        detail = detail_prefix + \
            'is missing control ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('OBI:0000716', 'H3K27me3')] == 0:
        detail = detail_prefix + \
            'is missing H3K27me3 ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('OBI:0000716', 'H3K36me3')] == 0:
        detail = detail_prefix + \
            'is missing H3K36me3 ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('OBI:0000716', 'H3K4me1')] == 0:
        detail = detail_prefix + \
            'is missing H3K4me1 ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('OBI:0000716', 'H3K4me3')] == 0:
        detail = detail_prefix + \
            'is missing H3K4me3 ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')

    if required_assays[('OBI:0000716', 'H3K9me3')] == 0:
        detail = detail_prefix + \
            'is missing H3K9me3 ChIP-seq assay, ' + \
            project_detail
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')

    if roadmap_flag is True:
        rna_assays = required_assays['OBI:0001271'] + \
            required_assays['OBI:0001463']

        methylation_assays = required_assays['OBI:0000693'] + \
            required_assays['OBI:0001861'] + \
            required_assays['OBI:0001863'] + \
            required_assays['OBI:0001862']

        if methylation_assays == 0:
            detail = detail_prefix + \
                'is missing MeDIP-seq, MRE-seq, RRBS, or MethylCap-seq assays. ' + \
                'At least one is ' + project_detail
            yield AuditFailure('missing IHEC required assay',
                               detail,
                               level='WARNING')
        if rna_assays == 0:
            detail = detail_prefix + \
                'is missing RNA-seq or array based transcription assays. ' + \
                'At least one is ' + project_detail
            yield AuditFailure('missing IHEC required assay',
                               detail,
                               level='WARNING')
    else:
        if required_assays[('OBI:0000716', 'H3K27ac')] == 0:
            detail = detail_prefix + \
                'is missing H3K27ac ChIP-seq assay, ' + \
                project_detail
            yield AuditFailure('missing IHEC required assay',
                               detail,
                               level='WARNING')
        if required_assays['OBI:0001863'] == 0:
            detail = detail_prefix + \
                'is missing WGBS assay, ' + \
                project_detail
            yield AuditFailure('missing IHEC required assay',
                               detail,
                               level='WARNING')
        if required_assays['OBI:0001271'] == 0:
            detail = detail_prefix + \
                'is missing RNA-seq assay, ' + \
                project_detail
            yield AuditFailure('missing IHEC required assay',
                               detail,
                               level='WARNING')
    return
Exemple #6
0
def audit_file_controlled_by(value, system):
    '''
    A fastq in a ChIP-seq experiment should have a controlled_by
    '''

    if value['dataset'].get('assay_term_name') not in [
            'ChIP-seq', 'RAMPAGE', 'CAGE',
            'shRNA knockdown followed by RNA-seq',
            'siRNA knockdown followed by RNA-seq',
            'CRISPR genome editing followed by RNA-seq'
    ]:

        return

    if value['file_format'] not in ['fastq']:
        return

    if 'target' in value['dataset'] and \
       'control' in value['dataset']['target'].get('investigated_as', []):
        return

    if 'controlled_by' not in value:
        value['controlled_by'] = []

    if value['controlled_by'] == []:
        detail = 'controlled_by is a list of files that are used as controls for a given experimental file. ' + \
                 'Fastq files generated in a {} assay require the '.format(
                     value['dataset']['assay_term_name']) + \
                 'specification of control fastq file(s) in the controlled_by list. ' + \
                 'Fastq file {} '.format(
                     value['@id']) + \
                 'is missing the requisite file specification in controlled_by list.'
        yield AuditFailure('missing controlled_by',
                           detail,
                           level='NOT_COMPLIANT')
        return

    if value['dataset'].get('assay_term_name') in ['ChIP-seq', 'RAMPAGE']:
        bio_rep_numbers = set()
        pe_files = []
        if len(value['controlled_by']) > 0:
            for control_file in value['controlled_by']:
                if 'replicate' in control_file:
                    bio_rep_numbers.add(control_file['replicate']
                                        ['biological_replicate_number'])
                if 'run_type' in control_file:
                    if control_file['run_type'] == 'paired-ended':
                        pe_files.append(control_file)
        for pe_file in pe_files:
            if 'paired_with' not in pe_file:
                detail = 'Fastq file {} '.format(value['@id']) + \
                         'from experiment {} '.format(value['dataset']['@id']) + \
                         'contains in controlled_by list PE fastq file ' + \
                         '{} with missing paired_with property.'.format(pe_file['@id'])
                yield AuditFailure('missing paired_with in controlled_by',
                                   detail,
                                   level='INTERNAL_ACTION')
            elif check_presence(pe_file['paired_with'], pe_files) is False:
                detail = 'Fastq file {} '.format(value['@id']) + \
                         'from experiment {} '.format(value['dataset']['@id']) + \
                         'contains in controlled_by list PE fastq file ' + \
                         '{} which is paired to a file {} '.format(pe_file['@id'],
                                                                   pe_file['paired_with']['@id']) + \
                         'that is not included in the controlled_by list'
                yield AuditFailure('missing paired_with in controlled_by',
                                   detail,
                                   level='INTERNAL_ACTION')

        if len(bio_rep_numbers) > 1:
            detail = 'Fastq file {} '.format(value['@id']) + \
                     'from experiment {} '.format(value['dataset']['@id']) + \
                     'contains in controlled_by list fastq files ' + \
                     'from diferent biological replicates {}.'.format(list(bio_rep_numbers))
            yield AuditFailure('inconsistent controlled_by replicates',
                               detail,
                               level='ERROR')

    possible_controls = value['dataset'].get('possible_controls')
    biosample = value['dataset'].get('biosample_term_id')
    biosample_term_name = value['dataset'].get('biosample_term_name')
    run_type = value.get('run_type', None)
    read_length = value.get('read_length', None)

    if value['controlled_by']:
        for ff in value['controlled_by']:
            control_bs = ff['dataset'].get('biosample_term_id')
            control_run = ff.get('run_type', None)
            control_length = ff.get('read_length', None)

            if control_bs != biosample:
                detail = 'controlled_by is a list of files that are used as controls for a given file. ' + \
                         'This experiment was performed using {}, but '.format(biosample_term_name) + \
                         'file {} contains in controlled_by list a file '.format(value['@id']) + \
                         '{} that belongs to experiment with different biosample {}.'.format(
                             ff['@id'],
                             ff['dataset'].get('biosample_term_name'))
                yield AuditFailure('inconsistent control',
                                   detail,
                                   level='ERROR')
                return

            if ff['file_format'] != value['file_format']:
                detail = 'controlled_by is a list of files that are used as controls for a given file. ' + \
                         'File {} with file_format {} contains in controlled_by list '.format(
                             value['@id'],
                             value['file_format'],) + \
                         'a file {} with different file_format {}.'.format(
                             ff['@id'],
                             ff['file_format'])
                yield AuditFailure('inconsistent control',
                                   detail,
                                   level='ERROR')
                return

            if (possible_controls is None) or (ff['dataset']['@id']
                                               not in possible_controls):
                detail = 'possible_controls is a list of experiment(s) that can serve as ' + \
                         'analytical controls for a given experiment. ' + \
                         'controlled_by is a list of files that are used as ' + \
                         'controls for a given file. ' + \
                         'File {} contains in controlled_by list a file {} '.format(
                             value['@id'],
                             ff['@id']) + \
                         'that belongs to an experiment {} that '.format(ff['dataset']['@id']) + \
                         'is not specified in possible_controls list of this experiment.'

                yield AuditFailure('inconsistent control',
                                   detail,
                                   level='ERROR')
                return

            if (run_type is None) or (control_run is None):
                continue

            if (read_length is None) or (control_length is None):
                continue

            if run_type != control_run and \
               value['dataset'].get('assay_term_name') not in ['RAMPAGE', 'CAGE']:
                detail = 'File {} is {} but its control file {} is {}'.format(
                    value['@id'], run_type, ff['@id'], control_run)
                yield AuditFailure('inconsistent control run_type',
                                   detail,
                                   level='WARNING')

            if read_length != control_length and \
               abs(read_length - control_length) > 2 and \
               value['dataset'].get('assay_term_name') not in \
                    ['shRNA knockdown followed by RNA-seq',
                     'siRNA knockdown followed by RNA-seq',
                     'CRISPR genome editing followed by RNA-seq']:

                detail = 'File {} is {} but its control file {} is {}'.format(
                    value['@id'], value['read_length'], ff['@id'],
                    ff['read_length'])
                yield AuditFailure('inconsistent control read length',
                                   detail,
                                   level='WARNING')
                return
Exemple #7
0
def audit_file_chip_seq_control_read_depth(
    value,
    system,
    condition=rfa('ENCODE3', 'ENCODE2-Mouse', 'ENCODE2', 'ENCODE', 'AMP-T2D',
                  'Roadmap')):
    '''
    An alignment file from the AMP-T2D Processing Pipeline should have read depth
    in accordance with the criteria
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return

    if value['file_format'] != 'bam':
        return

    if value['output_type'] in [
            'transcriptome alignments', 'unfiltered alignments'
    ]:
        return

    if value['lab'] not in ['/labs/encode-processing-pipeline/']:
        return

    if 'analysis_step_version' not in value:
        detail = 'AMP-T2D Processed alignment file {} has '.format(value['@id']) + \
            'no analysis step version'
        yield AuditFailure('missing analysis step version',
                           detail,
                           level='INTERNAL_ACTION')
        return

    if 'analysis_step' not in value['analysis_step_version']:
        detail = 'AMP-T2D Processed alignment file {} has '.format(value['@id']) + \
            'no analysis step in {}'.format(value['analysis_step_version']['@id'])
        yield AuditFailure('missing analysis step',
                           detail,
                           level='INTERNAL_ACTION')
        return

    if 'pipelines' not in value['analysis_step_version']['analysis_step']:
        detail = 'AMP-T2D Processed alignment file {} has '.format(value['@id']) + \
            'no pipelines in {}'.format(value['analysis_step_version']['analysis_step']['@id'])
        yield AuditFailure('missing pipelines in analysis step',
                           detail,
                           level='INTERNAL_ACTION')
        return

    if 'software_versions' not in value['analysis_step_version']:
        detail = 'AMP-T2D Processed alignment file {} has '.format(value['@id']) + \
            'no software_versions in {}'.format(value['analysis_step_version']['@id'])
        yield AuditFailure('missing software versions',
                           detail,
                           level='INTERNAL_ACTION')
        return

    if value['analysis_step_version']['software_versions'] == []:
        detail = 'AMP-T2D Processed alignment file {} has no '.format(value['@id']) + \
            'softwares listed in software_versions,' + \
            ' under {}'.format(value['analysis_step_version']['@id'])
        yield AuditFailure('missing software', detail, level='INTERNAL_ACTION')
        return

    chip_flag = False
    for p in value['analysis_step_version']['analysis_step']['pipelines']:
        if p['title'] == 'ChIP-seq read mapping':
            chip_flag = True
        if p['title'] == 'Raw mapping with no filtration':
            return

    if chip_flag is False:
        return

    quality_metrics = value.get('quality_metrics')

    if (quality_metrics is None) or (quality_metrics == []):
        return

    derived_from_files = value.get('derived_from')
    if (derived_from_files is None) or (derived_from_files == []):
        return

    target_name = 'empty'
    target_investigated_as = 'empty'

    if 'dataset' in value:
        if 'target' in value['dataset'] and 'name' in value['dataset'][
                'target']:
            target_name = value['dataset']['target']['name']
            target_investigated_as = value['dataset']['target'][
                'investigated_as']

    if target_name not in ['Control-human', 'Control-mouse']:
        control_bam = get_control_bam(value, 'ChIP-seq read mapping')
        if control_bam is not False:
            control_depth = get_chip_seq_bam_read_depth(control_bam)
            control_target = get_target_name(control_bam)
            standards_version = extract_award_version(control_bam)
            if control_depth is not False and control_target is not False:
                for failure in check_control_read_depth_standards(
                        control_bam, control_depth, control_target, True,
                        target_name, target_investigated_as,
                        standards_version):
                    yield failure
Exemple #8
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)

    if level == 0:
        return

    if value['status'] in ['revoked', 'archived']:
        level += 50

    context = system['context']
    request = system['request']
    linked = set()

    for schema_path in context.type_info.schema_links:
        if schema_path in [
                'supersedes', 'step_run', 'derived_from', 'controlled_by',
                'possible_controls', 'elements'
        ]:
            continue
        else:
            linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        # Avoid pulling the full @@object frame into request._embedded_uuids.
        linked_value = request.embed(
            path +
            '@@filtered_object?include=@id&include=@type&include=uuid&include=status'
        )
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        if (  # Special case: A revoked file can have a deleted replicate ticket #2938
                'File' in value['@type'] and value['status'] == 'revoked'
                and 'Replicate' in linked_value['@type']
                and linked_value['status'] == 'deleted'):
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
        if linked_value['status'] in ['revoked', 'archived']:
            linked_level += 50
        if linked_level == 0:
            detail = ('{} {} {} has {} subobject {} {}'.format(
                value['status'].capitalize(),
                space_in_words(value['@type'][0]).lower(),
                audit_link(path_to_text(value['@id']), value['@id']),
                linked_value['status'],
                space_in_words(linked_value['@type'][0]).lower(),
                audit_link(path_to_text(linked_value['@id']),
                           linked_value['@id'])))
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
        elif linked_level < level:
            detail = ('{} {} {} has {} subobject {} {}'.format(
                value['status'].capitalize(),
                space_in_words(value['@type'][0]).lower(),
                audit_link(path_to_text(value['@id']), value['@id']),
                linked_value['status'],
                space_in_words(linked_value['@type'][0]).lower(),
                audit_link(path_to_text(linked_value['@id']),
                           linked_value['@id'])))
            yield AuditFailure('mismatched status',
                               detail,
                               level='INTERNAL_ACTION')
Exemple #9
0
def audit_antibody_missing_characterizations(value, system):
    '''
    Check to see what characterizations are lacking for each antibody,
    for the cell lines we know about.
    '''
    if value['targets'][0].get('investigated_as') in ['control']:
        return

    if not value['characterizations']:
        detail = '{} '.format(value['@id']) + \
            'does not have any supporting characterizations submitted.'
        yield AuditFailure('no characterizations submitted',
                           detail,
                           level='NOT_COMPLIANT')
        return

    primary_chars = []
    secondary_chars = []
    num_compliant_primary = 0
    compliant_secondary = False
    for char in value['characterizations']:
        if 'primary_characterization_method' in char:
            primary_chars.append(char)
            if char['status'] in ['compliant', 'exempt from standards']:
                num_compliant_primary += 1
        if 'secondary_characterization_method' in char:
            secondary_chars.append(char)
            if char['status'] in ['compliant', 'exempt from standards']:
                compliant_secondary = True

    if not primary_chars:
        detail = '{} '.format(value['@id']) + \
            'does not have any primary characterizations submitted.'
        yield AuditFailure('no primary characterizations',
                           detail,
                           level='NOT_COMPLIANT')

    if not secondary_chars:
        detail = '{} '.format(value['@id']) + \
            'does not have any secondary characterizations submitted.'
        yield AuditFailure('no secondary characterizations',
                           detail,
                           level='NOT_COMPLIANT')

    if value['lot_reviews'][0]['detail'] in [
            'Characterizations not reviewed.'
    ]:
        detail = '{} has old characterizations that were not reviewed.'.format(
            value['@id'])
        yield AuditFailure('characterizations not reviewed',
                           detail,
                           level='WARNING')

    for lot_review in value['lot_reviews']:
        if lot_review['detail'] in [
                'Pending review of primary characterization.',
                'Primary characterization(s) in progress.',
                'Pending review of primary and secondary characterizations.',
                'Pending review of primary and awaiting submission of secondary characterization(s).',
                'Awaiting compliant primary and secondary characterizations.',
                'Awaiting a compliant primary characterization.',
                'Characterizations in progress.'
        ]:
            biosample = lot_review['biosample_term_name']
            if biosample == 'not specified':
                biosample = 'one or more cell types/tissues.'

            detail = '{} needs a compliant primary in {}'.format(
                value['@id'], lot_review['biosample_term_name'])
            yield AuditFailure('need compliant primaries',
                               detail,
                               level='NOT_COMPLIANT')

        if lot_review['detail'] is None and lot_review[
                'status'] == 'awaiting lab characterization':
            detail = '{} needs a compliant primary characterization for one or more cell ' + \
                'types/tissues.'.format(value['@id'])
            yield AuditFailure('need compliant primaries',
                               detail,
                               level='NOT_COMPLIANT')

    if secondary_chars and not compliant_secondary:
        detail = '{} '.format(value['@id']) + \
            'needs a compliant secondary characterization.'
        yield AuditFailure('need compliant secondary',
                           detail,
                           level='NOT_COMPLIANT')
        return
Exemple #10
0
def audit_biosample_gtex_children(value, system):
    '''
    GTEX children biosamples have to be properly registered.
    - aliases (column A from plate-maps)
    - part_of pointing to the parent biosample
    - source Kristin Ardlie
    '''
    if value['status'] in ['deleted', 'replaced', 'revoked']:
        return
    if 'donor' not in value:
        return
    if (value['donor']['accession'] in gtexDonorsList) and \
       (value['accession'] not in gtexParentsList):
        if 'source' not in value:
            detail = 'GTEX biosample {} has no source'.format(value['@id'])
            yield AuditFailure('GTEX biosample missing source',
                               detail,
                               level='INTERNAL_ACTION')
        else:
            if (value['source']['uuid'] !=
                    'f85ecd67-abf2-4a26-89c8-53a7273c8b0c'):
                detail = 'GTEX biosample {} has incorrect source {}'.format(
                    value['@id'], value['source']['title'])
                yield AuditFailure('GTEX biosample incorrect source',
                                   detail,
                                   level='INTERNAL_ACTION')
        if 'part_of' not in value:
            detail = 'GTEX child biosample {} is not asociated with any parent biosample'.format(
                value['@id'])
            yield AuditFailure('GTEX biosample missing part_of property',
                               detail,
                               level='INTERNAL_ACTION')
        else:
            partOfBiosample = value['part_of']
            if (partOfBiosample['accession'] not in gtexParentsList):
                detail = 'GTEX child biosample {} is asociated '.format(value['@id']) + \
                         'with biosample {} which is '.format(partOfBiosample['@id']) + \
                         'not a part of parent biosamples list'
                yield AuditFailure('GTEX biosample invalid part_of property',
                                   detail,
                                   level='INTERNAL_ACTION')
            else:
                if value['biosample_term_id'] != partOfBiosample[
                        'biosample_term_id']:
                    detail = 'GTEX child biosample {} is associated with '.format(value['@id']) + \
                             'biosample {} that has a different '.format(partOfBiosample['@id']) + \
                             'biosample_term_id {}'.format(partOfBiosample['biosample_term_id'])
                    yield AuditFailure(
                        'GTEX biosample invalid part_of property',
                        detail,
                        level='INTERNAL_ACTION')
        if ('aliases' not in value):
            detail = 'GTEX biosample {} has no aliases'.format(value['@id'])
            yield AuditFailure('GTEX biosample missing aliases',
                               detail,
                               level='INTERNAL_ACTION')
        else:
            donorAliases = value['donor']['aliases']
            repDonorAlias = ''
            for da in donorAliases:
                if da[0:7] == 'gtex:PT':
                    repDonorAlias = 'gtex:ENC-' + da[8:13]
            childAliases = value['aliases']
            aliasFlag = False
            for ca in childAliases:
                if ca[0:14] == repDonorAlias:
                    aliasFlag = True
            if aliasFlag is False:
                detail = 'GTEX biosample {} aliases {} '.format(value['@id'],
                                                                childAliases) + \
                         'do not include an alias based on plate-map, column A identifier'
                yield AuditFailure('GTEX biosample missing aliases',
                                   detail,
                                   level='INTERNAL_ACTION')
    return
Exemple #11
0
def audit_file_processed_derived_from(value, system):
    if value['output_category'] in ['raw data', 'reference']:
        return
    if 'derived_from' not in value or \
       'derived_from' in value and len(value['derived_from']) == 0:
        detail = (
            'derived_from is a list of files that were used to create a given file; '
            'for example, fastq file(s) will appear in the derived_from list of an '
            'alignments file. Processed file {} is missing the requisite file'
            ' specification in its derived_from list.'.format(
                audit_link(path_to_text(value['@id']), value['@id'])))
        yield AuditFailure('missing derived_from',
                           detail,
                           level='INTERNAL_ACTION')
        return

    if value['file_format'] != 'bam':
        return
    # Ignore replaced BAMs because missing derived_from logic should be applied to their
    # replacements instead (ENCD-3595).
    if value['status'] == 'replaced':
        return

    fastq_bam_counter = 0
    for f in value.get('derived_from'):
        if (f['file_format'] == 'bam' or f['file_format'] == 'fastq'
                or (f['file_format'] in ['fasta', 'csfasta', 'csqual']
                    and f['output_type'] == 'reads'
                    and f['output_category'] == 'raw data')):

            # Audit shouldn't trigger if status isn't registered in STATUS_LEVEL dict.
            if f['status'] not in STATUS_LEVEL or value[
                    'status'] not in STATUS_LEVEL:
                return

            if STATUS_LEVEL[f['status']] >= STATUS_LEVEL[value['status']]:
                fastq_bam_counter += 1

            if f['dataset'] != value['dataset'].get('@id'):
                detail = ('derived_from is a list of files that were used '
                          'to create a given file; '
                          'for example, fastq file(s) will appear in the '
                          'derived_from list of an '
                          'alignments file. '
                          'Alignments file {} '
                          'from experiment {} '
                          'specifies a file {} '
                          'from a different experiment {} '
                          'in its derived_from list.'.format(
                              audit_link(path_to_text(value['@id']),
                                         value['@id']),
                              audit_link(path_to_text(value['dataset']['@id']),
                                         value['dataset']['@id']),
                              audit_link(path_to_text(f['@id']), f['@id']),
                              audit_link(path_to_text(f['dataset']),
                                         f['dataset'])))
                yield AuditFailure('inconsistent derived_from',
                                   detail,
                                   level='INTERNAL_ACTION')
    if fastq_bam_counter == 0:
        detail = (
            'derived_from is a list of files that were used to create a given file; '
            'for example, fastq file(s) will appear in the derived_from list of an '
            'alignments file. Alignments file {} is missing the requisite '
            'file specification in its derived_from list.'.format(
                audit_link(path_to_text(value['@id']), value['@id']), ))
        yield AuditFailure('missing derived_from',
                           detail,
                           level='INTERNAL_ACTION')
Exemple #12
0
def audit_paired_with(value, system):
    '''
    A file with a paired_end needs a paired_with.
    Should be handled in the schema.
    A fastq file should be paired_with a fastq file.
    A paired_with should be the same replicate
    '''

    if 'paired_end' not in value:
        return

    if value['paired_end'] in ['1,2']:
        return

    if 'paired_with' not in value:
        return

    paired_with_file_format = value['paired_with'].get('file_format')

    if value.get(
            'file_format') == 'fastq' and paired_with_file_format != 'fastq':
        detail = (
            'Both the files in a paired-end run must be fastq files. '
            'Fastq file {} is paired with file {}, which is a {} file.'.format(
                audit_link(path_to_text(value['@id']), value['@id']),
                audit_link(path_to_text(value['paired_with']['@id']),
                           value['paired_with']['@id']),
                paired_with_file_format))
        yield AuditFailure('paired with non-fastq', detail, level='ERROR')

    if 'replicate' not in value['paired_with']:
        return

    if 'replicate' not in value:
        detail = (
            'File {} has paired_end = {}. It requires a replicate'.format(
                audit_link(path_to_text(value['@id']), value['@id']),
                value['paired_end']))
        yield AuditFailure('missing replicate',
                           detail,
                           level='INTERNAL_ACTION')
    elif value['replicate'].get('@id') != value['paired_with']['replicate']:
        detail = (
            'File {} has replicate {}. It is paired_with file {} with replicate {}'
            .format(
                audit_link(path_to_text(value['@id']), value['@id']),
                audit_link(path_to_text(value['replicate'].get('@id')),
                           value['replicate'].get('@id')),
                audit_link(path_to_text(value['paired_with']['@id']),
                           value['paired_with']['@id']),
                audit_link(path_to_text(value['paired_with'].get('replicate')),
                           value['paired_with'].get('replicate'))))
        yield AuditFailure('inconsistent paired_with', detail, level='ERROR')

    if value['paired_end'] == '1':
        context = system['context']
        paired_with = context.get_rev_links('paired_with')
        if len(paired_with) > 1:
            detail = (
                'Paired end 1 file {} paired_with by multiple paired end 2 files: {!r}'
                .format(audit_link(path_to_text(value['@id']), value['@id']),
                        paired_with))
            yield AuditFailure('multiple paired_with', detail, level='ERROR')
            return

    file_read_count = value.get('read_count')
    paired_with_read_count = value['paired_with'].get('read_count')

    if (file_read_count and paired_with_read_count) and (
            file_read_count != paired_with_read_count):
        detail = ('File {} has {} reads. It is'
                  ' paired_with file {} that has {} reads'.format(
                      audit_link(path_to_text(value['@id']),
                                 value['@id']), file_read_count,
                      audit_link(path_to_text(value['paired_with']['@id']),
                                 value['paired_with']['@id']),
                      paired_with_read_count))
        yield AuditFailure('inconsistent read count', detail, level='ERROR')
Exemple #13
0
def audit_reference_epigenome_assay_types_requirments(value, system):
    if 'related_datasets' not in value:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'has no related datasets. It lacks all IHEC required ' + \
                 'assays.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
        return
    required_assays = {
        ('ChIP-seq', 'Control'): 0,
        ('ChIP-seq', 'H3K27me3'): 0,
        ('ChIP-seq', 'H3K36me3'): 0,
        ('ChIP-seq', 'H3K4me1'): 0,
        ('ChIP-seq', 'H3K4me3'): 0,
        ('ChIP-seq', 'H3K27ac'): 0,
        ('ChIP-seq', 'H3K9me3'): 0,
        'whole-genome shotgun bisulfite sequencing': 0,
        'RNA-seq': 0
    }

    for assay in value['related_datasets']:
        assay_name = assay['assay_term_name']
        if (assay_name == 'ChIP-seq'):
            if 'target' in assay:
                assay_taget = assay['target']['label']
                key = (assay_name, assay_taget)
                if key in required_assays:
                    required_assays[key] = 1
        elif assay_name in required_assays:
            required_assays[assay_name] = 1
    if required_assays[('ChIP-seq', 'Control')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required control ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K27me3')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K27me3 ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K36me3')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K36me3 ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K4me1')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K4me1 ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K4me3')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K4me3 ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K27ac')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K27ac ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays[('ChIP-seq', 'H3K9me3')] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required H3K9me3 ChIP-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays['whole-genome shotgun bisulfite sequencing'] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required WGBS assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    if required_assays['RNA-seq'] == 0:
        detail = 'Reference Epigenome {} '.format(value['@id']) + \
                 'missing IHEC required RNA-seq assay.'
        yield AuditFailure('missing IHEC required assay',
                           detail,
                           level='WARNING')
    return
Exemple #14
0
def audit_mismatched_properties(value, system):
	excluded_statuses = ['deleted', 'replaced', 'revoked']
	assays = dict()
	biosample_types = dict()
	targets = {'no target': set()}
	donors = dict()
	treatments = {'no treatment': set()}
	genetic_modifications = {'no genetic modification': set()}

	filtered_datasets = [dataset for dataset in value['related_datasets'] if dataset['status'] not in excluded_statuses]

	for dataset in filtered_datasets:
		if dataset['assay_title'] in assays:
			assays[dataset['assay_title']].add(dataset['@id'])
		else:
			assays[dataset['assay_title']] = set([dataset['@id']])

		if dataset['biosample_ontology'] in biosample_types:
			biosample_types[dataset['biosample_ontology']].add(dataset['@id'])
		else:
			biosample_types[dataset['biosample_ontology']] = set([dataset['@id']])

		if 'target' not in dataset:
			targets['no target'].add(dataset['@id'])
		else:
			if dataset['target'] in targets:
				targets[dataset['target']].add(dataset['@id'])
			else:
				targets[dataset['target']] = set([dataset['@id']])

		if 'replicates' in dataset:
			for replicate in dataset['replicates']:
				if replicate['status'] not in excluded_statuses and \
				'library' in replicate and \
				replicate['library']['status'] not in excluded_statuses and \
				'biosample' in replicate['library'] and \
				replicate['library']['biosample']['status'] not in excluded_statuses:
					biosample = replicate['library']['biosample']
					if 'donor' in biosample:
						if biosample['donor'] in donors:
							donors[biosample['donor']].add(dataset['@id'])
						else:
							donors[biosample['donor']] = set([dataset['@id']])

					if not biosample['treatments']:
						treatments['no treatment'].add(dataset['@id'])
					else:
						treatments_combined = ', '.join(
							sorted([treatment['treatment_term_name'] for treatment in biosample['treatments']]))
						if treatments_combined in treatments:
							treatments[treatments_combined].add(dataset['@id'])
						else:
							treatments[treatments_combined] = set([dataset['@id']])
						
					if not biosample['genetic_modifications']: 
						genetic_modifications['no genetic modification'].add(dataset['@id'])
					else:
						gm_combined = ', '.join(sorted(biosample['genetic_modifications']))
						if gm_combined in genetic_modifications:
							genetic_modifications[gm_combined].add(dataset['@id'])
						else:
							genetic_modifications[gm_combined] = set([dataset['@id']])

	# Remove unused keys.
	if len(targets['no target']) == 0:
		targets.pop('no target')
	if len(treatments['no treatment']) == 0:
		treatments.pop('no treatment')
	if len(genetic_modifications['no genetic modification']) == 0:
		genetic_modifications.pop('no genetic modification')

	if len(assays) > 1:
		detail = 'Experiment series {} contains mismatched assays.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for assay_type in assays:
			expt_list = generate_formatted_list_of_experiments(assays[assay_type])
			detail = '{} Experiments {} are {} assays.'.format(
				detail,
				expt_list,
				assay_type
				)
		yield AuditFailure('Mismatched assays', detail, level='WARNING')

	if len(biosample_types) > 1:
		detail = 'Experiment series {} contains experiments on mismatched biosamples.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for biosample_type in biosample_types:
			expt_list = generate_formatted_list_of_experiments(biosample_types[biosample_type])
			detail = '{} Biosamples of Experiments {} are {}.'.format(
				detail,
				expt_list,
				audit_link(path_to_text(biosample_type),biosample_type)
				)
		yield AuditFailure('Mismatched biosamples', detail, level='WARNING')

	if len(targets) > 1:
		detail = 'Experiment series {} contains experiments with mismatched targets.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for target in targets:
			expt_list = generate_formatted_list_of_experiments(targets[target])
			detail = '{} Experiments {} target {}.'.format(
				detail,
				expt_list,
				audit_link(path_to_text(target),target)
				)
		yield AuditFailure('Mismatched targets', detail, level='WARNING')

	if len(donors) > 1:
		detail = 'Experiment series {} contains experiments on biosamples from mismatched donors.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for donor_id_key in donors:
			expt_list = generate_formatted_list_of_experiments(donors[donor_id_key])
			detail = '{} Biosamples of Experiments {} are from donor {}.'.format(
				detail,
				expt_list,
				audit_link(path_to_text(donor_id_key),donor_id_key)
				)
		yield AuditFailure('Mismatched donors', detail, level='WARNING')

	if len(treatments) > 1: 
		detail = 'Experiment series {} contains experiments on biosamples with mismatched treatments.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for treatment_key in treatments:
			expt_list = generate_formatted_list_of_experiments(treatments[treatment_key])
			detail = '{} Biosamples of Experiments {} were treated with {}.'.format(
				detail,
				expt_list,
				treatment_key
				)
		yield AuditFailure('Mismatched biosample treatments', detail, level='WARNING')

	if len(genetic_modifications) > 1:
		detail = 'Experiment series {} contains experiments on biosamples with mismatched genetic modifications.'.format(
			audit_link(path_to_text(value['@id']),value['@id']))
		for gm_key in genetic_modifications:
			expt_list = generate_formatted_list_of_experiments(genetic_modifications[gm_key])
			detail = '{} Biosamples of Experiments {} were modified by {}.'.format(
				detail,
				expt_list,
				gm_key
				)
		yield AuditFailure('Mismatched genetic modifications', detail, level='WARNING') #this text doesn't make sense for unmodified
	return