Exemple #1
0
def audit_file_paired_ended_run_type(value, system):
    '''
    Audit to catch those files that were upgraded to have run_type = paired ended
    resulting from its migration out of replicate but lack the paired_end property
    to specify which read it is. This audit will also catch the case where run_type
    = paired-ended but there is no paired_end = 2 due to registeration error.
    '''

    if value['status'] in ['deleted', 'replaced', 'revoked', 'upload failed']:
        return

    if value['file_format'] not in ['fastq', 'fasta', 'csfasta']:
        return

    if (value['output_type'] == 'reads') and (value.get('run_type')
                                              == 'paired-ended'):
        if 'paired_end' not in value:
            detail = 'File {} has a paired-ended run_type but is missing its paired_end value'.format(
                value['@id'])
            raise AuditFailure('missing paired_end',
                               detail,
                               level='DCC_ACTION')

        if (value['paired_end'] == 1) and 'paired_with' not in value:
            detail = 'File {} has a paired-ended run_type but is missing a paired_end=2 mate'.format(
                value['@id'])
            raise AuditFailure('missing mate pair', detail, level='DCC_ACTION')
Exemple #2
0
def audit_item_schema(value, system):
    context = system['context']
    registry = system['registry']
    if not context.schema:
        return

    properties = context.properties.copy()
    current_version = properties.get('schema_version', '')
    target_version = context.type_info.schema_version
    if target_version is not None and current_version != target_version:
        migrator = registry['migrator']
        try:
            properties = migrator.upgrade(
                context.item_type, properties, current_version, target_version,
                finalize=False, context=context, registry=registry)
        except RuntimeError:
            raise
        except Exception as e:
            detail = '%r upgrading from %r to %r' % (e, current_version, target_version)
            yield AuditFailure('upgrade failure', detail, level='DCC_ACTION')
            return

        properties['schema_version'] = target_version

    properties['uuid'] = str(context.uuid)
    validated, errors = validate(context.schema, properties, properties)
    for error in errors:
        category = 'validation error'
        path = list(error.path)
        if path:
            category += ': ' + '/'.join(path)
        detail = 'Object {} has schema error {}'.format(value['@id'], error.message)
        yield AuditFailure(category, detail, level='DCC_ACTION')
Exemple #3
0
def audit_biosample_depleted_term_match(value, system):
    '''
    The depleted_in_term_name and depleted_in_term_name
    should be concordant. This should be a calcualted field.
    If one exists, the other should.  This should be handled in the schema.
    '''
    if value['status'] == 'deleted':
        return

    if 'depleted_in_term_name' not in value:
        return

    if len(value['depleted_in_term_name']) != len(
            value['depleted_in_term_id']):
        detail = 'Biosample {} has a depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format(
            value['@id'])
        raise AuditFailure('mismatched depleted_in_term length',
                           detail,
                           level='ERROR')
        return

    for i, dep_term in enumerate(value['depleted_in_term_name']):
        if (term_mapping[dep_term]) != (value['depleted_in_term_id'][i]):
            detail = 'Biosample {} has a mismatch between {} and {}'.format(
                value['@id'], dep_term, value['depleted_in_term_id'][i])
            raise AuditFailure('mismatched depleted_in_term',
                               detail,
                               level='ERROR')
Exemple #4
0
def audit_paired_with(value, system):
    '''
    A file with a paired_end needs a paired_with.
    Should be handled in the schema.
    A paired_with should be the same replicate
    DISABLING until ticket 1795 is implemented
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if 'paired_end' not in value:
        return

    if value['paired_end'] == '1':
        context = system['context']
        paired_with = context.get_rev_links('paired_with')
        if len(paired_with) > 1:
            detail = 'Paired end 1 file {} paired_with by multiple paired end 2 files: {!r}'.format(
                value['@id'],
                paired_with,
            )
            raise AuditFailure('multiple paired_with', detail, level='ERROR')
        return

    if 'paired_with' not in value:
        detail = 'File {} has paired_end = {}. It requires a value for paired_with'.format(
            value['@id'], value['paired_end'])
        raise AuditFailure('missing paired_with', detail, level='DCC_ACTION')
Exemple #5
0
def audit_experiment_replicated(value, system):
    '''
    Experiments in ready for review or release ready state should be replicated. If not,
    wranglers should check with lab as to why before release.
    '''
    if value['status'] not in [
            'released', 'release ready', 'ready for review'
    ]:
        return

    num_bio_reps = set()
    for rep in value['replicates']:
        num_bio_reps.add(rep['biological_replicate_number'])

    if len(num_bio_reps) <= 1:
        if value['status'] in ['released']:
            detail = 'Experiment {} has only one biological replicate and is released. Check for proper annotation of this state in the metadata'.format(
                value['@id'])
            raise AuditFailure('unreplicated experiment',
                               detail,
                               level='DCC_ACTION')
        if value['status'] in ['ready for review', 'release ready']:
            detail = 'Experiment {} has only one biological replicate, more than one is typically expected before release'.format(
                value['@id'])
            raise AuditFailure('unreplicated experiment',
                               detail,
                               level='WARNING')
Exemple #6
0
def audit_experiment_control(value, system):
    '''
    Certain assay types (ChIP-seq, ...) require possible controls with a matching biosample.
    Of course, controls do not require controls.
    '''

    if value['status'] in ['deleted', 'proposed']:
        return

    # Currently controls are only be required for ChIP-seq
    if value.get('assay_term_name') not in controlRequiredAssayList:
        return

    # We do not want controls
    if 'target' in value and 'control' in value['target']['investigated_as']:
        return

    if value['possible_controls'] == []:
        detail = '{} experiments require a value in possible_control'.format(
            value['assay_term_name'])
        raise AuditFailure('missing possible_controls',
                           detail,
                           level='NOT_COMPLIANT')

    for control in value['possible_controls']:
        if control.get('biosample_term_id') != value.get('biosample_term_id'):
            detail = 'Control {} is for {} but experiment is done on {}'.format(
                control['@id'], control.get('biosample_term_name'),
                value['biosample_term_name'])
            raise AuditFailure('mismatched control', detail, level='ERROR')
Exemple #7
0
def audit_file_controlled_by(value, system):
    '''
    A fastq in a ChIP-seq experiment should have a controlled_by
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value['dataset'].get('assay_term_name') not in [
            'ChIP-seq', 'RAMPAGE', 'CAGE',
            'shRNA knockdown followed by RNA-seq'
    ]:
        return

    if 'target' in value['dataset'] and 'control' in value['dataset'][
            'target'].get('investigated_as', []):
        return

    if 'controlled_by' not in value:
        value['controlled_by'] = []

    if (value['controlled_by'] == []) and (value['file_format'] in ['fastq']):
        detail = 'Fastq file {} from {} requires controlled_by'.format(
            value['@id'], value['dataset']['assay_term_name'])
        raise AuditFailure('missing controlled_by',
                           detail,
                           level='NOT_COMPLIANT')

    possible_controls = value['dataset'].get('possible_controls')
    biosample = value['dataset'].get('biosample_term_id')

    for ff in value['controlled_by']:
        control_bs = ff['dataset'].get('biosample_term_id')

        if control_bs != biosample:
            detail = 'File {} has a controlled_by file {} with conflicting biosample {}'.format(
                value['@id'], ff['@id'], control_bs)
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='ERROR')
            return

        if ff['file_format'] != value['file_format']:
            detail = 'File {} with file_format {} has a controlled_by file {} with file_format {}'.format(
                value['@id'], value['file_format'], ff['@id'],
                ff['file_format'])
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='ERROR')

        if (possible_controls is None) or (ff['dataset']['@id']
                                           not in possible_controls):
            detail = 'File {} has a controlled_by file {} with a dataset {} that is not in possible_controls'.format(
                value['@id'], ff['@id'], ff['dataset']['@id'])
            raise AuditFailure('mismatched controlled_by',
                               detail,
                               level='DCC_ACTION')
Exemple #8
0
def audit_experiment_assay(value, system):
    '''
    Experiments should have assays with valid ontologies term ids and names that
    are a valid synonym.
    '''
    if value['status'] == 'deleted':
        return

    if 'assay_term_id' not in value:
        detail = 'Experiment {} is missing assay_term_id'.format(value['@id'])
        yield AuditFailure('missing assay information', detail, level='ERROR')
        return
        # This should be a dependancy

    if 'assay_term_name' not in value:
        detail = 'Experiment {} is missing assay_term_name'.format(
            value['@id'])
        yield AuditFailure('missing assay information', detail, level='ERROR')
        return
        # This should be a dependancy

    ontology = system['registry']['ontology']
    term_id = value.get('assay_term_id')
    term_name = value.get('assay_term_name')

    if term_id.startswith('NTR:'):
        detail = 'Assay_term_id is a New Term Request ({} - {})'.format(
            term_id, term_name)
        yield AuditFailure('NTR assay', detail, level='DCC_ACTION')
        return

    if term_id not in ontology:
        detail = 'Assay_term_id {} is not found in cached version of ontology'.format(
            term_id)
        yield AuditFailure('assay_term_id not in ontology',
                           term_id,
                           level='DCC_ACTION')
        return

    ontology_term_name = ontology[term_id]['name']
    modifed_term_name = term_name + ' assay'
    if (ontology_term_name != term_name and term_name not in ontology[term_id]['synonyms']) and \
        (ontology_term_name != modifed_term_name and
            modifed_term_name not in ontology[term_id]['synonyms']):
        detail = 'Experiment has a mismatch between assay_term_name "{}" and assay_term_id "{}"'.format(
            term_name,
            term_id,
        )
        yield AuditFailure('mismatched assay_term_name',
                           detail,
                           level='DCC_ACTION')
        return
Exemple #9
0
def audit_file_format_specifications(value, system):

    for doc in value.get('file_format_specifications', []):
        if doc['document_type'] != "file format specification":
            detail = 'File {} has document {} not of type file format specification'.format(
                value['@id'], doc['@id'])
            raise AuditFailure('wrong document_type', detail, level='ERROR')
def audit_antibody_characterization_unique_reviews(value, system):
    '''
    Make sure primary characterizations have unique lane, biosample_term_id and
    organism combinations for characterization reviews
    '''
    if (value['status'] in [
            'deleted', 'not submitted for review by lab', 'in progress',
            'not reviewed'
    ]):
        return

    if 'secondary_characterization_method' in value:
        return

    unique_reviews = set()
    for review in value['characterization_reviews']:
        lane = review['lane']
        term_id = review['biosample_term_id']
        organism = review['organism']
        review_lane = frozenset([lane, term_id, organism])
        if review_lane not in unique_reviews:
            unique_reviews.add(review_lane)
        else:
            detail = 'Characterization_review.lane {} is a duplicate review for {} - {}'.format(
                lane, term_id, organism)
            raise AuditFailure('duplicate lane review', detail, level='ERROR')
Exemple #11
0
def audit_experiment_spikeins(value, system):
    '''
    All ENCODE 3 long (>200) RNA-seq experiments should specify their spikeins.
    The spikeins specified should have datasets of type spikeins.
    The spikeins datasets should have a fasta file, a document, and maybe a tsv
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value.get('assay_term_name') != 'RNA-seq':
        return

    for rep in value['replicates']:

        lib = rep.get('library')
        if lib is None:
            continue

        size_range = lib.get('size_range')
        if size_range != '>200':
            continue

        spikes = lib.get('spikeins_used')
        if (spikes is None) or (spikes == []):
            detail = 'Library {} is in an RNA-seq experiment and has size_range >200. It requires a value for spikeins_used'.format(
                lib['@id'])
            yield AuditFailure('missing spikeins_used',
                               detail,
                               level='NOT_COMPLIANT')
def audit_antibody_characterization_status(value, system):
    '''
    Make sure the lane_status matches
    the characterization status
    '''
    if 'secondary_characterization_method' in value:
        return

    if (value['status'] in [
            "deleted", "not submitted for review by lab", 'in progress',
            'not reviewed'
    ]):
        if 'characterization_reviews' in value:
            '''If any of these statuses, we shouldn't have characterization_reviews'''
            detail = 'Antibody_characterization.status of {} is incompatible with having a value for characterization_reviews'.format(
                value['status'])
            raise AuditFailure('unexpected characterization_reviews',
                               detail,
                               level='WARNING')
        else:
            return
    '''Check each of the lane_statuses in characterization_reviews for an appropriate match'''
    has_compliant_lane = False
    is_pending = False
    if value['status'] == 'pending dcc review':
        is_pending = True
    for lane in value['characterization_reviews']:
        if (is_pending and lane['lane_status'] != 'pending dcc review') or (
                not is_pending
                and lane['lane_status'] == 'pending dcc review'):
            detail = 'A lane.status of {} is incompatible with antibody_characterization.status of pending dcc review'.format(
                lane['lane_status'])
            raise AuditFailure('mismatched lane status',
                               detail,
                               level='WARNING')
            continue

        if lane['lane_status'] == 'compliant':
            has_compliant_lane = True

    if has_compliant_lane and value['status'] != 'compliant':
        detail = 'A lane.status of {} is incompatible with antibody_characterization status of {}'.format(
            lane['lane_status'], value['status'])
        raise AuditFailure('mismatched lane status',
                           detail,
                           level='DCC_ACTION')
Exemple #13
0
def audit_biosample_transfection_type(value, system):
    '''
    A biosample with constructs or rnais should have a
    transfection_type
    '''
    if value['status'] == 'deleted':
        return

    if (value['rnais']) and ('transfection_type' not in value):
        detail = 'Biosample {} with a value for RNAi requires transfection_type'.format(
            value['@id'])
        raise AuditFailure('missing transfection_type', detail, level='ERROR')

    if (value['constructs']) and ('transfection_type' not in value):
        detail = 'Biosample {} with a value for construct requires transfection_type'.format(
            value['@id'])
        raise AuditFailure('missing transfection_type', detail, level='ERROR')
Exemple #14
0
def audit_biosample_donor(value, system):
    '''
    A biosample should have a donor.
    The organism of donor and biosample should match.
    Pooled_from biosamples do not need donors??
    '''
    if value['status'] in ['deleted']:
        return

    if ('donor' not in value) and (value['pooled_from']):
        return

    if ('donor' not in value) and (not value['pooled_from']):
        detail = 'Biosample {} requires a donor'.format(value['@id'])
        raise AuditFailure('missing donor', detail, level='ERROR')
        return

    donor = value['donor']
    if value['organism']['name'] != donor['organism']['name']:
        detail = 'Biosample {} is organism {}, yet its donor {} is organism {}. Biosamples require a donor of the same species'.format(
            value['@id'], value['organism']['name'], donor['@id'],
            donor['organism']['name'])
        raise AuditFailure('mismatched organism', detail, level='ERROR')

    if 'mutated_gene' not in donor:
        return

    if value['organism']['name'] != donor['mutated_gene']['organism']['name']:
        detail = 'Biosample {} is organism {}, but its donor {} mutated_gene is in {}. Donor mutated_gene should be of the same species as the donor and biosample'.format(
            value['@id'], value['organism']['name'], donor['@id'],
            donor['mutated_gene']['organism']['name'])
        raise AuditFailure('mismatched mutated_gene organism',
                           detail,
                           level='ERROR')

    for i in donor['mutated_gene']['investigated_as']:
        if i in [
                'histone modification', 'tag', 'control',
                'recombinant protein', 'nucleotide modification',
                'other post-translational modification'
        ]:
            detail = 'Donor {} has an invalid mutated_gene {}. Donor mutated_genes should not be tags, controls, recombinant proteins or modifications'.format(
                donor['@id'], donor['mutated_gene']['name'])
            raise AuditFailure('invalid donor mutated_gene',
                               detail,
                               level='ERROR')
Exemple #15
0
def audit_file_size(value, system):

    if value['status'] in ['deleted', 'replaced', 'uploading']:
        return

    if 'file_size' not in value:
        detail = 'File {} requires a value for file_size'.format(value['@id'])
        raise AuditFailure('missing file_size', detail, level='DCC_ACTION')
Exemple #16
0
def audit_experiment_release_date(value, system):
    '''
    Released experiments need release date.
    This should eventually go to schema
    '''
    if value['status'] == 'released' and 'date_released' not in value:
        detail = 'Experiment {} is released and requires a value in date_released'.format(
            value['@id'])
        raise AuditFailure('missing date_released', detail, level='DCC_ACTION')
Exemple #17
0
def audit_experiment_ChIP_control(value, system):

    if value['status'] in [
            'deleted', 'proposed', 'preliminary', 'replaced', 'revoked'
    ]:
        return

    # Currently controls are only be required for ChIP-seq
    if value.get('assay_term_name') != 'ChIP-seq':
        return

    # We do not want controls
    if 'target' in value and 'control' in value['target']['investigated_as']:
        return

    if not value['possible_controls']:
        return

    num_IgG_controls = 0
    for control in value['possible_controls']:
        if ('target' not in control) or (
                'control' not in control['target']['investigated_as']):
            detail = 'Experiment {} is ChIP-seq but its control {} is not linked to a target with investigated.as = control'.format(
                value['@id'], control['@id'])
            raise AuditFailure('invalid possible_control',
                               detail,
                               level='ERROR')

        if not control['replicates']:
            continue

        if 'antibody' in control['replicates'][0]:
            num_IgG_controls += 1

    # If all of the possible_control experiments are mock IP control experiments
    if num_IgG_controls == len(value['possible_controls']):
        if value.get('assay_term_name') == 'ChIP-seq':
            # The binding group agreed that ChIP-seqs all should have an input control.
            detail = 'Experiment {} is ChIP-seq and requires at least one input control, as agreed upon by the binding group. {} is not an input control'.format(
                value['@id'], control['@id'])
            raise AuditFailure('missing input control',
                               detail,
                               level='NOT_COMPLIANT')
Exemple #18
0
def audit_biosample_term(value, system):
    '''
    Biosample_term_id and biosample_term_name
    and biosample_type should all be present.
    This should be handled by schemas.
    Biosample_term_id should be in the ontology.
    Biosample_term_name should match biosample_term_id.
    '''

    if value['status'] in ['deleted']:
        return

    if 'biosample_term_id' not in value:
        return

    ontology = system['registry']['ontology']
    term_id = value['biosample_term_id']
    term_name = value.get('biosample_term_name')

    if term_id.startswith('NTR:'):
        detail = 'Biosample {} has a New Term Request {} - {}'.format(
            value['@id'], term_id, term_name)
        raise AuditFailure('NTR biosample', detail, level='DCC_ACTION')

    if term_id not in ontology:
        detail = 'Biosample {} has biosample_term_id of {} which is not in ontology'.format(
            value['@id'], term_id)
        raise AuditFailure('term_id not in ontology',
                           term_id,
                           level='DCC_ACTION')

    ontology_term_name = ontology[term_id]['name']
    if ontology_term_name != term_name and term_name not in ontology[term_id][
            'synonyms']:
        detail = 'Biosample {} has a mismatch between biosample_term_id "{}" and biosample_term_name "{}"'.format(
            value['@id'],
            term_id,
            term_name,
        )
        raise AuditFailure('mismatched biosample_term',
                           detail,
                           level='DCC_ACTION')
Exemple #19
0
def audit_references_for_publication(value, system):
    '''
    For datasets of type publication, there should be references. Those that
    do not should be earmarked so they can be added once the publication
    has been accepted
    '''
    if value['status'] in ['deleted', 'replaced', 'revoked', 'preliminary']:
        return

    if (value['dataset_type'] == 'publication') and (not value['references']):
        detail = 'publication dataset missing a reference to a publication'
        raise AuditFailure('missing reference', detail, level='WARNING')
def audit_antibody_characterization_review(value, system):
    '''
    Make sure that biosample terms are in ontology
    for each characterization_review.
    '''
    if (value['status'] in [
            'not reviewed', 'not submitted for review by lab', 'deleted',
            'in progress'
    ]):
        return

    if 'secondary_characterization_method' in value:
        return

    if value['characterization_reviews']:
        ontology = system['registry']['ontology']
        for review in value['characterization_reviews']:
            term_id = review['biosample_term_id']
            term_name = review['biosample_term_name']

            if term_id.startswith('NTR:'):
                detail = 'Antibody_characterization {} contains a New Term Request {} - {}'.format(
                    value['@id'], term_id, term_name)
                raise AuditFailure('NTR biosample', detail, level='DCC_ACTION')

            if term_id not in ontology:
                detail = 'Antibody characterization {} contains a biosample_term_id {} that is not in the ontology'.format(
                    value['@id'], term_id)
                raise AuditFailure('term_id not in ontology',
                                   term_id,
                                   level='DCC_ACTION')

            ontology_term_name = ontology[term_id]['name']
            if ontology_term_name != term_name and term_name not in ontology[
                    term_id]['synonyms']:
                detail = 'Antibody characterization {} has a mismatched term {} - {} expected {}'.format(
                    value['@id'], term_id, term_name, ontology_term_name)
                raise AuditFailure('mismatched term_name',
                                   detail,
                                   level='ERROR')
Exemple #21
0
def audit_library_depleted_in(value, system):
    '''
    If there is a depleted_term_name or term_id,
    both should exist - should be handled by schema
    They should match each other.
    This should also be replaced by a calculated field
    '''

    if value['status'] in ['deleted']:
        return

    if not value['depleted_in_term_name'] or not value['depleted_in_term_id']:
        return

    if len(value['depleted_in_term_name']) != len(
            value['depleted_in_term_id']):
        detail = 'Library {} has depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format(
            value['@id'])
        yield AuditFailure('depleted_in length mismatch',
                           detail,
                           level='ERROR')

    for i, dep_term in enumerate(value['depleted_in_term_id']):
        if dep_term == value['nucleic_acid_term_id']:
            detail = 'Library {} of type {} cannot be depleted in {}'.format(
                value['@id'], value['nucleic_acid_term_id'],
                value['depleted_in_term_id'][i])
            yield AuditFailure('invalid depleted_in_term_id',
                               detail,
                               level='ERROR')

        expected = moleculeDict[value['depleted_in_term_name'][i]]
        if expected != value['depleted_in_term_id'][i]:
            detail = 'Library {} has mismatch between {} - {}'.format(
                value['@id'], value['depleted_in_term_name'][i],
                value['depleted_in_term_id'][i])
            yield AuditFailure('mismatched depleted_in_term',
                               detail,
                               level='ERROR')
Exemple #22
0
def audit_item_status(value, system):
    if 'status' not in value:
        return

    level = STATUS_LEVEL.get(value['status'], 50)
    if level == 0:
        return

    context = system['context']
    request = system['request']
    linked = set()
    for schema_path in context.type_info.schema_links:
        if schema_path in ['supercedes', 'step_run']:
            continue
        linked.update(simple_path_ids(value, schema_path))

    for path in linked:
        linked_value = request.embed(path + '@@object')
        if 'status' not in linked_value:
            continue
        if linked_value['status'] == 'disabled':
            continue
        if (  # Special case: A revoked file can have a deleted replicate ticket #2938
            'file' in value['@type'] and
            value['status'] == 'revoked' and
            'replicate' in linked_value['@type'] and
            linked_value['status'] == 'deleted'
        ):
            continue
        linked_level = STATUS_LEVEL.get(linked_value['status'], 50)
        if linked_level == 0:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
            yield AuditFailure('mismatched status', detail, level='ERROR')
        elif linked_level < level:
            detail = '{} {} has {} subobject {}'.format(
                value['status'], value['@id'], linked_value['status'], linked_value['@id'])
            yield AuditFailure('mismatched status', detail, level='DCC_ACTION')
Exemple #23
0
def audit_file_read_length(value, system):
    '''
    Reads files should have a read_length
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value['output_type'] != 'reads':
        return

    if 'read_length' not in value:
        detail = 'Reads file {} missing read_length'.format(value['@id'])
        raise AuditFailure('missing read_length', detail, level='DCC_ACTION')
Exemple #24
0
def audit_library_RNA_size_range(value, system):
    '''
    An RNA library should have a size_range specified.
    This needs to accomodate the rfa
    '''

    if value['status'] in ['deleted']:
        return

    RNAs = ['SO:0000356', 'SO:0000871']

    if (value['nucleic_acid_term_id'] in RNAs) and ('size_range' not in value):
        detail = 'RNA library {} requires a value for size_range'.format(
            value['@id'])
        raise AuditFailure('missing size_range', detail, level='ERROR')
def audit_antibody_characterization_target(value, system):
    '''
    Make sure that target in characterization
    matches target of antibody
    '''
    antibody = value['characterizes']
    target = value['target']
    if 'recombinant protein' in target['investigated_as']:
        prefix = target['label'].split('-')[0]
        unique_antibody_target = set()
        unique_investigated_as = set()
        for antibody_target in antibody['targets']:
            label = antibody_target['label']
            unique_antibody_target.add(label)
            for investigated_as in antibody_target['investigated_as']:
                unique_investigated_as.add(investigated_as)
        if 'tag' not in unique_investigated_as:
            detail = 'Antibody {} is not for a tagged protein, yet target is investigated_as a recombinant protein'.format(
                antibody['@id'])
            raise AuditFailure('not tagged antibody', detail, level='ERROR')
        else:
            if prefix not in unique_antibody_target:
                detail = '{} is not found in target list for antibody {}'.format(
                    prefix, antibody['@id'])
                raise AuditFailure('mismatched tag target',
                                   detail,
                                   level='ERROR')
    else:
        target_matches = False
        for antibody_target in antibody['targets']:
            if target['name'] == antibody_target.get('name'):
                target_matches = True
        if not target_matches:
            detail = 'Target {} is not found in target list for antibody {}'.format(
                target['name'], antibody['@id'])
            raise AuditFailure('mismatched target', detail, level='ERROR')
Exemple #26
0
def audit_rep_extra_items(value, system):
    '''
    A replicate should no longer have platforms, read_length, paired_end
    Should be in the schema.
    '''

    for item in ['platform', 'read_length', 'paired_ended']:

        if item in value:
            detail = 'Replicate {} has a item {}'.format(
                value['@id'],
                value[item]  # ['name']
            )
            error_message = 'replicate with {}'.format(item)
            raise AuditFailure(error_message, detail, level='DCC_ACTION')
Exemple #27
0
def audit_antibody_lot_target(value, system):
    '''
    Antibody lots should not have associated characterizations
    for different target labels
    '''
    if value['status'] in ['not pursued', 'deleted']:
        return

    if not value['characterizations']:
        return

    for char in value['characterizations']:
        if char['target']['@id'] not in value['targets']:
            detail = 'The antibody_lot {} has a characterization {} with target {}, which is not in the targets list'.format(
                value['@id'], char['@id'], char['target']['@id'])
            yield AuditFailure('mismatched target', detail, level='ERROR')
Exemple #28
0
def audit_file_platform(value, system):
    '''
    A raw data file should have a platform specified.
    Should be in the schema.
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value['file_format'] not in raw_data_formats:
        return

    if 'platform' not in value:
        detail = 'Raw data file {} missing platform information'.format(
            value['@id'])
        raise AuditFailure('missing platform', detail, level='ERROR')
Exemple #29
0
def audit_file_flowcells(value, system):
    '''
    A fastq file could have its flowcell details.
    Don't bother to check anything but ENCODE3
    '''

    if value['status'] in ['deleted', 'replaced']:
        return

    if value['file_format'] not in ['fastq']:
        return

    if 'flowcell_details' not in value or (value['flowcell_details'] == []):
        detail = 'Fastq file {} is missing flowcell_details'.format(
            value['@id'])
        raise AuditFailure('missing flowcell_details', detail, level='WARNING')
Exemple #30
0
def audit_experiment_description(value, system):
    '''
    Experiments should have descriptions that contain the experimental variables and
    read like phrases.  I cannot get all of that here, but I thought I would start
    with looking for funny characters.
    '''
    if value['status'] == 'deleted':
        return

    if 'description' not in value:
        return

    notallowed = ['=', ':', '!', ';']
    if any(c in notallowed for c in value['description']):
        detail = 'Experiment {} has odd character(s) in the description'.format(
            value['@id'])
        raise AuditFailure('malformed description', detail, level='WARNING')