def audit_biosample_depleted_term_match(value, system): ''' The depleted_in_term_name and depleted_in_term_name should be concordant. This should be a calcualted field. If one exists, the other should. This should be handled in the schema. ''' if value['status'] == 'deleted': return if 'depleted_in_term_name' not in value: return if len(value['depleted_in_term_name']) != len( value['depleted_in_term_id']): detail = 'Biosample {} has a depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format( value['@id']) raise AuditFailure('mismatched depleted_in_term length', detail, level='ERROR') return for i, dep_term in enumerate(value['depleted_in_term_name']): if (term_mapping[dep_term]) != (value['depleted_in_term_id'][i]): detail = 'Biosample {} has a mismatch between {} and {}'.format( value['@id'], dep_term, value['depleted_in_term_id'][i]) raise AuditFailure('mismatched depleted_in_term', detail, level='ERROR')
def audit_experiment_replicated(value, system): ''' Experiments in ready for review or release ready state should be replicated. If not, wranglers should check with lab as to why before release. ''' if value['status'] not in [ 'released', 'release ready', 'ready for review' ]: return ''' Excluding single cell isolation experiments from the replication requirement ''' if value['assay_term_name'] == 'single cell isolation followed by RNA-seq': return num_bio_reps = set() for rep in value['replicates']: num_bio_reps.add(rep['biological_replicate_number']) if len(num_bio_reps) <= 1: if value['status'] in ['released']: detail = 'Experiment {} has only one biological replicate and is released. Check for proper annotation of this state in the metadata'.format( value['@id']) raise AuditFailure('unreplicated experiment', detail, level='DCC_ACTION') if value['status'] in ['ready for review', 'release ready']: detail = 'Experiment {} has only one biological replicate, more than one is typically expected before release'.format( value['@id']) raise AuditFailure('unreplicated experiment', detail, level='WARNING')
def audit_file_paired_ended_run_type(value, system): ''' Audit to catch those files that were upgraded to have run_type = paired ended resulting from its migration out of replicate but lack the paired_end property to specify which read it is. This audit will also catch the case where run_type = paired-ended but there is no paired_end = 2 due to registeration error. ''' if value['status'] in ['deleted', 'replaced', 'revoked', 'upload failed']: return if value['file_format'] not in ['fastq', 'fasta', 'csfasta']: return if (value['output_type'] == 'reads') and (value.get('run_type') == 'paired-ended'): if 'paired_end' not in value: detail = 'File {} has a paired-ended run_type but is missing its paired_end value'.format( value['@id']) raise AuditFailure('missing paired_end', detail, level='DCC_ACTION') if (value['paired_end'] == 1) and 'paired_with' not in value: detail = 'File {} has a paired-ended run_type but is missing a paired_end=2 mate'.format( value['@id']) raise AuditFailure('missing mate pair', detail, level='DCC_ACTION')
def audit_experiment_control(value, system): ''' Certain assay types (ChIP-seq, ...) require possible controls with a matching biosample. Of course, controls do not require controls. ''' if value['status'] in ['deleted', 'proposed']: return # Currently controls are only be required for ChIP-seq if value.get('assay_term_name') not in controlRequiredAssayList: return # We do not want controls if 'target' in value and 'control' in value['target']['investigated_as']: return if value['possible_controls'] == []: detail = '{} experiments require a value in possible_control'.format( value['assay_term_name']) raise AuditFailure('missing possible_controls', detail, level='NOT_COMPLIANT') for control in value['possible_controls']: if control.get('biosample_term_id') != value.get('biosample_term_id'): detail = 'Control {} is for {} but experiment is done on {}'.format( control['@id'], control.get('biosample_term_name'), value['biosample_term_name']) raise AuditFailure('mismatched control', detail, level='ERROR')
def audit_experiment_replicates_biosample(value, system): if value['status'] in ['deleted', 'replaced', 'revoked']: return biological_replicates_dict = {} biosamples_list = [] for rep in value['replicates']: bio_rep_num = rep['biological_replicate_number'] tech_rep_num = rep['technical_replicate_number'] if 'library' in rep and 'biosample' in rep['library']: biosample = rep['library']['biosample'] if not bio_rep_num in biological_replicates_dict: biological_replicates_dict[bio_rep_num] = biosample[ 'accession'] if biosample['accession'] in biosamples_list: detail = 'Experiment {} has multiple biological replicates associated with the same biosample {}'.format( value['@id'], biosample['@id']) raise AuditFailure( 'biological replicates with identical biosample', detail, level='DCC_ACTION') else: biosamples_list.append(biosample['accession']) else: if biosample['accession'] != biological_replicates_dict[ bio_rep_num]: detail = 'Experiment {} has technical replicates associated with the different biosamples'.format( value['@id']) raise AuditFailure( 'technical replicates with not identical biosample', detail, level='DCC_ACTION')
def audit_library_depleted_in(value, system): ''' If there is a depleted_term_name or term_id, both should exist - should be handled by schema They should match each other. This should also be replaced by a calculated field ''' if value['status'] in ['deleted']: return if not value['depleted_in_term_name'] or not value['depleted_in_term_id']: return if len(value['depleted_in_term_name']) != len(value['depleted_in_term_id']): detail = 'Library {} has depleted_in_term_name array and depleted_in_term_id array of differing lengths'.format( value['@id']) yield AuditFailure('depleted_in length mismatch', detail, level='ERROR') for i, dep_term in enumerate(value['depleted_in_term_id']): if dep_term == value['nucleic_acid_term_id']: detail = 'Library {} of type {} cannot be depleted in {}'.format( value['@id'], value['nucleic_acid_term_id'], value['depleted_in_term_id'][i]) yield AuditFailure('invalid depleted_in_term_id', detail, level='ERROR') expected = moleculeDict[value['depleted_in_term_name'][i]] if expected != value['depleted_in_term_id'][i]: detail = 'Library {} has mismatch between {} - {}'.format( value['@id'], value['depleted_in_term_name'][i], value['depleted_in_term_id'][i]) yield AuditFailure('mismatched depleted_in_term', detail, level='ERROR')
def audit_file_controlled_by(value, system): ''' A fastq in a ChIP-seq experiment should have a controlled_by ''' if value['status'] in ['deleted', 'replaced', 'revoked']: return if value['dataset'].get('assay_term_name') not in [ 'ChIP-seq', 'RAMPAGE', 'CAGE', 'shRNA knockdown followed by RNA-seq' ]: return if 'target' in value['dataset'] and 'control' in value['dataset'][ 'target'].get('investigated_as', []): return if 'controlled_by' not in value: value['controlled_by'] = [] if (value['controlled_by'] == []) and (value['file_format'] in ['fastq']): detail = 'Fastq file {} from {} requires controlled_by'.format( value['@id'], value['dataset']['assay_term_name']) raise AuditFailure('missing controlled_by', detail, level='NOT_COMPLIANT') possible_controls = value['dataset'].get('possible_controls') biosample = value['dataset'].get('biosample_term_id') for ff in value['controlled_by']: control_bs = ff['dataset'].get('biosample_term_id') if control_bs != biosample: detail = 'File {} has a controlled_by file {} with conflicting biosample {}'.format( value['@id'], ff['@id'], control_bs) raise AuditFailure('mismatched controlled_by', detail, level='ERROR') return if ff['file_format'] != value['file_format']: detail = 'File {} with file_format {} has a controlled_by file {} with file_format {}'.format( value['@id'], value['file_format'], ff['@id'], ff['file_format']) raise AuditFailure('mismatched controlled_by', detail, level='ERROR') if (possible_controls is None) or (ff['dataset']['@id'] not in possible_controls): detail = 'File {} has a controlled_by file {} with a dataset {} that is not in possible_controls'.format( value['@id'], ff['@id'], ff['dataset']['@id']) raise AuditFailure('mismatched controlled_by', detail, level='DCC_ACTION')
def audit_experiment_assay(value, system): ''' Experiments should have assays with valid ontologies term ids and names that are a valid synonym. ''' if value['status'] == 'deleted': return if 'assay_term_id' not in value: detail = 'Experiment {} is missing assay_term_id'.format(value['@id']) yield AuditFailure('missing assay information', detail, level='ERROR') return # This should be a dependancy if 'assay_term_name' not in value: detail = 'Experiment {} is missing assay_term_name'.format( value['@id']) yield AuditFailure('missing assay information', detail, level='ERROR') return # This should be a dependancy ontology = system['registry']['ontology'] term_id = value.get('assay_term_id') term_name = value.get('assay_term_name') if term_id.startswith('NTR:'): detail = 'Assay_term_id is a New Term Request ({} - {})'.format( term_id, term_name) yield AuditFailure('NTR assay', detail, level='DCC_ACTION') return if term_id not in ontology: detail = 'Assay_term_id {} is not found in cached version of ontology'.format( term_id) yield AuditFailure('assay_term_id not in ontology', term_id, level='DCC_ACTION') return ontology_term_name = ontology[term_id]['name'] modifed_term_name = term_name + ' assay' if (ontology_term_name != term_name and term_name not in ontology[term_id]['synonyms']) and \ (ontology_term_name != modifed_term_name and modifed_term_name not in ontology[term_id]['synonyms']): detail = 'Experiment has a mismatch between assay_term_name "{}" and assay_term_id "{}"'.format( term_name, term_id, ) yield AuditFailure('mismatched assay_term_name', detail, level='DCC_ACTION') return
def audit_biosample_concordance(value, system): ''' The biosample details of the experiment of a replicate and the library.biosample of a replicate need to match. ''' if value.get('status') in ['deleted', 'replaced']: return if 'library' not in value: return if 'biosample' not in value['library']: return exp = value['experiment']['@id'] exp_type = value['experiment'].get('biosample_type') exp_name = value['experiment'].get('biosample_term_name') exp_id = value['experiment'].get('biosample_term_id') bio = value['library']['biosample']['@id'] bs_type = value['library']['biosample'].get('biosample_type') bs_name = value['library']['biosample'].get('biosample_term_name') bs_id = value['library']['biosample'].get('biosample_term_id') if bs_type != exp_type: detail = '{} has mismatched biosample_type: {}, but {} in {}'.format( exp, exp_type, bs_type, bio ) yield AuditFailure('mismatched biosample_type', detail, level='ERROR') if bs_name != exp_name: detail = '{} has mismatched biosample_term_name: {}, but {} in {}'.format( exp, exp_name, bs_name, bio ) yield AuditFailure('mismatched biosample_term_name', detail, level='ERROR') if bs_id != exp_id: detail = '{} has mismatched biosample_term_id: {}, but {} in {}'.format( bio, exp_id, bs_id, bio ) yield AuditFailure('mismatched biosample_term_id', detail, level='ERROR')
def audit_antibody_characterization_unique_reviews(value, system): ''' Make sure primary characterizations have unique lane, biosample_term_id and organism combinations for characterization reviews ''' if (value['status'] in [ 'deleted', 'not submitted for review by lab', 'in progress', 'not reviewed' ]): return if 'secondary_characterization_method' in value: return unique_reviews = set() for review in value['characterization_reviews']: lane = review['lane'] term_id = review['biosample_term_id'] organism = review['organism'] review_lane = frozenset([lane, term_id, organism]) if review_lane not in unique_reviews: unique_reviews.add(review_lane) else: detail = 'Characterization_review.lane {} is a duplicate review for {} - {}'.format( lane, term_id, organism) raise AuditFailure('duplicate lane review', detail, level='ERROR')
def audit_experiment_spikeins(value, system): ''' All ENCODE 3 long (>200) RNA-seq experiments should specify their spikeins. The spikeins specified should have datasets of type spikeins. The spikeins datasets should have a fasta file, a document, and maybe a tsv ''' if value['status'] in ['deleted', 'replaced']: return if value.get('assay_term_name') != 'RNA-seq': return for rep in value['replicates']: lib = rep.get('library') if lib is None: continue size_range = lib.get('size_range') if size_range != '>200': continue spikes = lib.get('spikeins_used') if (spikes is None) or (spikes == []): detail = 'Library {} is in an RNA-seq experiment and has size_range >200. It requires a value for spikeins_used'.format( lib['@id']) yield AuditFailure('missing spikeins_used', detail, level='NOT_COMPLIANT')
def audit_file_format_specifications(value, system): for doc in value.get('file_format_specifications', []): if doc['document_type'] != "file format specification": detail = 'File {} has document {} not of type file format specification'.format( value['@id'], doc['@id']) raise AuditFailure('wrong document_type', detail, level='ERROR')
def audit_biosample_donor(value, system): ''' A biosample should have a donor. The organism of donor and biosample should match. Pooled_from biosamples do not need donors?? ''' if value['status'] in ['deleted']: return if ('donor' not in value) and (value['pooled_from']): return if ('donor' not in value) and (not value['pooled_from']): detail = 'Biosample {} requires a donor'.format(value['@id']) raise AuditFailure('missing donor', detail, level='ERROR') return donor = value['donor'] if value['organism']['name'] != donor['organism']['name']: detail = 'Biosample {} is organism {}, yet its donor {} is organism {}. Biosamples require a donor of the same species'.format( value['@id'], value['organism']['name'], donor['@id'], donor['organism']['name']) raise AuditFailure('mismatched organism', detail, level='ERROR') if 'mutated_gene' not in donor: return if value['organism']['name'] != donor['mutated_gene']['organism']['name']: detail = 'Biosample {} is organism {}, but its donor {} mutated_gene is in {}. Donor mutated_gene should be of the same species as the donor and biosample'.format( value['@id'], value['organism']['name'], donor['@id'], donor['mutated_gene']['organism']['name']) raise AuditFailure('mismatched mutated_gene organism', detail, level='ERROR') for i in donor['mutated_gene']['investigated_as']: if i in [ 'histone modification', 'tag', 'control', 'recombinant protein', 'nucleotide modification', 'other post-translational modification' ]: detail = 'Donor {} has an invalid mutated_gene {}. Donor mutated_genes should not be tags, controls, recombinant proteins or modifications'.format( donor['@id'], donor['mutated_gene']['name']) raise AuditFailure('invalid donor mutated_gene', detail, level='ERROR')
def audit_biosample_transfection_type(value, system): ''' A biosample with constructs or rnais should have a transfection_type ''' if value['status'] == 'deleted': return if (value['rnais']) and ('transfection_type' not in value): detail = 'Biosample {} with a value for RNAi requires transfection_type'.format( value['@id']) raise AuditFailure('missing transfection_type', detail, level='ERROR') if (value['constructs']) and ('transfection_type' not in value): detail = 'Biosample {} with a value for construct requires transfection_type'.format( value['@id']) raise AuditFailure('missing transfection_type', detail, level='ERROR')
def audit_file_size(value, system): if value['status'] in ['deleted', 'replaced', 'uploading', 'revoked']: return if 'file_size' not in value: detail = 'File {} requires a value for file_size'.format(value['@id']) raise AuditFailure('missing file_size', detail, level='DCC_ACTION')
def audit_antibody_characterization_status(value, system): ''' Make sure the lane_status matches the characterization status ''' if 'secondary_characterization_method' in value: return if (value['status'] in [ "deleted", "not submitted for review by lab", 'in progress', 'not reviewed' ]): if 'characterization_reviews' in value: '''If any of these statuses, we shouldn't have characterization_reviews''' detail = 'Antibody_characterization.status of {} is incompatible with having a value for characterization_reviews'.format( value['status']) raise AuditFailure('unexpected characterization_reviews', detail, level='WARNING') else: return '''Check each of the lane_statuses in characterization_reviews for an appropriate match''' has_compliant_lane = False is_pending = False if value['status'] == 'pending dcc review': is_pending = True for lane in value['characterization_reviews']: if (is_pending and lane['lane_status'] != 'pending dcc review') or ( not is_pending and lane['lane_status'] == 'pending dcc review'): detail = 'A lane.status of {} is incompatible with antibody_characterization.status of pending dcc review'.format( lane['lane_status']) raise AuditFailure('mismatched lane status', detail, level='WARNING') continue if lane['lane_status'] == 'compliant': has_compliant_lane = True if has_compliant_lane and value['status'] != 'compliant': detail = 'A lane.status of {} is incompatible with antibody_characterization status of {}'.format( lane['lane_status'], value['status']) raise AuditFailure('mismatched lane status', detail, level='DCC_ACTION')
def audit_experiment_release_date(value, system): ''' Released experiments need release date. This should eventually go to schema ''' if value['status'] == 'released' and 'date_released' not in value: detail = 'Experiment {} is released and requires a value in date_released'.format( value['@id']) raise AuditFailure('missing date_released', detail, level='DCC_ACTION')
def audit_experiment_ChIP_control(value, system): if value['status'] in [ 'deleted', 'proposed', 'preliminary', 'replaced', 'revoked' ]: return # Currently controls are only be required for ChIP-seq if value.get('assay_term_name') != 'ChIP-seq': return # We do not want controls if 'target' in value and 'control' in value['target']['investigated_as']: return if not value['possible_controls']: return num_IgG_controls = 0 for control in value['possible_controls']: if ('target' not in control) or ( 'control' not in control['target']['investigated_as']): detail = 'Experiment {} is ChIP-seq but its control {} is not linked to a target with investigated.as = control'.format( value['@id'], control['@id']) raise AuditFailure('invalid possible_control', detail, level='ERROR') if not control['replicates']: continue if 'antibody' in control['replicates'][0]: num_IgG_controls += 1 # If all of the possible_control experiments are mock IP control experiments if num_IgG_controls == len(value['possible_controls']): if value.get('assay_term_name') == 'ChIP-seq': # The binding group agreed that ChIP-seqs all should have an input control. detail = 'Experiment {} is ChIP-seq and requires at least one input control, as agreed upon by the binding group. {} is not an input control'.format( value['@id'], control['@id']) raise AuditFailure('missing input control', detail, level='NOT_COMPLIANT')
def audit_library_biosample(value, system): ''' The library should be linked to biosample ''' if value['status'] in ['deleted']: return if 'biosample' not in value: detail = 'Library {} has no biosample'.format( value['@id']) raise AuditFailure('missing biosample', detail, level='ERROR')
def audit_paired_with(value, system): ''' A file with a paired_end needs a paired_with. Should be handled in the schema. A paired_with should be the same replicate ''' if value['status'] in ['deleted', 'replaced', 'revoked']: return if 'paired_end' not in value: return if 'paired_with' not in value: detail = 'File {} has paired_end = {}. It requires a paired file'.format( value['@id'], value['paired_end']) raise AuditFailure('missing paired_with', detail, level='NOT_COMPLIANT') if 'replicate' not in value['paired_with']: return if 'replicate' not in value: detail = 'File {} has paired_end = {}. It requires a replicate'.format( value['@id'], value['paired_end']) raise AuditFailure('missing replicate', detail, level='DCC_ACTION') if value['replicate'] != value['paired_with']['replicate']: detail = 'File {} has replicate {}. It is paired_with file {} with replicate {}'.format( value['@id'], value.get('replicate'), value['paired_with']['@id'], value['paired_with'].get('replicate')) raise AuditFailure('mismatched paired_with', detail, level='ERROR') if value['paired_end'] == '1': context = system['context'] paired_with = context.get_rev_links('paired_with') if len(paired_with) > 1: detail = 'Paired end 1 file {} paired_with by multiple paired end 2 files: {!r}'.format( value['@id'], paired_with, ) raise AuditFailure('multiple paired_with', detail, level='ERROR')
def audit_biosample_term(value, system): ''' Biosample_term_id and biosample_term_name and biosample_type should all be present. This should be handled by schemas. Biosample_term_id should be in the ontology. Biosample_term_name should match biosample_term_id. ''' if value['status'] in ['deleted']: return if 'biosample_term_id' not in value: return ontology = system['registry']['ontology'] term_id = value['biosample_term_id'] term_name = value.get('biosample_term_name') if term_id.startswith('NTR:'): detail = 'Biosample {} has a New Term Request {} - {}'.format( value['@id'], term_id, term_name) raise AuditFailure('NTR biosample', detail, level='DCC_ACTION') if term_id not in ontology: detail = 'Biosample {} has biosample_term_id of {} which is not in ontology'.format( value['@id'], term_id) raise AuditFailure('term_id not in ontology', term_id, level='DCC_ACTION') ontology_term_name = ontology[term_id]['name'] if ontology_term_name != term_name and term_name not in ontology[term_id][ 'synonyms']: detail = 'Biosample {} has a mismatch between biosample_term_id "{}" and biosample_term_name "{}"'.format( value['@id'], term_id, term_name, ) raise AuditFailure('mismatched biosample_term', detail, level='DCC_ACTION')
def audit_references_for_publication(value, system): ''' For datasets of type publication, there should be references. Those that do not should be earmarked so they can be added once the publication has been accepted ''' if value['status'] in ['deleted', 'replaced', 'revoked', 'preliminary']: return if (value['dataset_type'] == 'publication') and (not value['references']): detail = 'publication dataset missing a reference to a publication' raise AuditFailure('missing reference', detail, level='WARNING')
def audit_antibody_characterization_review(value, system): ''' Make sure that biosample terms are in ontology for each characterization_review. ''' if (value['status'] in [ 'not reviewed', 'not submitted for review by lab', 'deleted', 'in progress' ]): return if 'secondary_characterization_method' in value: return if value['characterization_reviews']: ontology = system['registry']['ontology'] for review in value['characterization_reviews']: term_id = review['biosample_term_id'] term_name = review['biosample_term_name'] if term_id.startswith('NTR:'): detail = 'Antibody_characterization {} contains a New Term Request {} - {}'.format( value['@id'], term_id, term_name) raise AuditFailure('NTR biosample', detail, level='DCC_ACTION') if term_id not in ontology: detail = 'Antibody characterization {} contains a biosample_term_id {} that is not in the ontology'.format( value['@id'], term_id) raise AuditFailure('term_id not in ontology', term_id, level='DCC_ACTION') ontology_term_name = ontology[term_id]['name'] if ontology_term_name != term_name and term_name not in ontology[ term_id]['synonyms']: detail = 'Antibody characterization {} has a mismatched term {} - {} expected {}'.format( value['@id'], term_id, term_name, ontology_term_name) raise AuditFailure('mismatched term_name', detail, level='ERROR')
def audit_analysis_steps_closure(value, system): ''' The analysis_steps list should include all of a steps ancestors. ''' ids = {step['@id'] for step in value['analysis_steps']} parents = { parent for step in value['analysis_steps'] for parent in step.get('parents', []) } diff = parents.difference(ids) if diff: detail = ', '.join(sorted(diff)) raise AuditFailure('incomplete analysis_steps', detail, level='ERROR')
def audit_experiment_replicates_with_no_libraries(value, system): if value['status'] in ['deleted', 'replaced', 'revoked']: return if len(value['replicates']) == 0: return for rep in value['replicates']: if 'library' not in rep: detail = 'Experiment {} has a replicate {}, that has no library associated with'.format( value['@id'], rep['@id']) yield AuditFailure('replicate with no library', detail, level='DCC_ACTION') return
def audit_library_RNA_size_range(value, system): ''' An RNA library should have a size_range specified. This needs to accomodate the rfa ''' if value['status'] in ['deleted']: return RNAs = ['SO:0000356', 'SO:0000871'] if (value['nucleic_acid_term_id'] in RNAs) and ('size_range' not in value): detail = 'RNA library {} requires a value for size_range'.format(value['@id']) raise AuditFailure('missing size_range', detail, level='ERROR')
def audit_file_read_length(value, system): ''' Reads files should have a read_length ''' if value['status'] in ['deleted', 'replaced', 'revoked']: return if value['output_type'] != 'reads': return if 'read_length' not in value: detail = 'Reads file {} missing read_length'.format(value['@id']) raise AuditFailure('missing read_length', detail, level='ERROR')
def audit_antibody_characterization_target(value, system): ''' Make sure that target in characterization matches target of antibody ''' antibody = value['characterizes'] target = value['target'] if 'recombinant protein' in target['investigated_as']: prefix = target['label'].split('-')[0] unique_antibody_target = set() unique_investigated_as = set() for antibody_target in antibody['targets']: label = antibody_target['label'] unique_antibody_target.add(label) for investigated_as in antibody_target['investigated_as']: unique_investigated_as.add(investigated_as) if 'tag' not in unique_investigated_as: detail = 'Antibody {} is not for a tagged protein, yet target is investigated_as a recombinant protein'.format( antibody['@id']) raise AuditFailure('not tagged antibody', detail, level='ERROR') else: if prefix not in unique_antibody_target: detail = '{} is not found in target list for antibody {}'.format( prefix, antibody['@id']) raise AuditFailure('mismatched tag target', detail, level='ERROR') else: target_matches = False for antibody_target in antibody['targets']: if target['name'] == antibody_target.get('name'): target_matches = True if not target_matches: detail = 'Target {} is not found in target list for antibody {}'.format( target['name'], antibody['@id']) raise AuditFailure('mismatched target', detail, level='ERROR')
def audit_run_type(value, system): ''' A fastq file or a fasta file need to specify run_type. This was attempted to be a dependancy and didn't happen. ''' if value['status'] in ['deleted', 'replaced', 'revoked']: return if value['file_format'] not in ['fastq', 'fasta']: return if 'run_type' not in value: detail = 'File {} has file_format {}. It requires a value for run_type'.format( value['@id'], value['file_format']) raise AuditFailure('missing run_type', detail, level='NOT_COMPLIANT')
def audit_file_platform(value, system): ''' A raw data file should have a platform specified. Should be in the schema. ''' if value['status'] in ['deleted', 'replaced']: return if value['file_format'] not in raw_data_formats: return if 'platform' not in value: detail = 'Raw data file {} missing platform information'.format( value['@id']) raise AuditFailure('missing platform', detail, level='ERROR')