def validate_wildtype_sequence(seq, as_type="any"): # from .models import WildTypeSequence # Explicitly check for these cases as they are also valid AA sequences. if is_null(seq): raise ValidationError( "'%(seq)s' is not a valid wild type sequence." # , params={"seq": seq} ) seq = seq.upper() is_dna = dna_bases_validator(seq) is not None is_aa = amino_acids_validator(seq) is not None if as_type == WildTypeSequence.SequenceType.DNA and not is_dna: raise ValidationError( "'%(seq)s' is not a valid DNA reference sequence." # , # params={"seq": seq}, ) elif as_type == WildTypeSequence.SequenceType.PROTEIN and not is_aa: raise ValidationError( "'%(seq)s' is not a valid protein reference sequence." # , # params={"seq": seq}, ) elif (as_type == "any" or WildTypeSequence.SequenceType.INFER) and not (is_dna or is_aa): raise ValidationError( "'%(seq)s' is not a valid DNA or protein reference sequence." # , # params={"seq": seq}, )
def validate_variant_json(data: Dict[str, Dict]) -> None: """ Checks a given dictionary to ensure that it is suitable to be used as the `data` attribute in a :class:`Variant` instance. Parameters ---------- data : dict Dictionary of keys mapping to a list. """ expected_keys = [variant_score_data, variant_count_data] for key in expected_keys: if key not in data.keys(): raise ValidationError(f"Missing the required key {key}") if required_score_column not in data[variant_score_data]: raise ValidationError( f"Missing required column '{required_score_column}' in variant's score data." ) extras = [k for k in data.keys() if k not in set(expected_keys)] if len(extras) > 0: extras = [k for k in data.keys() if k not in expected_keys] raise ValidationError("Encountered unexpected keys {extras}") # Check the correct data types are given. for key in expected_keys: if not isinstance(data[key], dict): type_ = type(data[key]).__name__ raise ValidationError( f"Value for '{key}' must be a dict not {type_}.")
def validate_hgvs_string( value: Union[str, bytes], column: Optional[str] = None, splice_present: bool = False, targetseq: Optional[str] = None, relaxed_ordering: bool = False, ) -> Optional[str]: if is_null(value): return None if hasattr(value, "decode"): value = value.decode() if not isinstance(value, str): raise ValidationError("Variant HGVS values input must be strings. " "'{}' has the type '{}'.".format( value, type(value).__name__)) if value.lower() == "_sy": raise ValidationError( "_sy is no longer supported and should be replaced by p.(=)") elif value.lower() == "_wt": raise ValidationError( "_wt is no longer supported and should be replaced by (cgnp).=") try: variant = Variant(s=value, targetseq=targetseq, relaxed_ordering=relaxed_ordering) except MaveHgvsParseError as error: raise ValidationError(f"{value}: {str(error)}") prefix = variant.prefix.lower() if column in ("nt", hgvs_nt_column): if splice_present: if prefix not in "g": raise ValidationError( f"'{value}' is not a genomic variant (prefix 'g.'). " f"Nucleotide variants must be genomic if transcript " f"variants are also defined.") else: if prefix not in "cn": raise ValidationError( f"'{value}' is not a transcript variant. The accepted " f"transcript variant prefixes are 'c.', 'n.'.") elif column in ("splice", hgvs_splice_column): if prefix not in "cn": raise ValidationError( f"'{value}' is not a transcript variant. The accepted " f"transcript variant prefixes are 'c.', 'n.'.") elif column in ("p", hgvs_pro_column): if prefix not in "p": raise ValidationError( f"'{value}' is not a protein variant. The accepted " f"protein variant prefix is 'p.'.") else: raise ValueError( "Unknown column '{}'. Expected nt, splice or p".format(column)) return str(variant)
def validate_mavedb_urn_experiment(urn): if not (MAVEDB_EXPERIMENT_URN_RE.match(urn) or MAVEDB_TMP_URN_RE.match(urn)): raise ValidationError( "Error test" # "%(urn)s is not a valid Experiment urn.", params={"urn": urn} )
def validate_columns_match(variant, scoreset) -> None: """ Validate that a child matches parents defined columns to keep data in sync. """ try: if variant.score_columns != scoreset.score_columns: raise ValidationError( f"Variant defines score columns '{variant.score_columns}' " f"but parent defines columns '{scoreset.score_columns}. ") if variant.count_columns != scoreset.count_columns: raise ValidationError( f"Variant defines count columns '{variant.count_columns}' " f"but parent defines columns '{scoreset.count_columns}. ") except KeyError as error: raise ValidationError(f"Missing key {str(error)}")
def validate_interval_start_lteq_end(start, end): # Intervals may be underspecified, but will be ignored so skip validation. if start is None or end is None: return if start > end: raise ValidationError( ("An interval's starting coordinate cannot be greater than the " "ending coordinate."))
def validate_sra_identifier(identifier): if not (idutils.is_sra(identifier) or idutils.is_bioproject(identifier) or idutils.is_geo(identifier) or idutils.is_arrayexpress_array(identifier) or idutils.is_arrayexpress_experiment(identifier)): raise ValidationError( f"'{identifier} is not a valid SRA, GEO, ArrayExpress or BioProject " "accession.")
def validate_unique_intervals(intervals): for interval1 in intervals: for interval2 in intervals: if ((interval1.pk is not None) and (interval2.pk is not None) and (interval1.pk == interval2.pk)): continue elif interval1 is interval2: continue elif interval1.equals(interval2): raise ValidationError( "You can not specify the same interval twice.")
def validate_chromosome(value): # Intervals may be underspecified, but will be ignored so skip validation. if value is None: return if is_null(value): raise ValidationError("Chromosome identifier must not be null.")
def validate_strand(value): if value not in ("+", "-"): raise ValidationError( "GenomicInterval strand must be either '+' or '-'")
def validate_doi_identifier(identifier): if not idutils.is_doi(identifier): raise ValidationError(f"'{identifier}' is not a valid DOI.")
def validate_map_has_at_least_one_interval(reference_map): if not reference_map.get_intervals().count(): raise ValidationError( "You must specify at least one interval for each reference map.")
def validate_genome_short_name(value): if is_null(value): raise ValidationError("Genome short name must not be null.")
def validate_genome_identifier(identifier): if not idutils.is_genome(identifier): raise ValidationError( f"'{identifier}' is not a valid GenBank or RefSeq genome assembly." )
def validate_mavedb_urn(urn): if not MAVEDB_ANY_URN_RE.match(urn): raise ValidationError( "Error test" # "%(urn)s is not a valid urn.", params={"urn": urn} )
def validate_refseq_identifier(identifier): if not idutils.is_refseq(identifier): raise ValidationError( f"'{identifier}' is not a valid RefSeq accession.")
def validate_uniprot_identifier(identifier): if not idutils.is_uniprot(identifier): raise ValidationError( f"'{identifier}' is not a valid UniProt accession.")
def validate_ensembl_identifier(identifier): if not idutils.is_ensembl(identifier): raise ValidationError( f"'{identifier}' is not a valid Ensembl accession.")
def validate_organism_name(value): if is_null(value): raise ValidationError("Species name must not be null.")
def validate_one_primary_map(reference_maps): primary_count = sum(a.is_primary_reference_map() for a in reference_maps) if primary_count > 1 or primary_count < 1: raise ValidationError("A target must have one primary reference map.")
def validate_reference_genome_has_one_external_identifier(referencegenome): if not referencegenome.genome_id: raise ValidationError( "Only one external identifier can be specified for a reference" "genome.")
def validate_keyword(kw): if is_null(kw) or not isinstance(kw, str): raise ValidationError( f"'{kw}' not a valid keyword. Keywords must be valid strings.")
def validate_map_has_unique_reference_genome(annotations): genomes = set( [str(a.get_reference_genome_name()).lower() for a in annotations]) if len(genomes) < len(annotations): raise ValidationError( "Each reference map must specify a different reference genome.")
def validate_pubmed_identifier(identifier): if not idutils.is_pmid(identifier): raise ValidationError( f"'{identifier} is not a valid PubMed identifier.")
def validate_at_least_one_map(reference_maps): if not len(reference_maps): raise ValidationError( "A target must have at least one reference map specified.")
def validate_mavedb_urn_scoreset(urn): if not (MAVEDB_SCORESET_URN_RE.match(urn) or MAVEDB_TMP_URN_RE.match(urn)): raise ValidationError( "Error test" # "%(urn)s is not a valid score set urn.", params={"urn": urn} )
def validate_gene_name(value): if is_null(value): raise ValidationError("Gene name must not be null.")
def validate_mavedb_urn_variant(urn): if not (MAVEDB_VARIANT_URN_RE.match(urn) or MAVEDB_TMP_URN_RE.match(urn)): raise ValidationError( "Error test" # "%(urn)s is not a valid Variant urn.", params={"urn": urn} )