예제 #1
0
def validate_wildtype_sequence(seq, as_type="any"):
    # from .models import WildTypeSequence

    # Explicitly check for these cases as they are also valid AA sequences.
    if is_null(seq):
        raise ValidationError(
            "'%(seq)s' is not a valid wild type sequence."  # , params={"seq": seq}
        )

    seq = seq.upper()
    is_dna = dna_bases_validator(seq) is not None
    is_aa = amino_acids_validator(seq) is not None

    if as_type == WildTypeSequence.SequenceType.DNA and not is_dna:
        raise ValidationError(
            "'%(seq)s' is not a valid DNA reference sequence."  # ,
            # params={"seq": seq},
        )
    elif as_type == WildTypeSequence.SequenceType.PROTEIN and not is_aa:
        raise ValidationError(
            "'%(seq)s' is not a valid protein reference sequence."  # ,
            # params={"seq": seq},
        )
    elif (as_type == "any"
          or WildTypeSequence.SequenceType.INFER) and not (is_dna or is_aa):
        raise ValidationError(
            "'%(seq)s' is not a valid DNA or protein reference sequence."  # ,
            # params={"seq": seq},
        )
예제 #2
0
def validate_variant_json(data: Dict[str, Dict]) -> None:
    """
    Checks a given dictionary to ensure that it is suitable to be used
    as the `data` attribute in a :class:`Variant` instance.

    Parameters
    ----------
    data : dict
        Dictionary of keys mapping to a list.
    """
    expected_keys = [variant_score_data, variant_count_data]
    for key in expected_keys:
        if key not in data.keys():
            raise ValidationError(f"Missing the required key {key}")

    if required_score_column not in data[variant_score_data]:
        raise ValidationError(
            f"Missing required column '{required_score_column}' in variant's score data."
        )

    extras = [k for k in data.keys() if k not in set(expected_keys)]
    if len(extras) > 0:
        extras = [k for k in data.keys() if k not in expected_keys]
        raise ValidationError("Encountered unexpected keys {extras}")

    # Check the correct data types are given.
    for key in expected_keys:
        if not isinstance(data[key], dict):
            type_ = type(data[key]).__name__
            raise ValidationError(
                f"Value for '{key}' must be a dict not {type_}.")
예제 #3
0
def validate_hgvs_string(
    value: Union[str, bytes],
    column: Optional[str] = None,
    splice_present: bool = False,
    targetseq: Optional[str] = None,
    relaxed_ordering: bool = False,
) -> Optional[str]:
    if is_null(value):
        return None

    if hasattr(value, "decode"):
        value = value.decode()
    if not isinstance(value, str):
        raise ValidationError("Variant HGVS values input must be strings. "
                              "'{}' has the type '{}'.".format(
                                  value,
                                  type(value).__name__))

    if value.lower() == "_sy":
        raise ValidationError(
            "_sy is no longer supported and should be replaced by p.(=)")
    elif value.lower() == "_wt":
        raise ValidationError(
            "_wt is no longer supported and should be replaced by (cgnp).=")

    try:
        variant = Variant(s=value,
                          targetseq=targetseq,
                          relaxed_ordering=relaxed_ordering)
    except MaveHgvsParseError as error:
        raise ValidationError(f"{value}: {str(error)}")

    prefix = variant.prefix.lower()
    if column in ("nt", hgvs_nt_column):
        if splice_present:
            if prefix not in "g":
                raise ValidationError(
                    f"'{value}' is not a genomic variant (prefix 'g.'). "
                    f"Nucleotide variants must be genomic if transcript "
                    f"variants are also defined.")
        else:
            if prefix not in "cn":
                raise ValidationError(
                    f"'{value}' is not a transcript variant. The accepted "
                    f"transcript variant prefixes are 'c.', 'n.'.")
    elif column in ("splice", hgvs_splice_column):
        if prefix not in "cn":
            raise ValidationError(
                f"'{value}' is not a transcript variant. The accepted "
                f"transcript variant prefixes are 'c.', 'n.'.")
    elif column in ("p", hgvs_pro_column):
        if prefix not in "p":
            raise ValidationError(
                f"'{value}' is not a protein variant. The accepted "
                f"protein variant prefix is 'p.'.")
    else:
        raise ValueError(
            "Unknown column '{}'. Expected nt, splice or p".format(column))

    return str(variant)
예제 #4
0
def validate_mavedb_urn_experiment(urn):
    if not (MAVEDB_EXPERIMENT_URN_RE.match(urn)
            or MAVEDB_TMP_URN_RE.match(urn)):
        raise ValidationError(
            "Error test"
            # "%(urn)s is not a valid Experiment urn.", params={"urn": urn}
        )
예제 #5
0
def validate_columns_match(variant, scoreset) -> None:
    """
    Validate that a child matches parents defined columns to keep
    data in sync.
    """
    try:
        if variant.score_columns != scoreset.score_columns:
            raise ValidationError(
                f"Variant defines score columns '{variant.score_columns}' "
                f"but parent defines columns '{scoreset.score_columns}. ")
        if variant.count_columns != scoreset.count_columns:
            raise ValidationError(
                f"Variant defines count columns '{variant.count_columns}' "
                f"but parent defines columns '{scoreset.count_columns}. ")
    except KeyError as error:
        raise ValidationError(f"Missing key {str(error)}")
예제 #6
0
def validate_interval_start_lteq_end(start, end):
    # Intervals may be underspecified, but will be ignored so skip validation.
    if start is None or end is None:
        return
    if start > end:
        raise ValidationError(
            ("An interval's starting coordinate cannot be greater than the "
             "ending coordinate."))
def validate_sra_identifier(identifier):
    if not (idutils.is_sra(identifier) or idutils.is_bioproject(identifier)
            or idutils.is_geo(identifier)
            or idutils.is_arrayexpress_array(identifier)
            or idutils.is_arrayexpress_experiment(identifier)):
        raise ValidationError(
            f"'{identifier} is not a valid SRA, GEO, ArrayExpress or BioProject "
            "accession.")
예제 #8
0
def validate_unique_intervals(intervals):
    for interval1 in intervals:
        for interval2 in intervals:
            if ((interval1.pk is not None) and (interval2.pk is not None)
                    and (interval1.pk == interval2.pk)):
                continue
            elif interval1 is interval2:
                continue
            elif interval1.equals(interval2):
                raise ValidationError(
                    "You can not specify the same interval twice.")
예제 #9
0
def validate_chromosome(value):
    # Intervals may be underspecified, but will be ignored so skip validation.
    if value is None:
        return
    if is_null(value):
        raise ValidationError("Chromosome identifier must not be null.")
예제 #10
0
def validate_strand(value):
    if value not in ("+", "-"):
        raise ValidationError(
            "GenomicInterval strand must be either '+' or '-'")
예제 #11
0
def validate_doi_identifier(identifier):
    if not idutils.is_doi(identifier):
        raise ValidationError(f"'{identifier}' is not a valid DOI.")
예제 #12
0
def validate_map_has_at_least_one_interval(reference_map):
    if not reference_map.get_intervals().count():
        raise ValidationError(
            "You must specify at least one interval for each reference map.")
예제 #13
0
def validate_genome_short_name(value):
    if is_null(value):
        raise ValidationError("Genome short name must not be null.")
예제 #14
0
def validate_genome_identifier(identifier):
    if not idutils.is_genome(identifier):
        raise ValidationError(
            f"'{identifier}' is not a valid GenBank or RefSeq genome assembly."
        )
예제 #15
0
def validate_mavedb_urn(urn):
    if not MAVEDB_ANY_URN_RE.match(urn):
        raise ValidationError(
            "Error test"
            # "%(urn)s is not a valid urn.", params={"urn": urn}
        )
예제 #16
0
def validate_refseq_identifier(identifier):
    if not idutils.is_refseq(identifier):
        raise ValidationError(
            f"'{identifier}' is not a valid RefSeq accession.")
예제 #17
0
def validate_uniprot_identifier(identifier):
    if not idutils.is_uniprot(identifier):
        raise ValidationError(
            f"'{identifier}' is not a valid UniProt accession.")
예제 #18
0
def validate_ensembl_identifier(identifier):
    if not idutils.is_ensembl(identifier):
        raise ValidationError(
            f"'{identifier}' is not a valid Ensembl accession.")
예제 #19
0
def validate_organism_name(value):
    if is_null(value):
        raise ValidationError("Species name must not be null.")
예제 #20
0
def validate_one_primary_map(reference_maps):
    primary_count = sum(a.is_primary_reference_map() for a in reference_maps)
    if primary_count > 1 or primary_count < 1:
        raise ValidationError("A target must have one primary reference map.")
예제 #21
0
def validate_reference_genome_has_one_external_identifier(referencegenome):
    if not referencegenome.genome_id:
        raise ValidationError(
            "Only one external identifier can be specified for a reference"
            "genome.")
예제 #22
0
def validate_keyword(kw):
    if is_null(kw) or not isinstance(kw, str):
        raise ValidationError(
            f"'{kw}' not a valid keyword. Keywords must be valid strings.")
예제 #23
0
def validate_map_has_unique_reference_genome(annotations):
    genomes = set(
        [str(a.get_reference_genome_name()).lower() for a in annotations])
    if len(genomes) < len(annotations):
        raise ValidationError(
            "Each reference map must specify a different reference genome.")
예제 #24
0
def validate_pubmed_identifier(identifier):
    if not idutils.is_pmid(identifier):
        raise ValidationError(
            f"'{identifier} is not a valid PubMed identifier.")
예제 #25
0
def validate_at_least_one_map(reference_maps):
    if not len(reference_maps):
        raise ValidationError(
            "A target must have at least one reference map specified.")
예제 #26
0
def validate_mavedb_urn_scoreset(urn):
    if not (MAVEDB_SCORESET_URN_RE.match(urn) or MAVEDB_TMP_URN_RE.match(urn)):
        raise ValidationError(
            "Error test"
            # "%(urn)s is not a valid score set urn.", params={"urn": urn}
        )
예제 #27
0
def validate_gene_name(value):
    if is_null(value):
        raise ValidationError("Gene name must not be null.")
예제 #28
0
def validate_mavedb_urn_variant(urn):
    if not (MAVEDB_VARIANT_URN_RE.match(urn) or MAVEDB_TMP_URN_RE.match(urn)):
        raise ValidationError(
            "Error test"
            # "%(urn)s is not a valid Variant urn.", params={"urn": urn}
        )