Ejemplo n.º 1
0
def validate_lantern_translation_outside_database(tile_library_host,
                                                  tile_library_path):
    try:
        r = requests.get("http://%s%s" %
                         (tile_library_host, tile_library_path),
                         timeout=1)
    except Exception as e:
        raise TileLibraryValidationError({'tile_library_host': str(e)})
    if r.status_code != requests.codes.ok:
        raise TileLibraryValidationError(
            {'tile_library_int-tile_library_host': r.text})
Ejemplo n.º 2
0
def validate_tile_position_int(tile_position_int):
    if tile_position_int < 0:
        raise TileLibraryValidationError(
            {'tile_position_int': "integer must be positive"})
    v = 'f' * (NUM_HEX_INDEXES_FOR_VERSION)
    p = 'f' * (NUM_HEX_INDEXES_FOR_PATH)
    s = 'f' * (NUM_HEX_INDEXES_FOR_STEP)
    max_tile_position = int(v + p + s, 16)
    if tile_position_int > max_tile_position:
        raise TileLibraryValidationError({
            'tile_position_int':
            "tile position int must be smaller than or equal to '%s.%s.%s'" %
            (v, p, s)
        })
Ejemplo n.º 3
0
def validate_tile_variant_int(tile_variant_int):
    if tile_variant_int < 0:
        raise TileLibraryValidationError(
            {'tile_variant_int': "integer must be positive"})
    v = 'f' * (NUM_HEX_INDEXES_FOR_VERSION)
    p = 'f' * (NUM_HEX_INDEXES_FOR_PATH)
    s = 'f' * (NUM_HEX_INDEXES_FOR_STEP)
    vv = 'f' * (NUM_HEX_INDEXES_FOR_VARIANT_VALUE)
    max_tile_variant = int(v + p + s + vv, 16)
    if tile_variant_int > max_tile_variant:
        raise TileLibraryValidationError({
            'tile_variant_int':
            "tile variant int must be smaller than or equal to '%s.%s.%s.%s'" %
            (v, p, s, vv)
        })
Ejemplo n.º 4
0
def validate_alternate_bases(tile_var_seq, alternate_bases, through_start,
                             through_end):
    """
        check the genome variant alternate bases are the bases in the variant
    """
    VALIDATION_ERRORS = {}
    if through_start < 0:
        VALIDATION_ERRORS['start'] = "start is less than 0"
    if through_start > len(tile_var_seq):
        VALIDATION_ERRORS[
            'start'] = "start is larger than the length of the tile_variant_sequence"
    if through_end < 0:
        VALIDATION_ERRORS['end'] = "end is less than 0"
    if through_end > len(tile_var_seq):
        VALIDATION_ERRORS[
            'end'] = "end is larger than the length of the tile_variant_sequence"
    if through_end < through_start:
        VALIDATION_ERRORS['start-end'] = "end is larger than start"
    if tile_var_seq[through_start:through_end].upper(
    ) != alternate_bases.strip('-').upper():
        VALIDATION_ERRORS[
            'genome_variant.alternate_bases'] = "Alternate bases (%s) do not match bases in tile variant (%s)" % (
                alternate_bases, tile_var_seq[through_start:through_end])
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 5
0
def validate_tile_variant_loci_encompass_genome_variant_loci(
        genome_var_start_int, genome_var_end_int, tile_var_start_int,
        tile_var_end_int, tile_var_is_at_start, tile_var_is_at_end):
    """
        check genome variant loci are within tile variant loci
    """
    VALIDATION_ERRORS = {}
    acceptable_start_position = tile_var_start_int + TAG_LENGTH
    start_msg = "%s is in the start tag or before the locus"
    if tile_var_is_at_start:
        acceptable_start_position -= TAG_LENGTH
        start_msg = "%s is before the locus"
    acceptable_end_position = tile_var_end_int - TAG_LENGTH
    end_msg = "%s is in the end tag or after the locus"
    if tile_var_is_at_end:
        acceptable_end_position += TAG_LENGTH
        end_msg = "%s is after the locus"
    if genome_var_start_int < acceptable_start_position:
        VALIDATION_ERRORS['genome_variant.start_int'] = start_msg % "start_int"
    if genome_var_start_int > acceptable_end_position:
        VALIDATION_ERRORS['genome_variant.start_int'] = end_msg % "start_int"
    if genome_var_end_int < acceptable_start_position:
        VALIDATION_ERRORS['genome_variant.end_int'] = start_msg % "end_int"
    if genome_var_end_int > acceptable_end_position:
        VALIDATION_ERRORS['genome_variant.end_int'] = end_msg % "end_int"
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 6
0
def validate_reference_versus_alternate_bases(ref_bases, alt_bases):
    if ref_bases.upper() == alt_bases.upper():
        raise TileLibraryValidationError({
            'reference_bases-alternate_bases':
            "Reference bases (%s) are the same as alternate bases (%s)" %
            (ref_bases, alt_bases)
        })
Ejemplo n.º 7
0
def validate_lantern_translation(lantern_name, tile_variant_int):
    VALIDATION_ERRORS = {}
    #If these throw an error, I want it to propogate.
    #Check that lantern_name doesn't have spanning tile notation
    matching = re.match(LANTERN_NAME_FORMAT_STRING, lantern_name)
    if matching.group(2) != None:
        VALIDATION_ERRORS[
            'lantern_name'] = "lantern_name cannot have spanning tile notation"
    tile_position_int = basic_fns.get_position_from_cgf_string(lantern_name)
    validate_tile_variant_int(tile_variant_int)
    tile_path_version, tile_path, tile_step = basic_fns.get_position_ints_from_position_int(
        tile_position_int)
    variant_path_version, variant_path, variant_step, variant_val = basic_fns.get_tile_variant_ints_from_tile_variant_int(
        tile_variant_int)
    if tile_path_version != variant_path_version:
        VALIDATION_ERRORS[
            'version_mismatch'] = "tile variant path version and tile position path version must be equal"
    if tile_path != variant_path:
        VALIDATION_ERRORS[
            'path_mismatch'] = "tile variant path and tile position path must be equal"
    if tile_step != variant_step:
        VALIDATION_ERRORS[
            'step_mismatch'] = "tile variant step and tile position step must be equal"
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 8
0
def validate_reference_bases(reference_seq, start, end, reference_bases):
    """
        check genome variant reference bases are the bases in the reference sequence
    """
    if reference_seq[start:end].upper() != reference_bases.strip('-').upper():
        raise TileLibraryValidationError({
            'reference_bases':
            "Reference bases (%s) do not match bases in reference tile variant (%s)"
            % (reference_bases, reference_seq[start:end])
        })
Ejemplo n.º 9
0
def validate_spanning_tile(tile_position_one, tile_position_two,
                           num_positions_spanned):
    validate_tile_position_int(tile_position_one)
    validate_tile_position_int(tile_position_two)
    #If these throw an error, I want it to propogate.
    tile1_path_version, tile1_path, tile1_step = basic_fns.get_position_ints_from_position_int(
        tile_position_one)
    tile2_path_version, tile2_path, tile2_step = basic_fns.get_position_ints_from_position_int(
        tile_position_two)
    if tile1_path_version != tile2_path_version:
        raise TileLibraryValidationError({
            'spanning_tile_error':
            'starting and ending tiles cross path versions'
        })
    if tile1_path != tile2_path:
        raise TileLibraryValidationError(
            {'spanning_tile_error': 'starting and ending tiles cross paths'})
    if abs(tile2_step - tile1_step) != num_positions_spanned - 1:
        raise TileLibraryValidationError({
            'spanning_tile_error':
            'number of steps spanned (from tile position integers and reported) do not match'
        })
Ejemplo n.º 10
0
def validate_tile_variant(tile_position_int, tile_variant_int, variant_value,
                          sequence, seq_length, seq_md5sum, start_tag, end_tag,
                          is_start_of_path, is_end_of_path):
    acceptable_seq_length = TAG_LENGTH * 2
    if is_start_of_path:
        acceptable_seq_length -= TAG_LENGTH
    if is_end_of_path:
        acceptable_seq_length -= TAG_LENGTH
    VALIDATION_ERRORS = {}
    #If these throw an error, I want it to propogate.
    validate_tile_position_int(tile_position_int)
    validate_tile_variant_int(tile_variant_int)
    tile_path_version, tile_path, tile_step = basic_fns.get_position_ints_from_position_int(
        tile_position_int)
    variant_path_version, variant_path, variant_step, variant_val = basic_fns.get_tile_variant_ints_from_tile_variant_int(
        tile_variant_int)
    if tile_path_version != variant_path_version:
        VALIDATION_ERRORS[
            'version_mismatch'] = "tile variant path version and tile position path version must be equal"
    if tile_path != variant_path:
        VALIDATION_ERRORS[
            'path_mismatch'] = "tile variant path and tile position path must be equal"
    if tile_step != variant_step:
        VALIDATION_ERRORS[
            'step_mismatch'] = "tile variant step and tile position step must be equal"
    if variant_val != variant_value:
        VALIDATION_ERRORS[
            'variant_value_mismatch'] = "tile variant value and input variant value must be equal"
    if seq_length != len(sequence):
        VALIDATION_ERRORS[
            'length_mismatch'] = "length must be the length of the sequence"
    if sequence.lower() != sequence:
        VALIDATION_ERRORS['sequence'] = "Sequence must be entirely lowercase"
    digestor = hashlib.new('md5', sequence)
    if digestor.hexdigest() != seq_md5sum:
        VALIDATION_ERRORS[
            'md5sum-sequence'] = "md5sum is not actually md5sum of sequence"
    if len(sequence) < acceptable_seq_length:
        VALIDATION_ERRORS[
            'sequence_malformed'] = "Sequence is not long enough - the tags overlap"
    if sequence[:TAG_LENGTH] != start_tag:
        VALIDATION_ERRORS[
            'start_tag-sequence'] = "Sequence does not start with the given start tag"
    if sequence[-TAG_LENGTH:] != end_tag:
        VALIDATION_ERRORS[
            'end_tag-sequence'] = "Sequence does not end with the given end tag"
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 11
0
def validate_same_chromosome(locus_chrom_int, variant_chrom_int,
                             locus_chrom_name, variant_chrom_name):
    """
        check genome variant chromosome is the same as the locus chromosome
    """
    VALIDATION_ERRORS = {}
    if locus_chrom_int != variant_chrom_int:
        VALIDATION_ERRORS[
            'chromosome_int'] = 'Locus for tile variant is not in chromosome %i' % (
                variant_chrom_int)
    if locus_chrom_name != variant_chrom_name:
        VALIDATION_ERRORS[
            'alternate_chromosome_name'] = "Locus for tile variant is not in chromosome %s" % (
                variant_chrom_name)
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 12
0
def validate_locus(chromosome_int, tile_position_int, TAG_LENGTH,
                   tile_sequence_length, begin_int, end_int):
    VALIDATION_ERRORS = {}
    version, path, step = basic_fns.get_position_ints_from_position_int(
        tile_position_int)
    if path not in range(CHR_PATH_LENGTHS[chromosome_int - 1],
                         CHR_PATH_LENGTHS[chromosome_int]):
        VALIDATION_ERRORS[
            'chromosome_int-tile_position'] = "Path %i is not in chromosome %i, based on CHR_PATH_LENGTHS" % (
                path, chromosome_int)
    if end_int <= begin_int:
        VALIDATION_ERRORS[
            'malformed_locus'] = "end_int must be strictly larger than begin_int"
    if tile_sequence_length != end_int - begin_int:
        VALIDATION_ERRORS[
            'tile_length_locus_mismatch'] = "Sequence length must be the same length specified by the loci"
    if end_int - begin_int < TAG_LENGTH * 2:
        VALIDATION_ERRORS[
            'short_locus'] = "the distance between begin_int and end_int must be greater than twice the TAG_LENGTH"
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 13
0
def validate_tile_position(tile_position_int, is_start_of_path, is_end_of_path,
                           start_tag, end_tag):
    VALIDATION_ERRORS = {}
    version, path, step = basic_fns.get_position_ints_from_position_int(
        tile_position_int)
    if step == 0:
        if not is_start_of_path:
            VALIDATION_ERRORS[
                'tile_position_int-is_start_of_path'] = "If step is 0, is_start_of_path should be True"
    else:
        if is_start_of_path:
            VALIDATION_ERRORS[
                'tile_position_int-is_start_of_path'] = "If step is not 0, is_start_of_path should be False"
    try:
        validate_tag(start_tag)
    except TileLibraryValidationError as e:
        VALIDATION_ERRORS['start_tag'] = e.value
    try:
        validate_tag(end_tag)
    except TileLibraryValidationError as e:
        VALIDATION_ERRORS['end_tag'] = e.value
    if is_start_of_path:
        if start_tag != '':
            VALIDATION_ERRORS[
                'start_tag-is_start_of_path'] = "If is_start_of_path, start_tag should be empty"
    else:
        if start_tag == '':
            VALIDATION_ERRORS[
                'start_tag-is_start_of_path'] = "If not is_start_of_path, start_tag should not be empty"
    if is_end_of_path:
        if end_tag != '':
            VALIDATION_ERRORS[
                'end_tag-is_end_of_path'] = "If is_end_of_path, end_tag should be empty"
    else:
        if end_tag == '':
            VALIDATION_ERRORS[
                'end_tag-is_end_of_path'] = "If not is_end_of_path, end_tag should not be empty"
    if len(VALIDATION_ERRORS) > 0:
        raise TileLibraryValidationError(VALIDATION_ERRORS)
Ejemplo n.º 14
0
def validate_tag(tag):
    if tag.lower() != tag:
        raise TileLibraryValidationError("Tag must be lowercase")
    if len(tag) != TAG_LENGTH and len(tag) != 0:
        raise TileLibraryValidationError(
            "Tag length must be equal to the set TAG_LENGTH or must be empty")
Ejemplo n.º 15
0
def validate_json(text):
    try:
        json.loads(text)
    except ValueError:
        raise TileLibraryValidationError("Expects json-formatted text")
Ejemplo n.º 16
0
def validate_num_spanning_tiles(num_spanning):
    if num_spanning < 1:
        raise TileLibraryValidationError(
            "num positions spanned must be greater than or equal to 1")