Exemple #1
0
def make_alt_1hot(ref_1hot, snp_seq_pos, ref_allele, alt_allele):
    """Return alternative allele one hot coding."""
    ref_n = len(ref_allele)
    alt_n = len(alt_allele)

    # copy reference
    alt_1hot = np.copy(ref_1hot)

    if alt_n == ref_n:
        # SNP
        dna_io.hot1_set(alt_1hot, snp_seq_pos, alt_allele)

    elif ref_n > alt_n:
        # deletion
        delete_len = ref_n - alt_n
        if (ref_allele[0] == alt_allele[0]):
            dna_io.hot1_delete(alt_1hot, snp_seq_pos + 1, delete_len)
        else:
            print('WARNING: Delection first nt does not match: %s %s' %
                  (ref_allele, alt_allele),
                  file=sys.stderr)

    else:
        # insertion
        if (ref_allele[0] == alt_allele[0]):
            dna_io.hot1_insert(alt_1hot, snp_seq_pos + 1, alt_allele[1:])
        else:
            print('WARNING: Insertion first nt does not match: %s %s' %
                  (ref_allele, alt_allele),
                  file=sys.stderr)

    return alt_1hot
Exemple #2
0
def make_alt_1hot(ref_1hot, snp_seq_pos, ref_allele, alt_allele):
  """Return alternative allele one hot coding."""
  ref_n = len(ref_allele)
  alt_n = len(alt_allele)

  # copy reference
  alt_1hot = np.copy(ref_1hot)

  if alt_n == ref_n:
    # SNP
    dna_io.hot1_set(alt_1hot, snp_seq_pos, alt_allele)

  elif ref_n > alt_n:
    # deletion
    delete_len = ref_n - alt_n
    assert (ref_allele[0] == alt_allele[0])
    dna_io.hot1_delete(alt_1hot, snp_seq_pos+1, delete_len)

  else:
    # insertion
    assert (ref_allele[0] == alt_allele[0])
    dna_io.hot1_insert(alt_1hot, snp_seq_pos+1, alt_allele[1:])

  return alt_1hot
Exemple #3
0
def alleles_1hot(gene_seq, seq_1hot, seq_snps):
    ''' One hot code for gene sequence alleles. '''

    # initialize one hot coding
    aseqs_1hot = []

    # add reference allele sequence
    aseqs_1hot.append(np.copy(seq_1hot))

    # set all reference alleles
    for snp in seq_snps:

        # determine SNP position wrt sequence
        snp_seq_pos = snp.pos - 1 - gene_seq.start

        # verify that the reference allele matches the reference
        seq_ref = dna_io.hot1_dna(aseqs_1hot[0][snp_seq_pos:snp_seq_pos +
                                                len(snp.ref_allele), :])
        if seq_ref != snp.ref_allele:
            print(
                'WARNING: %s - ref allele %s does not match reference genome %s; changing reference genome to match.'
                % (snp.rsid, snp.ref_allele, seq_ref),
                file=sys.stderr)

            if len(seq_ref) == len(snp.ref_allele):
                # SNP
                dna_io.hot1_set(aseqs_1hot[0], snp_seq_pos, snp.ref_allele)

            # not confident in these operations

            # elif len(seq_ref) > len(snp.ref_allele):
            #   # deletion
            #   delete_len = len(seq_ref) - len(snp.ref_allele)
            #   dna_io.hot1_delete(aseqs_1hot[0], snp_seq_pos + 1, delete_len)

            # else:
            #   # insertion
            #   dna_io.hot1_insert(aseqs_1hot[0], snp_seq_pos + 1, snp.ref_allele[1:])

            else:
                raise Exception(
                    'ERROR: reference mismatch indels cannot yet be handled.')

    # for each SNP
    for snp in seq_snps:

        # determine SNP position wrt sequence
        snp_seq_pos = snp.pos - 1 - gene_seq.start

        # add minor allele sequence
        aseqs_1hot.append(np.copy(aseqs_1hot[0]))
        if len(snp.ref_allele) == len(snp.alt_alleles[0]):
            # SNP
            dna_io.hot1_set(aseqs_1hot[-1], snp_seq_pos, snp.alt_alleles[0])

        elif len(snp.ref_allele) > len(snp.alt_alleles[0]):
            # deletion
            delete_len = len(snp.ref_allele) - len(snp.alt_alleles[0])
            assert (snp.ref_allele[0] == snp.alt_alleles[0][0])
            dna_io.hot1_delete(aseqs_1hot[-1], snp_seq_pos + 1, delete_len)

        else:
            # insertion
            assert (snp.ref_allele[0] == snp.alt_alleles[0][0])
            dna_io.hot1_insert(aseqs_1hot[-1], snp_seq_pos + 1,
                               snp.alt_alleles[0][1:])

    # finalize
    aseqs_1hot = np.array(aseqs_1hot)

    return aseqs_1hot