def make_alt_1hot(ref_1hot, snp_seq_pos, ref_allele, alt_allele): """Return alternative allele one hot coding.""" ref_n = len(ref_allele) alt_n = len(alt_allele) # copy reference alt_1hot = np.copy(ref_1hot) if alt_n == ref_n: # SNP dna_io.hot1_set(alt_1hot, snp_seq_pos, alt_allele) elif ref_n > alt_n: # deletion delete_len = ref_n - alt_n if (ref_allele[0] == alt_allele[0]): dna_io.hot1_delete(alt_1hot, snp_seq_pos + 1, delete_len) else: print('WARNING: Delection first nt does not match: %s %s' % (ref_allele, alt_allele), file=sys.stderr) else: # insertion if (ref_allele[0] == alt_allele[0]): dna_io.hot1_insert(alt_1hot, snp_seq_pos + 1, alt_allele[1:]) else: print('WARNING: Insertion first nt does not match: %s %s' % (ref_allele, alt_allele), file=sys.stderr) return alt_1hot
def make_alt_1hot(ref_1hot, snp_seq_pos, ref_allele, alt_allele): """Return alternative allele one hot coding.""" ref_n = len(ref_allele) alt_n = len(alt_allele) # copy reference alt_1hot = np.copy(ref_1hot) if alt_n == ref_n: # SNP dna_io.hot1_set(alt_1hot, snp_seq_pos, alt_allele) elif ref_n > alt_n: # deletion delete_len = ref_n - alt_n assert (ref_allele[0] == alt_allele[0]) dna_io.hot1_delete(alt_1hot, snp_seq_pos+1, delete_len) else: # insertion assert (ref_allele[0] == alt_allele[0]) dna_io.hot1_insert(alt_1hot, snp_seq_pos+1, alt_allele[1:]) return alt_1hot
def alleles_1hot(gene_seq, seq_1hot, seq_snps): ''' One hot code for gene sequence alleles. ''' # initialize one hot coding aseqs_1hot = [] # add reference allele sequence aseqs_1hot.append(np.copy(seq_1hot)) # set all reference alleles for snp in seq_snps: # determine SNP position wrt sequence snp_seq_pos = snp.pos - 1 - gene_seq.start # verify that the reference allele matches the reference seq_ref = dna_io.hot1_dna(aseqs_1hot[0][snp_seq_pos:snp_seq_pos + len(snp.ref_allele), :]) if seq_ref != snp.ref_allele: print( 'WARNING: %s - ref allele %s does not match reference genome %s; changing reference genome to match.' % (snp.rsid, snp.ref_allele, seq_ref), file=sys.stderr) if len(seq_ref) == len(snp.ref_allele): # SNP dna_io.hot1_set(aseqs_1hot[0], snp_seq_pos, snp.ref_allele) # not confident in these operations # elif len(seq_ref) > len(snp.ref_allele): # # deletion # delete_len = len(seq_ref) - len(snp.ref_allele) # dna_io.hot1_delete(aseqs_1hot[0], snp_seq_pos + 1, delete_len) # else: # # insertion # dna_io.hot1_insert(aseqs_1hot[0], snp_seq_pos + 1, snp.ref_allele[1:]) else: raise Exception( 'ERROR: reference mismatch indels cannot yet be handled.') # for each SNP for snp in seq_snps: # determine SNP position wrt sequence snp_seq_pos = snp.pos - 1 - gene_seq.start # add minor allele sequence aseqs_1hot.append(np.copy(aseqs_1hot[0])) if len(snp.ref_allele) == len(snp.alt_alleles[0]): # SNP dna_io.hot1_set(aseqs_1hot[-1], snp_seq_pos, snp.alt_alleles[0]) elif len(snp.ref_allele) > len(snp.alt_alleles[0]): # deletion delete_len = len(snp.ref_allele) - len(snp.alt_alleles[0]) assert (snp.ref_allele[0] == snp.alt_alleles[0][0]) dna_io.hot1_delete(aseqs_1hot[-1], snp_seq_pos + 1, delete_len) else: # insertion assert (snp.ref_allele[0] == snp.alt_alleles[0][0]) dna_io.hot1_insert(aseqs_1hot[-1], snp_seq_pos + 1, snp.alt_alleles[0][1:]) # finalize aseqs_1hot = np.array(aseqs_1hot) return aseqs_1hot