def get_inds(self, seq): lcas = motif_seq.LcSeq(seq, self.k, self.lca, 'lca') lces = motif_seq.LcSeq(seq, self.k, self.lce, 'lce') lca_in, lca_out = lcas._get_motif_indexes() lce_in, lce_out = lces._get_motif_indexes() ind_in = lca_in.union(lce_in) return ind_in
def in_out_kappa(self): df = pd.read_csv(self.train_fpi, sep='\t', index_col=0) df = df[df['y'] == 0] seqs = list(df['Sequence']) all_deltas = [] net_charges = [] frac_charges = [] for seq in seqs: ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca') in_seq, out_seq = ms.seq_in_motif() in_kmer, out_kmer = ms.overlapping_kmer_in_motif() if len(in_kmer) > 20: ka = kappa.KappaKmers(out_kmer, out_seq) if ka.FCR() > 0.1: delta = ka.deltaForm() net_charges.append(ka.NCPR()) print(out_seq) print(delta) all_deltas.append(delta) frac_charges.append(ka.FCR()) #plt.hist(net_charges) plt.scatter(net_charges, all_deltas, alpha=0.5, color='grey') #plt.ylim([0, 0.35]) plt.ylim([0, 0.5]) plt.xlim([-0.8, 0.8]) #plt.xlim([0, 0.4]) plt.xlabel('Net charge per residue', size=14) plt.ylabel('Charge Asymmetry (Delta)', size=14) plt.title('Outside LC Motifs') plt.show()
def normal_charge_properties(self): df = pd.read_csv(self.train_fpi, sep='\t', index_col=0) df = df[df['y'] == 0] seqs = list(df['Sequence']) all_deltas = [] net_charges = [] frac_charges = [] all_seq_in = '' for seq in seqs: ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca') in_seq, out_seq = ms.seq_in_motif() in_kmer, out_kmer = ms.overlapping_kmer_in_motif() if len(in_kmer) > 20: ka = kappa.KappaKmers(out_kmer, out_seq) delta = ka.deltaForm() if ka.NCPR() > -0.1 and ka.NCPR() < 0.1: if delta < 0.1: ns = norm_score.NormScore() score = ns.lc_norm_score([seq])[0] if score > 20: if ka.FCR() < 0.2: all_seq_in += in_seq analysed_seq = ProteinAnalysis(all_seq_in) aa_perc = analysed_seq.get_amino_acids_percent() print(aa_perc)
def in_out_kappa(self): df = pd.read_csv(self.train_fpi, sep='\t', index_col=0) df = df[df['y'] == 0] seqs = list(df['Sequence']) for seq in seqs: ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca') in_seq, out_seq = ms.seq_in_motif() SeqOb = SequenceParameters(in_seq) print(SeqOb.get_kappa()) seqOb = SequenceParameters(out_seq) print(seqOb.get_kappa()) print('')