def chi_goodness_yats(self): observed = np.asarray(self.observed) expected = np.asarray(self.expected) observed = observed + 0.5 * np.sign(expected - observed) chi2_yats, p_yats = power_divergence(observed, expected, ddof=0) add_result(self, "chi2_yats", round(chi2_yats, 5)) add_result(self, "p_yats", round(p_yats, 5))
def transcribed_sequence(self): self.get_seq_names_and_contents() for seq_name, seq in zip(self.seq_names, self.seq_contents): raw_seq = Seq(seq, IUPAC.unambiguous_dna) transformed_seq = raw_seq.transcribe() add_result(self, seq_name, str(transformed_seq))
def reverse_sequence(self): self.get_seq_names_and_contents() for seq_name, seq in zip(self.seq_names, self.seq_contents): raw_seq = Seq(seq, IUPAC.unambiguous_dna) transformed_seq = str(raw_seq[::-1]) add_result(self, seq_name, transformed_seq)
def reverse_and_complement_sequences(self): self.get_seq_names_and_contents() for seq_name, seq in zip(self.seq_names, self.seq_contents): raw_seq = Seq(seq, IUPAC.unambiguous_dna) transformed_seq = raw_seq.reverse_complement() add_result(self, seq_name, str(transformed_seq))
def calculate(self): # check if type is float change math formula if isinstance(self.data["amplified_marker"], float): fi = self.data["amplified_marker"] else: fi = int(self.data["amplified_marker"]) / ( int(self.data["amplified_marker"]) + int(self.data["absence_marker"])) pic = 1 - (fi**2 + ((1 - fi)**2)) add_result(self, "PIC", round(pic, 4)) return self.results
def correlation(self): field_sum = float(self.data["field_sum"]) chi2_standard, p_standard, dof, ex = self.chi_square_standard() chi2_yats, p_yats = self.chi_square_yats() if dof == 1: correlation_standard = sqrt(chi2_standard / field_sum) correlation_yats = sqrt(chi2_yats / field_sum) add_result(self, "coefficient of contingency type", "Phi") else: m = min(self.data["width"], self.data["height"]) correlation_standard = sqrt(chi2_standard / (field_sum * (m - 1))) correlation_yats = sqrt(chi2_yats / (field_sum * (m - 1))) add_result(self, "coefficient of contingency type", "Crammer`s V") return chi2_standard, p_standard, dof, chi2_yats, p_yats, correlation_standard, correlation_yats
def raw_sequence(self): filename = create_seq_file(self.data['sequences']) threshold = self.data.get('threshold', 0.55) muscle = MuscleCommandline(input=filename) stdout, stderr = muscle() align = AlignIO.read(StringIO(stdout), "fasta") summary_align = AlignInfo.SummaryInfo(align) consensus = summary_align.gap_consensus(threshold=threshold, ambiguous='N') add_result(self, "Consensus sequence", str(consensus)) add_result(self, "Sequence length", len(consensus)) remove_temp_file(filename) return self.results
def genebank_seq(self): Entrez.email = "*****@*****.**" with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=self.data['seq-name-1']) as handle: seq_record = SeqIO.read(handle, "gb") seq_name_1 = str(seq_record.id) + " " + str( seq_record.description)[:25] self.data['seq-content-1'] = str(seq_record.seq) self.data['seq-name-1'] = seq_name_1 with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=self.data['seq-name-2']) as handle: seq_record = SeqIO.read(handle, "gb") seq_name_2 = str(seq_record.id) + " " + str( seq_record.description)[:25] self.data['seq-content-2'] = str(seq_record.seq) self.data['seq-name-2'] = seq_name_2 alignment = self.get_alignments() add_result(self, f'Seq id. {seq_name_1} ... length [bp]', len(self.data['seq-content-1'])) add_result(self, f'Seq id. {seq_name_2} ... length [bp]', len(self.data['seq-content-2'])) ident = alignment.count("|") coverage = self.get_coverage() average_identity = self.get_average_identity(ident) frag_identity = self.get_frag_identity(ident) add_result(self, "Coverage [%]", coverage) add_result(self, "Average identity [%]", average_identity) add_result(self, "Fragmental identity [%]", frag_identity) return self.results
def raw_sequence(self): self.remove_newlines() seq_name_1 = self.data['seq-name-1'] seq_name_2 = self.data['seq-name-2'] seq_content_1 = self.data['seq-content-1'] seq_content_2 = self.data['seq-content-2'] alignment = self.get_alignments() add_result(self, f'Sequence {seq_name_1} length', len(seq_content_1)) add_result(self, f'Sequence {seq_name_2} length', len(seq_content_2)) ident = alignment.count("|") coverage = self.get_coverage() average_identity = self.get_average_identity(ident) frag_identity = self.get_frag_identity(ident) add_result(self, "Coverage [%]", coverage) add_result(self, "Average identity [%]", average_identity) add_result(self, "Fragmental identity [%]", frag_identity) return self.results
def calculate(self): alfa = self.data["alfa"] ho, he, rho, e_ho, e_he, e_rho, p, q = self.calculate_expected_observed( ) chi, pval = chisquare([ho, he, rho], f_exp=[e_ho, e_he, e_rho]) add_result(self, 'expected number of homozygotes', round(e_ho, 2)) add_result(self, 'expected number of heterozygotes', round(e_he, 2)) add_result(self, 'expected number of rare homozygotes', round(e_rho, 2)) add_result(self, 'p', round(p, 5)) add_result(self, 'q', round(q, 5)) add_result(self, 'p-value', round(pval, 5)) add_result(self, 'Chi-square value', round(chi, 5)) if ho < 5 or he < 5 or rho < 5: chi_yates, pval_yates = self.calculate_yates_correction() add_result(self, 'Yate`s chi-square value', round(chi_yates, 5)) add_result(self, 'Yate`s p-value', round(pval_yates, 5)) # set pval to pval_yates for next if statement ~JANO pval = pval_yates if pval <= alfa: msg = ( "Distribution does not consistent with Hardy Weinberg's law at the level " "of significance: {}".format(alfa)) add_result(self, 'status', msg) fis = 1 - (he / e_he) add_result(self, 'fis', round(fis, 4)) elif pval > alfa: msg = "Distribution consistent with Hardy Weinberg's law at the level of significance: {}".format( alfa) add_result(self, 'status', msg) return self.results
def calculate(self): add_result(self, "H", self.calculate_h()) add_result(self, "PIC", self.calculate_pic()) return self.results
def calculate(self): chi2_standard, p_standard, dof, chi2_yats, p_yats, correlation_standard, correlation_yats = self.correlation( ) add_result(self, "dof", dof) add_result(self, "chi2_standard", round(chi2_standard, 5)) add_result(self, "p_standard", round(p_standard, 5)) add_result(self, "correlation_standard", round(correlation_standard, 5)) add_result(self, "chi2_yats", round(chi2_yats, 5)) add_result(self, "p_yats", round(p_yats, 5)) add_result(self, "correlation_yats", round(correlation_yats, 5)) return self.results
def chi_goodness_standard(self): chi2, p = chisquare(self.observed, f_exp=self.expected) add_result(self, "chi2_standard", round(chi2, 5)) add_result(self, "p_standard", round(p, 5)) add_result(self, "dof", len(self.observed) - 1)