def chi_goodness_yats(self):
     observed = np.asarray(self.observed)
     expected = np.asarray(self.expected)
     observed = observed + 0.5 * np.sign(expected - observed)
     chi2_yats, p_yats = power_divergence(observed, expected, ddof=0)
     add_result(self, "chi2_yats", round(chi2_yats, 5))
     add_result(self, "p_yats", round(p_yats, 5))
    def transcribed_sequence(self):
        self.get_seq_names_and_contents()

        for seq_name, seq in zip(self.seq_names, self.seq_contents):
            raw_seq = Seq(seq, IUPAC.unambiguous_dna)
            transformed_seq = raw_seq.transcribe()

            add_result(self, seq_name, str(transformed_seq))
    def reverse_sequence(self):
        self.get_seq_names_and_contents()

        for seq_name, seq in zip(self.seq_names, self.seq_contents):
            raw_seq = Seq(seq, IUPAC.unambiguous_dna)
            transformed_seq = str(raw_seq[::-1])

            add_result(self, seq_name, transformed_seq)
    def reverse_and_complement_sequences(self):
        self.get_seq_names_and_contents()

        for seq_name, seq in zip(self.seq_names, self.seq_contents):
            raw_seq = Seq(seq, IUPAC.unambiguous_dna)
            transformed_seq = raw_seq.reverse_complement()

            add_result(self, seq_name, str(transformed_seq))
Beispiel #5
0
    def calculate(self):
        # check if type is float change math formula
        if isinstance(self.data["amplified_marker"], float):
            fi = self.data["amplified_marker"]
        else:
            fi = int(self.data["amplified_marker"]) / (
                int(self.data["amplified_marker"]) +
                int(self.data["absence_marker"]))

        pic = 1 - (fi**2 + ((1 - fi)**2))

        add_result(self, "PIC", round(pic, 4))
        return self.results
    def correlation(self):
        field_sum = float(self.data["field_sum"])
        chi2_standard, p_standard, dof, ex = self.chi_square_standard()
        chi2_yats, p_yats = self.chi_square_yats()

        if dof == 1:
            correlation_standard = sqrt(chi2_standard / field_sum)
            correlation_yats = sqrt(chi2_yats / field_sum)
            add_result(self, "coefficient of contingency type", "Phi")
        else:
            m = min(self.data["width"], self.data["height"])
            correlation_standard = sqrt(chi2_standard / (field_sum * (m - 1)))
            correlation_yats = sqrt(chi2_yats / (field_sum * (m - 1)))
            add_result(self, "coefficient of contingency type", "Crammer`s V")

        return chi2_standard, p_standard, dof, chi2_yats, p_yats, correlation_standard, correlation_yats
    def raw_sequence(self):
        filename = create_seq_file(self.data['sequences'])
        threshold = self.data.get('threshold', 0.55)

        muscle = MuscleCommandline(input=filename)
        stdout, stderr = muscle()
        align = AlignIO.read(StringIO(stdout), "fasta")

        summary_align = AlignInfo.SummaryInfo(align)
        consensus = summary_align.gap_consensus(threshold=threshold,
                                                ambiguous='N')

        add_result(self, "Consensus sequence", str(consensus))
        add_result(self, "Sequence length", len(consensus))

        remove_temp_file(filename)

        return self.results
    def genebank_seq(self):
        Entrez.email = "*****@*****.**"

        with Entrez.efetch(db="nucleotide",
                           rettype="gb",
                           retmode="text",
                           id=self.data['seq-name-1']) as handle:
            seq_record = SeqIO.read(handle, "gb")
            seq_name_1 = str(seq_record.id) + " " + str(
                seq_record.description)[:25]
            self.data['seq-content-1'] = str(seq_record.seq)
            self.data['seq-name-1'] = seq_name_1

        with Entrez.efetch(db="nucleotide",
                           rettype="gb",
                           retmode="text",
                           id=self.data['seq-name-2']) as handle:
            seq_record = SeqIO.read(handle, "gb")
            seq_name_2 = str(seq_record.id) + " " + str(
                seq_record.description)[:25]
            self.data['seq-content-2'] = str(seq_record.seq)
            self.data['seq-name-2'] = seq_name_2

        alignment = self.get_alignments()

        add_result(self, f'Seq id. {seq_name_1} ... length [bp]',
                   len(self.data['seq-content-1']))
        add_result(self, f'Seq id. {seq_name_2} ... length [bp]',
                   len(self.data['seq-content-2']))

        ident = alignment.count("|")

        coverage = self.get_coverage()
        average_identity = self.get_average_identity(ident)
        frag_identity = self.get_frag_identity(ident)

        add_result(self, "Coverage [%]", coverage)
        add_result(self, "Average identity [%]", average_identity)
        add_result(self, "Fragmental identity [%]", frag_identity)

        return self.results
    def raw_sequence(self):
        self.remove_newlines()

        seq_name_1 = self.data['seq-name-1']
        seq_name_2 = self.data['seq-name-2']
        seq_content_1 = self.data['seq-content-1']
        seq_content_2 = self.data['seq-content-2']

        alignment = self.get_alignments()

        add_result(self, f'Sequence {seq_name_1} length', len(seq_content_1))
        add_result(self, f'Sequence {seq_name_2} length', len(seq_content_2))

        ident = alignment.count("|")

        coverage = self.get_coverage()
        average_identity = self.get_average_identity(ident)
        frag_identity = self.get_frag_identity(ident)

        add_result(self, "Coverage [%]", coverage)
        add_result(self, "Average identity [%]", average_identity)
        add_result(self, "Fragmental identity [%]", frag_identity)

        return self.results
    def calculate(self):
        alfa = self.data["alfa"]
        ho, he, rho, e_ho, e_he, e_rho, p, q = self.calculate_expected_observed(
        )
        chi, pval = chisquare([ho, he, rho], f_exp=[e_ho, e_he, e_rho])

        add_result(self, 'expected number of homozygotes', round(e_ho, 2))
        add_result(self, 'expected number of heterozygotes', round(e_he, 2))
        add_result(self, 'expected number of rare homozygotes',
                   round(e_rho, 2))
        add_result(self, 'p', round(p, 5))
        add_result(self, 'q', round(q, 5))
        add_result(self, 'p-value', round(pval, 5))
        add_result(self, 'Chi-square value', round(chi, 5))

        if ho < 5 or he < 5 or rho < 5:
            chi_yates, pval_yates = self.calculate_yates_correction()

            add_result(self, 'Yate`s chi-square value', round(chi_yates, 5))
            add_result(self, 'Yate`s p-value', round(pval_yates, 5))

            # set pval to pval_yates for next if statement ~JANO
            pval = pval_yates

        if pval <= alfa:
            msg = (
                "Distribution does not consistent with Hardy Weinberg's law at the level "
                "of significance: {}".format(alfa))
            add_result(self, 'status', msg)

            fis = 1 - (he / e_he)
            add_result(self, 'fis', round(fis, 4))
        elif pval > alfa:
            msg = "Distribution consistent with Hardy Weinberg's law at the level of significance: {}".format(
                alfa)
            add_result(self, 'status', msg)

        return self.results
 def calculate(self):
     add_result(self, "H", self.calculate_h())
     add_result(self, "PIC", self.calculate_pic())
     return self.results
    def calculate(self):
        chi2_standard, p_standard, dof, chi2_yats, p_yats, correlation_standard, correlation_yats = self.correlation(
        )
        add_result(self, "dof", dof)
        add_result(self, "chi2_standard", round(chi2_standard, 5))
        add_result(self, "p_standard", round(p_standard, 5))
        add_result(self, "correlation_standard", round(correlation_standard,
                                                       5))
        add_result(self, "chi2_yats", round(chi2_yats, 5))
        add_result(self, "p_yats", round(p_yats, 5))
        add_result(self, "correlation_yats", round(correlation_yats, 5))

        return self.results
 def chi_goodness_standard(self):
     chi2, p = chisquare(self.observed, f_exp=self.expected)
     add_result(self, "chi2_standard", round(chi2, 5))
     add_result(self, "p_standard", round(p, 5))
     add_result(self, "dof", len(self.observed) - 1)