Esempio n. 1
0
def test_stats_motif_enrichment():
    # n_motifs = 2, n_total_input = 3, n_total_control = 5
    pwm1 = PositionWeightMatrix(values=[[1, 0], [0, 0], [0, 1], [1, 1]],
                                name='pwm1')
    pwm2 = PositionWeightMatrix(values=[[1, 0], [0, 0], [0, 1], [1, 1]],
                                name='pwm2')
    pwm3 = PositionWeightMatrix(values=[[1, 0], [0, 0], [0, 1], [1, 1]],
                                name='pwm3')
    pwms = [pwm1, pwm2, pwm3]
    motif_sites = [[[True], [True, True], []], [[], [], []], [[], [], [True]]]
    motif_sites_control = [[[True], [], [True, True, True], [], [True]],
                           [[], [True], [], [True], []], [[], [], [], [], []]]
    enrichment_results = motif_enrichment(
        pwms=pwms,
        motif_sites=motif_sites,
        motif_sites_control=motif_sites_control)
    assert len(enrichment_results) == 3
    assert enrichment_results[0].n_input == 2
    assert enrichment_results[0].n_control == 3
    assert enrichment_results[0].fold_change == pytest.approx(2 * 5 / 3 / 3)
    assert enrichment_results[1].n_input == 0
    assert enrichment_results[1].n_control == 2
    assert enrichment_results[1].fold_change == 0
    assert enrichment_results[2].n_input == 1
    assert enrichment_results[2].n_control == 0
Esempio n. 2
0
def test_pwms_init(motif_root):
    with pytest.raises(ValueError):
        MotifPwms(pwms=[PositionWeightMatrix([[1], [2], [3], [4]]), 's', True])
    pwm = PositionWeightMatrix([[1], [2], [3], [4]])
    name = 'motif_set'
    pwms = MotifPwms(pwms=[pwm], name=name)
    assert len(pwms) == 1
    assert pwms.name == name
    pwms = MotifPwms(name=name)
    pwms.read_motifscan_pwms(
        os.path.join(motif_root, 'test', 'test_pwms.motifscan'))
    assert len(pwms) == 2
    with pytest.raises(MotifSetNotFoundError):
        load_built_pwms('uninstalled_motif_set', 'hg19')
Esempio n. 3
0
def test_pwm_raw_score():
    pwm = PositionWeightMatrix([[1.35, 0.21, -5.23], [0.07, -0.21, 0.6],
                                [2.15, 2.22, -0.84], [-2.64, -1.89, 5.47]])
    assert pwm.max_raw_score == pytest.approx(9.84)
    assert pwm.max_raw_score == pytest.approx(9.84)
    assert pwm.min_raw_score == pytest.approx(-9.76)
    assert pwm.min_raw_score == pytest.approx(-9.76)
Esempio n. 4
0
def test_pwm_score():
    pwm = PositionWeightMatrix([[1.35, 0.21, -5.23], [0.07, -0.21, 0.6],
                                [2.15, 2.22, -0.84], [-2.64, -1.89, 5.47]])
    with pytest.raises(ValueError):
        pwm.score("")
    with pytest.raises(ValueError):
        pwm.score("NNNN")
    assert pwm.score("NNN") == 0
    assert pwm.score("AGT") == pytest.approx(0.918699)
    assert pwm.score("ANT") == pytest.approx(0.693089)
    assert pwm.score("CTA") == pytest.approx(-0.716463)
Esempio n. 5
0
def test_pwms_write_motifscan_pwms(tmp_dir):
    pwm = PositionWeightMatrix([[1], [2], [3], [4]],
                               name='motif1',
                               matrix_id='id1',
                               cutoffs={
                                   '1e-2': 0.1,
                                   '1e-3': 0.4
                               })
    name = 'motif_set'
    pwms = MotifPwms(pwms=[pwm], name=name)
    pwms_path = os.path.join(tmp_dir, 'test_pwm.motifscan')
    pwms.write_motifscan_pwms(pwms_path)
    assert os.path.isfile(pwms_path)
Esempio n. 6
0
def test_set_cutoffs():
    pwm = PositionWeightMatrix([[1], [2], [3], [4]])
    name = 'motif_set'
    pwms = MotifPwms(pwms=[pwm], name=name)
    pwms.set_cutoffs([range(1, 1000001)])
    assert len(pwm.cutoffs) == 5
    assert pwm.cutoffs['1e-2'] == 990000
    assert pwm.cutoffs['1e-3'] == 999000
    assert pwm.cutoffs['1e-4'] == 999900
    assert pwm.cutoffs['1e-5'] == 999990
    assert pwm.cutoffs['1e-6'] == 999999
    with pytest.raises(ValueError):
        pwms.set_cutoffs([[1], [2]])
    with pytest.raises(ValueError):
        pwms.set_cutoffs([1, 2, 3])
    with pytest.raises(ValueError):
        pwms.set_cutoffs([[1, 2, 3]])
Esempio n. 7
0
def test_scan_motif(genome_root):
    genome_path = os.path.join(genome_root, 'test')
    genome = Genome(name='test', path=genome_path)
    regions = [GenomicRegion(chrom='chr1', start=2, end=5)]
    pwms = MotifPwms()
    pwm = PositionWeightMatrix([[1, 0], [0, 1], [0, 0], [1, 0]],
                               cutoffs={
                                   '1e-3': 0.5,
                                   '1e-4': 1
                               })
    pwms.append(pwm)
    scanner = Scanner(genome=genome,
                      regions=regions,
                      window_size=4,
                      p_value='1e-4')
    sites = scanner.scan_motifs(pwms)
    print(sites)
    assert len(sites[0]) == 1
    assert len(sites[0][0]) == 1
    scanner = Scanner(genome=genome,
                      regions=regions,
                      window_size=4,
                      p_value='1e-3')
    sites = scanner.scan_motifs(pwms)
    print(sites)
    assert len(sites[0][0]) == 3
    scanner = Scanner(genome=genome,
                      regions=regions,
                      window_size=4,
                      p_value='1e-2')
    with pytest.raises(ValueError):
        scanner.scan_motifs(pwms)
    scanner = Scanner(genome=genome,
                      regions=regions,
                      window_size=4,
                      p_value='1e-3',
                      remove_dup=False)
    sites = scanner.scan_motifs(pwms)
    assert len(sites[0][0]) == 5
Esempio n. 8
0
    def read_motifscan_pwms(self, path):
        """Read PWMs in MotifScan format.

         MotifScan PWM Example:

            >MA0006.1   Ahr::Arnt   PWM
            A [-0.85815 -5.68647 -5.68647 -5.68647 -5.68647 -5.68647]
            C [ 0.48657 -5.32257  1.53966 -5.32257 -5.32257 -5.32257]
            G [-0.90016  1.53922 -5.32301  1.53922 -5.32301  1.58174]
            T [ 0.43981 -1.93828 -1.93828 -1.93828  1.21696 -5.68779]
            Cutoff_p1e-3	0.55403
            Cutoff_p1e-4	0.82985
            Cutoff_p1e-5	1.0

        Parameters
        ----------
        path : str
            The file path to read the MotifScan PWMs.

        Raises
        ------
        PwmsMotifScanFormatError
            If the file does not strictly follow the MotifScan PWMs format.
        """
        logger.debug(f"Reading MotifScan PWMs from {path}")
        header_pattern = re.compile(r"^>(\S+)\t(\S+)\tPWM$")
        matrix_pattern = re.compile(r"^([ACGT]) \[(.+)\]$")
        cutoff_pattern = re.compile(r"^Cutoff_p(\S+)\t(\S+)")

        pwms = []
        line_num = 0
        # expect_flag: 0=header, 1=matrix, 2=cutoff, 3=cutoff or header
        # 1 header line + 4 matrix line + at least 1 cutoff line
        expect_flag = 0
        with open(path, 'r') as fin:
            for line in fin:
                line_num += 1
                line = line.strip()
                if not line:  # skip blank lines
                    continue

                m_header = header_pattern.match(line)
                m_matrix = matrix_pattern.match(line)
                m_cutoff = cutoff_pattern.match(line)

                # format checker
                if m_header:
                    if expect_flag != 0 and expect_flag != 3:
                        raise PwmsMotifScanFormatError(line_num, line)
                elif m_matrix:
                    if expect_flag != 1:
                        raise PwmsMotifScanFormatError(line_num, line)
                elif m_cutoff:
                    if expect_flag != 2 and expect_flag != 3:
                        raise PwmsMotifScanFormatError(line_num, line)
                else:  # does not match any pattern
                    raise PwmsMotifScanFormatError(line_num, line)

                if m_header:
                    if expect_flag == 3:  # already got a pwm and save it
                        pwm = PositionWeightMatrix(values=values,
                                                   name=name,
                                                   matrix_id=matrix_id,
                                                   cutoffs=cutoffs)
                        pwms.append(pwm)
                    matrix_id = m_header.group(1)
                    name = m_header.group(2)
                    n_matrix = 0
                    values = []
                    cutoffs = {}
                    expect_flag = 1  # found header, expect matrix line next
                elif m_matrix:
                    base = m_matrix.group(1)
                    if base != bases[n_matrix]:
                        raise PwmsMotifScanFormatError(line_num, line)
                    tmp_values = m_matrix.group(2).split()
                    try:
                        values.append(list(map(float, tmp_values)))
                    except (ValueError, TypeError):
                        raise PwmsMotifScanFormatError(line_num, line)
                    n_matrix += 1
                    if n_matrix == 4:
                        # got 4 matrix line, expect cutoff line next
                        expect_flag = 2
                elif m_cutoff:
                    p = m_cutoff.group(1)
                    cutoff = m_cutoff.group(2)
                    cutoffs[p] = float(cutoff)
                    # got first cutoff, expect cutoff or header next
                    if expect_flag == 2:
                        expect_flag = 3

            # check whether the last matrix is complete
            if expect_flag == 1 or expect_flag == 2:
                raise PwmsMotifScanFormatError(line_num + 1, '')
            if expect_flag == 3:
                pwm = PositionWeightMatrix(values=values,
                                           name=name,
                                           matrix_id=matrix_id,
                                           cutoffs=cutoffs)
                pwms.append(pwm)
        self.extend(pwms)
        logger.debug(f"Found {len(pwms)} MotifScan PWMs")