def test_split_sample(testdir, mock_testclass):
    sample = 'POLR2A'
    bed = sample + '.bed'
    sort = sample + '-sort.bed'
    sort_copy = Path(__file__).parent.joinpath('sample-split.bed')
    copyfile(sort_copy, sort)
    Bed.sort_bysize = MagicMock(side_effect=create_file_sort)
    Bed.sort = MagicMock()
    os_remove = os.remove
    os.remove = MagicMock()
    binlength = 10
    binminlength = 100
    binmaxlength = 130
    s.split_sample(sample, binlength, binminlength, binmaxlength)
    Bed.sort_bysize.assert_called_once_with(bed, ANY)
    Bed.sort.assert_any_call(ANY, sample + '-100-110.bed')
    Bed.sort.assert_any_call(ANY, sample + '-110-120.bed')
    Bed.sort.assert_any_call(ANY, sample + '-120-130.bed')
    with open(Bed.sort.call_args_list[0].args[0], 'r') as infile:
        assert infile.readline() == 'chr4\t800\t900\ttest4\t4\t+\n'
        assert infile.readline() == ''
    with open(Bed.sort.call_args_list[1].args[0], 'r') as infile:
        assert infile.readline() == 'chr8\t800\t910\ttest8\t4\t-\n'
        assert infile.readline() == ''
    with open(Bed.sort.call_args_list[2].args[0], 'r') as infile:
        assert infile.readline() == 'chr5\t100\t220\ttest5\t1\t-\n'
        assert infile.readline() == 'chr1\t100\t229\ttest1\t1\t+\n'
        assert infile.readline() == ''
    for remove_args in os.remove.call_args_list:
        os_remove(remove_args.args[0])
def test_split_samples(testdir, mock_testclass):
    samples = Path(__file__).parent.joinpath('samples.txt')
    s.split_sample = MagicMock()
    s.split_samples(samples)
    s.split_sample.assert_any_call('POLR2A', 10, 100, 500)
    s.split_sample.assert_any_call('ASDURF', 10, 100, 500)
    s.split_sample.assert_any_call('POLR1C', 10, 100, 500)
def test_split_samples_second_parameters(testdir, mock_testclass):
    samples = Path(__file__).parent.joinpath('samples.txt')
    s.split_sample = MagicMock()
    binlength = 20
    binminlength = 200
    binmaxlength = 400
    s.split_samples(samples, 1, binlength, binminlength, binmaxlength)
    s.split_sample.assert_called_once_with('ASDURF', binlength, binminlength,
                                           binmaxlength)
Exemple #4
0
def headers(samples, datasets):
    '''Statistics headers'''
    headers = ['Sample', 'Total reads', 'Mapped reads', 'Deduplicated reads']
    splits_headers = set()
    for sample in samples:
        splits_headers.update([split[len(sample) + 1:] for split in Split.splits(sample)])
    if datasets:
        for dataset in datasets:
            splits_headers.update([split[len(sample) + 1:] for split in Split.splits(sample)])
    splits_headers = [header for header in splits_headers]
    splits_headers.sort(key=Split.splitkey)
    headers.extend(splits_headers)
    return (headers, splits_headers)
def fit_gaussian(samples='samples.txt',
                 absolute=False,
                 components=False,
                 svg=False,
                 verbose=False,
                 center=None,
                 cmin=None,
                 cmax=None,
                 amp=None,
                 amin=None,
                 sigma=None,
                 smin=None,
                 suffix=None,
                 index=None):
    '''Fits gaussian curve to dyad coverage.'''
    sample_names = Parser.first(samples)
    if index != None:
        sample_names = [sample_names[index]]
    for sample in sample_names:
        fit_gaussian_sample(sample, absolute, components, svg, verbose, center,
                            cmin, cmax, amp, amin, sigma, smin, suffix)
        splits = sb.splits(sample)
        for split in splits:
            fit_gaussian_sample(split, absolute, components, svg, verbose,
                                center, cmin, cmax, amp, amin, sigma, smin,
                                suffix)
def test_split_samples_parameters(testdir, mock_testclass):
    samples = Path(__file__).parent.joinpath('samples.txt')
    s.split_sample = MagicMock()
    binlength = 20
    binminlength = 200
    binmaxlength = 400
    s.split_samples(samples,
                    binlength=binlength,
                    binminlength=binminlength,
                    binmaxlength=binmaxlength)
    s.split_sample.assert_any_call('POLR2A', binlength, binminlength,
                                   binmaxlength)
    s.split_sample.assert_any_call('ASDURF', binlength, binminlength,
                                   binmaxlength)
    s.split_sample.assert_any_call('POLR1C', binlength, binminlength,
                                   binmaxlength)
Exemple #7
0
def center_annotations_sample_splits(sample, input_suffix='', output_suffix='-forcov'):
    '''Prepare BED file used for genome coverage on a single sample.'''
    print ('Center annotations on sample {}'.format(sample))
    center_annotations_sample(sample, input_suffix, output_suffix)
    splits = sb.splits(sample)
    for split in splits:
        center_annotations_sample(split, input_suffix, output_suffix)
Exemple #8
0
def dyad_coverage(samples,
                  genes='genes.txt',
                  selection=None,
                  absolute=False,
                  minp=-75,
                  maxp=75,
                  smoothing=None,
                  suffix=None,
                  index=None):
    '''Finds the distribution of ditances between fragments and dyad.'''
    genes_info = pd.read_csv(genes, sep='\t', comment='#')
    genes_info = genes_info.loc[genes_info[genes_info.columns[6]] != -1]
    if selection:
        selection_genes = Parser.first(selection)
        genes_info = genes_info[genes_info[genes_info.columns[2]].isin(
            selection_genes)]
    sample_names = Parser.first(samples)
    if index != None:
        sample_names = [sample_names[index]]
    for sample in sample_names:
        dyad_coverage_sample(sample, genes_info, absolute, minp, maxp, suffix,
                             smoothing)
        splits = sb.splits(sample)
        for split in splits:
            dyad_coverage_sample(split, genes_info, absolute, minp, maxp,
                                 suffix, smoothing)
def sample_splits_prepgenomecov(sample):
    '''Prepare BED file used for genome coverage on a single sample.'''
    print('Compute genome coverage on sample {}'.format(sample))
    prepare_genome_coverage_sample(sample)
    splits = sb.splits(sample)
    for split in splits:
        prepare_genome_coverage_sample(split)
def sample_splits_genome_coverage(sample, genome, scale=None, strand=None, input_suffix='', output_suffix='-cov', spike_suffix=None, control_suffix=None, genomecov_args=()):
    '''Compute genome coverage on a single sample.'''
    print ('Computing genome coverage on sample {}'.format(sample))
    genome_coverage(sample, genome, scale, strand, input_suffix, output_suffix, spike_suffix, control_suffix, genomecov_args)
    splits = Split.splits(sample)
    for split in splits:
        genome_coverage(split, genome, scale, strand, input_suffix, output_suffix, spike_suffix, control_suffix, genomecov_args)
def ignore_strand_sample_splits(sample,
                                input_suffix='',
                                output_suffix='-forcov'):
    '''Prepare BED file used for genome coverage on a single sample.'''
    print('Duplicate annotations on other strand on sample {}'.format(sample))
    ignore_strand_sample(sample, input_suffix, output_suffix)
    splits = sb.splits(sample)
    for split in splits:
        ignore_strand_sample(split, input_suffix, output_suffix)
def test_splits_2(testdir, mock_testclass):
    sample = 'POLR2A'
    Path(sample + '-100-150.bed').touch()
    Path(sample + '-200-250.bed').touch()
    Path(sample + '-300-350.bed').touch()
    Path(sample + '-400-450.bed').touch()
    splits = s.splits(sample)
    assert splits[0] == sample + '-100-150'
    assert splits[1] == sample + '-200-250'
    assert splits[2] == sample + '-300-350'
    assert splits[3] == sample + '-400-450'
    assert len(splits) == 4
def test_splits(testdir, mock_testclass):
    sample = 'POLR2A'
    Path(sample + '-100-110.bed').touch()
    Path(sample + '-110-120.bed').touch()
    Path(sample + '-120-130.bed').touch()
    Path(sample + '-130-140.bed').touch()
    splits = s.splits(sample)
    assert splits[0] == sample + '-100-110'
    assert splits[1] == sample + '-110-120'
    assert splits[2] == sample + '-120-130'
    assert splits[3] == sample + '-130-140'
    assert len(splits) == 4
Exemple #14
0
def dyadcov(samples, genes, minp, maxp, smoothing, index):
    '''Finds the distribution of ditances between fragments and dyad.'''
    logging.basicConfig(filename='debug.log', level=logging.DEBUG, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    genes_info = pd.read_csv(genes, sep='\t', comment='#')
    genes_info = genes_info.loc[genes_info[genes_info.columns[6]] != -1]
    sample_names = pd.read_csv(samples, header=None, sep='\t', comment='#')[0]
    if index != None:
        sample_names = [sample_names[index]]
    for sample in sample_names:
        dyad_coverage_sample(sample, genes_info, minp, maxp, smoothing)
        splits = sb.splits(sample)
        for split in splits:
            dyad_coverage_sample(split, genes_info, minp, maxp, smoothing)
Exemple #15
0
def fitgaussian(samples, components, svg, verbose, center, cmin, cmax, amp,
                amin, sigma, smin, index):
    '''Fits gaussian curve to dyad coverage.'''
    logging.basicConfig(filename='debug.log',
                        level=logging.DEBUG,
                        format='%(asctime)s %(levelname)-8s %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')
    sample_names = pd.read_csv(samples, header=None, sep='\t', comment='#')[0]
    if index != None:
        sample_names = [sample_names[index]]
    for sample in sample_names:
        fitgaussian_sample(sample, components, svg, verbose, center, cmin,
                           cmax, amp, amin, sigma, smin)
        splits = sb.splits(sample)
        for split in splits:
            fitgaussian_sample(split, components, svg, verbose, center, cmin,
                               cmax, amp, amin, sigma, smin)
def fit_double_gaussian(samples='samples.txt',
                        absolute=False,
                        components=False,
                        gaussian=False,
                        svg=False,
                        verbose=False,
                        center1=None,
                        cmin1=None,
                        cmax1=None,
                        amp1=None,
                        amin1=None,
                        sigma1=None,
                        smin1=None,
                        center2=None,
                        cmin2=None,
                        cmax2=None,
                        amp2=None,
                        amin2=None,
                        sigma2=None,
                        smin2=None,
                        suffix=None,
                        index=None):
    '''Fits double gaussian curve to dyad coverage.'''
    sample_names = Parser.first(samples)
    if index != None:
        sample_names = [sample_names[index]]
    for sample in sample_names:
        fit_double_gaussian_sample(sample, absolute, components, gaussian, svg,
                                   verbose, center1, cmin1, cmax1, amp1, amin1,
                                   sigma1, smin1, center2, cmin2, cmax2, amp2,
                                   amin2, sigma2, smin2, suffix)
        splits = sb.splits(sample)
        for split in splits:
            fit_double_gaussian_sample(split, absolute, components, gaussian,
                                       svg, verbose, center1, cmin1, cmax1,
                                       amp1, amin1, sigma1, smin1, center2,
                                       cmin2, cmax2, amp2, amin2, sigma2,
                                       smin2, suffix)
def test_annotation_length_invalid(testdir, mock_testclass):
    annotation_length = s.annotation_length('chr1\t300')
    assert annotation_length == -1
def test_splitkey_invalid(testdir, mock_testclass):
    with pytest.raises(AttributeError):
        s.splitkey('POLR2A')
def test_splitkey_2(testdir, mock_testclass):
    splitkey = s.splitkey('POLR2A-350-680')
    assert splitkey == 350
def test_splitkey(testdir, mock_testclass):
    splitkey = s.splitkey('POLR2A-120-150')
    assert splitkey == 120
def test_splits_none(testdir, mock_testclass):
    sample = 'POLR2A'
    splits = s.splits(sample)
    assert len(splits) == 0
def test_annotation_length(testdir, mock_testclass):
    annotation_length = s.annotation_length('chr1\t100\t250\ttest1')
    assert annotation_length == 150
def test_split_samples_second(testdir, mock_testclass):
    samples = Path(__file__).parent.joinpath('samples.txt')
    s.split_sample = MagicMock()
    s.split_samples(samples, 1)
    s.split_sample.assert_called_once_with('ASDURF', 10, 100, 500)
def test_splitkey_noend(testdir, mock_testclass):
    with pytest.raises(AttributeError):
        s.splitkey('POLR2A-350')
def test_annotation_length_2(testdir, mock_testclass):
    annotation_length = s.annotation_length('chr1\t300\t680\ttest1')
    assert annotation_length == 380