def setup(self): self.m = metaseq.genomic_signal( metaseq.example_filename('gdc.bam'), kind='bam') line = '[%s] %s\n' % (datetime.datetime.now(), self.__class__.__name__) print line sys.stdout.flush() pass
def setup(self): self.m = metaseq.genomic_signal(metaseq.example_filename('gdc.bam'), kind='bam') line = '[%s] %s\n' % (datetime.datetime.now(), self.__class__.__name__) print line sys.stdout.flush() pass
def test_errors(): "these things should raise errors" def check(error, callable_obj, args, kwargs): assert_raises(error, callable_obj, *args, **kwargs) class X(metaseq.filetype_adapters.BaseAdapter): def make_fileobj(self): return None items = [ (ValueError, metaseq.filetype_adapters.BaseAdapter, (metaseq.example_filename('gdc.bed'),), {}), (NotImplementedError, metaseq.filetype_adapters.BigWigAdapter(metaseq.example_filename('gdc.bigwig')).__getitem__, (0,), {}), (ValueError, X("").__getitem__, (0,), {}), #(ValueError, gs['bam'].local_coverage, ['chr2L:1-5', 'chr2L:1-5'], dict(processes=PROCESSES)), ] for error, callable_obj, args, kwargs in items: yield check, error, callable_obj, args, kwargs
def run_metaseq(): # set up a BamSignal object m = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam"), kind="bam") print "metaseq starting...", sys.stdout.flush() t0 = time.time() # Tweak processes and chunksize as needed to balance CPUs and I/O. PROCESSES = 6 CHUNKSIZE = 100 # the trick is to use a single bin... ms_array = m.array(windows, processes=PROCESSES, chunksize=CHUNKSIZE, bins=1) t1 = time.time() print "completed in %.2fs" % (t1 - t0) sys.stdout.flush() return ms_array.ravel()
def run_metaseq(): # set up a BamSignal object m = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), kind='bam') print 'metaseq starting...', sys.stdout.flush() t0 = time.time() # Tweak processes and chunksize as needed to balance CPUs and I/O. PROCESSES = 6 CHUNKSIZE = 100 # the trick is to use a single bin... ms_array = m.array(windows, processes=PROCESSES, chunksize=CHUNKSIZE, bins=1) t1 = time.time() print 'completed in %.2fs' % (t1 - t0) sys.stdout.flush() return ms_array.ravel()
import multiprocessing from matplotlib import pyplot as plt import matplotlib import numpy as np import metaseq import pybedtools # Use example data and generate some random features gs = metaseq.genomic_signal(metaseq.example_filename('x.bam'), 'bam') features = pybedtools.BedTool()\ .window_maker( b=pybedtools.BedTool('chr2L 0 500000', from_string=True).fn, w=1000)\ .shuffle(seed=1, genome={'chr2L': (0, 5000000)}) genes = [] for i, f in enumerate(features): genes.append('gene_%s' % i) genes = np.array(genes) arr = gs.array(features, processes=multiprocessing.cpu_count(), bins=100) # At this point, each item in `genes` corresponds to the same row in `arr` ind, breaks = metaseq.plotutils.clustered_sortind(arr, k=5) # Boundaries of clusters are provided in `breaks`. # So the first cluster's original indices into `arr` are: cluster_1_inds = ind[0:breaks[0]] # Which means the genes in the first cluster are:
""" Many of these tests use the minimal test/data/gdc.bed file which has just enough complexity to be useful in testing corner cases. When reading through the tests, it's useful to have that file open to understand what's happening. """ import os import metaseq import multiprocessing from metaseq.array_helpers import ArgumentError import numpy as np from nose.tools import assert_raises from nose.plugins.skip import SkipTest gs = {} for kind in ['bed', 'bam', 'bigbed', 'bigwig']: gs[kind] = metaseq.genomic_signal(metaseq.example_filename('gdc.%s' % kind), kind) PROCESSES = int(os.environ.get("METASEQ_PROCESSES", multiprocessing.cpu_count())) def test_tointerval(): assert metaseq.helpers.tointerval("chr2L:1-10[-]").strand == '-' assert metaseq.helpers.tointerval("chr2L:1-10[+]").strand == '+' assert metaseq.helpers.tointerval("chr2L:1-10").strand == '.' def test_local_count(): def check(kind, coord, expected, stranded): try: result = gs[kind].local_count(coord, stranded=stranded) except NotImplementedError: raise SkipTest("Incompatible bx-python version for bigBed")
def test_example_data_exists(): assert os.path.exists(metaseq.example_filename('x.bam')) assert os.path.exists(metaseq.example_filename('gdc.bam'))
def setup(self): self.m = metaseq.genomic_signal(metaseq.example_filename('gdc.bigbed'), kind='bigbed')
Convenience function to close all mini-browser figures """ for fig in FIGS: plt.close(fig) # Choices for RUN_TYPE are: # * 'intron': all introns of all genes on the selected chromosomes # * 'TSS' : gene-level TSSs, +/- upstream and downstream bp # * 'peaks' : peaks from ENCODE; acts as a positive control on the numbers RUN_TYPE = 'TSS' try: chip = chipseq.Chipseq( ip_bam=metaseq.example_filename( 'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1.bam' ), control_bam=metaseq.example_filename( 'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1.bam' ), dbfn=metaseq.example_filename( 'Homo_sapiens.GRCh37.66.cleaned.gtf.db') ) except ValueError: raise ValueError("please use the download_data.py script in the " "data directory") if RUN_TYPE == "TSS": # Gets all genes on selected chroms, then applies the TSS modifier and # saves the results
def test_db(): # should work d.attach_db(None) d.attach_db(metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db'))
""" Many of these tests use the minimal test/data/gdc.bed file which has just enough complexity to be useful in testing corner cases. When reading through the tests, it's useful to have that file open to understand what's happening. """ import os import metaseq import multiprocessing from metaseq.array_helpers import ArgumentError import numpy as np from nose.tools import assert_raises from nose.plugins.skip import SkipTest gs = {} for kind in ['bed', 'bam', 'bigbed', 'bigwig']: gs[kind] = metaseq.genomic_signal( metaseq.example_filename('gdc.%s' % kind), kind) PROCESSES = int( os.environ.get("METASEQ_PROCESSES", multiprocessing.cpu_count())) def test_tointerval(): assert metaseq.helpers.tointerval("chr2L:1-10[-]").strand == '-' assert metaseq.helpers.tointerval("chr2L:1-10[+]").strand == '+' assert metaseq.helpers.tointerval("chr2L:1-10").strand == '.' def test_local_count(): def check(kind, coord, expected, stranded): try: result = gs[kind].local_count(coord, stranded=stranded)
module for testing the larger files (x.bam, x.bed.gz, etc) """ import multiprocessing import metaseq import pybedtools CPUS = multiprocessing.cpu_count() gs = {} for kind in ['bam', 'bigwig', 'bed', 'bigbed']: if kind == 'bed': ext = 'bed.gz' else: ext = kind gs[kind] = metaseq.genomic_signal( metaseq.example_filename('x.%s' % ext), kind) # generate the test features features = pybedtools.BedTool()\ .window_maker( b=pybedtools.BedTool('chr2L 0 500000', from_string=True).fn, w=1000)\ .shuffle(seed=1, genome={'chr2L': (0, 5000000)}) args = (features,) kwargs = dict(processes=CPUS, bins=100) bam_array = gs['bam'].array(*args, **kwargs) bed_array = gs['bed'].array(*args, **kwargs) bw_array = gs['bigwig'].array(*args, method='get_as_array', **kwargs)
def peak_panel(self, ax, feature): bedtool = pybedtools.BedTool(self.bed) features = bedtool.intersect([feature], u=True) track = Track(features) ax.add_collection(track) ax.axis('tight') return feature if __name__ == "__main__": import metaseq import gffutils import pybedtools G = gffutils.FeatureDB( metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')) ip = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') inp = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam') peaks = pybedtools.BedTool(metaseq.example_filename( 'wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz')) plotting_kwargs = [ dict(color='r', label='IP'), dict(color='k', linestyle=':', label='input')] local_coverage_kwargs = dict(fragment_size=200)
def peak_panel(self, ax, feature): bedtool = pybedtools.BedTool(self.bed) features = bedtool.intersect([feature], u=True) track = Track(features) ax.add_collection(track) ax.axis('tight') return feature if __name__ == "__main__": import metaseq import gffutils import pybedtools G = gffutils.FeatureDB( metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')) ip = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') inp = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam') peaks = pybedtools.BedTool( metaseq.example_filename( 'wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz')) plotting_kwargs = [ dict(color='r', label='IP'), dict(color='k', linestyle=':', label='input') ]
def setup(self): self.m = metaseq.genomic_signal( metaseq.example_filename('gdc.bigbed'), kind='bigbed')
def setup(self): deseq_fn = metaseq.example_filename('ex.deseq') db_fn = metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db') self.d = metaseq.ResultsTable(deseq_fn, db_fn)
Diagnostic plots are generated at the end of the script. TODO: figure out what's causing the discrepancies (open vs closed intervals? Binning artifact? CIGAR operations?) """ import os import sys import time import numpy as np import metaseq import pybedtools from matplotlib import pyplot as plt bam_fn = metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam") if not os.path.exists(bam_fn): raise ValueError("Please run download_data.py in test/data dir to retrieve ENCODE " "data used for examples") # Construct 10kb windows, but subset to only use chr19 (to speed up the test) print "creating windows..." sys.stdout.flush() windows = pybedtools.BedTool().window_maker(genome="hg19", w=10000).filter(lambda x: x.chrom == "chr19").saveas() def run_bedtools(): # set up a BAM-based BedTool bt = pybedtools.BedTool(bam_fn)
if __name__ == "__main__": import sys choices = ['xcorr', 'chipseq'] try: examples = sys.argv[1:] except IndexError: print 'Choices are: ', choices examples = [] for ex in examples: if ex not in choices: raise ValueError('%s not in %s' % (ex, choices)) if 'xcorr' in examples: ip = metaseq.genomic_signal( metaseq.example_filename( 'wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') NWINDOWS = 5000 FRAGMENT_SIZE = 1 WINDOWSIZE = 5000 THRESH = FRAGMENT_SIZE / float(WINDOWSIZE) * 10 lags, shift = estimate_shift( ip, nwindows=NWINDOWS, maxlag=500, thresh=THRESH, array_kwargs=dict( processes=8, chunksize=100, fragment_size=FRAGMENT_SIZE), verbose=True) plt.plot(lags, shift.mean(axis=0)) plt.axvline( lags[np.argmax(shift.mean(axis=0))], linestyle='--', color='k')
from metaseq import results_table import metaseq import numpy as np fn = metaseq.example_filename('ex.deseq') d = results_table.ResultsTable(fn) def test_dataframe_access(): # different ways of accessing get the same data in memory assert d.id is d.data.id assert d['id'] is d.data.id def test_dataframe_subsetting(): assert all(d[:10].data == d.data[:10]) assert all(d.update(d.data[:10]).data == d.data[:10]) def test_copy(): e = d.copy() e.id = 'a' assert e.id[0] == 'a' assert d.id[0] != 'a' def smoke_tests(): #smoke test for repr print repr(d) def test_db(): # should work
""" module for testing the larger files (x.bam, x.bed.gz, etc) """ import multiprocessing import metaseq import pybedtools CPUS = multiprocessing.cpu_count() gs = {} for kind in ['bam', 'bigwig', 'bed', 'bigbed']: if kind == 'bed': ext = 'bed.gz' else: ext = kind gs[kind] = metaseq.genomic_signal(metaseq.example_filename('x.%s' % ext), kind) # generate the test features features = pybedtools.BedTool()\ .window_maker( b=pybedtools.BedTool('chr2L 0 500000', from_string=True).fn, w=1000)\ .shuffle(seed=1, genome={'chr2L': (0, 5000000)}) args = (features, ) kwargs = dict(processes=CPUS, bins=100) bam_array = gs['bam'].array(*args, **kwargs) bed_array = gs['bed'].array(*args, **kwargs)
if __name__ == "__main__": import sys choices = ['xcorr', 'chipseq'] try: examples = sys.argv[1:] except IndexError: print 'Choices are: ', choices examples = [] for ex in examples: if ex not in choices: raise ValueError('%s not in %s' % (ex, choices)) if 'xcorr' in examples: ip = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') NWINDOWS = 5000 FRAGMENT_SIZE = 1 WINDOWSIZE = 5000 THRESH = FRAGMENT_SIZE / float(WINDOWSIZE) * 10 lags, shift = estimate_shift(ip, nwindows=NWINDOWS, maxlag=500, thresh=THRESH, array_kwargs=dict( processes=8, chunksize=100, fragment_size=FRAGMENT_SIZE), verbose=True)
from metaseq import results_table import metaseq import numpy as np fn = metaseq.example_filename('ex.deseq') d = results_table.ResultsTable(fn) def test_dataframe_access(): # different ways of accessing get the same data in memory assert d.id is d.data.id assert d['id'] is d.data.id def test_dataframe_subsetting(): assert all(d[:10].data == d.data[:10]) assert all(d.update(d.data[:10]).data == d.data[:10]) def test_copy(): e = d.copy() e.id = 'a' assert e.id[0] == 'a' assert d.id[0] != 'a' def smoke_tests(): #smoke test for repr print repr(d)
def peak_panel(self, ax, feature): bedtool = pybedtools.BedTool(self.bed) features = bedtool.intersect([feature], u=True) track = Track(features) ax.add_collection(track) # ax.axis('tight') return feature if __name__ == "__main__": import metaseq import gffutils import pybedtools G = gffutils.FeatureDB(metaseq.example_filename("Homo_sapiens.GRCh37.66.cleaned.gtf.db")) ip = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam"), "bam") inp = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562InputStdAlnRep1.bam"), "bam") peaks = pybedtools.BedTool(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz")) plotting_kwargs = [dict(color="r", label="IP"), dict(color="k", linestyle=":", label="input")] local_coverage_kwargs = dict(fragment_size=200) b = SignalMiniBrowser([ip, inp], plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs) g = GeneModelMiniBrowser([ip, inp], G, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs) p = PeakMiniBrowser([ip, inp], peaks, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs)
""" Settings for the ctcf_peaks example script """ import gffutils import metaseq UPSTREAM = 1000 DOWNSTREAM = 1000 BINS = 100 FRAGMENT_SIZE = 200 GENOME = 'hg19' CHROMS = ['chr1', 'chr2'] gtfdb = metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db') G = gffutils.FeatureDB(gtfdb)
Diagnostic plots are generated at the end of the script. TODO: figure out what's causing the discrepancies (open vs closed intervals? Binning artifact? CIGAR operations?) """ import os import sys import time import numpy as np import metaseq import pybedtools from matplotlib import pyplot as plt bam_fn = metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam') if not os.path.exists(bam_fn): raise ValueError( 'Please run download_data.py in test/data dir to retrieve ENCODE ' 'data used for examples') # Construct 10kb windows, but subset to only use chr19 (to speed up the test) print 'creating windows...' sys.stdout.flush() windows = pybedtools.BedTool()\ .window_maker(genome='hg19', w=10000)\ .filter(lambda x: x.chrom == 'chr19')\ .saveas()
return s def keys(self): return self.fn_dict.keys() def values(self): return [self._dict[key] for key in self.keys()] def items(self): return list((key, self._dict[key]) for key in self.keys()) if __name__ == "__main__": import metaseq from matplotlib import pyplot as plt db = metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db') import_kwargs = dict(comment='#') d = DESeqResults( metaseq.example_filename('rrp6-s2-polyA.final.summary'), db=db, import_kwargs=import_kwargs, ) e = DESeqResults( metaseq.example_filename('rrp40-s2-polyA.final.summary'), db=db, import_kwargs=import_kwargs, ) d = d.align_with(e)
if (i == j) and hist_kwargs: ax.hist(xfunc(p.ix[i][val]), **hist_kwargs) else: scatter(p.ix[i][val], p.ix[j][val], ax=ax, xlab_prefix=i + " ", ylab_prefix=j + " ", **kwargs) axind += 1 if __name__ == "__main__": from metaseq import example_filename dbfn = example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db') db = gffutils.FeatureDB(dbfn) p = pandas.Panel({ 'uninduced_1': deseq_dataframe(example_filename('GSM847565_SL2585.table'), index_col='id', db=db), 'induced_1': deseq_dataframe(example_filename('GSM847566_SL2592.table'), index_col='id', db=db), 'uninduced_2': deseq_dataframe(example_filename('GSM847567_SL4337.table'), index_col='id', db=db),
if score != 0: fout.write('\t'.join([ feature.chrom, str(start), str(stop), str(score)]) + '\n') start = start + binsize this_batch = [] i = 0 fout.close() if __name__ == "__main__": import metaseq ip_bam = metaseq.genomic_signal( metaseq.example_filename( 'wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') control_bam = metaseq.genomic_signal( metaseq.example_filename( 'wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam') BINSIZE = 10 WINDOWSIZE = 10000 BINS = WINDOWSIZE / BINSIZE features = pybedtools.BedTool()\ .window_maker(genome='hg19', w=WINDOWSIZE)\ .filter(lambda x: x.chrom == 'chr19') result = compare( signal1=ip_bam, signal2=control_bam, features=features,
import numpy as np import os import metaseq ip_filename = metaseq.helpers.example_filename("wgEncodeHaibTfbsK562Atf3V0416101AlnRep1_chr17.bam") input_filename = metaseq.helpers.example_filename("wgEncodeHaibTfbsK562RxlchV0416101AlnRep1_chr17.bam") ip_signal = metaseq.genomic_signal(ip_filename, "bam") input_signal = metaseq.genomic_signal(input_filename, "bam") # If you already have TSSs, skip this part. import gffutils db = gffutils.FeatureDB(metaseq.example_filename("Homo_sapiens.GRCh37.66_chr17.gtf.db")) import pybedtools from pybedtools.featurefuncs import TSS from gffutils.helpers import asinterval def tss_generator(): for transcript in db.features_of_type("transcript"): yield TSS(asinterval(transcript), upstream=1000, downstream=1000) if not os.path.exists("tsses.gtf"): tsses = pybedtools.BedTool(tss_generator()).saveas("tsses.gtf") tsses = pybedtools.BedTool("tsses.gtf") from metaseq import persistence
ncols = len(p.items) axind = 1 for i in p.items: for j in p.items: ax = fig.add_subplot(nrows, ncols, axind) if (i == j) and hist_kwargs: ax.hist(xfunc(p.ix[i][val]), **hist_kwargs) else: scatter(p.ix[i][val], p.ix[j][val], ax=ax, xlab_prefix=i + " ", ylab_prefix=j + " ", **kwargs) axind += 1 if __name__ == "__main__": from metaseq import example_filename dbfn = example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db') db = gffutils.FeatureDB(dbfn) p = pandas.Panel( { 'uninduced_1': deseq_dataframe( example_filename('GSM847565_SL2585.table'), index_col='id', db=db), 'induced_1': deseq_dataframe( example_filename('GSM847566_SL2592.table'), index_col='id', db=db), 'uninduced_2': deseq_dataframe( example_filename('GSM847567_SL4337.table'), index_col='id', db=db),
import numpy as np import os import metaseq ip_filename = metaseq.helpers.example_filename( 'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1_chr17.bam') input_filename = metaseq.helpers.example_filename( 'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1_chr17.bam') ip_signal = metaseq.genomic_signal(ip_filename, 'bam') input_signal = metaseq.genomic_signal(input_filename, 'bam') # If you already have TSSs, skip this part. import gffutils db = gffutils.FeatureDB( metaseq.example_filename('Homo_sapiens.GRCh37.66_chr17.gtf.db')) import pybedtools from pybedtools.featurefuncs import TSS from gffutils.helpers import asinterval def tss_generator(): for transcript in db.features_of_type('transcript'): yield TSS(asinterval(transcript), upstream=1000, downstream=1000) if not os.path.exists('tsses.gtf'): tsses = pybedtools.BedTool(tss_generator()).saveas('tsses.gtf') tsses = pybedtools.BedTool('tsses.gtf') from metaseq import persistence
Convenience function to close all mini-browser figures """ for fig in FIGS: plt.close(fig) # Choices for RUN_TYPE are: # * 'intron': all introns of all genes on the selected chromosomes # * 'TSS' : gene-level TSSs, +/- upstream and downstream bp # * 'peaks' : peaks from ENCODE; acts as a positive control on the numbers RUN_TYPE = 'TSS' try: chip = chipseq.Chipseq( ip_bam=metaseq.example_filename( 'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1.bam'), control_bam=metaseq.example_filename( 'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1.bam'), dbfn=metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')) except ValueError: raise ValueError("please use the download_data.py script in the " "data directory") if RUN_TYPE == "TSS": # Gets all genes on selected chroms, then applies the TSS modifier and # saves the results tss_fn = 'example_tsses.gtf' if not os.path.exists(tss_fn): features = pybedtools.BedTool(helpers.gene_generator())\ .filter(helpers.chromfilter)\ .each(helpers.TSS, upstream=settings.UPSTREAM,
if score != 0: fout.write('\t'.join( [feature.chrom, str(start), str(stop), str(score)]) + '\n') start = start + binsize this_batch = [] i = 0 fout.close() if __name__ == "__main__": import metaseq ip_bam = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') control_bam = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam') BINSIZE = 10 WINDOWSIZE = 10000 BINS = WINDOWSIZE / BINSIZE features = pybedtools.BedTool()\ .window_maker(genome='hg19', w=WINDOWSIZE)\ .filter(lambda x: x.chrom == 'chr19') result = compare(signal1=ip_bam, signal2=control_bam, features=features,