def __init__(self,*filenames,**kwargs): #data_format=None,printer=None): """Create a |BigBedGenomeHash| Parameters ---------- filenames : str or list of str Filename or list of filenames of `Tabix`_-compressed files data_format : str Format of tabix-compressed file(s). Choices are: `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`) """ from pysam import Tabixfile if len(filenames) == 1 and isinstance(filenames[0],list): filenames = filenames[0] self.filenames = list(multiopen(filenames)) self.printer = kwargs.get("printer",NullWriter()) data_format = kwargs.get("data_format","GTF2") try: self._reader_class = TabixGenomeHash._READERS[data_format] except ValueError: msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join(sorted(TabixGenomeHash._READERS.keys())) self.printer.write(msg) raise ValueError(msg) self.tabix_readers = [Tabixfile(X) for X in self.filenames]
def __init__(self, *filenames, **kwargs): #data_format=None,printer=None): """Create a |BigBedGenomeHash| Parameters ---------- filenames : str or list of str Filename or list of filenames of `Tabix`_-compressed files data_format : str Format of tabix-compressed file(s). Choices are: `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`) """ from pysam import Tabixfile if len(filenames) == 1 and isinstance(filenames[0], list): filenames = filenames[0] self.filenames = list(multiopen(filenames)) self.printer = kwargs.get("printer", NullWriter()) data_format = kwargs.get("data_format", "GTF2") try: self._reader_class = TabixGenomeHash._READERS[data_format] except ValueError: msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join( sorted(TabixGenomeHash._READERS.keys())) self.printer.write(msg) raise ValueError(msg) self.tabix_readers = [Tabixfile(X) for X in self.filenames]
def __init__( self, *filenames, **kwargs ): #,base_record_format="III",return_type=None,cache_depth=5): """Create a |BigBedGenomeHash| Parameters ---------- *filenames : str One or more filenames to open (NOT open filehandles) return_type : class implementing a :py:meth:`from_bed` method Class of object to return (Default: |SegmentChain|) """ from plastid.readers.bigbed import BigBedReader return_type = kwargs.get("return_type", SegmentChain) filenames = list(multiopen(filenames)) for filename in filenames: if not isinstance(filename, str): raise ValueError("`filename` must be a 'str'. Found a '%s'." % type(filename)) self.filenames = filenames self.bigbedreaders = [ BigBedReader(X, return_type=return_type) for X in filenames ]
def __init__(self, *streams, **kwargs): """ AssembledFeatureReader(*streams, return_type=SegmentChain, add_three_for_stop=False, printer=None, tabix=False, **kwargs) Parameters ---------- streams : file-like One or more fileneames or open filehandles of input data. return_type : |SegmentChain| or subclass, optional Type of feature to return from assembled subfeatures (Default: |SegmentChain|) add_three_for_stop : bool, optional Some annotation files exclude the stop codon from CDS annotations. If set to `True`, three nucleotides will be added to the threeprime end of each CDS annotation, **UNLESS** the annotated transcript contains explicit stop_codon feature. (Default: `False`) printer : file-like, optional Logger implementing a ``write()`` method. Default: |NullWriter| tabix : boolean, optional `streams` point to `tabix`_-compressed files or are open :class:`~pysam.ctabix.tabix_file_iterator` (Default: `False`) **kwargs Other keyword arguments used by specific parsers """ streams = multiopen(streams, fn=open, kwargs=dict(mode="rb")) if kwargs.get("tabix", False) == True: self.stream = itertools.chain.from_iterable( (_tabix_iteradaptor(X) for X in streams)) else: self.stream = itertools.chain.from_iterable(streams) self.counter = 0 self.printer = kwargs.get("printer", NullWriter()) self.return_type = kwargs.get("return_type", SegmentChain) add_three_for_stop = kwargs.get("add_three_for_stop", False) self._finalize = add_three_for_stop_codon if add_three_for_stop == True else lambda x: x self.metadata = {} self.rejected = []
def __init__(self,*streams,**kwargs): """ AssembledFeatureReader(*streams, return_type=SegmentChain, add_three_for_stop=False, printer=None, tabix=False, **kwargs) Parameters ---------- streams : file-like One or more fileneames or open filehandles of input data. return_type : |SegmentChain| or subclass, optional Type of feature to return from assembled subfeatures (Default: |SegmentChain|) add_three_for_stop : bool, optional Some annotation files exclude the stop codon from CDS annotations. If set to `True`, three nucleotides will be added to the threeprime end of each CDS annotation, **UNLESS** the annotated transcript contains explicit stop_codon feature. (Default: `False`) printer : file-like, optional Logger implementing a ``write()`` method. Default: |NullWriter| tabix : boolean, optional `streams` point to `tabix`_-compressed files or are open :class:`~pysam.ctabix.tabix_file_iterator` (Default: `False`) **kwargs Other keyword arguments used by specific parsers """ streams = multiopen(streams,fn=open,kwargs=dict(mode="rb")) if kwargs.get("tabix",False) == True: self.stream = itertools.chain.from_iterable((_tabix_iteradaptor(X) for X in streams)) else: self.stream = itertools.chain.from_iterable(streams) self.counter = 0 self.printer = kwargs.get("printer",NullWriter()) self.return_type = kwargs.get("return_type",SegmentChain) add_three_for_stop = kwargs.get("add_three_for_stop",False) self._finalize = add_three_for_stop_codon if add_three_for_stop == True else lambda x: x self.metadata = {} self.rejected = []
def __init__(self,*filenames,**kwargs): #,base_record_format="III",return_type=None,cache_depth=5): """Create a |BigBedGenomeHash| Parameters ---------- *filenames : str One or more filenames to open (NOT open filehandles) return_type : class implementing a :py:meth:`from_bed` method Class of object to return (Default: |SegmentChain|) """ from plastid.readers.bigbed import BigBedReader return_type = kwargs.get("return_type",SegmentChain) filenames = list(multiopen(filenames)) for filename in filenames: if not isinstance(filename,str): raise ValueError("`filename` must be a 'str'. Found a '%s'." % type(filename)) self.filenames = filenames self.bigbedreaders = [BigBedReader(X,return_type=return_type) for X in filenames]