Esempio n. 1
0
    def __init__(self,*filenames,**kwargs): #data_format=None,printer=None):
        """Create a |BigBedGenomeHash|
        
        Parameters
        ----------
        filenames : str or list of str
            Filename or list of filenames of `Tabix`_-compressed files

        data_format : str
            Format of tabix-compressed file(s). Choices are:
            `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`)
        """
        from pysam import Tabixfile
        if len(filenames) == 1 and isinstance(filenames[0],list):
            filenames = filenames[0]
            
        self.filenames = list(multiopen(filenames))
        self.printer = kwargs.get("printer",NullWriter())
        data_format  = kwargs.get("data_format","GTF2")
        try:
            self._reader_class = TabixGenomeHash._READERS[data_format]
        except ValueError:
            msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join(sorted(TabixGenomeHash._READERS.keys()))
            self.printer.write(msg)
            raise ValueError(msg)
        
        self.tabix_readers = [Tabixfile(X) for X in self.filenames]
Esempio n. 2
0
    def __init__(self, *filenames, **kwargs):  #data_format=None,printer=None):
        """Create a |BigBedGenomeHash|
        
        Parameters
        ----------
        filenames : str or list of str
            Filename or list of filenames of `Tabix`_-compressed files

        data_format : str
            Format of tabix-compressed file(s). Choices are:
            `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`)
        """
        from pysam import Tabixfile
        if len(filenames) == 1 and isinstance(filenames[0], list):
            filenames = filenames[0]

        self.filenames = list(multiopen(filenames))
        self.printer = kwargs.get("printer", NullWriter())
        data_format = kwargs.get("data_format", "GTF2")
        try:
            self._reader_class = TabixGenomeHash._READERS[data_format]
        except ValueError:
            msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join(
                sorted(TabixGenomeHash._READERS.keys()))
            self.printer.write(msg)
            raise ValueError(msg)

        self.tabix_readers = [Tabixfile(X) for X in self.filenames]
Esempio n. 3
0
    def __init__(
            self, *filenames, **kwargs
    ):  #,base_record_format="III",return_type=None,cache_depth=5):
        """Create a |BigBedGenomeHash|
        
        Parameters
        ----------
        *filenames : str 
            One or more filenames to open (NOT open filehandles)

        return_type : class implementing a :py:meth:`from_bed` method
            Class of object to return (Default: |SegmentChain|)
        """
        from plastid.readers.bigbed import BigBedReader
        return_type = kwargs.get("return_type", SegmentChain)

        filenames = list(multiopen(filenames))
        for filename in filenames:
            if not isinstance(filename, str):
                raise ValueError("`filename` must be a 'str'. Found a '%s'." %
                                 type(filename))

        self.filenames = filenames
        self.bigbedreaders = [
            BigBedReader(X, return_type=return_type) for X in filenames
        ]
Esempio n. 4
0
    def __init__(self, *streams, **kwargs):
        """
        AssembledFeatureReader(*streams, return_type=SegmentChain, add_three_for_stop=False, printer=None, tabix=False, **kwargs)
        
        Parameters
        ----------
        streams : file-like
            One or more fileneames or open filehandles of input data.
        
        return_type : |SegmentChain| or subclass, optional
            Type of feature to return from assembled subfeatures (Default: |SegmentChain|)

        add_three_for_stop : bool, optional
            Some annotation files exclude the stop codon from CDS annotations. If set to
            `True`, three nucleotides will be added to the threeprime end of each
            CDS annotation, **UNLESS** the annotated transcript contains explicit stop_codon 
            feature. (Default: `False`)
                        
        printer : file-like, optional
            Logger implementing a ``write()`` method. Default: |NullWriter|
        
        tabix : boolean, optional
            `streams` point to `tabix`_-compressed files or are open
            :class:`~pysam.ctabix.tabix_file_iterator` (Default: `False`)

        **kwargs
            Other keyword arguments used by specific parsers
        """
        streams = multiopen(streams, fn=open, kwargs=dict(mode="rb"))

        if kwargs.get("tabix", False) == True:
            self.stream = itertools.chain.from_iterable(
                (_tabix_iteradaptor(X) for X in streams))
        else:
            self.stream = itertools.chain.from_iterable(streams)

        self.counter = 0

        self.printer = kwargs.get("printer", NullWriter())

        self.return_type = kwargs.get("return_type", SegmentChain)
        add_three_for_stop = kwargs.get("add_three_for_stop", False)
        self._finalize = add_three_for_stop_codon if add_three_for_stop == True else lambda x: x

        self.metadata = {}
        self.rejected = []
Esempio n. 5
0
    def __init__(self,*streams,**kwargs):
        """
        AssembledFeatureReader(*streams, return_type=SegmentChain, add_three_for_stop=False, printer=None, tabix=False, **kwargs)
        
        Parameters
        ----------
        streams : file-like
            One or more fileneames or open filehandles of input data.
        
        return_type : |SegmentChain| or subclass, optional
            Type of feature to return from assembled subfeatures (Default: |SegmentChain|)

        add_three_for_stop : bool, optional
            Some annotation files exclude the stop codon from CDS annotations. If set to
            `True`, three nucleotides will be added to the threeprime end of each
            CDS annotation, **UNLESS** the annotated transcript contains explicit stop_codon 
            feature. (Default: `False`)
                        
        printer : file-like, optional
            Logger implementing a ``write()`` method. Default: |NullWriter|
        
        tabix : boolean, optional
            `streams` point to `tabix`_-compressed files or are open
            :class:`~pysam.ctabix.tabix_file_iterator` (Default: `False`)

        **kwargs
            Other keyword arguments used by specific parsers
        """
        streams = multiopen(streams,fn=open,kwargs=dict(mode="rb"))
        
        if kwargs.get("tabix",False) == True:
            self.stream = itertools.chain.from_iterable((_tabix_iteradaptor(X) for X in streams))
        else:
            self.stream = itertools.chain.from_iterable(streams)

        self.counter = 0

        self.printer = kwargs.get("printer",NullWriter())

        self.return_type   = kwargs.get("return_type",SegmentChain)        
        add_three_for_stop = kwargs.get("add_three_for_stop",False)
        self._finalize =  add_three_for_stop_codon if add_three_for_stop == True else lambda x: x

        self.metadata    = {}
        self.rejected    = []
Esempio n. 6
0
    def __init__(self,*filenames,**kwargs): #,base_record_format="III",return_type=None,cache_depth=5):
        """Create a |BigBedGenomeHash|
        
        Parameters
        ----------
        *filenames : str 
            One or more filenames to open (NOT open filehandles)

        return_type : class implementing a :py:meth:`from_bed` method
            Class of object to return (Default: |SegmentChain|)
        """
        from plastid.readers.bigbed import BigBedReader
        return_type = kwargs.get("return_type",SegmentChain)
        
        filenames = list(multiopen(filenames))
        for filename in filenames:
            if not isinstance(filename,str):
                raise ValueError("`filename` must be a 'str'. Found a '%s'." % type(filename)) 
        
        self.filenames = filenames
        self.bigbedreaders = [BigBedReader(X,return_type=return_type) for X in filenames]