Exemple #1
0
    def __init__(self, design_filename, adapters):
        """.. rubric:: Constructor

        :param str design_filename: a CSV file that is compatible
            with our :class:`sequana.expdesign.ExpDesignAdapter`
        :param adapters: the type of adapters (PCRFree, Nextera, 
            Rubicon, TruSeq, SMARTer, Small)

        The files of adapters are stored in Sequana and accessible with the
        sequana_data function. So, for instance if adapters is set to Nextera,
        the following file is used to identify the adapters::

            sequana_data("adapters_Nextera_fwd.fa")

        New adapters files can be added on request. Currently, Nextera and
        PCRFree are available. Rubicon and TruSeq will be added soon.
        """
        from sequana.expdesign import ExpDesignAdapter
        self.design = ExpDesignAdapter(design_filename)

        if self.design.df.index.name == "Sample_ID" or \
            "Sample_ID" in self.design.df.columns:
            self.design.df.set_index("Sample_ID", inplace=True)
        else:
            raise ValueError("Incorrect design file. Missing Sample_ID field")

        self.adapters = adapters

        file1 = sequana_data("adapters_%s_fwd.fa" % adapters)
        file2 = sequana_data("adapters_%s_revcomp.fa" % adapters)

        self._adapters_fwd = AdapterReader(file1)
        self._adapters_revc = AdapterReader(file2)  # !!! revcomp
Exemple #2
0
def get_sequana_adapters(type_, direction):
    """Return path to a list of adapters in FASTA format

    :param tag: PCRFree, Rubicon, Nextera
    :param type_: fwd, rev, revcomp
    :return: path to the adapter filename

    """
    # search possible types
    registered = _get_registered_adapters()
    if type_ not in registered:
        logger.error("This adapter type (%s) is not valid" % type_)
        logger.error("choose one in %s types" % registered)
        raise ValueError

    directions = ["fwd", "rev", "revcomp"]
    if direction not in directions:
        logger.error("This kind of tag (%s) is not valid" % direction)
        logger.error("choose one in %s " % directions)
        raise ValueError
    try:
        this = sequana_data("adapters_%s_%s.fa" % (type_, direction))
        logger.warning("Rename {} (remove the adapters_ prefix)".format(this))
        return this
    except:
        return sequana_data("%s_%s.fa" % (type_, direction))
Exemple #3
0
def _get_registered_adapters():
    filenames = sequana_data('*', 'data/adapters')
    filenames = [x for x in filenames if "_fwd" in x]
    registered = [
        x.lstrip("adapters_").replace("_fwd.fa", "") for x in filenames
    ]
    registered = set(registered)
    return registered
Exemple #4
0
    def __init__(self, design_filename, adapters):
        """.. rubric:: Constructor

        :param str design_filename: a CSV file that is compatible
            with our :class:`sequana.expdesign.ExpDesignAdapter`
        :param adapters: the type of adapters (PCRFree, Nextera,
            Rubicon, TruSeq, SMARTer, Small)

        The files of adapters are stored in Sequana and accessible with the
        sequana_data function. So, for instance if adapters is set to Nextera,
        the following file is used to identify the adapters::

            sequana_data("adapters_Nextera_fwd.fa")

        New adapters files can be added on request. See resources/data/adapters
        for the full list. You can also use::

            from sequana.adapters import _get_registered_adapters
            _get_registered_adapters()
        """
        from sequana.expdesign import ExpDesignAdapter
        self.design = ExpDesignAdapter(design_filename)

        if self.design.df.index.name == "Sample_ID" or \
            "Sample_ID" in self.design.df.columns:
            self.design.df.set_index("Sample_ID", inplace=True)
        else:
            raise ValueError("Incorrect design file. Missing Sample_ID field")

        self.adapters = adapters

        try:
            file1 = sequana_data("adapters_%s_fwd.fa" % adapters)
            logger.warning("rename your file removing prefix adatper")
        except:
            file1 = sequana_data("%s_fwd.fa" % adapters)

        try:
            file2 = sequana_data("adapters_%s_revcomp.fa" % adapters)
            logger.warning("rename your file removing prefix adatper")
        except:
            file2 = sequana_data("%s_revcomp.fa" % adapters)

        self._adapters_fwd = AdapterReader(file1)
        self._adapters_revc = AdapterReader(file2)  # !!! revcomp
Exemple #5
0
    def __init__(self, design_filename, adapters):
        """.. rubric:: Constructor

        :param str design_filename: a CSV file that is compatible
            with our :class:`sequana.iem.IEM`
        :param adapters: the type of adapters (PCRFree, Nextera,
            Rubicon, TruSeq, SMARTer, Small)

        The files of adapters are stored in Sequana and accessible with the
        sequana_data function. So, for instance if adapters is set to Nextera,
        the following file is used to identify the adapters::

            sequana_data("adapters_Nextera_fwd.fa")

        New adapters files can be added on request. See resources/data/adapters
        for the full list. You can also use::

            from sequana.adapters import _get_registered_adapters
            _get_registered_adapters()
        """
        from sequana.iem import IEM
        self.design = IEM(design_filename)

        self.adapters = adapters

        try:
            file1 = sequana_data("adapters_%s_fwd.fa" % adapters)
            logger.warning("rename your file removing prefix adatper")
        except:
            file1 = sequana_data("%s_fwd.fa" % adapters)

        try:
            file2 = sequana_data("adapters_%s_revcomp.fa" % adapters)
            logger.warning("rename your file removing prefix adatper")
        except:
            file2 = sequana_data("%s_revcomp.fa" % adapters)

        self._adapters_fwd = AdapterReader(file1)
        self._adapters_revc = AdapterReader(file2)  # !!! revcomp
Exemple #6
0
def _get_registered_adapters():
    filenames = sequana_data('*', 'data/adapters')
    filenames = [x for x in filenames if x.startswith("adapters")]
    registered = [x.lstrip("adapters_").split("_",1)[0] for x in filenames]
    registered = set(registered)
    return registered