Beispiel #1
0
def test_duplicated_design():

    filename = sequana_data("test_expdesign_hiseq_duplicated_index.csv")
    ss = FindAdaptersFromDesign(filename, "Small")
    res = ss.get_adapters_from_sample("VB-22")
    assert res['index1']['fwd'].identifier == "Small_Adapter_5|name:small5|seq:ACAGTG"
    assert res['index1']['fwd'].sequence == "CAAGCAGAAGACGGCATACGAGATACAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA"
Beispiel #2
0
def test_wrong_design():
    design = sequana_data("test_expdesign_wrong.csv")
    ad = FindAdaptersFromDesign(design, "NEXTFlex48_DNA")
    try:
        ad.check()
        assert False
    except:
        assert True
Beispiel #3
0
def test_wrong_design():
    design = sequana_data("test_expdesign_wrong.csv")
    ad = FindAdaptersFromDesign(design, "PCRFree")
    try:
        ad.check()
        assert False
    except:
        assert True
Beispiel #4
0
def test_nextera():
    # simple indexing
    design = sequana_data("test_index_mapper.csv")
    ad = FindAdaptersFromDesign(design, "Nextera")
    results = ad.get_adapters_from_sample("C4405-M1-EC1")
    assert results['index1']['fwd'][0].identifier  == 'Nextera_index_N703|name:N703|seq:AGGCAGAA'

    ad.check()  # all samples are used in get_adapters_from_sample
    ad.sample_names

    fwd, rev = ad.save_adapters_to_fasta("C4405-M1-EC1")
    os.remove(fwd)
    os.remove(rev)

    # double indexing
    design = sequana_data("test_expdesign_hiseq_doubleindex.csv")
    fa = FindAdaptersFromDesign(design, "Nextera")
    fa.check()
Beispiel #5
0
def test_all_adapters():
    # test all adapters using a dummy design file.

    design = sequana_data("test_index_mapper.csv")
    """TODO
adapters_NEBNext2_fwd.fa
adapters_NEBNext_fwd.fa
dapters_Small_fwd.fa
adapters_NEBNextOligos_fwd.fa
adapters_SMARTer_fwd.f
adapters_Nextera_2_fwd.fa
    """

    for this in ['TruSeq', 'Nextera', 'NEXTFlex48_DNA', 'Small', 'NEBNext']:
        #'NEXTFlex96_DNA', 'TruSeqCD_DNA', 'TruSeqUD']:
        fa = FindAdaptersFromDesign(design, this)
        res = fa.get_adapters_from_sample("C1152-S2-EC1")
        _check_content(res)

    # Nextera
    fa = FindAdaptersFromDesign(design, "Nextera")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "revc" in res['universal']
    assert "seq:TAAGGCGA" in res['index1']['fwd'][0].identifier
    assert "transposase_seq_1" in res.keys()
    assert "transposase_seq_2" in res.keys()
Beispiel #6
0
def main(args=None):
    """Mostly checking the options provided by the user and then call
    :func:`sequana_init` function to create the pre-filled config file +
    snakemake + README +runme.sh in a dedicated project directory.

    """
    import sequana
    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        sa = Tools()
        sa.purple("Welcome to Sequana standalone application")
        logger.critical("You must use --pipeline <valid pipeline name>\nuse "
                        "--show-pipelines or --help for more information. ")
        return
    else:
        options = user_options.parse_args(args[1:])

    # these imports must be local. This also speed up the --help

    sa = Tools(verbose=options.verbose)
    sa.purple("Welcome to Sequana standalone application")

    # Those options are mutually exclusive
    flag = int(
        "%s%s%s%s%s%s" %
        (int(bool(options.issue)), int(bool(options.version)),
         int(bool(options.info)), int(bool(options.show_pipelines)),
         int(bool(options.pipeline)), int(bool(options.get_config))), 2)
    if flag not in [1, 2, 4, 8, 16, 3, 32]:
        logger.critical("You must use one of --pipeline, --info, "
                        "--show-pipelines, --issue, --version, --get-config")
        sys.exit(1)

    # OPTIONS that gives info and exit
    if options.issue:
        onweb('https://github.com/sequana/sequana/issues')
        return

    if options.version:
        sa.purple("Sequana version %s" % sequana.version)
        return

    if options.show_pipelines:
        sa.purple("Valid pipeline names:")
        for this in sorted(valid_pipelines):
            m = Module(this)
            sa.green(" - " + this)
            print(textwrap(m.overview, indent=8))
        return

    if options.info:
        module = Module(options.info)
        module.onweb()
        return

    if options.pipeline:
        # check validity of the pipeline name
        if options.pipeline not in valid_pipelines:
            txt = "".join([" - %s\n" % this for this in valid_pipelines])
            logger.critical("%s not a valid pipeline name. Use of one:\n" %
                            options.pipeline + txt)
            sys.exit(1)

    # copy locally the request config file from a specific pipeline
    if flag == 3:  #--get-config and --pipeline used
        module = Module(options.pipeline)
        copy_config_from_sequana(module)
        return

    # pipeline should be defined by now. Let us start the real work here
    Module("dag").check("warning")
    Module(options.pipeline).check("warning")

    # If user provides file1 and/or file2, check the files exist
    if options.file1 and os.path.exists(options.file1) is False:
        raise ValueError("%s does not exist" % options.file1)

    if options.file2 and os.path.exists(options.file2) is False:
        raise ValueError("%s does not exist" % options.file2)

    if options.kraken and os.path.exists(options.kraken) is False:
        raise ValueError("%s does not exist" % options.kraken)

    if options.input_directory and os.path.exists(
            options.input_directory) is False:
        raise ValueError("%s does not exist" % options.input_directory)

    # check valid combo of arguments
    flag = int(
        "%s%s%s%s%s" % (
            int(bool(options.pattern)),
            int(bool(options.input_directory)),
            int(bool(options.file1)),
            int(bool(options.file2)),
            int(bool(options.config)),
        ), 2)

    # config file has flag 1, others have flag 2,4,8,16
    # config file alone : 1
    # --input-directory alone: 2
    # --file1 alone: 4
    # --file1 + --file2 : 2+4=6
    # --input-pattern alone: 16
    # none of those options redirect to input_directory=local
    if flag not in [0, 1, 2, 4, 6, 8, 16]:
        logger.critical(help_input + "\n\nUse --help for more information")
        sys.exit(1)

    assert options.extension in ["fastq", "fq", "fastq.gz", "fq.gz", "bam"]

    # Note that we use abspath to make it more robust and easier to debug
    # If no options, we use input_directory and set it to "."
    if flag == 0 or options.input_directory:
        if flag == 0:
            options.input_directory = "."
        options.input_directory = os.path.abspath(options.input_directory)
        data = options.input_directory + os.sep + "*" + options.extension
        options.file1 = ""
        options.file2 = ""
        options.pattern = ""
        if options.verbose:
            logger.info("Looking for sample files matching %s" % data)
    elif options.pattern:
        options.pattern = os.path.abspath(options.pattern)
        data = os.path.abspath(options.pattern)
        options.input_directory = ""
        options.extension = ""
        options.file1 = ""
        options.file2 = ""
    elif options.config:
        pass
    elif options.file1:
        data = [options.file1]
        options.file1 = os.path.abspath(options.file1)
        if options.file2:
            data = [options.file2]
            options.file2 = os.path.abspath(options.file2)
        options.input_directory = ""
        options.pattern = ""
        options.extension = ""

    if options.extension == 'bam' or options.pattern.endswith('bam') or \
            options.pattern.endswith('bed'):

        ff = FileFactory(data)
    else:
        ff = FastQFactory(data,
                          read_tag=options.input_readtag,
                          verbose=options.verbose)

    if options.pipeline == 'quality_control' or options.pipeline == 'rnaseq':
        # check combo
        flag = int(
            "%s%s%s%s%s" %
            (int(bool(options.no_adapters)), int(bool(options.design)),
             int(bool(options.adapters)), int(bool(
                 options.adapter_fwd)), int(bool(options.adapter_rev))), 2)

        if flag not in [16, 12, 6, 4, 2, 3]:
            logger.critical(
                "You must use a design experimental file using --design"
                " and --adapters to indicate the type of adapters (PCRFree"
                " or Nextera), or provide the adapters directly as a "
                " string (or a file) using --adapter_fwd (AND --adapter_"
                "rev for paired-end data). A third way is to set --adapters"
                " to either Nextera, PCRFree, Rubicon or universal in which case "
                " all adapters will be used (slower). Finally, you may use "
                " --no-adapters for testing purpose or if you know there "
                " is no adapters")
            sys.exit(1)

        # flag 12 (design + adapters when wrong args provided)
        if options.design and options.adapters not in adapters_choice:
            raise ValueError(
                "When using --design, you must also "
                "provide the type of adapters using --adapters (set to "
                "one of %s )" % adapters_choice)
        if options.design and options.adapters:
            from sequana import FindAdaptersFromDesign
            fa = FindAdaptersFromDesign(options.design, options.adapters)
            fa.check()

        # flag 12 (design + adapters with correct args)
        elif options.design and options.adapters in adapters_choice:
            options.adapters_fwd = options.adapters
            options.adapters_rev = options.adapters
        elif options.no_adapters:
            options.adapter_fwd = "XXXX"
            options.adapter_rev = "XXXX"
        else:
            if options.adapter_fwd is None:
                if options.adapters not in ["universal"] + adapters_choice:
                    msg = "Incorrect adapter choice %s. " % options.adapters
                    msg += "Correct values are :\n"
                    for this in ['universal'] + adapters_choice:
                        msg += " - {}\n ".format(this)
                    logger.error(msg)
                    raise ValueError
                # flag 4
                if options.adapters == "universal":
                    options.adapter_fwd = "GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGC"
                    options.adapter_rev = "TCTAGCCTTCTCGCAGCACATCCCTTTCTCACATCTAGAGCCACCAGCGGCATAGTAA"
                # flag 4
                else:
                    # Let the pipeline handle the names
                    options.adapter_fwd = options.adapters
                    options.adapter_rev = options.adapters
            # flag 2/3
            else:
                if options.adapter_fwd:
                    # Could be a string or a file. If a file, check the format
                    if os.path.exists(options.adapter_fwd):
                        AdapterReader(options.adapter_fwd)
                        options.adapter_fwd = "file:%s" % options.adapter_fwd
                if options.adapter_rev:
                    # Could be a string or a file. If a file, check the format
                    if os.path.exists(options.adapter_rev):
                        AdapterReader(options.adapter_rev)
                        options.adapter_rev = "file:%s" % options.adapter_rev
        if options.design:
            # Just check the format
            adapter_finder = FindAdaptersFromDesign(options.design,
                                                    options.adapters)

    # If all options are valid, we can now create the tree structure
    sequana_init(options)
Beispiel #7
0
def test_wrong_sample():
    design = sequana_data("test_index_mapper.csv")
    ad = FindAdaptersFromDesign(design, "TruSeq")
    res = ad.get_adapters_from_sample("C4405-M1-EC1")
    assert res['index1']['fwd'] is None
    assert res['index1']['revc'] is None
Beispiel #8
0
def test_nextflex48():
    design = sequana_data("test_index_mapper.csv")

    try:
        ad = FindAdaptersFromDesign(design, "error")
        assert False
    except Exception:
        assert True

    # Other input from PCRFree NextFlex48
    ad = FindAdaptersFromDesign(design, "NEXTFlex48_DNA")

    # Test the index1/2_seq with 3 cases
    # index1 present only,
    # no index at all (None)
    # index1 and index2 present
    design1 = sequana_data("test_expdesign_hiseq.csv")
    ad1 = FindAdaptersFromDesign(design1, "NEXTFlex48_DNA")
    ad1.check()
    res1 = ad1.get_adapters_from_sample("553-iH2-1")
    res2 = ad1.get_adapters_from_sample("539-st2")
    res3 = ad1.get_adapters_from_sample("107-st2")

    assert res1['index1'][
        'fwd'].identifier == "NEXTFlex48_DNA|name:8|seq:TTAGGC"
    assert res1['index1']['fwd'].name == "8"
    assert res1['index1']['revc'].name == "8"

    assert list(res2.keys()) == ["universal"]

    assert res3['index1']['fwd'].name == "9"
    assert res3['index1']['revc'].name == "9"
    assert res3['index2']['fwd'].name == "10"
    assert res3['index2']['revc'].name == "10"

    # double indexing
    # This is a double indexing for PCRFree, which has not been tested
    # since it requires 16S adapters not yet in sequana
    """design2 = sequana_data("test_expdesign_miseq_illumina2.csv")
    ad2 = FindAdaptersFromDesign(design2, "PCRFree")
    assert ad2.get_adapters_from_sample('M2')['index1']['fwd'].identifier == \
            'NextFlex_PCR_Free_adapter2|name:2|seq:TGACCA'
    assert ad2.get_adapters_from_sample('M2')['index2']['fwd'].identifier == \
            'NextFlex_PCR_Free_adapter13|name:13|seq:AGTCAA'
    """

    design = sequana_data("test_expdesign_miseq_illumina.csv")
    ad = FindAdaptersFromDesign(design, "NEXTFlex48_DNA")
    res = ad.get_adapters_from_sample("CR81-L1236-P1")
    assert res['index1'][
        'fwd'].identifier == 'NEXTFlex48_DNA|name:1|seq:CGATGT'

    design1 = sequana_data("test_expdesign_miseq_illumina_1.csv")
    ad = FindAdaptersFromDesign(design1, "NEXTFlex48_DNA")
    ad.check()  # all sample names must be found
    res = ad.get_adapters_from_sample("CM-2685")['index1']['fwd']
    assert res.name == "3"
Beispiel #9
0
def test_all_adapters():
    # test all adapters using a dummy design file.

    design = sequana_data("test_index_mapper.csv")

    # Rubicon
    fa = FindAdaptersFromDesign(design, "Rubicon")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "PolyA" not in res.keys()
    assert "PolyT" not in res.keys()
    assert "universal" in res.keys()
    assert "fwd" in res['universal']
    assert "rev" in res['universal']

    # TruSeq
    fa = FindAdaptersFromDesign(design, "TruSeq")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "PolyA" in res.keys()
    assert "PolyT" in res.keys()
    assert "universal" in res
    assert "fwd" in res['universal']
    assert "rev" in res['universal']

    # Nextera
    fa = FindAdaptersFromDesign(design, "Nextera")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert res['index1']['fwd'][0].identifier == "Nextera_index_N701|name:N701|seq:TAAGGCGA"
    assert "transposase_seq_1" in res.keys()
    assert "transposase_seq_2" in res.keys()
    assert "universal" in res.keys()
    assert "fwd" in res['universal']
    assert "rev" in res['universal']

    # PCRFree
    fa = FindAdaptersFromDesign(design, "PCRFree")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "universal" in res.keys()
    assert "fwd" in res['universal']
    assert "rev" in res['universal']
    # let us be more precise for PCRFree only for a regression bug where 
    # res['universal'] 
    assert res["universal"]["fwd"].identifier == "Universal_Adapter|name:universal"
    assert res["universal"]["fwd"].sequence == "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"


    # NEBNext
    fa = FindAdaptersFromDesign(design, "NEBNext")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "universal" in res.keys()
    assert "fwd" in res['universal']
    assert "rev" in res['universal']

    # Small
    fa = FindAdaptersFromDesign(design, "Small")
    res = fa.get_adapters_from_sample("C1152-S2-EC1")
    assert "universal" in res.keys()
    assert "fwd" in res['universal']
    assert "rev" in res['universal']
Beispiel #10
0
def test_wrong_sample():
    design = sequana_data("test_index_mapper.csv")
    ad = FindAdaptersFromDesign(design, "Rubicon")
    res = ad.get_adapters_from_sample("C4405-M1-EC1")
    assert res['index1']['fwd'] is None
    assert res['index1']['rev'] is None
Beispiel #11
0
def test_rubicon():
    design = sequana_data("test_expdesign_rubicon.csv")
    fa = FindAdaptersFromDesign(design, "Rubicon")
    fa.check()
Beispiel #12
0
def test_pcrfree():
    design = sequana_data("test_index_mapper.csv")

    try:
        ad = FindAdaptersFromDesign(design, "error")
        assert False
    except Exception:
        assert True

    # Other input from PCRFree
    ad = FindAdaptersFromDesign(design, "PCRFree")

    # Test the index1/2_seq with 3 cases
    # index1 present only,
    # no index at all (None)
    # index1 and index2 present
    design1 = sequana_data("test_expdesign_hiseq.csv")
    ad1 = FindAdaptersFromDesign(design1, "PCRFree")
    ad1.check()
    res1 = ad1.get_adapters_from_sample("553-iH2-1")
    res2 = ad1.get_adapters_from_sample("539-st2")
    res3 = ad1.get_adapters_from_sample("107-st2")

    assert res1['index1']['fwd'].identifier == "NextFlex_PCR_Free_adapter8|name:8|seq:TTAGGC"
    assert res1['index1']['fwd'].name == "8"
    assert res1['index1']['rev'].name == "8"

    assert list(res2.keys()) == ["universal"]

    assert res3['index1']['fwd'].name == "9"
    assert res3['index1']['rev'].name == "9"
    assert res3['index2']['fwd'].name == "10"
    assert res3['index2']['rev'].name == "10"

    # double indexing
    # This is a double indexing for PCRFree, which has not been tested
    # since it requires 16S adapters not yet in sequana
    """design2 = sequana_data("test_expdesign_miseq_illumina2.csv")
    ad2 = FindAdaptersFromDesign(design2, "PCRFree")
    assert ad2.get_adapters_from_sample('M2')['index1']['fwd'].identifier == \
            'NextFlex_PCR_Free_adapter2|name:2|seq:TGACCA'
    assert ad2.get_adapters_from_sample('M2')['index2']['fwd'].identifier == \
            'NextFlex_PCR_Free_adapter13|name:13|seq:AGTCAA'
    """


    design = sequana_data("test_expdesign_miseq_illumina.csv")
    ad = FindAdaptersFromDesign(design, "PCRFree")
    res = ad.get_adapters_from_sample("CR81-L1236-P1")
    assert res['index1']['fwd'].identifier == 'NextFlex_PCR_Free_adapter1|name:1|seq:CGATGT'



    design1 = sequana_data("test_expdesign_miseq_illumina_1.csv")
    ad = FindAdaptersFromDesign(design1, "PCRFree")
    ad.check() # all sample names must be found
    res = ad.get_adapters_from_sample("CM-2685")['index1']['fwd']
    assert res.name == "3"