Esempio n. 1
0
def rapmap_index(gtf_file, ref_file, algorithm, data, out_dir):
    valid_indexes = ["pseudoindex", "quasiindex"]
    index_type = algorithm + "index"
    assert index_type in valid_indexes, \
        "RapMap only supports %s indices." % valid_indexes
    out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    rapmap = config_utils.get_program("rapmap", dd.get_config(data))
    # use user supplied transcriptome FASTA file if it exists
    if dd.get_transcriptome_fasta(data):
        out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data))
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data)
    tmpdir = dd.get_tmp_dir(data)
    if file_exists(out_dir + "rapidx.jfhash"):
        return out_dir
    files = dd.get_input_sequence_files(data)
    kmersize = sailfish.pick_kmersize(files[0])
    message = "Creating rapmap {index_type} for {gtf_fa} with {kmersize} bp kmers."
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{rapmap} {index_type} -k {kmersize} -i {tx_out_dir} -t {gtf_fa}"
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 2
0
def rapmap_index(gtf_file, ref_file, algorithm, data, out_dir):
    valid_indexes = ["pseudoindex", "quasiindex"]
    index_type = algorithm + "index"
    assert index_type in valid_indexes, \
        "RapMap only supports %s indices." % valid_indexes
    out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    rapmap = config_utils.get_program("rapmap", dd.get_config(data))
    # use user supplied transcriptome FASTA file if it exists
    if dd.get_transcriptome_fasta(data):
        out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data))
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data)
    tmpdir = dd.get_tmp_dir(data)
    if file_exists(out_dir + "rapidx.jfhash"):
        return out_dir
    files = dd.get_input_sequence_files(data)
    kmersize = sailfish.pick_kmersize(files[0])
    message = "Creating rapmap {index_type} for {gtf_fa} with {kmersize} bp kmers."
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{rapmap} {index_type} -k {kmersize} -i {tx_out_dir} -t {gtf_fa}"
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 3
0
def salmon_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index", sailfish.get_build_string(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    salmon = config_utils.get_program("salmon", dd.get_config(data))
    num_cores = dd.get_num_cores(data)
    if dd.get_transcriptome_fasta(data):
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data, out_dir)
    assert file_exists(gtf_fa), "%s was not found, exiting." % gtf_fa
    tmpdir = dd.get_tmp_dir(data)
    out_file = os.path.join(out_dir, "versionInfo.json")
    if file_exists(out_file):
        return out_dir
    files = dd.get_input_sequence_files(data)
    readlength = bam.fastq.estimate_read_length(files[0])
    if readlength % 2 == 0:
        readlength -= 1
    kmersize = min(readlength, 31)
    with file_transaction(data, out_dir) as tx_out_dir:
        cmd = "{salmon} index -k {kmersize} -p {num_cores} -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating Salmon index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 4
0
def rapmap_pseudoindex(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "pseudoindex", dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    rapmap = config_utils.get_program("rapmap", dd.get_config(data))
    gtf_fa = sailfish._create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    if file_exists(out_dir + "rapidx.jfhash"):
        return out_dir
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{rapmap} pseudoindex -k 31 -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating rapmap pseudoindex for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 5
0
def salmon_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index", dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    salmon = config_utils.get_program("salmon", dd.get_config(data))
    num_cores = dd.get_num_cores(data)
    gtf_fa = sailfish._create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    ### TODO PUT MEMOZATION HERE
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{salmon} index -k 31 -p {num_cores} -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating Salmon index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 6
0
def salmon_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index", dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    salmon = config_utils.get_program("salmon", dd.get_config(data))
    num_cores = dd.get_num_cores(data)
    gtf_fa = sailfish._create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    ### TODO PUT MEMOZATION HERE
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{salmon} index -k 31 -p {num_cores} -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating Salmon index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 7
0
def rapmap_index(gtf_file, ref_file, algorithm, data, out_dir):
    valid_indexes = ["pseudoindex", "quasiindex"]
    index_type = algorithm + "index"
    assert index_type in valid_indexes, \
        "RapMap only supports %s indices." % valid_indexes
    out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    rapmap = config_utils.get_program("rapmap", dd.get_config(data))
    gtf_fa = sailfish.create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    if file_exists(out_dir + "rapidx.jfhash"):
        return out_dir
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{rapmap} {index_type} -k 31 -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating rapmap {index_type} for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 8
0
def salmon_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index", dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    salmon = config_utils.get_program("salmon", dd.get_config(data))
    num_cores = dd.get_num_cores(data)
    if dd.get_transcriptome_fasta(data):
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    out_file = os.path.join(out_dir, "versionInfo.json")
    if file_exists(out_file):
        return out_dir
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{salmon} index -k 31 -p {num_cores} -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating Salmon index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 9
0
def salmon_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index", dd.get_genome_build(data))
    if dd.get_disambiguate(data):
        out_dir = "-".join([out_dir] + dd.get_disambguate(data))
    salmon = config_utils.get_program("salmon", dd.get_config(data))
    num_cores = dd.get_num_cores(data)
    if dd.get_transcriptome_fasta(data):
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data, out_dir)
    tmpdir = dd.get_tmp_dir(data)
    out_file = os.path.join(out_dir, "versionInfo.json")
    if file_exists(out_file):
        return out_dir
    with file_transaction(out_dir) as tx_out_dir:
        cmd = "{salmon} index -k 31 -p {num_cores} -i {tx_out_dir} -t {gtf_fa}"
        message = "Creating Salmon index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_dir
Esempio n. 10
0
def kallisto_index(gtf_file, ref_file, data, out_dir):
    out_dir = os.path.join(out_dir, "index")
    out_stem = dd.get_genome_build(data)
    if dd.get_disambiguate(data):
        out_fname = "-".join([out_fname] + dd.get_disambguate(data))
    index_dir = os.path.join(out_dir, out_stem)
    kallisto = config_utils.get_program("kallisto", dd.get_config(data))
    if dd.get_transcriptome_fasta(data):
        gtf_fa = dd.get_transcriptome_fasta(data)
    else:
        gtf_fa = sailfish.create_combined_fasta(data, index_dir)
    out_file = os.path.join(index_dir, out_stem + ".idx")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmd = "{kallisto} index -k 31 -i {tx_out_file} {gtf_fa}"
        message = "Creating Kallisto index for {gtf_fa}."
        do.run(cmd.format(**locals()), message.format(**locals()), None)
    return out_file