def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) # make the needed directories map(safe_makedir, config["dir"].values()) # specific for project input_dir = config["dir"]["data"] logger.info("Loading files from %s" % (input_dir)) input_files = list(locate("*.fq", input_dir)) input_files += list(locate("*.fastq", input_dir)) logger.info("Input files: %s" % (input_files)) results_dir = config["dir"]["results"] safe_makedir(results_dir) # make the stage repository repository = StageRepository(config) logger.info("Stages found: %s" % (repository.plugins)) if config.get("test_pipeline", False): logger.info("Running a test pipeline on a subset of the reads.") results_dir = os.path.join(results_dir, "test_pipeline") config["dir"]["results"] = results_dir safe_makedir(results_dir) curr_files = map(make_test, input_files, [config] * len(input_files)) logger.info("Converted %s to %s. " % (input_files, curr_files)) else: curr_files = input_files logger.info("Running RNASeq alignment pipeline on %s." % (curr_files)) for stage in config["run"]: if stage == "fastqc": logger.info("Running fastqc on %s." % (curr_files)) stage_runner = FastQC(config) view.map(stage_runner, curr_files) if stage == "cutadapt": curr_files = combine_pairs(curr_files) logger.info("Running cutadapt on %s." % (curr_files)) stage_runner = Cutadapt(config) curr_files = view.map(stage_runner, curr_files) if stage == "tophat": logger.info("Running Tophat on %s." % (curr_files)) #tophat = repository["tophat"](config) tophat = Tophat(config) tophat_outputs = view.map(tophat, curr_files) bamfiles = view.map(sam.sam2bam, tophat_outputs) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) final_bamfiles = bamsort curr_files = tophat_outputs if stage == "disambiguate": logger.info("Disambiguating %s." % (curr_files)) disambiguate = repository[stage](config) view.map(disambiguate, curr_files) if stage == "htseq-count": logger.info("Running htseq-count on %s." % (bamfiles)) name_sorted = view.map(sam.bam_name_sort, bamfiles) curr_files = view.map(sam.bam2sam, name_sorted) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq_count.combine_counts(htseq_outputs) if stage == "rnaseq_metrics": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) #coverage = repository[stage](config) coverage = RNASeqMetrics(config) view.map(coverage, curr_files) if stage == "rseqc": logger.info("Running rseqc on %s." % (curr_files)) #rseq_args = zip(*product(curr_files, [config])) rseq_args = zip(*product(final_bamfiles, [config])) view.map(rseqc.bam_stat, *rseq_args) view.map(rseqc.genebody_coverage, *rseq_args) view.map(rseqc.junction_annotation, *rseq_args) view.map(rseqc.junction_saturation, *rseq_args) RPKM_args = zip(*product(final_bamfiles, [config])) RPKM_count_out = view.map(rseqc.RPKM_count, *RPKM_args) RPKM_count_fixed = view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """ annotate_args = zip(*product(RPKM_count_fixed, ["gene_id"], ["ensembl_gene_id"], ["human"])) view.map(annotate.annotate_table_with_biomart, *annotate_args) """ view.map(rseqc.RPKM_saturation, *rseq_args) curr_files = tophat_outputs # end gracefully stop_cluster()
def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) # make the needed directories map(safe_makedir, config["dir"].values()) # specific for project input_dir = config["dir"]["data"] logger.info("Loading files from %s" % (input_dir)) input_files = list(locate("*.fq", input_dir)) input_files += list(locate("*.fastq", input_dir)) logger.info("Input files: %s" % (input_files)) results_dir = config["dir"]["results"] safe_makedir(results_dir) # make the stage repository repository = StageRepository(config) logger.info("Stages found: %s" % (repository.plugins)) if config.get("test_pipeline", False): logger.info("Running a test pipeline on a subset of the reads.") results_dir = os.path.join(results_dir, "test_pipeline") config["dir"]["results"] = results_dir safe_makedir(results_dir) curr_files = map(make_test, input_files, [config] * len(input_files)) logger.info("Converted %s to %s. " % (input_files, curr_files)) else: curr_files = input_files logger.info("Running RNASeq alignment pipeline on %s." % (curr_files)) for stage in config["run"]: if stage == "fastqc": logger.info("Running fastqc on %s." % (curr_files)) stage_runner = FastQC(config) view.map(stage_runner, curr_files) if stage == "cutadapt": curr_files = combine_pairs(curr_files) logger.info("Running cutadapt on %s." % (curr_files)) stage_runner = Cutadapt(config) curr_files = view.map(stage_runner, curr_files) if stage == "tophat": logger.info("Running Tophat on %s." % (curr_files)) #tophat = repository["tophat"](config) tophat = Tophat(config) tophat_outputs = view.map(tophat, curr_files) sortsam = view.map(sam.coordinate_sort_sam, tophat_outputs, [config] * len(tophat_outputs)) bamfiles = view.map(sam.sam2bam, sortsam) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) final_bamfiles = bamsort curr_files = tophat_outputs if stage == "disambiguate": logger.info("Disambiguating %s." % (curr_files)) disambiguate = repository[stage](config) view.map(disambiguate, curr_files) if stage == "htseq-count": logger.info("Running htseq-count on %s." % (bamfiles)) name_sorted = view.map(sam.bam_name_sort, bamfiles) curr_files = view.map(sam.bam2sam, name_sorted) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq_count.combine_counts(htseq_outputs) if stage == "rnaseq_metrics": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) #coverage = repository[stage](config) coverage = RNASeqMetrics(config) view.map(coverage, curr_files) if stage == "rseqc": logger.info("Running rseqc on %s." % (curr_files)) #rseq_args = zip(*product(curr_files, [config])) rseq_args = zip(*product(final_bamfiles, [config])) view.map(rseqc.bam_stat, *rseq_args) down_args = zip(*product(final_bamfiles, [40000000])) down_bam = view.map(sam.downsample_bam, *down_args) view.map(rseqc.genebody_coverage, down_bam, [config] * len(down_bam)) view.map(rseqc.junction_annotation, *rseq_args) view.map(rseqc.junction_saturation, *rseq_args) RPKM_args = zip(*product(final_bamfiles, [config])) RPKM_count_out = view.map(rseqc.RPKM_count, *RPKM_args) RPKM_count_fixed = view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """ annotate_args = zip(*product(RPKM_count_fixed, ["gene_id"], ["ensembl_gene_id"], ["human"])) view.map(annotate.annotate_table_with_biomart, *annotate_args) """ view.map(rseqc.RPKM_saturation, *rseq_args) curr_files = tophat_outputs # end gracefully stop_cluster()
def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) # make the needed directories map(safe_makedir, config["dir"].values()) # specific for project input_dir = config["input_dir"] logger.info("Loading files from %s" % (input_dir)) input_files = list(locate("*.fq", input_dir)) input_files += list(locate("*.fastq", input_dir)) logger.info("Input files: %s" % (input_files)) results_dir = config["dir"]["results"] safe_makedir(results_dir) if config.get("test_pipeline", False): logger.info("Running a test pipeline on a subset of the reads.") results_dir = os.path.join(results_dir, "test_pipeline") config["dir"]["results"] = results_dir safe_makedir(results_dir) curr_files = map(make_test, input_files, [config] * len(input_files)) logger.info("Converted %s to %s. " % (input_files, curr_files)) else: curr_files = input_files logger.info("Running RNASeq alignment pipeline on %s." % (curr_files)) for stage in config["run"]: if stage == "fastqc": logger.info("Running fastqc on %s." % (curr_files)) stage_runner = FastQC(config) view.map(stage_runner, curr_files) if stage == "cutadapt": curr_files = combine_pairs(curr_files) logger.info("Running cutadapt on %s." % (curr_files)) stage_runner = Cutadapt(config) curr_files = view.map(stage_runner, curr_files) logger.info("Output of cutadapt: %s." % (curr_files)) if stage == "bowtie": logger.info("Running Bowtie on %s." % (curr_files)) bowtie = Bowtie(config) bowtie_outputs = view.map(bowtie, curr_files) bamfiles = view.map(sam.sam2bam, bowtie_outputs) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) if stage == "htseq-count": logger.info("Running htseq-count on %s." % (curr_files)) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq.combine_counts(htseq_outputs) if stage == "rnaseq_metrics": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) coverage = RNASeqMetrics(config) view.map(coverage, curr_files) if stage == "rseqc": logger.info("Running rseqc on %s." % (curr_files)) #rseq_args = zip(*product(curr_files, [config])) rseq_args = zip(*product(final_bamfiles, [config])) view.map(rseqc.bam_stat, *rseq_args) view.map(rseqc.genebody_coverage, *rseq_args) view.map(rseqc.junction_annotation, *rseq_args) view.map(rseqc.junction_saturation, *rseq_args) RPKM_args = zip(*product(final_bamfiles, [config])) RPKM_count_out = view.map(rseqc.RPKM_count, *RPKM_args) RPKM_count_fixed = view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """ annotate_args = zip(*product(RPKM_count_fixed, ["gene_id"], ["ensembl_gene_id"], ["human"])) view.map(annotate.annotate_table_with_biomart, *annotate_args) """ view.map(rseqc.RPKM_saturation, *rseq_args) curr_files = tophat_outputs # end gracefully stop_cluster()
def main(config, view): # make the needed directories map(safe_makedir, config["dir"].values()) # specific for project input_dir = config["dir"]["data"] logger.info("Loading files from %s" % (input_dir)) input_files = list(locate("*.fq", input_dir)) input_files += list(locate("*.fastq", input_dir)) logger.info("Input files: %s" % (input_files)) results_dir = config["dir"]["results"] safe_makedir(results_dir) # make the stage repository repository = StageRepository(config) logger.info("Stages found: %s" % (repository.plugins)) if config.get("test_pipeline", False): logger.info("Running a test pipeline on a subset of the reads.") results_dir = os.path.join(results_dir, "test_pipeline") config["dir"]["results"] = results_dir safe_makedir(results_dir) curr_files = map(make_test, input_files, [config] * len(input_files)) logger.info("Converted %s to %s. " % (input_files, curr_files)) else: curr_files = input_files logger.info("Running RNASeq alignment pipeline on %s." % (curr_files)) for stage in config["run"]: if stage == "fastqc": logger.info("Running fastqc on %s." % (curr_files)) stage_runner = FastQC(config) view.map(stage_runner, curr_files) if stage == "cutadapt": curr_files = combine_pairs(curr_files) logger.info("Running cutadapt on %s." % (curr_files)) stage_runner = Cutadapt(config) curr_files = view.map(stage_runner, curr_files) if stage == "tophat": logger.info("Running Tophat on %s." % (curr_files)) #tophat = repository["tophat"](config) tophat = Tophat(config) tophat_outputs = view.map(tophat, curr_files) sortsam = view.map(sam.coordinate_sort_sam, tophat_outputs, [config] * len(tophat_outputs)) bamfiles = view.map(sam.sam2bam, sortsam) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) final_bamfiles = bamsort curr_files = tophat_outputs if stage == "disambiguate": logger.info("Disambiguating %s." % (curr_files)) disambiguate = repository[stage](config) view.map(disambiguate, curr_files) if stage == "htseq-count": logger.info("Running htseq-count on %s." % (bamfiles)) name_sorted = view.map(sam.bam_name_sort, bamfiles) curr_files = view.map(sam.bam2sam, name_sorted) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq_count.combine_counts(htseq_outputs) if stage == "rnaseq_metrics": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) #coverage = repository[stage](config) coverage = RNASeqMetrics(config) view.map(coverage, curr_files) if stage == "hard_clip": logger.info("Trimming from the beginning of reads on %s." % (curr_files)) hard_clipper = HardClipper(config) curr_files = view.map(hard_clipper, curr_files) if stage == "rseqc": logger.info("Running rseqc on %s." % (curr_files)) curr_files = view.map(sam.sam2bam, curr_files) rseq_args = zip(*product(curr_files, [config])) view.map(rseqc.bam_stat, *rseq_args) view.map(rseqc.genebody_coverage, *rseq_args) view.map(rseqc.junction_annotation, *rseq_args) view.map(sam.bamindex, curr_files) RPKM_count_out = view.map(rseqc.RPKM_count, *rseq_args) view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """
def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) # make the needed directories map(safe_makedir, config["dir"].values()) # specific for thesis pipeline input_dirs = config["input_dirs"] results_dir = config["dir"].get("results", "results") input_files = _find_input_files(config) conditions = _group_input_by_condition(input_files) logger.info("Input_files: %s" % (input_files)) logger.info("Condition groups %s" %(conditions)) htseq_outdict = {} for condition, curr_files in conditions.items(): condition_dir = os.path.join(results_dir, condition) safe_makedir(condition_dir) config["dir"]["results"] = condition_dir for stage in config["run"]: if stage == "fastqc": _emit_stage_message(stage, curr_files) fastqc_config = _get_stage_config(config, stage) fastqc_args = zip(*product(curr_files, [fastqc_config], [config])) view.map(fastqc.run, *fastqc_args) if stage == "cutadapt": _emit_stage_message(stage, curr_files) cutadapt_config = _get_stage_config(config, stage) cutadapt_args = zip(*product(curr_files, [cutadapt_config], [config])) cutadapt_outputs = view.map(cutadapt_tool.run, *cutadapt_args) curr_files = cutadapt_outputs logger.info("Fixing mate pair information.") pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] logger.info("Forward: %s" % (first)) logger.info("Reverse: %s" % (second)) fixed = view.map(fastq.fix_mate_pairs_with_config, first, second, [config] * len(first)) curr_files = list(flatten(fixed)) if stage == "sickle": _emit_stage_message(stage, curr_files) pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] fixed = view.map(sickle.run_with_config, first, second, [config] * len(first)) curr_files = list(flatten(fixed)) if stage == "tophat": _emit_stage_message(stage, curr_files) tophat_config = _get_stage_config(config, stage) pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] logger.info("first %s" % (first)) logger.info("second %s" % (second)) #tophat_args = zip(*product(first, second, [config["ref"]], # ["tophat"], [config])) tophat_outputs = view.map(tophat.run_with_config, first, second, [config["ref"]] * len(first), ["tophat"] * len(first), [config] * len(first)) bamfiles = view.map(sam.sam2bam, tophat_outputs) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) final_bamfiles = bamsort curr_files = tophat_outputs if stage == "htseq-count": _emit_stage_message(stage, curr_files) htseq_config = _get_stage_config(config, stage) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq_outdict[condition] = htseq_outputs if stage == "coverage": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) nrun = len(curr_files) ref = prepare_ref_file(config["stage"][stage]["ref"], config) ribo = config["stage"][stage]["ribo"] picard = BroadRunner(config["program"]["picard"]) out_dir = os.path.join(results_dir, stage) safe_makedir(out_dir) out_files = [replace_suffix(os.path.basename(x), "metrics") for x in curr_files] out_files = [os.path.join(out_dir, x) for x in out_files] out_files = view.map(picardrun.picard_rnaseq_metrics, [picard] * nrun, curr_files, [ref] * nrun, [ribo] * nrun, out_files) if stage == "rseqc": _emit_stage_message(stage, curr_files) rseqc_config = _get_stage_config(config, stage) rseq_args = zip(*product(curr_files, [config])) view.map(rseqc.bam_stat, *rseq_args) view.map(rseqc.genebody_coverage, *rseq_args) view.map(rseqc.junction_annotation, *rseq_args) view.map(rseqc.junction_saturation, *rseq_args) RPKM_args = zip(*product(final_bamfiles, [config])) RPKM_count_out = view.map(rseqc.RPKM_count, *RPKM_args) RPKM_count_fixed = view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """ annotate_args = zip(*product(RPKM_count_fixed, ["gene_id"], ["ensembl_gene_id"], ["human"])) view.map(annotate.annotate_table_with_biomart, *annotate_args) """ view.map(rseqc.RPKM_saturation, *rseq_args) curr_files = tophat_outputs # combine htseq-count files and run deseq on them conditions, htseq_files = dict_to_vectors(htseq_outdict) deseq_config = _get_stage_config(config, "deseq") cell_types = _group_input_by_cell_type(htseq_files) for cell_type, files in cell_types.items(): for comparison in deseq_config["comparisons"]: comparison_name = "_vs_".join(comparison) deseq_dir = os.path.join(results_dir, "deseq", cell_type, comparison_name) safe_makedir(deseq_dir) out_file = os.path.join(deseq_dir, comparison_name + ".counts.txt") files_by_condition = _group_input_by_condition(files) _emit_stage_message("deseq", files_by_condition) c, f = dict_to_vectors(files_by_condition) combined_out = htseq_count.combine_counts(f, None, out_file) deseq_out = os.path.join(deseq_dir, comparison_name) logger.info("Running deseq on %s with conditions %s " "and writing ot %s" % (combined_out, conditions, deseq_out)) deseq_out = view.map(deseq.run, [combined_out], [c], [deseq_out]) annotate.annotate_table_with_biomart(deseq_out[0], "id", "ensembl_gene_id", "human") #annotated_file = view.map(annotate.annotate_table_with_biomart, # [deseq_out], # ["id"], # ["ensembl_gene_id"], # ["human"]) # end gracefully stop_cluster()
def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) # make the needed directories map(safe_makedir, config["dir"].values()) # specific for thesis pipeline input_dirs = config["input_dirs"] results_dir = config["dir"].get("results", "results") input_files = _find_input_files(config) conditions = _group_input_by_condition(input_files) logger.info("Input_files: %s" % (input_files)) logger.info("Condition groups %s" % (conditions)) htseq_outdict = {} for condition, curr_files in conditions.items(): condition_dir = os.path.join(results_dir, condition) safe_makedir(condition_dir) config["dir"]["results"] = condition_dir for stage in config["run"]: if stage == "fastqc": _emit_stage_message(stage, curr_files) fastqc_config = _get_stage_config(config, stage) fastqc_args = zip( *product(curr_files, [fastqc_config], [config])) view.map(fastqc.run, *fastqc_args) if stage == "cutadapt": _emit_stage_message(stage, curr_files) cutadapt_config = _get_stage_config(config, stage) cutadapt_args = zip( *product(curr_files, [cutadapt_config], [config])) cutadapt_outputs = view.map(cutadapt_tool.run, *cutadapt_args) curr_files = cutadapt_outputs logger.info("Fixing mate pair information.") pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] logger.info("Forward: %s" % (first)) logger.info("Reverse: %s" % (second)) fixed = view.map(fastq.fix_mate_pairs_with_config, first, second, [config] * len(first)) curr_files = list(flatten(fixed)) if stage == "sickle": _emit_stage_message(stage, curr_files) pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] fixed = view.map(sickle.run_with_config, first, second, [config] * len(first)) curr_files = list(flatten(fixed)) if stage == "tophat": _emit_stage_message(stage, curr_files) tophat_config = _get_stage_config(config, stage) pairs = combine_pairs(curr_files) first = [x[0] for x in pairs] second = [x[1] for x in pairs] logger.info("first %s" % (first)) logger.info("second %s" % (second)) #tophat_args = zip(*product(first, second, [config["ref"]], # ["tophat"], [config])) tophat_outputs = view.map(tophat.run_with_config, first, second, [config["ref"]] * len(first), ["tophat"] * len(first), [config] * len(first)) bamfiles = view.map(sam.sam2bam, tophat_outputs) bamsort = view.map(sam.bamsort, bamfiles) view.map(sam.bamindex, bamsort) final_bamfiles = bamsort curr_files = tophat_outputs if stage == "htseq-count": _emit_stage_message(stage, curr_files) htseq_config = _get_stage_config(config, stage) htseq_args = zip(*product(curr_files, [config], [stage])) htseq_outputs = view.map(htseq_count.run_with_config, *htseq_args) htseq_outdict[condition] = htseq_outputs if stage == "coverage": logger.info("Calculating RNASeq metrics on %s." % (curr_files)) nrun = len(curr_files) ref = prepare_ref_file(config["stage"][stage]["ref"], config) ribo = config["stage"][stage]["ribo"] picard = BroadRunner(config["program"]["picard"]) out_dir = os.path.join(results_dir, stage) safe_makedir(out_dir) out_files = [ replace_suffix(os.path.basename(x), "metrics") for x in curr_files ] out_files = [os.path.join(out_dir, x) for x in out_files] out_files = view.map(picardrun.picard_rnaseq_metrics, [picard] * nrun, curr_files, [ref] * nrun, [ribo] * nrun, out_files) if stage == "rseqc": _emit_stage_message(stage, curr_files) rseqc_config = _get_stage_config(config, stage) rseq_args = zip(*product(curr_files, [config])) view.map(rseqc.bam_stat, *rseq_args) view.map(rseqc.genebody_coverage, *rseq_args) view.map(rseqc.junction_annotation, *rseq_args) view.map(rseqc.junction_saturation, *rseq_args) RPKM_args = zip(*product(final_bamfiles, [config])) RPKM_count_out = view.map(rseqc.RPKM_count, *RPKM_args) RPKM_count_fixed = view.map(rseqc.fix_RPKM_count_file, RPKM_count_out) """ annotate_args = zip(*product(RPKM_count_fixed, ["gene_id"], ["ensembl_gene_id"], ["human"])) view.map(annotate.annotate_table_with_biomart, *annotate_args) """ view.map(rseqc.RPKM_saturation, *rseq_args) curr_files = tophat_outputs # combine htseq-count files and run deseq on them conditions, htseq_files = dict_to_vectors(htseq_outdict) deseq_config = _get_stage_config(config, "deseq") cell_types = _group_input_by_cell_type(htseq_files) for cell_type, files in cell_types.items(): for comparison in deseq_config["comparisons"]: comparison_name = "_vs_".join(comparison) deseq_dir = os.path.join(results_dir, "deseq", cell_type, comparison_name) safe_makedir(deseq_dir) out_file = os.path.join(deseq_dir, comparison_name + ".counts.txt") files_by_condition = _group_input_by_condition(files) _emit_stage_message("deseq", files_by_condition) c, f = dict_to_vectors(files_by_condition) combined_out = htseq_count.combine_counts(f, None, out_file) deseq_out = os.path.join(deseq_dir, comparison_name) logger.info("Running deseq on %s with conditions %s " "and writing ot %s" % (combined_out, conditions, deseq_out)) deseq_out = view.map(deseq.run, [combined_out], [c], [deseq_out]) annotate.annotate_table_with_biomart(deseq_out[0], "id", "ensembl_gene_id", "human") #annotated_file = view.map(annotate.annotate_table_with_biomart, # [deseq_out], # ["id"], # ["ensembl_gene_id"], # ["human"]) # end gracefully stop_cluster()