def run_trinity(bam_files, output_dir, log_dir, num_cores, max_intron, jaccard_clip_flag): D_conf = import_config(this_dir) trinity_bin = D_conf['TRINITY_PATH'] # Trinity --genome_guided_bam rnaseq_alignments.csorted.bam # --max_memory 50G --genome_guided_max_intron 2000 --CPU 6 for bam_file in bam_files: prefix = (os.path.splitext(os.path.basename(bam_file))[0]) outdir = os.path.join(output_dir, 'trinity_{}'.format(prefix)) new_output = os.path.join(outdir, 'Trinity_{}.fasta'.format(prefix)) logger_time.debug('START: Trinity for {}'.format(prefix)) if not os.path.exists(new_output): log_file = os.path.join(log_dir, program_name, 'trinity_{}.log'.format(prefix)) command = ( '{} {} --genome_guided_bam {} --genome_guided_max_intron {} ' '--max_memory {} --CPU {} --output {} > {} 2>&1'.format( trinity_bin, jaccard_clip_flag, bam_file, max_intron, max_memory, num_cores, outdir, log_file)) logger_txt.debug('[Run] {}'.format(command)) os.system(command) # Rename the file trinity_output = os.path.join(outdir, 'Trinity-GG.fasta') os.rename(trinity_output, new_output) else: logger_txt.debug( 'Running Trinity has already been finished {}'.format(prefix)) logger_time.debug('DONE : Trinity for {}'.format(prefix))
def check_busco_dataset(busco_dataset): '''Check BUSCO dataset''' d_conf = import_config() busco_bin = d_conf['BUSCO_PATH'] proc = subprocess.Popen([busco_bin, '--list-datasets'], stdout=subprocess.PIPE) output = str(proc.stdout.read().decode('utf-8')) busco_dbs = re.findall(r'\S+_odb10', output) if busco_dataset not in set(busco_dbs): sys.exit( '[ERROR] Invalid BUSCO DATASET: {}. Run busco --list-datasets to ' 'get a full list available datasets'.format(busco_dataset)) print('BUSCO_DATASET is ok...')
def run_repeat_modeler(genome_assembly, output_dir, log_dir, num_cores): D_conf = import_config(this_dir) builddatabase_bin = D_conf['BUILDDATABASE_PATH'] repeatmodeler_bin = D_conf['REPEATMODELER_PATH'] # BuildDatabase -name Choanephora_cucurbitarum # ../Choanephora_cucurbitarum_assembly.fna # RepeatModeler -database Choanephora_cucurbitarum -pa 25 # Get repeat model repeat_lib = os.path.join( output_dir, '*', 'consensi.fa.classified' ) if not glob(repeat_lib): os.chdir(os.path.join(output_dir)) logger_time.debug('START running RepeatModeler') log_file1 = os.path.join( log_dir, program_name, 'build_database.log' ) command1 = '{} -name {} {} > {} 2>&1'.format( builddatabase_bin, genome_assembly, genome_assembly, log_file1 ) logger_txt.debug('[Run] {}'.format(command1)) os.system(command1) log_file2 = os.path.join( log_dir, program_name, 'repeat_modeler.log' ) command2 = '{} -database {} -pa {} > {} 2>&1'.format( repeatmodeler_bin, genome_assembly, num_cores, log_file2 ) logger_txt.debug('[Run] {}'.format(command2)) os.system(command2) logger_time.debug('DONE running RepeatModeler') else: logger_txt.debug('Running RepeatModeler has already been finished') # Check if RepeatModeler is properly finished if not glob(repeat_lib): logger_txt.debug( '[ERROR] RepeatModeler has finished abnormally. There is no ' 'consensi.fa.classified file.' ) sys.exit(2)
Input: FASTQ files and genome assembly Output: SAM and converted BAM file using SAMtools. Last updated: Jul 13, 2020 ''' import os import re import sys from argparse import ArgumentParser from import_config import import_config from set_logging import set_logging # Parameters D_CONF = import_config() # Main function def main(): '''Main function''' argparser_usage = ( 'run_hisat2.py -r <fastq1> <fastq2> <fastq3> ...' ' -o <output_dir> -l <log_dir> -f <ref_fasta> -c <num_cores>' ' -m <max_intron>' ) parser = ArgumentParser(usage=argparser_usage) parser.add_argument( '-r', '--read_files', nargs='+', required=True, help='Multiople read files in fastq format' )
from Bio.Alphabet import IUPAC import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt from collections import defaultdict from argparse import ArgumentParser from Bio.Alphabet import generic_dna # Get Logging this_path = os.path.realpath(__file__) this_dir = os.path.dirname(this_path) sys.path.append(this_dir) from import_config import import_config # Parameters D_conf = import_config(this_dir) # Main function def main(argv): argparse_usage = ( 'create_markdown.py -f <input_fasta> -g <input_gff3> ' '-t <trinity_assembly> -b <bam_file> -o <output_dir>' ) parser = ArgumentParser(usage=argparse_usage) parser.add_argument( '-f', '--input_fasta', nargs=1, required=True, help='Genome assembly file in FASTA format' ) parser.add_argument( '-g', '--input_gff3', nargs=1, required=True,
from export_files import export_files from process_files import process_files from import_config import import_config opt = '10' while int(opt) not in [0, 1, 2, 3, 4]: print("[1] Import Configurations") print("[2] Process Files") print("[3] Export File") print("[0] Exit") opt = input("Choose 1 to 4: ") if opt == '1': im = import_config() im.import_config() elif opt == '2': pr = process_files() pr.process_files() elif opt == '3': ex = export_files() ex.export_file() elif opt == '0': print("Goodbye!") exit(0)