예제 #1
0
def run_trinity(bam_files, output_dir, log_dir, num_cores, max_intron,
                jaccard_clip_flag):
    D_conf = import_config(this_dir)
    trinity_bin = D_conf['TRINITY_PATH']

    # Trinity --genome_guided_bam rnaseq_alignments.csorted.bam
    # --max_memory 50G --genome_guided_max_intron 2000 --CPU 6
    for bam_file in bam_files:
        prefix = (os.path.splitext(os.path.basename(bam_file))[0])
        outdir = os.path.join(output_dir, 'trinity_{}'.format(prefix))

        new_output = os.path.join(outdir, 'Trinity_{}.fasta'.format(prefix))
        logger_time.debug('START: Trinity for {}'.format(prefix))
        if not os.path.exists(new_output):
            log_file = os.path.join(log_dir, program_name,
                                    'trinity_{}.log'.format(prefix))
            command = (
                '{} {} --genome_guided_bam {} --genome_guided_max_intron {} '
                '--max_memory {} --CPU {} --output {} > {} 2>&1'.format(
                    trinity_bin, jaccard_clip_flag, bam_file, max_intron,
                    max_memory, num_cores, outdir, log_file))
            logger_txt.debug('[Run] {}'.format(command))
            os.system(command)

            # Rename the file
            trinity_output = os.path.join(outdir, 'Trinity-GG.fasta')
            os.rename(trinity_output, new_output)

        else:
            logger_txt.debug(
                'Running Trinity has already been finished {}'.format(prefix))

        logger_time.debug('DONE : Trinity for {}'.format(prefix))
예제 #2
0
def check_busco_dataset(busco_dataset):
    '''Check BUSCO dataset'''
    d_conf = import_config()
    busco_bin = d_conf['BUSCO_PATH']
    proc = subprocess.Popen([busco_bin, '--list-datasets'],
                            stdout=subprocess.PIPE)
    output = str(proc.stdout.read().decode('utf-8'))
    busco_dbs = re.findall(r'\S+_odb10', output)
    if busco_dataset not in set(busco_dbs):
        sys.exit(
            '[ERROR] Invalid BUSCO DATASET: {}. Run busco --list-datasets to '
            'get a full list available datasets'.format(busco_dataset))
    print('BUSCO_DATASET is ok...')
def run_repeat_modeler(genome_assembly, output_dir, log_dir, num_cores):
    D_conf = import_config(this_dir)
    builddatabase_bin = D_conf['BUILDDATABASE_PATH']
    repeatmodeler_bin = D_conf['REPEATMODELER_PATH']

    # BuildDatabase -name Choanephora_cucurbitarum
    # ../Choanephora_cucurbitarum_assembly.fna
    # RepeatModeler -database Choanephora_cucurbitarum -pa 25

    # Get repeat model
    repeat_lib = os.path.join(
        output_dir, '*', 'consensi.fa.classified'
    )
    if not glob(repeat_lib):
        os.chdir(os.path.join(output_dir))
        logger_time.debug('START running RepeatModeler')
        log_file1 = os.path.join(
            log_dir, program_name, 'build_database.log'
        )
        command1 = '{} -name {} {} > {} 2>&1'.format(
            builddatabase_bin, genome_assembly, genome_assembly, log_file1
        )
        logger_txt.debug('[Run] {}'.format(command1))
        os.system(command1)

        log_file2 = os.path.join(
            log_dir, program_name, 'repeat_modeler.log'
        )
        command2 = '{} -database {} -pa {} > {} 2>&1'.format(
            repeatmodeler_bin, genome_assembly, num_cores, log_file2
        )
        logger_txt.debug('[Run] {}'.format(command2))
        os.system(command2)
        logger_time.debug('DONE  running RepeatModeler')
    else:
        logger_txt.debug('Running RepeatModeler has already been finished')

    # Check if RepeatModeler is properly finished
    if not glob(repeat_lib):
        logger_txt.debug(
            '[ERROR] RepeatModeler has finished abnormally. There is no '
            'consensi.fa.classified file.'
        )
        sys.exit(2)
예제 #4
0
Input: FASTQ files and genome assembly
Output: SAM and converted BAM file using SAMtools.
Last updated: Jul 13, 2020
'''

import os
import re
import sys
from argparse import ArgumentParser

from import_config import import_config
from set_logging import set_logging

# Parameters
D_CONF = import_config()


# Main function
def main():
    '''Main function'''
    argparser_usage = (
        'run_hisat2.py -r <fastq1> <fastq2> <fastq3> ...'
        ' -o <output_dir> -l <log_dir> -f <ref_fasta> -c <num_cores>'
        ' -m <max_intron>'
    )
    parser = ArgumentParser(usage=argparser_usage)
    parser.add_argument(
        '-r', '--read_files', nargs='+', required=True,
        help='Multiople read files in fastq format'
    )
예제 #5
0
from Bio.Alphabet import IUPAC
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from collections import defaultdict
from argparse import ArgumentParser
from Bio.Alphabet import generic_dna

# Get Logging
this_path = os.path.realpath(__file__)
this_dir = os.path.dirname(this_path)
sys.path.append(this_dir)
from import_config import import_config

# Parameters
D_conf = import_config(this_dir)


# Main function
def main(argv):
    argparse_usage = (
        'create_markdown.py -f <input_fasta> -g <input_gff3> '
        '-t <trinity_assembly> -b <bam_file> -o <output_dir>'
    )
    parser = ArgumentParser(usage=argparse_usage)
    parser.add_argument(
        '-f', '--input_fasta', nargs=1, required=True,
        help='Genome assembly file in FASTA format'
    )
    parser.add_argument(
        '-g', '--input_gff3', nargs=1, required=True,
예제 #6
0
from export_files import export_files
from process_files import process_files
from import_config import import_config

opt = '10'
while int(opt) not in [0, 1, 2, 3, 4]:
    print("[1] Import Configurations")
    print("[2] Process Files")
    print("[3] Export File")
    print("[0] Exit")
    opt = input("Choose 1 to 4: ")

if opt == '1':
    im = import_config()
    im.import_config()

elif opt == '2':
    pr = process_files()
    pr.process_files()

elif opt == '3':
    ex = export_files()
    ex.export_file()

elif opt == '0':
    print("Goodbye!")
    exit(0)