Exemple #1
0
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
	parameters = '';
	num_threads = multiprocessing.cpu_count() / 2;

	if ((machine_name.lower() == 'illumina') or (machine_name.lower() == 'roche')):
		# parameters = '-x illumina -v 5 -b 4 -B 0';
		parameters = '-v 5 -t %d -B 0 -b 3' % num_threads;

	elif ((machine_name.lower() == 'pacbio')):
		# parameters = '-v 5 -b 4 -B 0';
		parameters = '-v 5 -t %d -B 0 -b 3' % num_threads;

	elif ((machine_name.lower() == 'nanopore')):
		# parameters = '-x nanopore -v 5 -b 4 -B 0';
		parameters = '-v 5 -t %d -B 0 -b 3' % num_threads;

	elif ((machine_name.lower() == 'debug')):
		# parameters = '-x nanopore -v 5 -C -B 0 -j 11 -v 7 -y 31676 -n 1 -t 1';
		parameters = '-B 0 -b 3 -F 0.05 -l 9 -A 12 -v 7 -y 31676 -n 1 -t 1';

	else:			# default
		parameters = '-v 5 -t %d' % num_threads;



	if (output_suffix != ''):
		output_filename = '%s-%s' % (MAPPER_NAME, output_suffix);
	else:
		output_filename = MAPPER_NAME;
	
	reads_basename = os.path.splitext(os.path.basename(reads_file))[0];
	sam_file = '%s/%s.sam' % (output_path, output_filename);
	memtime_file = '%s/%s.memtime' % (output_path, output_filename);
	memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename);
	
	# Run the indexing process, and measure execution time and memory.
	if (os.path.exists(reference_file + '.gmidx') == False or os.path.exists(reference_file + '.gmidxsec') == False):
		sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME));
		command = '%s %s/%s -I -r %s' % (basicdefines.measure_command(memtime_file_index), ALIGNER_PATH, BIN, reference_file);
		sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
		subprocess.call(command, shell=True);
		sys.stderr.write('\n\n');
	else:
		sys.stderr.write('[%s wrapper] Reference index already exists. Continuing.\n' % (MAPPER_NAME));
		sys.stderr.flush();

	# Run the alignment process, and measure execution time and memory.
	sys.stderr.write('[%s wrapper] Running %s...\n' % (MAPPER_NAME, MAPPER_NAME));
	command = '%s %s/%s %s -r %s -d %s -o %s' % (basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN, parameters, reference_file, reads_file, sam_file);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');
	
	sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' % (MAPPER_NAME, MAPPER_NAME));

	return sam_file
Exemple #2
0
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
	parameters = '';
	num_threads = multiprocessing.cpu_count() / 2;

	if ((machine_name.lower() == 'illumina') or (machine_name.lower() == 'roche')):
		parameters = '-t %s' % str(num_threads);

	elif ((machine_name.lower() == 'pacbio')):
		parameters = '-t %s -x pacbio' % str(num_threads);

	elif ((machine_name.lower() == 'nanopore')):
		parameters = '-t %s -x ont2d' % str(num_threads);

	elif ((machine_name.lower() == 'debug')):
		parameters = '-t %s' % str(num_threads);

	else:			# default
		parameters = '-t %s' % str(num_threads);



	if (output_suffix != ''):
		output_filename = '%s-%s' % (MAPPER_NAME, output_suffix);
	else:
		output_filename = MAPPER_NAME;
	
	reads_basename = os.path.splitext(os.path.basename(reads_file))[0];
	sam_file = '%s/%s.sam' % (output_path, output_filename);
	memtime_file = '%s/%s.memtime' % (output_path, output_filename);
	memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename);
	
	# Run the indexing process, and measure execution time and memory.
	if (not os.path.exists(reference_file + '.bwt')):
		sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME));
		command = '%s %s/%s index %s' % (basicdefines.measure_command(memtime_file_index), ALIGNER_PATH, BIN, reference_file);
		sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
		subprocess.call(command, shell=True);
		sys.stderr.write('\n\n');
	else:
		sys.stderr.write('[%s wrapper] Reference index already exists. Continuing.\n' % (MAPPER_NAME));
		sys.stderr.flush();

	# Run the alignment process, and measure execution time and memory.
	sys.stderr.write('[%s wrapper] Running %s...\n' % (MAPPER_NAME, MAPPER_NAME));
	command = '%s %s/%s mem %s %s %s > %s' % (basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN, parameters, reference_file, reads_file, sam_file);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');
	
	sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' % (MAPPER_NAME, MAPPER_NAME));

	return sam_file
Exemple #3
0
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):
    # Create a config file and then use it to run the assembly
    # 1. COPY
    CONFIG_PATH = os.path.join(output_path, 'sd2.config')
    shutil.copy(CONFIG_TEMPLATE_PATH, CONFIG_PATH)
    with open(CONFIG_PATH, 'a') as configfile:
        # If reads file is fastq
        if reads_file[-3:] == '.fq' or reads_file[-6:] == '.fastq':
            configfile.write('q=%s\n' % reads_file)
        # If reads file is fasta
        elif reads_file[-3:] == '.fa' or reads_file[-6:] == '.fasta':
            configfile.write('p=%s\n' % reads_file)
        else:
            sys.stderr.write('\n[%s wrapper] Unsupported file format (%s)!\n' %
                             (ASSEMBLER_NAME, reads_file))

    # Config file is closed (hopefully)
    num_threads = multiprocessing.cpu_count() / 2

    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')
    command = 'cd %s; %s %s all -s %s -p %d -K 127 -R -o graph_prefix 1>ass.log 2>ass.err' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        CONFIG_PATH, num_threads)
    subprocess.call(command, shell='True')
Exemple #4
0
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):
    parameters = ''
    num_threads = os.environ["NUM_THREADS"]

    if ((machine_name.lower() == 'illumina')
            or (machine_name.lower() == 'roche')):
        parameters = '-t %s' % str(num_threads)

    elif ((machine_name.lower() == 'pacbio')):
        parameters = '-t %s -x pacbio' % str(num_threads)

    elif ((machine_name.lower() == 'nanopore')):
        parameters = '-ax map-ont' % str(num_threads)

    elif ((machine_name.lower() == 'longindel')):
        parameters = '-t %s -x ont2d -w 1200 -d 1200' % str(num_threads)

    elif ((machine_name.lower() == 'longindel2')):
        parameters = '-t %s -x ont2d -w 5000 -d 5000' % str(num_threads)

    elif ((machine_name.lower() == 'debug')):
        parameters = '-t %s' % str(num_threads)

    else:  # default
        parameters = '-t %s' % str(num_threads)

    if (output_suffix != ''):
        output_filename = '%s-%s' % (MAPPER_NAME, output_suffix)
    else:
        output_filename = MAPPER_NAME

    reads_basename = os.path.splitext(os.path.basename(reads_file))[0]
    sam_file = '%s/%s.sam' % (output_path, output_filename)
    memtime_file = '%s/%s.memtime' % (output_path, output_filename)
    memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename)

    # Run the indexing process, and measure execution time and memory.

    # Run the alignment process, and measure execution time and memory.
    sys.stderr.write('[%s wrapper] Running %s...\n' %
                     (MAPPER_NAME, MAPPER_NAME))
    command = '%s %s/%s %s -ax map-ont  %s  %s > %s' % (
        basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN,
        parameters, reference_file, reads_file, sam_file)
    sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command))
    subprocess.call(command, shell=True)
    sys.stderr.write('\n\n')

    sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' %
                     (MAPPER_NAME, MAPPER_NAME))

    return sam_file
Exemple #5
0
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
    # IDBA works only on fasta files
    # If fastq file is provided, convert it to fasta
    splitname = os.path.splitext(reads_file)
    basename = splitname[0]
    ext = splitname[1]
    if ext == '.fq' or ext == '.fastq':
        fasta_filename = basename + '.fa'
        command = '%s %s %s' % (FQ2FA_BIN, reads_file, fasta_filename)
        subprocess.call(command, shell='True')
        # Use created fasta file as reads file from now on
        reads_file = fasta_filename

    num_threads = multiprocessing.cpu_count() / 2

    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')
    command = '%s %s --num_threads %d -r %s -o %s' % (basicdefines.measure_command(memtime_path), ASSEMBLER_BIN, num_threads, reads_file, output_path)
    subprocess.call(command, shell='True')
Exemple #6
0
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
    # Sparse also runs only on fasta
    # Atm parameters are hardcoded.
    # TODO: if fastq is given convert it to fasta
    #       callculate estimated genome size (GS) from reference and/or reads files
    num_threads = multiprocessing.cpu_count() / 2

    # ATM using the same set of parametars for all sequencers
    if machine_name in basicdefines.TECH:

        genomesize = 60000000       # Starting value / historical reasons

        # Calculating reference size
        reference_fastq = fastqparser.read_fastq(reference_file)
        reference_seq = reference_fastq[1][0]
        genomesize = len(reference_seq)

        memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')
        command = 'cd %s; %s %s -t %d k 21 GS %d f %s' % (output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN, num_threads, 10*genomesize, reads_file)
        subprocess.call(command, shell='True')
    else:
        sys.stderr.write('\}\nInvalid machine_name parameter for assembler %s' % ASSEMBLER_NAME)
        sys.stderr.write('\nSkipping ....')
Exemple #7
0
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):

    num_threads = multiprocessing.cpu_count() / 2

    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')

    parrentdir = os.path.dirname(output_path.rstrip('/'))
    tmemtime_path = os.path.join(parrentdir, ASSEMBLER_NAME + '.memtime')

    # Ray creates his output folder which at this point doesnt exist
    # Therefore cgmemtime cannot create output file in output folder.
    # Creating in it upper folder instead
    command = '%s mpiexec -n %d %s -s %s -o %s' % (
        basicdefines.measure_command(tmemtime_path), num_threads,
        ASSEMBLER_BIN, reads_file, output_path)
    subprocess.call(command, shell='True')

    # After Ray is finished, moving memtime file to the Ray output folder
    shutil.move(tmemtime_path, memtime_path)
Exemple #8
0
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
	parameters = '';
	num_threads = multiprocessing.cpu_count() / 2;

	if ((machine_name.lower() == 'illumina') or (machine_name.lower() == 'roche')):
		parameters = '-v ';

	elif ((machine_name.lower() == 'pacbio')):
		parameters = '-v -q 1 -r 1 -a 1 -b 1';

	elif ((machine_name.lower() == 'nanopore')):
		parameters = '-v -q 1 -r 1 -a 1 -b 1';

	elif ((machine_name.lower() == 'debug')):
		parameters = '-v ';

	else:			# default
		parameters = '-v ';



	if (output_suffix != ''):
		output_filename = '%s-%s' % (MAPPER_NAME, output_suffix);
	else:
		output_filename = MAPPER_NAME;

	

	reads_fasta = reads_file;
	reads_basename = os.path.splitext(os.path.basename(reads_file))[0];
	maf_file = '%s/%s.maf' % (output_path, output_filename);
	sam_file = '%s/%s.sam' % (output_path, output_filename);
	memtime_file = '%s/%s.memtime' % (output_path, output_filename);
	memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename);
	memtime_file_maftosam = '%s/%s-maftosam.memtime' % (output_path, output_filename);
	reference_db_file = reference_file + '.db';

	# Check if the given input file is a FASTA or FASTQ, and convert to FASTA if necessary.
	if (reads_file[-1] == 'q'):
		sys.stderr.write('[%s wrapper] Converting FASTQ to FASTA...\n' % (MAPPER_NAME));
		reads_fasta = reads_file[0:-1] + 'a';
		fastqparser.convert_to_fasta(reads_file, reads_fasta);
		sys.stderr.write('\n');

	# Run the indexing process, and measure execution time and memory.
	if not os.path.exists(reference_db_file + '.suf'):
		sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME));
		command = '%s %s/lastdb %s %s' % (basicdefines.measure_command(memtime_file_index), ALIGNER_PATH, reference_db_file, reference_file);
		sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
		subprocess.call(command, shell=True);
		sys.stderr.write('\n\n');
	else:
		sys.stderr.write('[%s wrapper] Reference DB already exists. Continuing.\n' % (MAPPER_NAME));
		sys.stderr.flush();

	# Run the alignment process, and measure execution time and memory.
	sys.stderr.write('[%s wrapper] Running %s...\n' % (MAPPER_NAME, MAPPER_NAME));
	command = '%s %s/%s %s %s %s > %s' % (basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN, parameters, reference_db_file, reads_fasta, maf_file);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');

	# Run the alignment process, and measure execution time and memory.
	sys.stderr.write('[%s wrapper] Converting the output MAF to SAM file...\n' % (MAPPER_NAME));
	fp = open(sam_file, 'w');
	fp.write(get_sam_header(reference_file));
	fp.close();
	command = '%s %s/../scripts/maf-convert sam %s >> %s' % (basicdefines.measure_command(memtime_file_maftosam), ALIGNER_PATH, maf_file, sam_file);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');
	
	sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' % (MAPPER_NAME, MAPPER_NAME));

	return sam_file
Exemple #9
0
def measure_command(measure_file):
    if (MODULE_BASICDEFINES == True and os.path.exists(CGMEMTIME_FILE)):
        return basicdefines.measure_command(measure_file)
    else:
        return '/usr/bin/time --format "Command line: %%C\\nReal time: %%e s\\nCPU time: -1.0 s\\nUser time: %%U s\\nSystem time: %%S s\\nMaximum RSS: %%M kB\\nExit status: %%x" --quiet -o %s ' % measure_file
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):
    parameters = ''
    num_threads = multiprocessing.cpu_count() / 2

    if ((machine_name.lower() == 'illumina')
            or (machine_name.lower() == 'roche')):
        parameters = '-x illumina -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'pacbio')):
        parameters = '-v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'nanopore')):
        parameters = '-v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'nanoporecirc')):
        parameters = '-v 5 -t %d -C -B 0' % num_threads

    elif ((machine_name.lower() == 'myers')):
        parameters = '-a myers -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'gotoh')):
        parameters = '-a gotoh -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'anchor')):
        parameters = '-a anchor -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'anchorcirc')):
        parameters = '-a anchor -C -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'anchorgotoh')):
        parameters = '-a anchorgotoh -v 5 -t %d -B 0' % num_threads

    elif ((machine_name.lower() == 'metagen')):
        parameters = '-v 5 -t %d -C -B 0 -Z' % num_threads

    elif ((machine_name.lower() == 'metagenanchor')):
        parameters = '-a anchor -v 5 -t %d -C -B 0 -Z' % num_threads

    else:  # default
        parameters = '-v 5 -t %d' % num_threads

    if (output_suffix != ''):
        output_filename = '%s-%s' % (MAPPER_NAME, output_suffix)
    else:
        output_filename = MAPPER_NAME

    reads_basename = os.path.splitext(os.path.basename(reads_file))[0]
    sam_file = '%s/%s.sam' % (output_path, output_filename)
    memtime_file = '%s/%s.memtime' % (output_path, output_filename)
    memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename)

    # Run the indexing process, and measure execution time and memory.
    if (os.path.exists(reference_file + '.gmidx') == False
            or os.path.exists(reference_file + '.gmidxsec') == False):
        sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME))
        command = '%s %s/%s -I -r %s' % (basicdefines.measure_command(
            memtime_file_index), ALIGNER_PATH, BIN, reference_file)
        sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command))
        subprocess.call(command, shell=True)
        sys.stderr.write('\n\n')
    else:
        sys.stderr.write(
            '[%s wrapper] Reference index already exists. Continuing.\n' %
            (MAPPER_NAME))
        sys.stderr.flush()

    # Run the alignment process, and measure execution time and memory.
    sys.stderr.write('[%s wrapper] Running %s...\n' %
                     (MAPPER_NAME, MAPPER_NAME))
    command = '%s %s/%s %s -r %s -d %s -o %s' % (
        basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN,
        parameters, reference_file, reads_file, sam_file)
    sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command))
    subprocess.call(command, shell=True)
    sys.stderr.write('\n\n')

    sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' %
                     (MAPPER_NAME, MAPPER_NAME))

    return sam_file
Exemple #11
0
def measure_command_wrapper(out_filename):
	if (USE_BASICDEFINES_ == True):
		return basicdefines.measure_command(out_filename);
	else:
		return '/usr/bin/time --format "Command line: %%C\\nReal time: %%e s\\nCPU time: -1.0 s\\nUser time: %%U s\\nSystem time: %%S s\\nMaximum RSS: %%M kB\\nExit status: %%x" --quiet -o %s ' % out_filename;
Exemple #12
0
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):
    # SGA has a rathar long series of steps to do to run an assembly
    # TODO: Here is one sequence of programs producing one results
    #       Parameters are many and they could all influence end result
    #       Some parameters should be inferred from the reads file or set by user

    # COMMENT: changing directory every time because it seems that it is not preserved
    #          across multiple shell commands

    # The pipile implemented here will be based on sga-celegans example
    # because that example used pacbio dataset

    num_threads = multiprocessing.cpu_count() / 2

    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')
    # command = '%s %s --num_threads %d -r %s -o %s' % (basicdefines.measure_command(memtime_path), ASSEMBLER_BIN, num_threads, reads_file, output_path)
    # subprocess.call(command, shell='True')

    # 1. Preprocess
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_1.memtime')
    command = 'cd %s; %s %s preprocess -o sgaccs.fasta %s' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        reads_file)
    subprocess.call(command, shell='True')

    # Not doing error correction
    # # 2a. Build index for error correction
    # command = 'cd %s; %s index -a ropebwt -t 32 --no-reverse sgaccs.fasta' % (output_path, ASSEMBLER_BIN)
    # subprocess.call(command, shell='True')

    # # 2b. Perform error correction
    # command = 'cd %s; %s correct -k 21 --learn -t 32 -o reads.ec.k21.fasta sgaccs.fasta' % (output_path, ASSEMBLER_BIN)
    # subprocess.call(command, shell='True')

    # 3. Contig assembly

    #3a. Index data
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_2.memtime')
    command = 'cd %s; %s %s index -a ropebwt -t %d --no-reverse %s' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        num_threads, reads_file)
    subprocess.call(command, shell='True')

    # Not doing filtering
    # #3b. Remove exact-match duplicates and reads with low frequency kmers
    # # COMMENT: In my experience filtering could filter out too much
    # #          It might be better to skip it
    # #          Not sure how to decide when to skip and when not to
    # command = 'cd %s; %s filter -x 2 -t %d --homopolymer-check --low-complexity-check reads.ec.k21.fasta' % (output_path, ASSEMBLER_BIN, num_threads)
    # subprocess.call(command, shell='True')

    #3c. Merge simple, unbranched chains of vertices
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_3.memtime')
    command = 'cd %s; %s %s fm-merge -m 30 -t %d -o merged.k21.fa %s' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        num_threads, reads_file)
    subprocess.call(command, shell='True')

    # 3d. Build an index of the merged sequences
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_4.memtime')
    command = 'cd %s; %s %s index -d 1000000 -t %d merged.k21.fa' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        num_threads)
    subprocess.call(command, shell='True')

    # 3e. Remove any substrings that were generated from the merge process
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_5.memtime')
    command = 'cd %s; %s %s rmdup -t %d merged.k21.fa' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        num_threads)
    subprocess.call(command, shell='True')

    # 3f. Compute the structure of the string graph
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_6.memtime')
    command = 'cd %s; %s %s overlap -m 30 -t %d merged.k21.rmdup.fa' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN,
        num_threads)
    subprocess.call(command, shell='True')

    # 3g. Perform the contig assembly without bubble popping
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '_7.memtime')
    command = 'cd %s; %s %s assemble -m 30 -o assemble merged.k21.rmdup.asqg.gz' % (
        output_path, basicdefines.measure_command(memtime_path), ASSEMBLER_BIN)
    subprocess.call(command, shell='True')

    # Callculate memtime summary and write it in a file
    memtime_path = os.path.join(output_path, ASSEMBLER_NAME + '.memtime')
    real_time = 0.0
    cpu_time = 0.0
    user_time = 0.0
    system_time = 0.0
    max_rss = 0.0
    with open(memtime_path, 'w') as fmemtime:
        fmemtime.write('SGA summary .memtime file (%s)' % output_path)
        for i in xrange(1, 8):
            memtime_file = '%s_%d.memtime' % (ASSEMBLER_NAME, i)
            tmemtime_path = os.path.join(output_path, memtime_file)
            with open(tmemtime_path, 'r') as tfmemtime:
                tfmemtime.readline()  # skipping 1st line
                line = tfmemtime.readline()
                treal_time = float(line.split()[2])  # real time
                line = tfmemtime.readline()
                tcpu_time = float(line.split()[2])  # cpu time
                line = tfmemtime.readline()
                tuser_time = float(line.split()[2])  # user time
                line = tfmemtime.readline()
                tsystem_time = float(line.split()[2])  # system time
                line = tfmemtime.readline()
                tmax_rss = int(line.split()[2])  # Max RSS

                real_time += treal_time
                cpu_time += tcpu_time
                user_time += tuser_time
                system_time += tsystem_time
                if tmax_rss > max_rss:
                    max_rss = tmax_rss

        # writing to summary .memtime file
        fmemtime.write('\nReal time:  %.3f s' % real_time)
        fmemtime.write('\nCPU time:  %.3f s' % cpu_time)
        fmemtime.write('\nUser time:  %.3f s' % user_time)
        fmemtime.write('\nSystem time:  %.3f s' % system_time)
        fmemtime.write('\nMaximum RSS: %d MB' % max_rss)
def measure_command_wrapper(out_filename):
	if (USE_BASICDEFINES_ == True):
		return basicdefines.measure_command(out_filename);
	else:
		return '/usr/bin/time --format "Command line: %%C\\nReal time: %%e s\\nCPU time: -1.0 s\\nUser time: %%U s\\nSystem time: %%S s\\nMaximum RSS: %%M kB\\nExit status: %%x" --quiet -o %s ' % out_filename;
def run(reads_file, reference_file, machine_name, output_path, output_suffix=''):
	parameters = '';
	num_threads = multiprocessing.cpu_count() / 2;

	if ((machine_name.lower() == 'illumina') or (machine_name.lower() == 'roche')):
		parameters = '-num_threads %s' % str(num_threads);

	elif ((machine_name.lower() == 'pacbio')):
		parameters = '-reward 5 -penalty -4 -gapopen 8 -gapextend 6 -dust no';
		parameters += ' -num_threads %s' % str(num_threads);

	elif ((machine_name.lower() == 'nanopore')):
		# These parameters used in the paper: ""
		# These parameters used in the paper: "Oxford Nanopore Sequencing and de novo Assembly of a Eukaryotic Genome", Supplemental Notes and Figures
		# http://biorxiv.org/content/biorxiv/suppl/2015/01/06/013490.DC1/013490-1.pdf
		# Quote: "Overall accuracy was calculated by aligning the raw Oxford Nanopore reads to the W303 pacbio assembly using Blast version 2.2.27+ with the following parameters:"
		# parameters += ' -reward 5 -penalty -4 -gapopen 8 -gapextend 6 -dust no -evalue 1e-10';
		parameters = '-reward 5 -penalty -4 -gapopen 8 -gapextend 6 -dust no';
		parameters += ' -num_threads %s' % str(num_threads);

	elif ((machine_name.lower() == 'debug')):
		parameters = '-num_threads %s' % str(num_threads);
		# sys.stderr.write('ERROR: Debug parameters not implemented yet!\n');
		# exit(1);

	else:			# default
		parameters = '-num_threads %s' % str(num_threads);



	# http://www.kenkraaijeveld.nl/genomics/bioinformatics/
	# The first thing to do is to build your contig.fa file into a Blast database. Type:
	# $ makeblastdb -in [path to contigs.fa] -dbtype nucl -out [path to output directory]
	# You can now query this database with sequences that you want to find. For example:
	# $ blastn -query [path to file with sequence of interest] -task blastn -db [path to your database] -out [path to output directory] -num_threads 8 

	if (output_suffix != ''):
		output_filename = '%s-%s' % (MAPPER_NAME, output_suffix);
	else:
		output_filename = MAPPER_NAME;
	
	reads_basename = os.path.splitext(os.path.basename(reads_file))[0];
	sam_file = '%s/%s.sam' % (output_path, output_filename);
	out_file = '%s/%s.out' % (output_path, output_filename);
	filtered_out_file = '%s/%s-filtered.out' % (output_path, output_filename);
	out_db_path = '%s-blastdb' % (reference_file);
	memtime_file = '%s/%s.memtime' % (output_path, output_filename);
	memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename);
	
	# Run the indexing process, and measure execution time and memory.
	if (not os.path.exists(out_db_path + '.nsq')):
		sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME));
		command = '%s %s/makeblastdb -in %s -dbtype nucl -out %s' % (basicdefines.measure_command(memtime_file_index), ALIGNER_PATH, reference_file, out_db_path);
		sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
		# subprocess.call(command, shell=True);
		sys.stderr.write('\n\n');
	else:
		sys.stderr.write('[%s wrapper] Reference index already exists. Continuing.\n' % (MAPPER_NAME));
		sys.stderr.flush();

	# Run the alignment process, and measure execution time and memory.
	sys.stderr.write('[%s wrapper] Running %s...\n' % (MAPPER_NAME, MAPPER_NAME));
	# command = '%s %s/%s -task blastn -db %s -query %s -out %s %s' % (basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN, out_db_path, reads_file, out_file, parameters);
	command = '%s %s/%s -task blastn -db %s -query %s -out %s %s -outfmt "6 %s"' % (basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN, out_db_path, reads_file, out_file, parameters, outfmt);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');

	# Filter the BLAST out file and extract only one alignment per read (the one with highest alignment score).
	sys.stderr.write('[%s wrapper] Filtering BLAST output...\n' % (MAPPER_NAME));
	command = '%s/filterblastout/bin/filterblastout %s > %s' % (SCRIPT_PATH, out_file, filtered_out_file);
	sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command));
	subprocess.call(command, shell=True);
	sys.stderr.write('\n\n');

	convert_blast_to_sam(reference_file, reads_file, filtered_out_file, sam_file);

	sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' % (MAPPER_NAME, MAPPER_NAME));

	return sam_file
def run(reads_file,
        reference_file,
        machine_name,
        output_path,
        output_suffix=''):
    parameters = ''
    num_threads = multiprocessing.cpu_count() / 2

    if ((machine_name.lower() == 'illumina')
            or (machine_name.lower() == 'roche')):
        parameters = '-nproc %s -sam -bestn 1 -minMatch 7' % str(num_threads)

    elif ((machine_name.lower() == 'pacbio')):
        parameters = '-nproc %s -sam -bestn 1' % str(num_threads)

    elif ((machine_name.lower() == 'nanopore')):
        parameters = '-nproc %s -sam -bestn 1' % str(num_threads)

    elif ((machine_name.lower() == 'longindel')):
        parameters = '-nproc %s -sam -bestn 1 -clipping none' % str(
            num_threads)
# -clipping [none|hard|subread|soft] (none)
#             Use no/hard/subread/soft clipping for SAM output.

    elif ((machine_name.lower() == 'debug')):
        parameters = '-nproc %s -sam -bestn 1' % str(num_threads)

    elif ((machine_name.lower() == 'pacbiom4')):
        parameters = '-nproc %s -bestn 1 -m 4' % str(num_threads)

    else:  # default
        parameters = '-nproc %s -sam -bestn 1' % str(num_threads)

    if (output_suffix != ''):
        output_filename = '%s-%s' % (MAPPER_NAME, output_suffix)
    else:
        output_filename = MAPPER_NAME

    reads_basename = os.path.splitext(os.path.basename(reads_file))[0]
    sam_file = '%s/%s.sam' % (output_path, output_filename)
    memtime_file = '%s/%s.memtime' % (output_path, output_filename)
    memtime_file_index = '%s/%s-index.memtime' % (output_path, output_filename)

    # Run the indexing process, and measure execution time and memory.
    if ((not os.path.exists(reference_file + '.blasrsa'))):
        sys.stderr.write('[%s wrapper] Generating index...\n' % (MAPPER_NAME))
        command = '%s %s/alignment/bin/sawriter %s.blasrsa %s' % (
            basicdefines.measure_command(memtime_file_index), ALIGNER_PATH,
            reference_file, reference_file)
        sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command))
        subprocess.call(command, shell=True)
        sys.stderr.write('\n\n')
    else:
        sys.stderr.write(
            '[%s wrapper] Reference index already exists. Continuing.\n' %
            (MAPPER_NAME))
        sys.stderr.flush()

    # Run the alignment process, and measure execution time and memory.
    sys.stderr.write('[%s wrapper] Running %s...\n' %
                     (MAPPER_NAME, MAPPER_NAME))
    command = '%s %s/%s %s %s %s -sa %s.blasrsa -out %s' % (
        basicdefines.measure_command(memtime_file), ALIGNER_PATH, BIN,
        reads_file, reference_file, parameters, reference_file, sam_file)
    sys.stderr.write('[%s wrapper] %s\n' % (MAPPER_NAME, command))
    subprocess.call(command, shell=True)
    sys.stderr.write('\n\n')

    sys.stderr.write('[%s wrapper] %s wrapper script finished processing.\n' %
                     (MAPPER_NAME, MAPPER_NAME))

    return sam_file