Python runCommandPopenCommunicate Beispiele, utils.runCommandPopenCommunicate Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: trimmomatic.py Projekt: INNUENDOCON/INNUca

def trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent, jarMaxMemory, fastq_encoding):
    trimmomatic_out_files = []
    for fastq in fastq_files:
        trimmomatic_out_files.append(os.path.join(trimmomatic_folder, str(os.path.splitext(os.path.splitext(os.path.basename(fastq))[0])[0] + 'P.fastq.gz')))
        trimmomatic_out_files.append(os.path.join(trimmomatic_folder, str(os.path.splitext(os.path.splitext(os.path.basename(fastq))[0])[0] + 'U.fastq.gz')))
    # Run Trimmomatic
    command = ['java', '', '-jar', jar_path_trimmomatic, 'PE', '-threads', str(threads), '', ' '.join(fastq_files), ' '.join(trimmomatic_out_files), '', '', '', str('SLIDINGWINDOW:' + slidingWindow), str('LEADING:' + str(leading)), str('TRAILING:' + str(trailing)), str('MINLEN:' + str(minLength)), 'TOPHRED33']

    if str(jarMaxMemory) != 'off':
        command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'

    if not doNotTrimCrops:
        if maxReadsLength is not None:
            if crop is not None:
                crop = maxReadsLength - crop[0]
                command[10] = str('CROP:' + str(crop))
            else:
                if nts2clip_based_ntsContent is not None:
                    crop = nts2clip_based_ntsContent[1]
                    print str(crop) + ' nucleotides will be clipped at the end of reads'
                    crop = maxReadsLength - crop
                    command[10] = str('CROP:' + str(crop))
        else:
            print 'Because FastQC did not run successfully, --trimCrop option will not be considered'

        if headCrop is not None:
            command[11] = str('HEADCROP:' + str(headCrop[0]))
        else:
            if nts2clip_based_ntsContent is not None:
                headCrop = nts2clip_based_ntsContent[0]
                print str(headCrop) + ' nucleotides will be clipped at the beginning of reads'
                command[11] = str('HEADCROP:' + str(headCrop))

    if not doNotSearchAdapters:
        if adaptersFasta is not None:
            print 'Removing adapters contamination using ' + adaptersFasta
            command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'
        else:
            trimmomatic_adapters_folder = os.path.join(os.path.dirname(script_path), 'src', 'Trimmomatic-0.36', 'adapters')
            adapters_files = [os.path.join(trimmomatic_adapters_folder, 'Nextera_XT_INNUca.fasta'), os.path.join(trimmomatic_adapters_folder, 'NexteraPE-PE.fa'), os.path.join(trimmomatic_adapters_folder, 'TruSeq2-PE.fa'), os.path.join(trimmomatic_adapters_folder, 'TruSeq3-PE-2.fa')]
            print 'Removing adapters contamination using ' + str(adapters_files)
            adaptersFasta = concatenateFastaFiles(adapters_files, trimmomatic_folder, 'concatenated_adaptersFile.fasta')
            command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'

    if fastq_encoding is not None:
        if fastq_encoding == 33:
            command[7] = '-phred33'
        elif fastq_encoding == 64:
            command[7] = '-phred64'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    else:
        print 'Trimmomatic fail! Trying run with Phred+33 enconding defined...'
        command[7] = '-phred33'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
        if not run_successfully:
            print 'Trimmomatic fail again! Trying run with Phred+64 enconding defined...'
            command[7] = '-phred64'
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    return run_successfully

Beispiel #2

0

Datei anzeigen

Datei: pilon.py Projekt: INNUENDOCON/INNUca

def indexSequenceBowtie2(referenceFile, threads):
    if os.path.isfile(str(referenceFile + '.1.bt2')):
        run_successfully = True
    else:
        command = ['bowtie2-build', '--threads', str(threads), referenceFile, referenceFile]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    return run_successfully

Beispiel #3

0

Datei anzeigen

Datei: pear.py Projekt: B-UMMI/INNUca

def compress_decompress(compressed_file, decompressed_file, compressed_True):
    run_successfully = False
    malformated_fastq = False
    length_sequence = None

    compression_type = None
    if not compressed_True:
        compression_type = utils.compressionType(compressed_file)

    if compression_type is not None or compressed_True:
        command = ['', '', '--stdout', '--keep', '', '>', '']

        if not compressed_True:
            command[0] = compression_type[0]
            command[1] = '--decompress'
            command[4] = compressed_file
            command[6] = decompressed_file
        else:
            command[0] = 'gzip'
            command[4] = decompressed_file
            command[6] = compressed_file

        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, True)
        if run_successfully and not compressed_True:
            malformated_fastq, length_sequence = check_uncompression_fastq(decompressed_file)
    elif compression_type is None and not compressed_True:
        run_successfully = True
        malformated_fastq, length_sequence = check_uncompression_fastq(compressed_file)
        decompressed_file = compressed_file

    if malformated_fastq:
        run_successfully = False

    utils.saveVariableToPickle([run_successfully, compressed_file if compressed_True else decompressed_file, length_sequence], os.path.dirname(decompressed_file), os.path.splitext(os.path.basename(decompressed_file))[0])

Beispiel #4

0

Datei anzeigen

Datei: pilon.py Projekt: INNUENDOCON/INNUca

def pilon(jar_path_pilon, assembly, bam_file, outdir, jarMaxMemory):
    assembly_polished = os.path.splitext(assembly)[0] + '.polished.fasta'
    command = ['java', '', '-jar', jar_path_pilon, '--genome', assembly, '--frags', bam_file, '--outdir', outdir, '--output', os.path.basename(os.path.splitext(assembly_polished)[0]), '--changes', '--vcf']
    if str(jarMaxMemory) != 'off':
        command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    if not run_successfully:
        assembly_polished = None
    return run_successfully, assembly_polished

Beispiel #5

0

Datei anzeigen

Datei: pilon.py Projekt: INNUENDOCON/INNUca

def sortAlignment(alignment_file, output_file, sortByName_True, threads):
    outFormat_string = os.path.splitext(output_file)[1][1:].lower()
    command = ['samtools', 'sort', '-o', output_file, '-O', outFormat_string, '', '-@', str(threads), alignment_file]
    if sortByName_True:
        command[6] = '-n'
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    if not run_successfully:
        output_file = None
    return run_successfully, output_file

Beispiel #6

0

Datei anzeigen

Datei: mlst.py Projekt: B-UMMI/INNUca

def runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus):
    pass_qc = False
    failing = {}
    failing['sample'] = False
    warnings = {}

    novel_alleles = os.path.join(outdir, 'mlst_novel_alleles.fasta')

    command = ['mlst', '--novel', novel_alleles, contigs]
    run_successfully, stdout, _ = utils.runCommandPopenCommunicate(command, False, None, True)

    if run_successfully:
        scheme_mlst = stdout.splitlines()[0].split('\t')[1].split('_')[0]
        st = stdout.splitlines()[0].split('\t')[2]
        profile = stdout.splitlines()[0].split('\t')[3:]

        if st == '-':
            clean_novel_alleles(novel_alleles=novel_alleles, scheme_mlst=scheme_mlst, profile=profile)
        else:
            if os.path.isfile(novel_alleles):
                os.remove(novel_alleles)

        report = 'MLST found ST ' + str(st) + ' from scheme ' + scheme_mlst
        print(report)
        with open(os.path.join(outdir, 'mlst_report.txt'), 'wt') as writer:
            writer.write('#scheme' + '\n' + scheme_mlst + '\n' + '#ST' + '\n' + st + '\n')
            writer.write('#profile' + '\n' + ' '.join(profile) + '\n')
            writer.flush()

        if scheme_mlst.split('_', 1)[0] == scheme.split('_', 1)[0]:
            pass_qc = True
        else:
            if scheme == 'unknown' and scheme_mlst != '-':
                pass_qc = True
                warnings['sample'] = 'Found {scheme_mlst} scheme for a species with unknown' \
                                     ' scheme'.format(scheme_mlst=scheme_mlst)
            elif scheme == 'unknown' and scheme_mlst == '-':
                pass_qc = True
            elif species_genus == 'yersinia' and mlst_scheme_genus == 'yersinia':
                pass_qc = True
                warnings['sample'] = 'Found a Yersinia scheme ({scheme_mlst}), but it is different from what it was' \
                                     ' expected ({scheme})'.format(scheme_mlst=scheme_mlst, scheme=scheme)
            else:
                if mlst_scheme_genus is not None and scheme_mlst == scheme == mlst_scheme_genus:
                    pass_qc = True
                else:
                    failing['sample'] = 'MLST scheme found ({scheme_mlst}) and provided ({scheme}) are not the' \
                                        ' same'.format(scheme_mlst=scheme_mlst, scheme=scheme)
                    print(failing['sample'])
    else:
        failing['sample'] = 'Did not run'

    if len(warnings) > 0:
        print(warnings['sample'])

    return run_successfully, pass_qc, failing, warnings

Beispiel #7

0

Datei anzeigen

Datei: assembly_mapping.py Projekt: INNUENDOCON/INNUca

def get_bam_subset(alignment_file, sequences_2_keep, threads):
    bam_subset = os.path.splitext(alignment_file)[0] + '.subset.bam'

    command = ['samtools', 'view', '-buh', '-F', '4', '-o', bam_subset, '-@', str(threads), alignment_file, ' '.join(sequences_2_keep)]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)

    if not run_successfully:
        bam_subset = None

    return run_successfully, bam_subset

Beispiel #8

0

Datei anzeigen

Datei: mlst.py Projekt: INNUENDOCON/INNUca

def getScheme(species):
    command = ['which', 'mlst']
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
    mlst_folder = os.path.abspath(os.path.realpath(stdout.splitlines()[0]))

    mlst_db_path, species_scheme_map_new = get_species_scheme_map_version(mlst_folder)

    scheme, genus_mlst_scheme = parse_species_scheme_map(species.lower().split(' '), mlst_db_path, species_scheme_map_new)

    print '\n' + 'MLST scheme found for {species}: {scheme}'.format(species=species, scheme=scheme)

    return scheme, species.lower().split(' ')[0], genus_mlst_scheme

Beispiel #9

0

Datei anzeigen

Datei: download.py Projekt: B-UMMI/getSeqENA

def gzip_files(file_2_compress, pickle_prefix, outdir):
    if file_2_compress.endswith('.temp'):
        out_file = os.path.splitext(file_2_compress)[0]
    else:
        out_file = file_2_compress

    command = ['gzip', '--stdout', '--best', file_2_compress, '>', str(out_file + '.gz')]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, True)
    if run_successfully:
        os.remove(file_2_compress)

    utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + os.path.basename(file_2_compress)))

Beispiel #10

0

Datei anzeigen

def index_fasta_samtools(fasta, region_None, region_outfile_none,
                         print_comand_True):
    command = ['samtools', 'faidx', fasta, '', '', '']
    shell_true = False
    if region_None is not None:
        command[3] = region_None
    if region_outfile_none is not None:
        command[4] = '>'
        command[5] = region_outfile_none
        shell_true = True
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, shell_true, None, print_comand_True)
    return run_successfully, stdout

Beispiel #11

0

Datei anzeigen

Datei: assembly_mapping.py Projekt: yemilawal/INNUca

def get_bam_subset(alignment_file, sequences_2_keep, threads):
    bam_subset = os.path.splitext(alignment_file)[0] + '.subset.bam'

    command = [
        'samtools', 'view', '-buh', '-F', '4', '-o', bam_subset, '-@',
        str(threads), alignment_file, ' '.join(sequences_2_keep)
    ]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, False)

    if not run_successfully:
        bam_subset = None

    return run_successfully, bam_subset

Beispiel #12

0

Datei anzeigen

def runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus):
    pass_qc = False
    failing = {}
    failing['sample'] = False
    warnings = {}

    command = ['mlst', contigs]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)

    if run_successfully:
        scheme_mlst = stdout.splitlines()[0].split('\t')[1].split('_')[0]
        st = stdout.splitlines()[0].split('\t')[2]
        profile = stdout.splitlines()[0].split('\t')[3:]

        report = 'MLST found ST ' + str(st) + ' from scheme ' + scheme_mlst
        print report
        with open(os.path.join(outdir, 'mlst_report.txt'), 'wt') as writer:
            writer.write('#scheme' + '\n' + scheme_mlst + '\n' + '#ST' + '\n' +
                         st + '\n')
            writer.write('#profile' + '\n' + ' '.join(profile) + '\n')
            writer.flush()

        if scheme_mlst.split('_', 1)[0] == scheme.split('_', 1)[0]:
            pass_qc = True
        else:
            if scheme == 'unknown' and scheme_mlst != '-':
                pass_qc = True
                warnings[
                    'sample'] = 'Found {scheme_mlst} scheme for a species with unknown scheme'.format(
                        scheme_mlst=scheme_mlst)
            elif scheme == 'unknown' and scheme_mlst == '-':
                pass_qc = True
            elif species_genus == 'yersinia' and mlst_scheme_genus == 'yersinia':
                pass_qc = True
                warnings[
                    'sample'] = 'Found a Yersinia scheme ({scheme_mlst}), but it is different from what it was expected({scheme})'.format(
                        scheme_mlst=scheme_mlst, scheme=scheme)
            else:
                failing[
                    'sample'] = 'MLST scheme found (' + scheme_mlst + ') and provided (' + scheme + ') are not the same'
                print failing['sample']
    else:
        warnings['sample'] = 'Did not run;'
        pass_qc = True

    if len(warnings) > 0:
        print warnings['sample']

    return run_successfully, pass_qc, failing, warnings

Beispiel #13

0

Datei anzeigen

Datei: download.py Projekt: B-UMMI/getSeqENA

def download_with_aspera(aspera_file_path, aspera_key, outdir, pickle_prefix, sra, ena_id):
    command = ['ascp', '-QT', '-l', '300m', '', '-i', aspera_key, '', outdir]
    if not sra:
        command[4] = '-P33001'
        command[7] = str('era-fasp@' + aspera_file_path)
        pickle = pickle_prefix + '.' + aspera_file_path.rsplit('/', 1)[1]
    else:
        command[7] = '[email protected]:/sra/sra-instant/reads/ByRun/sra/{a}/{b}/{c}/{c}.sra'.format(
            a=ena_id[:3], b=ena_id[:6], c=ena_id)
        pickle = pickle_prefix + '.' + ena_id

    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)

    utils.saveVariableToPickle(run_successfully, outdir, pickle)

Beispiel #14

0

Datei anzeigen

Datei: estimated_coverage.py Projekt: INNUENDOCON/INNUca

def countSequencedBases(fastq_file, outdir):
    run_successfully = False
    bases = None

    # Determine compression type
    compression_type = utils.compressionType(fastq_file)
    if compression_type is not None:
        command = [compression_type[1], '--keep', '--stdout', fastq_file, '|', 'grep', '--after-context=1', '"@"', '|', 'grep', '--invert-match', '"^--$"', '|', 'grep', '--invert-match', '"@"', '|', 'wc', '']

        # Number of characters
        command[18] = '--chars'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
        if run_successfully:
            bases = int(stdout.splitlines()[0])

            # Number of lines
            command[18] = '--lines'
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
            if run_successfully:
                lines = int(stdout.splitlines()[0])
                bases = bases - lines

    utils.saveVariableToPickle([run_successfully, bases], outdir, str('estimate_coverage.' + os.path.basename(fastq_file)))

Beispiel #15

0

Datei anzeigen

def sortAlignment(alignment_file, output_file, sortByName_True, threads):
    outFormat_string = os.path.splitext(output_file)[1][1:].lower()
    command = [
        'samtools', 'sort', '-o', output_file, '-O', outFormat_string, '',
        '-@',
        str(threads), alignment_file
    ]
    if sortByName_True:
        command[6] = '-n'
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)
    if not run_successfully:
        output_file = None
    return run_successfully, output_file

Beispiel #16

0

Datei anzeigen

Datei: download.py Projekt: B-UMMI/getSeqENA

def download_with_wget(ftp_file_path, outdir, pickle_prefix, sra, ena_id):
    command = ['wget', '--tries=1', '', '-O', '']
    if not sra:
        command[2] = ftp_file_path
        file_download = ftp_file_path.rsplit('/', 1)[1]
        command[4] = os.path.join(outdir, file_download)
        pickle = pickle_prefix + '.' + file_download
    else:
        command[2] = 'ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/{a}/{b}/{c}/{c}.sra'.format(
            a=ena_id[:3], b=ena_id[:6], c=ena_id)
        command[4] = os.path.join(outdir, ena_id + '.sra')
        pickle = pickle_prefix + '.' + ena_id
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)

    utils.saveVariableToPickle(run_successfully, outdir, pickle)

Beispiel #17

0

Datei anzeigen

def countSequencedBases(fastq_file, outdir):
    run_successfully = False
    bases = None

    # Determine compression type
    compression_type = utils.compressionType(fastq_file)
    if compression_type is not None:
        command = [compression_type[1], '--keep', '--stdout', fastq_file, '|', 'grep', '--after-context=1', '"@"', '|',
                   'grep', '--invert-match', '"^--$"', '|', 'grep', '--invert-match', '"@"', '|', 'wc', '']

        # Number of characters
        command[18] = '--chars'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
        if run_successfully:
            bases = int(stdout.splitlines()[0])

            # Number of lines
            command[18] = '--lines'
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
            if run_successfully:
                lines = int(stdout.splitlines()[0])
                bases = bases - lines

    utils.saveVariableToPickle([run_successfully, bases], outdir, str('estimate_coverage.' + os.path.basename(fastq_file)))

Beispiel #18

0

Datei anzeigen

Datei: pilon.py Projekt: abremges/INNUca

def pilon(jar_path_pilon, assembly, bam_file, outdir, jarMaxMemory):
    assembly_polished = os.path.splitext(assembly)[0] + '.polished.fasta'
    command = [
        'java', '', '-jar', jar_path_pilon, '--genome', assembly, '--frags',
        bam_file, '--outdir', outdir, '--output',
        os.path.basename(os.path.splitext(assembly_polished)[0]), '--changes',
        '--vcf'
    ]
    if str(jarMaxMemory) != 'off':
        command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)
    if not run_successfully:
        assembly_polished = None
    return run_successfully, assembly_polished

Beispiel #19

0

Datei anzeigen

Datei: download.py Projekt: B-UMMI/getSeqENA

def sra_2_fastq(download_dir, ena_id):
    command = ['fastq-dump', '-I', '-O', download_dir, '--split-files', '{download_dir}{ena_id}.sra'.format(
        download_dir=download_dir, ena_id=ena_id)]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)
    if run_successfully:
        files = [os.path.join(download_dir, f) for f in os.listdir(download_dir)
                 if not f.startswith('.') and os.path.isfile(os.path.join(download_dir, f)) and f.endswith('.fastq')]

        pool = multiprocessing.Pool(processes=2)
        results = []
        p = pool.map_async(rename_header_sra, files, callback=results.extend)
        p.wait()

        run_successfully = all(results)

    return run_successfully

Beispiel #20

0

Datei anzeigen

def create_vcf(bam_file, sequence_to_analyse, outdir, counter, reference_file):
    gene_vcf = os.path.join(
        outdir, 'samtools_mpileup.sequence_' + str(counter) + '.vcf')

    command = [
        'samtools', 'mpileup', '--count-orphans', '--no-BAQ', '--min-BQ', '0',
        '--min-MQ', '0', '--fasta-ref', reference_file, '--region',
        sequence_to_analyse, '--output', gene_vcf, '--VCF', '--uncompressed',
        '--output-tags', 'INFO/AD,AD,DP', bam_file
    ]

    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, False)
    if not run_successfully:
        gene_vcf = None
    return run_successfully, gene_vcf

Beispiel #21

0

Datei anzeigen

def getScheme(species):
    command = ['which', 'mlst']
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, False)
    mlst_folder = os.path.abspath(os.path.realpath(stdout.splitlines()[0]))

    mlst_db_path, species_scheme_map_new = get_species_scheme_map_version(
        mlst_folder)

    scheme, genus_mlst_scheme = parse_species_scheme_map(
        species.lower().split(' '), mlst_db_path, species_scheme_map_new)

    print('\n' + 'MLST scheme found for {species}: {scheme}'.format(
        species=species, scheme=scheme))

    return scheme, species.lower().split(' ')[0], genus_mlst_scheme

Beispiel #22

0

Datei anzeigen

def mapping_bowtie2(fastq_files, reference_file, outdir, keep_bam=False, threads=1):
    """
    Map reads against a reference fasta file

    Parameters
    ----------
    fastq_files : list
        List of fastq files
    reference_file : str
        Path to the reference file (the assembly)
    outdir : str
        Path to the output directory
    keep_bam : bool, default False
        True if want to keep the BAM file produced (with mapped and unmapped reads)
    threads : int, default 1
        Number of threads to be used

    Returns
    -------
    run_successfully : bool
        Boolean stating if INNUca Assembly_Mapping module ran successfully or not
    sam_file : str or None
        If everything went fine, it returns the path to the sam file, otherwise it returns None
    """

    sam_file = os.path.join(outdir, str('alignment.sam'))

    # Index reference file
    run_successfully = indexSequenceBowtie2(reference_file, threads)

    if run_successfully:
        command = ['bowtie2', '-q', '--very-sensitive-local', '--threads', str(threads), '-x', reference_file, '',
                   '', '-S', sam_file]
        if len(fastq_files) == 1:
            command[7] = '-U ' + fastq_files[0]
        else:
            command[7] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1]

        if not keep_bam:
            command[8] = '--no-unal'

        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    if not run_successfully:
        sam_file = None

    return run_successfully, sam_file

Beispiel #23

0

Datei anzeigen

Datei: pilon.py Projekt: B-UMMI/INNUca

def mapping_bowtie2(fastq_files, reference_file, outdir, keep_bam=False, threads=1):
    """
    Map reads against a reference fasta file

    Parameters
    ----------
    fastq_files : list
        List of fastq files
    reference_file : str
        Path to the reference file (the assembly)
    outdir : str
        Path to the output directory
    keep_bam : bool, default False
        True if want to keep the BAM file produced (with mapped and unmapped reads)
    threads : int, default 1
        Number of threads to be used

    Returns
    -------
    run_successfully : bool
        Boolean stating if INNUca Assembly_Mapping module ran successfully or not
    sam_file : str or None
        If everything went fine, it returns the path to the sam file, otherwise it returns None
    """

    sam_file = os.path.join(outdir, str('alignment.sam'))

    # Index reference file
    run_successfully = indexSequenceBowtie2(reference_file, threads)

    if run_successfully:
        command = ['bowtie2', '-q', '--very-sensitive-local', '--threads', str(threads), '-x', reference_file, '',
                   '', '--fr', '-I', '0', '-X', '2000', '-S', sam_file]
        if len(fastq_files) == 1:
            command[7] = '-U ' + fastq_files[0]
        else:
            command[7] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1]

        if not keep_bam:
            command[8] = '--no-unal'

        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    if not run_successfully:
        sam_file = None

    return run_successfully, sam_file

Beispiel #24

0

Datei anzeigen

Datei: trimmomatic.py Projekt: INNUENDOCON/INNUca

def controlForZeroReads(fastq_files):
    not_empty_fastq = False

    fastq = fastq_files[0]

    compression_type = utils.compressionType(fastq)

    if compression_type is not None:
        command = [compression_type[1], '--stdout', '--keep', fastq, '|', 'head', '-n', '4']
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)

        if run_successfully:
            stdout = stdout.splitlines()
            if len(stdout) == 4:
                not_empty_fastq = True

    return not_empty_fastq

Beispiel #25

0

Datei anzeigen

Datei: spades.py Projekt: INNUENDOWEB/INNUca

def spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCoverageAssembly, kmers, assembled_se_reads):
    contigs = os.path.join(spades_folder, 'contigs.fasta')

    command = ['spades.py', '', '--only-assembler', '--threads', str(threads), '--memory', str(maxMemory), '--cov-cutoff', str(minCoverageAssembly), '', '-1', fastq_files[0], '-2', fastq_files[1], '', '-o', spades_folder]

    if not notUseCareful:
        command[1] = '--careful'

    if len(kmers) > 0:
        kmers = ','.join(map(str, kmers))
        command[9] = str('-k ' + kmers)

    if assembled_se_reads is not None:
        command[14] = str('-s ' + assembled_se_reads)

    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    return run_successfully, contigs

Beispiel #26

0

Datei anzeigen

Datei: assembly_mapping.py Projekt: B-UMMI/INNUca

def getting_mapping_statistics(alignment_file):
    command = ['samtools', 'flagstat', alignment_file]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, True)

    dict_mapping_statistics = {}
    if run_successfully:
        stdout = stdout.splitlines()
        for line in stdout:
            line = line.splitlines()[0]
            if len(line) > 0:
                line = line.split(' ', 3)
                field = line[3].split('(', 1)
                if len(field) == 0:
                    field = field[0].replace(' ', '_')
                else:
                    field = field[0].rsplit(' ', 1)[0].replace(' ', '_')
                dict_mapping_statistics[field] = {'qc_passed': int(line[0]), 'qc_failed': int(line[2])}
    return run_successfully, dict_mapping_statistics

Beispiel #27

0

Datei anzeigen

Datei: pilon.py Projekt: INNUENDOCON/INNUca

def mappingBowtie2(fastq_files, referenceFile, threads, outdir):
    sam_file = os.path.join(outdir, str('alignment.sam'))

    # Index reference file
    run_successfully = indexSequenceBowtie2(referenceFile, threads)

    if run_successfully:
        command = ['bowtie2', '-q', '--very-sensitive-local', '--threads', str(threads), '-x', referenceFile, '', '--no-unal', '-S', sam_file]
        if len(fastq_files) == 1:
            command[8] = '-U ' + fastq_files[0]
        else:
            command[8] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    if not run_successfully:
        sam_file = None

    return run_successfully, sam_file

Beispiel #28

0

Datei anzeigen

Datei: mlst.py Projekt: INNUENDOCON/INNUca

def runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus):
    pass_qc = False
    failing = {}
    failing['sample'] = False
    warnings = {}

    command = ['mlst', contigs]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    if run_successfully:
        scheme_mlst = stdout.splitlines()[0].split('\t')[1].split('_')[0]
        st = stdout.splitlines()[0].split('\t')[2]
        profile = stdout.splitlines()[0].split('\t')[3:]

        report = 'MLST found ST ' + str(st) + ' from scheme ' + scheme_mlst
        print report
        with open(os.path.join(outdir, 'mlst_report.txt'), 'wt') as writer:
            writer.write('#scheme' + '\n' + scheme_mlst + '\n' + '#ST' + '\n' + st + '\n')
            writer.write('#profile' + '\n' + ' '.join(profile) + '\n')
            writer.flush()

        if scheme_mlst.split('_', 1)[0] == scheme.split('_', 1)[0]:
            pass_qc = True
        else:
            if scheme == 'unknown' and scheme_mlst != '-':
                pass_qc = True
                warnings['sample'] = 'Found {scheme_mlst} scheme for a species with unknown scheme'.format(scheme_mlst=scheme_mlst)
            elif scheme == 'unknown' and scheme_mlst == '-':
                pass_qc = True
            elif species_genus == 'yersinia' and mlst_scheme_genus == 'yersinia':
                pass_qc = True
                warnings['sample'] = 'Found a Yersinia scheme ({scheme_mlst}), but it is different from what it was expected ({scheme})'.format(scheme_mlst=scheme_mlst, scheme=scheme)
            else:
                failing['sample'] = 'MLST scheme found (' + scheme_mlst + ') and provided (' + scheme + ') are not the same'
                print failing['sample']
    else:
        warnings['sample'] = 'Did not run;'
        pass_qc = True

    if len(warnings) > 0:
        print warnings['sample']

    return run_successfully, pass_qc, failing, warnings

Beispiel #29

0

Datei anzeigen

Datei: fastqc.py Projekt: B-UMMI/INNUca

def fastQC(fastqc_folder, threads, adaptersFasta, fastq_files):
    # Create temporary FastQC foldes
    os.mkdir(os.path.join(fastqc_folder, 'temp.fastqc_temporary_dir', ''))

    # Run FastQC
    command = ['fastqc', '-o', fastqc_folder, '--extract', '--nogroup', '--format', 'fastq', '--threads', str(threads), '', '--dir', os.path.join(fastqc_folder, 'temp.fastqc_temporary_dir', '')]
    command = command + fastq_files
    if adaptersFasta is not None:
        adaptersTEMP = adapters2fastQC(fastqc_folder, adaptersFasta)
        print 'Scanning for adapters contamination using ' + adaptersFasta
        command[9] = '--adapters ' + adaptersTEMP
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    # Remove temporary files
    os.rmdir(os.path.join(fastqc_folder, 'temp.fastqc_temporary_dir', ''))
    if adaptersFasta is not None:
        os.remove(adaptersTEMP)

    return run_successfully

Beispiel #30

0

Datei anzeigen

def compute_consensus_sequence(reference_file, sequence_to_analyse,
                               compressed_vcf_file, outdir, sufix):
    sequence_dict = None

    gene_fasta = os.path.join(outdir, str(sequence_to_analyse + '.fasta'))

    run_successfully, stdout = index_fasta_samtools(reference_file,
                                                    sequence_to_analyse,
                                                    gene_fasta, False)
    if run_successfully:
        command = [
            'bcftools', 'consensus', '-f', gene_fasta, compressed_vcf_file
        ]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, False, None, False)
        if run_successfully:
            sequence_dict = parse_fasta_inMemory(stdout)

    return run_successfully, sequence_dict

Beispiel #31

0

Datei anzeigen

def fastQintegrity(fastq, outdir):
    run_successfully = False

    temporary_output_file = os.path.join(outdir, os.path.splitext(os.path.basename(fastq))[0])

    compression_type = utils.compressionType(fastq)

    encoding, min_reads_length, max_reads_length = None, None, None

    if compression_type is not None:
        command = [compression_type[1], '--stdout', '--keep', fastq, '>', temporary_output_file]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)

        if run_successfully:
            encoding, min_reads_length, max_reads_length = run_guess_encoding_single_thread(temporary_output_file, None, outdir)

    if os.path.isfile(temporary_output_file):
        os.remove(temporary_output_file)

    utils.saveVariableToPickle([run_successfully, encoding, min_reads_length, max_reads_length], outdir, os.path.basename(fastq))

Beispiel #32

0

Datei anzeigen

def controlForZeroReads(fastq_files):
    not_empty_fastq = False

    fastq = fastq_files[0]

    compression_type = utils.compressionType(fastq)

    if compression_type is not None:
        command = [
            compression_type[1], '--stdout', '--keep', fastq, '|', 'head',
            '-n', '4'
        ]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, True, None, False)

        if run_successfully:
            stdout = stdout.splitlines()
            if len(stdout) == 4:
                not_empty_fastq = True

    return not_empty_fastq

Beispiel #33

0

Datei anzeigen

Datei: assembly_mapping.py Projekt: yemilawal/INNUca

def getting_mapping_statistics(alignment_file):
    command = ['samtools', 'flagstat', alignment_file]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, True, None, True)

    dict_mapping_statistics = {}
    if run_successfully:
        stdout = stdout.splitlines()
        for line in stdout:
            line = line.splitlines()[0]
            if len(line) > 0:
                line = line.split(' ', 3)
                field = line[3].split('(', 1)
                if len(field) == 0:
                    field = field[0].replace(' ', '_')
                else:
                    field = field[0].rsplit(' ', 1)[0].replace(' ', '_')
                dict_mapping_statistics[field] = {
                    'qc_passed': int(line[0]),
                    'qc_failed': int(line[2])
                }
    return run_successfully, dict_mapping_statistics

Beispiel #34

0

Datei anzeigen

Datei: pilon.py Projekt: abremges/INNUca

def mappingBowtie2(fastq_files, referenceFile, threads, outdir):
    sam_file = os.path.join(outdir, str('alignment.sam'))

    # Index reference file
    run_successfully = indexSequenceBowtie2(referenceFile, threads)

    if run_successfully:
        command = [
            'bowtie2', '-q', '--very-sensitive-local', '--threads',
            str(threads), '-x', referenceFile, '', '-S', sam_file
        ]
        if len(fastq_files) == 1:
            command[8] = '-U ' + fastq_files
        else:
            command[8] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1]
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, False, None, True)

    if not run_successfully:
        sam_file = None

    return run_successfully, sam_file

Beispiel #35

0

Datei anzeigen

def controlForZeroReads(fastq_files):
	not_empty_fastq = False

	fastq = fastq_files[0]

	command = ['', '--stdout', '--keep', fastq, '|', 'head', '-n', '4']

	filetype = utils.compressionType(fastq)
	if filetype == 'gz':
		command[0] = 'gunzip'
	elif filetype == 'bz2':
		command[0] = 'bunzip2'

	if command[0] != '':
		run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)

		if run_successfully:
			stdout = stdout.splitlines()
			if len(stdout) == 4:
				not_empty_fastq = True

	return not_empty_fastq

Beispiel #36

0

Datei anzeigen

Datei: spades.py Projekt: abremges/INNUca

def spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory,
           minCoverageAssembly, kmers):
    contigs = os.path.join(spades_folder, 'contigs.fasta')

    command = [
        'spades.py', '', '--only-assembler', '--threads',
        str(threads), '--memory',
        str(maxMemory), '--cov-cutoff',
        str(minCoverageAssembly), '', '-1', fastq_files[0], '-2',
        fastq_files[1], '-o', spades_folder
    ]

    if not notUseCareful:
        command[1] = '--careful'

    if len(kmers) > 0:
        kmers = ','.join(map(str, kmers))
        command[9] = str('-k ' + kmers)

    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)

    return run_successfully, contigs

Beispiel #37

0

Datei anzeigen

def fastQintegrity(fastq, outdir):
    run_successfully = False

    temporary_output_file = os.path.join(
        outdir,
        os.path.splitext(os.path.basename(fastq))[0])

    command = ['', '--stdout', '--keep', fastq, '>', temporary_output_file]

    filetype = utils.compressionType(fastq)
    if filetype == 'gz':
        command[0] = 'gunzip'
    elif filetype == 'bz2':
        command[0] = 'bunzip2'

    if command[0] != '':
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, True, None, False)

    if os.path.isfile(temporary_output_file):
        os.remove(temporary_output_file)

    utils.saveVariableToPickle(run_successfully, outdir,
                               os.path.basename(fastq))

Beispiel #38

0

Datei anzeigen

Datei: download.py Projekt: bfrgoncalves/ReMatCh

def alignmentToFastq(alignment_file, outdir, threads, pair_end_type):
	fastq_basename = os.path.splitext(alignment_file)[0]
	outfiles = None
	bamFile = fastq_basename + '.temp.bam'
	# sort cram
	run_successfully, bamFile = sortAlignment(alignment_file, bamFile, True, threads)
	if run_successfully:
		command = ['samtools', 'fastq', '', bamFile]
		if pair_end_type.lower() == 'paired':
			command[2] = '-1 ' + str(fastq_basename + '_1.fq') + ' -2 ' + str(fastq_basename + '_2.fq')
		elif pair_end_type == 'single':
			command[2] = '-0 ' + str(fastq_basename + '.fq')

		run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
		if run_successfully:
			if pair_end_type.lower() == 'paired':
				outfiles = [str(fastq_basename + '_1.fq'), str(fastq_basename + '_2.fq')]
			elif pair_end_type.lower() == 'single':
				outfiles = [str(fastq_basename + '.fq')]

	if os.path.isfile(bamFile):
		os.remove(bamFile)

	return run_successfully, outfiles

Beispiel #39

0

Datei anzeigen

Datei: pilon.py Projekt: INNUENDOCON/INNUca

def indexAlignment(alignment_file):
    command = ['samtools', 'index', alignment_file]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    return run_successfully

Beispiel #40

0

Datei anzeigen

Datei: mlst.py Projekt: abremges/INNUca

def getBlastPath():
    print '\n' + 'The following blastn will be used'
    command = ['which', 'blastn']
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)
    print stdout

Beispiel #41

0

Datei anzeigen

Datei: assembly_mapping.py Projekt: B-UMMI/INNUca

def compute_genome_coverage_data(alignment_file, sequence_to_analyse, outdir, counter):
    genome_coverage_data_file = os.path.join(outdir, 'samtools_depth.sequence_' + str(counter) + '.tab')
    command = ['samtools', 'depth', '-a', '-r', sequence_to_analyse, alignment_file, '>', genome_coverage_data_file]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
    return run_successfully, genome_coverage_data_file

Beispiel #42

0

Datei anzeigen

Datei: download.py Projekt: B-UMMI/getSeqENA

def curl_installed():
    command = ['which', 'curl']
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
    return run_successfully

Beispiel #43

0

Datei anzeigen

def runMlst(contigs, scheme, outdir, species_genus, mlst_scheme_genus):
    pass_qc = False
    failing = {}
    failing['sample'] = False
    warnings = {}

    novel_alleles = os.path.join(outdir, 'mlst_novel_alleles.fasta')

    command = ['mlst', '--novel', novel_alleles, contigs]
    run_successfully, stdout, _ = utils.runCommandPopenCommunicate(
        command, False, None, True)

    if run_successfully:
        scheme_mlst = stdout.splitlines()[0].split('\t')[1].split('_')[0]
        st = stdout.splitlines()[0].split('\t')[2]
        profile = stdout.splitlines()[0].split('\t')[3:]

        if st == '-' and os.path.isfile(novel_alleles):
            clean_novel_alleles(novel_alleles=novel_alleles,
                                scheme_mlst=scheme_mlst,
                                profile=profile)
        else:
            if os.path.isfile(novel_alleles):
                os.remove(novel_alleles)

        report = 'MLST found ST ' + str(st) + ' from scheme ' + scheme_mlst
        print(report)
        with open(os.path.join(outdir, 'mlst_report.txt'), 'wt') as writer:
            writer.write('#scheme' + '\n' + scheme_mlst + '\n' + '#ST' + '\n' +
                         st + '\n')
            writer.write('#profile' + '\n' + ' '.join(profile) + '\n')
            writer.flush()

        if scheme_mlst.split('_', 1)[0] == scheme.split('_', 1)[0]:
            pass_qc = True
        else:
            if scheme == 'unknown' and scheme_mlst != '-':
                pass_qc = True
                warnings['sample'] = 'Found {scheme_mlst} scheme for a species with unknown' \
                                     ' scheme'.format(scheme_mlst=scheme_mlst)
            elif scheme == 'unknown' and scheme_mlst == '-':
                pass_qc = True
            elif scheme != 'unknown' and scheme_mlst == '-':
                pass_qc = True
                warnings[
                    'sample'] = 'Could not find a scheme for a species with known scheme ({})'.format(
                        scheme)
            elif species_genus == 'yersinia' and mlst_scheme_genus == 'yersinia':
                pass_qc = True
                warnings['sample'] = 'Found a Yersinia scheme ({scheme_mlst}), but it is different from what it was' \
                                     ' expected ({scheme})'.format(scheme_mlst=scheme_mlst, scheme=scheme)
            else:
                if mlst_scheme_genus is not None and scheme_mlst == scheme == mlst_scheme_genus:
                    pass_qc = True
                else:
                    failing['sample'] = 'MLST scheme found ({scheme_mlst}) and provided ({scheme}) are not the' \
                                        ' same'.format(scheme_mlst=scheme_mlst, scheme=scheme)
                    print(failing['sample'])
    else:
        failing['sample'] = 'Did not run'

    if len(warnings) > 0:
        print(warnings['sample'])

    return run_successfully, pass_qc, failing, warnings

Beispiel #44

0

Datei anzeigen

Datei: download.py Projekt: bfrgoncalves/ReMatCh

def downloadWithFtp(ftp_file_path, outdir, pickle_prefix):
	file_download = ftp_file_path.rsplit('/', 1)[1]
	command = ['wget', ftp_file_path, '-O', os.path.join(outdir, file_download)]
	run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)

	utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + file_download))

Beispiel #45

0

Datei anzeigen

Datei: download.py Projekt: bfrgoncalves/ReMatCh

def downloadWithAspera(aspera_file_path, asperaKey, outdir, pickle_prefix):
	command = ['ascp', '-QT', '-l', '300m', '-i', asperaKey, str('era-fasp@' + aspera_file_path), outdir]
	run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)

	utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + aspera_file_path.rsplit('/', 1)[1]))

Beispiel #46

0

Datei anzeigen

Datei: mlst.py Projekt: B-UMMI/INNUca

def getBlastPath():
    print('\n' + 'The following blastn will be used')
    command = ['which', 'blastn']
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
    print(stdout)

Beispiel #47

0

Datei anzeigen

Datei: pear.py Projekt: yemilawal/INNUca

def run_pear(decompressed_reads_list, sample_name, threads, outdir, fastq_encoding, trimmomatic_run_successfully, minimum_overlap_reads):
    pass_qc = False
    failing = {}

    command = ['pear', '--forward-fastq', decompressed_reads_list[0], '--reverse-fastq', decompressed_reads_list[1], '--output', os.path.join(outdir, sample_name), '--p-value', str(1.0), '--min-assembly-length', str(minimum_overlap_reads), '--phred-base', '', '--cap', str(0), '--threads', str(threads), '--memory', str(str(threads) + 'G'), '--keep-original']

    if trimmomatic_run_successfully:
        command[12] = '33'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, True)
    else:
        if fastq_encoding is not None:
            command[12] = str(fastq_encoding[1])
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
        else:
            print 'Pear fail! Trying run with Phred+33 enconding defined...'
            command[12] = '33'
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
            if not run_successfully:
                print 'Pear fail again! Trying run with Phred+64 enconding defined...'
                command[12] = '64'
                run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

    with open(os.path.join(outdir, str(sample_name + '.pear_out.txt')), 'wt') as writer:
        for line in stdout:
            writer.write(line)

    unassembled_pe_reads, assembled_se_reads = None, None
    assembled_reads, unassembled_reads, discarded_reads = None, None, None
    if run_successfully:
        assembled_reads, unassembled_reads, discarded_reads = parse_pearOutput_getAssembled(stdout)
        unassembled_pe_reads_uncompressed, assembled_se_reads_uncompressed = get_pear_output(outdir, sample_name)
        if assembled_reads == 0:
            assembled_se_reads = None
        else:
            compress_decompress(str(assembled_se_reads_uncompressed + '.gz'), assembled_se_reads_uncompressed, True)
            run_successfully, assembled_se_reads = get_compressed_decompressed_reads(outdir)
            assembled_se_reads = assembled_se_reads[0] if run_successfully else assembled_se_reads

        if unassembled_reads == 0:
            unassembled_pe_reads = None
        else:
            if run_successfully:
                pool = multiprocessing.Pool(processes=threads)
                for fastq in unassembled_pe_reads_uncompressed:
                    pool.apply_async(compress_decompress, args=(str(fastq + '.gz'), fastq, True,))
                pool.close()
                pool.join()

                run_successfully, unassembled_pe_reads = get_compressed_decompressed_reads(outdir)

        os.remove(assembled_se_reads_uncompressed)
        for fastq in unassembled_pe_reads_uncompressed:
            os.remove(fastq)

        if float(assembled_reads) / float(assembled_reads + unassembled_reads) < 0.75:
            pass_qc = True
            failing['sample'] = False
        else:
            failing['sample'] = 'Number of overlapping reads is >= 75% of total reads'
            print failing

    return run_successfully, pass_qc, failing, assembled_se_reads, unassembled_pe_reads, assembled_reads, unassembled_reads, discarded_reads

Beispiel #48

0

Datei anzeigen

def indexAlignment(alignment_file):
    command = ['samtools', 'index', alignment_file]
    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None, True)
    return run_successfully

Beispiel #49

0

Datei anzeigen

def trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads,
                adaptersFasta, script_path, doNotSearchAdapters, fastq_files,
                maxReadsLength, doNotTrimCrops, crop, headCrop, leading,
                trailing, slidingWindow, minLength, nts2clip_based_ntsContent,
                jarMaxMemory, fastq_encoding):
    trimmomatic_out_files = []
    for fastq in fastq_files:
        trimmomatic_out_files.append(
            os.path.join(
                trimmomatic_folder,
                str(
                    os.path.splitext(
                        os.path.splitext(os.path.basename(fastq))[0])[0] +
                    'P.fastq.gz')))
        trimmomatic_out_files.append(
            os.path.join(
                trimmomatic_folder,
                str(
                    os.path.splitext(
                        os.path.splitext(os.path.basename(fastq))[0])[0] +
                    'U.fastq.gz')))
    # Run Trimmomatic
    command = [
        'java', '', '-jar', jar_path_trimmomatic, 'PE', '-threads',
        str(threads), '', ' '.join(fastq_files),
        ' '.join(trimmomatic_out_files), '', '', '',
        str('SLIDINGWINDOW:' + slidingWindow),
        str('LEADING:' + str(leading)),
        str('TRAILING:' + str(trailing)),
        str('MINLEN:' + str(minLength)), 'TOPHRED33'
    ]

    if str(jarMaxMemory) != 'off':
        command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'

    if not doNotTrimCrops:
        if maxReadsLength is not None:
            if crop is not None:
                crop = maxReadsLength - crop[0]
                command[10] = str('CROP:' + str(crop))
            else:
                if nts2clip_based_ntsContent is not None:
                    crop = nts2clip_based_ntsContent[1]
                    print str(
                        crop
                    ) + ' nucleotides will be clipped at the end of reads'
                    crop = maxReadsLength - crop
                    command[10] = str('CROP:' + str(crop))
        else:
            print 'Because FastQC did not run successfully, --trimCrop option will not be considered'

        if headCrop is not None:
            command[11] = str('HEADCROP:' + str(headCrop[0]))
        else:
            if nts2clip_based_ntsContent is not None:
                headCrop = nts2clip_based_ntsContent[0]
                print str(
                    headCrop
                ) + ' nucleotides will be clipped at the beginning of reads'
                command[11] = str('HEADCROP:' + str(headCrop))

    if not doNotSearchAdapters:
        if adaptersFasta is not None:
            print 'Removing adapters contamination using ' + adaptersFasta
            command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'
        else:
            trimmomatic_adapters_folder = os.path.join(
                os.path.dirname(script_path), 'src', 'Trimmomatic-0.36',
                'adapters')
            adapters_files = [
                os.path.join(trimmomatic_adapters_folder,
                             'Nextera_XT_INNUca.fasta'),
                # os.path.join(trimmomatic_adapters_folder, 'NexteraPE-PE.fa'),
                os.path.join(trimmomatic_adapters_folder, 'TruSeq2-PE.fa'),
                os.path.join(trimmomatic_adapters_folder, 'TruSeq3-PE-2.fa')
            ]
            print 'Removing adapters contamination using ' + str(
                adapters_files)
            adaptersFasta = concatenateFastaFiles(
                adapters_files, trimmomatic_folder,
                'concatenated_adaptersFile.fasta')
            command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'

    run_successfully = False
    if fastq_encoding is not None:
        if fastq_encoding == 33:
            command[7] = '-phred33'
        elif fastq_encoding == 64:
            command[7] = '-phred64'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, False, None, True)

    if not run_successfully:
        print 'Trying to run Trimmomatic with Phred+33 enconding defined...'
        command[7] = '-phred33'
        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, False, None, True)
        if not run_successfully:
            print 'Trimmomatic fail again! Trying to run with Phred+64 enconding defined...'
            command[7] = '-phred64'
            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
                command, False, None, True)

    return run_successfully