Ejemplo n.º 1
0
def run_kallisto(k_index,
                 query_file_paths,
                 output_dir,
                 kallisto_executable='kallisto',
                 b=30,
                 t=4):
    '''
    Given the path to a kallisto index and a list of query files
    takes the query filepaths uses subprocess to run analysis using 
    kallisto program. 
    '''
    output_dir = if_not_dir_make(output_dir, 'kallisto_results')
    kallisto_dirs = []  # holds paths to result dirs
    for query_a, query_b in query_file_paths:  # iterate by paired fastq
        results_dir = os.path.join(
            output_dir,  # result dir based on query a
            os.path.basename(query_a) + '_kallisto')
        kallisto_dirs.append(results_dir)  # store path
        cmd = [
            kallisto_executable, 'quant', '-i', k_index, '-o', results_dir,
            '-b', b, '-t', t, query_a, query_b
        ]

        cmd = [str(i) for i in cmd]  # convert everything to string
        subprocess.call(cmd)  # run command
    return kallisto_dirs
Ejemplo n.º 2
0
def search_BTI(BTI, query_files, output_dir, log, threads=4):
    '''
    Given a bowtie2 index and a list of list of paired end reads
    where each sublist has the paired reads. Alligns the reads
    against the bowtie index and then converts to fastq format.

    a query file alligns sequnces in the query file
    (fasta format) to the index and writes the allignment SAM file to the
    output dir.
    '''
    cond_dict = default_conditions()  # use to test for which condition
    output_dir = if_not_dir_make(output_dir, 'bowtie_results')
      
    output_files = []
    for read_a, read_b in query_files:
        sam_name = os.path.basename(read_a) + '.sam'
        output_file = os.path.join(output_dir, sam_name)
        
        sample, date = test_for_condition(cond_dict, read_a)
        donor = DONOR_DICT[sample]
        
        print('Running new Bowtie Search')
        cmd = ['bowtie2', '-x', BTI, '-1', read_a, '-2',
               read_b, '-S', output_file, '--threads', '4', '--no-discordant', 
               '--no-unal', '--no-mixed']  # prevent unaligned and discordant reads in results
        subprocess.call(cmd)
        fasta_file = convert_sam_to_fasta(output_file)
        output_files.append(fasta_file)  # append path to fasta file
        log_string = make_read_comparison_string(read_a, fasta_file, donor, date)
        # format the log string
        log.write(log_string + '\n')  # write the log string to log
        print(log_string)

    return output_files
Ejemplo n.º 3
0
def assemble_with_spades(input_file, output_dir, threads=4,
                         spades_executable='spades'):
    '''
    Reads in a list of fasta files and assembles using spades. Returns the
    path to the contigs file. 
    '''
    output_dir = if_not_dir_make(output_dir, 'SPAdes')
    cmd = [spades_executable, '--12', input_file, '-k', '55,77,99,127', '-t', threads,
           '--only-assembler', '-o', output_dir]
    cmd = [str(c) for c in cmd]  # convert everything to string
    subprocess.call(cmd)

    return os.path.join(output_dir, 'contigs.fasta')
Ejemplo n.º 4
0
def build_bowtie_index(input_file, output_dir, index_name='MP_BTI'):
    '''
    Given an index name, input fasta file and an output path creates
    a new bowtie2 index and returns the path to that index for querying.
    '''
    BT_path = if_not_dir_make(output_dir, index_name)
    index_path = os.path.join(BT_path, index_name)
          
    print('Building Bowtie Index')
    cmd = ['bowtie2-build', input_file, index_path]
    subprocess.call(cmd)

    return index_path
Ejemplo n.º 5
0
def make_kalisto_index(trans_file,
                       output_dir,
                       kallisto_executable='kallisto',
                       kalliso_index_name='mp_kalisto_index.idx'):
    '''
    Given a transcriptome fasta file and an output dir runs kallisto via
    subprocess to create a new kallisto index from that fasta file. Returns the
    path to the newly created kallisto index as a string.
    '''
    output_dir = if_not_dir_make(output_dir, 'Kallisto_Index')
    output_file = os.path.join(output_dir, kalliso_index_name)
    cmd = [
        kallisto_executable, 'index', '-i', output_file, trans_file,
        '--make-unique'
    ]
    subprocess.call(cmd)
    return output_file
Ejemplo n.º 6
0
def run_sleuth(sleuth_table,
               output_dir,
               log,
               results_file_name='Sleuth_results.txt'):
    '''
    Given a sleuth table and an output dir run the sleuth_R.R script
    to execute differntial expression analysis via the sleuth R package.
    The sleuth table should be created from the make_sleuth_table function
    if it does not already exist. Returns path to the sleuth results.
    '''
    output_dir = if_not_dir_make(output_dir, 'sleuth_results')
    sleuth_results = os.path.join(output_dir, results_file_name)
    cmd = ['Rscript', 'sleuth_R.R', '-f', sleuth_table, '-o', sleuth_results]
    subprocess.call(cmd)

    write_results_to_log(sleuth_results, log)

    return sleuth_results
Ejemplo n.º 7
0
def get_args():
    parse = argparse.ArgumentParser(description='Process args for rm_verify')
    parse.add_argument('-o',
                       default='./miniProject_Ethan_Holleman',
                       help='Output directory to write files to')
    parse.add_argument('-i',
                       help='Input directory if files already \
                                  downloaded or for test data')
    parse.add_argument('-k', help='Path to kallisto index if already created')
    parse.add_argument('-r',
                       help='Path to kallisto ressults if already created')
    parse.add_argument('-q',
                       help='Path to coverted fastq files if already created')
    parse.add_argument('-b', help='Path bowtie2 index if already created')
    parse.add_argument('-s', help='Path to bowtie results if already ran')
    parse.add_argument('-f', help='Path to big fasta file')
    parse.add_argument('-a', help='Path to assembled genome if exists')
    parse.add_argument('-t', default=2, help='Number of threads')
    parse.add_argument('-l', help='path to write log file to')
    parse.add_argument('-local',
                       default=0,
                       help='If set to 1 runs BLAST search locally')
    parse.add_argument(
        '-test',
        default=0,
        help=
        'Run the program in with test data. You will still need to set -local 1 to run BLAST locally.'
    )

    parse = parse.parse_args()
    cwd_name = if_not_dir_make(os.getcwd(), 'miniProject_Ethan_Holleman')
    if not parse.o:
        parse.o = cwd_name
    if not parse.l:
        parse.l = cwd_name
    if parse.test == 1 or parse.test == '1':
        parse.q = './test_data/SRA_to_FASTQ'

    # output directory should be miniProject_Ethan_Holleman

    return parse
Ejemplo n.º 8
0
def run_blast(seq_object, output_dir, dir_name='BLAST_results'):
    '''
    Given a string of a seq object blasts against nucleotide database
    for Herp only. Returns a blast record type object object
    that can be parsed using NCBIXML.read()
    '''
    blast_path = if_not_dir_make(output_dir, dir_name)
    xml_path = os.path.join(blast_path, dir_name + '.xml')
    xml = NCBIWWW.qblast('blastn',
                         'nr',
                         str(seq_object),
                         entrez_query='Herpesviridae[ORGN]',
                         hitlist_size=10,
                         expect=1e-200,
                         megablast=True,
                         alignments=10)

    with open(xml_path, "w") as out_handle:
        print('writing results')
        out_handle.write(xml.read())

    return xml_path
Ejemplo n.º 9
0
def local_blast(seq, local_BDB, output_dir, dir_name='BLAST_results'):
    '''
    Runs BLAST locally using BLAST+ and the included BLAST database which
    is compressed and included in the test_data dir. Calls make_local_BDB
    to uncompress the files, writes the query string to a file and then
    BLASTS against the uncompressed local database. Returns a path to
    the results in xml format.
    '''
    blast_path = if_not_dir_make(output_dir, dir_name)
    xml_path = os.path.join(blast_path, dir_name + '.xml')
    query_path = os.path.join(blast_path, 'BLAST_query.fasta')
    print('writing query')
    print(query_path)
    with open(query_path, 'w') as qp:
        qp.write('>Query Sequence\n')
        qp.write(seq)
    cmd = [
        'blastn', '-db', local_BDB, '-query', query_path, '-outfmt', '5',
        '-out', xml_path
    ]
    subprocess.call(cmd)

    return xml_path