Example #1
0
def cleanup():
    '''
    Removes "nucmer.error" file (Mac only).
      nucmer or mummer, which is run as part of quast, creates an error file
      in the Py_code folder that will be deleted if present.
      That seems to be a problem only for the Mac, not Linux
    '''

    PY_dir = config.get_DO_PATHS()['PY_dir'] + 'nucmer.error'
    if os.path.exists(BASE_PATH + PY_dir + 'nucmer.error'):
        os.remove(BASE_PATH + PY_dir + 'nucmer.error')
Example #2
0
Created on Thu Jul  5 16:07:55 2018

@authors: 
    Wolfgang Haas, Pascal Lapierre, and Kimberlee A. Musser
    Wadsworth Center, New York State Department of Health
    120 New Scotland Ave., Albany, New York 12208
    [email protected]
    
last update: 24 September 2020                                
"""

import pydot
from heapq import heappop, heappush
import config

BASE_PATH = config.get_DO_PATHS()['BASE_PATH']
TEMP_dir = config.get_DO_PATHS()['TEMP_dir']

##### generate the minimum spanning tree from distance data ###################


def make_graph(lo_concat_pairwise_diffs):
    '''
    Turns a list of [(G1, G2, V1),(G2, G1, V1),...] tuples into a graph, which 
      is in this case a dictionary of dictionaries. G1 and G2 are the Genomes
      (isolates), V1 (Value) is the number of either SNPs or mutation events.
    param: list lo_concat_pairwise_diffs = list of (G1, G2, V1) and its 
           inverse, (G2, G1, V1), which are both needed by prim_mst()
    return: a graph (a dict of dict), input for prim_mst()
    '''
Example #3
0
@authors: 
    Wolfgang Haas, Pascal Lapierre, and Kimberlee A. Musser
    Wadsworth Center, New York State Department of Health
    120 New Scotland Ave., Albany, New York 12208
    [email protected]
    
last update: 24 September 2020                                
"""

import os
import shutil
import re
import config
import toolshed

BASE_PATH = config.get_DO_PATHS()['BASE_PATH']
TEMP_dir = config.get_DO_PATHS()['TEMP_dir']
OUTPUT_dir = config.get_DO_PATHS()['OUTPUT_dir']
GENOMES_dir = config.get_DO_PATHS()['GENOMES_dir']

Parsnp_image, Parsnp_WorkingDir = config.get_DO_IMAGES()['Parsnp']
NU_image, NU_WorkingDir = config.get_DO_IMAGES()['Newick_utils']

##### housekeeping ############################################################


def sort_input(lo_phylo_tree_data):
    ''' 
    Converts a list of (sp_abbr, isolate, work_dir, ref_name), collected by 
      pipeline_master.py, into a dict of pipeline/ref/ : [(work_dir, isolate)] 
      items.
Example #4
0
                        
@authors: 
    Wolfgang Haas, Pascal Lapierre, and Kimberlee A. Musser
    Wadsworth Center, New York State Department of Health
    120 New Scotland Ave., Albany, New York 12208
    [email protected]
    
last update: 24 September 2020                                
"""

import numpy as np
import config
import toolshed

BASE_PATH = config.get_DO_PATHS()['BASE_PATH']
OUTPUT_dir = config.get_DO_PATHS()['OUTPUT_dir']

Kraken_image, Kraken_WorkingDir = config.get_DO_IMAGES()['Kraken']

##### running Kraken ##########################################################


def run_Kraken(work_dir):
    ''' 
    Runs Minikraken to classify contigs by species. Output is a number for the 
      classification and kmer counts, which needs to translated into human-
      readable form.
    param: str work_dir = isolate-specific folder, e.g.: 'WH200812_001259/'
    return: ReturnCode, StdOut, StdErr
    output: 'kraken_out.txt' file
Example #5
0
@authors: 
    Wolfgang Haas, Pascal Lapierre, and Kimberlee A. Musser
    Wadsworth Center, New York State Department of Health
    120 New Scotland Ave., Albany, New York 12208
    [email protected]
    
last update: 24 September 2020
"""

import LegioCluster_main
import config
import os
import sys

# set the paths to various folders
BASE_PATH = config.get_DO_PATHS()['BASE_PATH']
INPUT_dir = config.get_DO_PATHS()['INPUT_dir']
VCF_dir = config.get_DO_PATHS()['VCF_dir']
GENOMES_dir = config.get_DO_PATHS()['GENOMES_dir']
READS_dir = config.get_DO_PATHS()['READS_dir']
TEMP_dir = config.get_DO_PATHS()['TEMP_dir']
OUTPUT_dir = config.get_DO_PATHS()['OUTPUT_dir']
REF_dir = config.get_DO_PATHS()['REF_dir']

##### various checks ##########################################################

# this is purely a check to see that these modules are available
import matplotlib
import numpy
import pydot
Example #6
0
        (also <reference_SNP_cons.txt> if not already present)

@authors: 
    Wolfgang Haas, Pascal Lapierre, and Kimberlee A. Musser
    Wadsworth Center, New York State Department of Health
    120 New Scotland Ave., Albany, New York 12208
    [email protected]
    
last update: 24 September 2020                                
"""

import csv
import os
import config

BASE_PATH = config.get_DO_PATHS()['BASE_PATH']
TEMP_dir = config.get_DO_PATHS()['TEMP_dir']
REF_dir = config.get_DO_PATHS()['REF_dir']
VCF_dir = config.get_DO_PATHS()['VCF_dir']

##### reading the reference fasta file ########################################


def read_ref_file(SS_dir, ref_fa_file):
    ''' 
    Reads a fasta file with the sequence of the reference genome (consisting 
      of one or more contigs) and returns a list of headers and sequences.
    param: str SS_dir = species-specific directory, e.g.: 'Lpn/'
    param: str ref_fa_file = name of a reference strain's FASTA file
    return: list lo_contigs = list of headers and sequences, e.g.:
        [[NODE_1_length_6526_cov_26.4, 'ACTTGTACTAATTGGCTGATTGTTGACATAA...'],
Example #7
0
import stat

from numpy import median


# dict of abbreviations : [species name, genome length]
DO_SPECIES   = config.get_DO_SPECIES()

# list of species set up to run the pipeline
LO_PIPELINES = config.get_LO_SP_ABBR()

# folder where all files are located
#BASE_PATH   = config.get_DO_PATHS()['BASE_PATH']
#BASE_PATH   = os.path.dirname(os.getcwd())    # parent folder to current folder
BASE_PATH   = os.getcwd() + '/'                    # current folder
GENOMES_dir = config.get_DO_PATHS()['GENOMES_dir'] # 'Genomes/',
PY_dir      = config.get_DO_PATHS()['PY_dir']      # 'Py_code/',
REF_dir     = config.get_DO_PATHS()['REF_dir']     # 'References/',
VCF_dir     = config.get_DO_PATHS()['VCF_dir']     # 'VCF_files/', 
READS_dir   = config.get_DO_PATHS()['READS_dir']   # 'reads/'







##### 1. user input ###########################################################


def get_species_name():