def get_args():
    global fi
    global prop
    global misc
    global properties_file
    global genome_name
    fi = fileutils()

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='Script downloading genome files')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='''Please provide the properties file, 
                               which including workdir''',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                               which is provided by genome_list.txt''',
                        required=True)
    args = parser.parse_args()
    # check args
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    misc = misc()
    misc.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    # define variables
    genome_name = args.genome_name
    print("properties_file:", properties_file)
    print("genome_name:", genome_name)
Ejemplo n.º 2
0
def get_args():
    global prop
    global properties_file
    global prefix
    global fi

    fi = fileutils()

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script creates multiQC html file using fastqc, 
                                                    bcftools, snpEff, QUAST, and QualiMap 
                                                    output files''')
    parser.add_argument(
        '-p',
        '--properties_file',
        type=str,
        help=
        'Please provide the properties file, which including the paths of workdir',
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    # check args
    args = parser.parse_args()
    fi.check_exist(args.properties_file)
    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    prefix = args.prefix
    print("properties_file:", str(properties_file))
    print("prefix:", prefix)
Ejemplo n.º 3
0
def get_args():
    global properties_file
    global genome
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script invests genes under selection pressure within species through dNdS. 
        Species can be chosen from genome_list.txt''')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from genome_list.txt''',
        required=True)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide snp vcf files' pattern with full file path",
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (
            line.rstrip()
            for line in open(prop.get_attrib("available_genomes")).readlines()
    ) and args.genome_name != "cryptosporidium_hominis":
        misc.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    if not re.search(".vcf", args.vcf_file_pattern):
        misc.my_exit("vcf_file_pattern need to end up with .vcf")
    genome = args.genome_name
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix
def get_args():  
    global fi  
    global prop
    global properties_file
    global genome_name    
    global bam_file_pattern
    global bam_files
    global mapping_file
    global prefix
    global bam_key_pattern
                       
    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='Script build all individual chromosome multiple alignment for recombination')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', 
                        required=True)
    parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name available 
                                                                 in genome_list.txt only''', 
                        required=True)
    parser.add_argument('-bp', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern 
                                                                       with the full path, ending with .bam, with runID 
                                                                       in the bam file name''', 
                        required=True)
    parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, containing one 
                                                                  column of the runID and the other column is the expression
                                                                  displayed in the multiple alignment file description line''', 
                        required=False)
    parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', 
                        required=True)  

    # check args
    args = parser.parse_args()
    fi=fileutils()
    fi.check_exist(args.properties_file)
    properties_file=args.properties_file
    prop=properties(properties_file)
    if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()):
        misc.my_exit("{} is not available, please try another genome".format(args.genome_name))
    if not re.search(".bam$",args.bam_file_pattern):
        misc.my_exit("bam_file_pattern need to end up with .bam")
    bam_file_pattern=args.bam_file_pattern
    bam_files=glob.glob(bam_file_pattern)
    bam_key_pattern="[A-Z]RR\d{6,}"
    for bam_file in bam_files:
        if not re.search(bam_key_pattern, bam_file):
            misc.my_exit("There is no runID in the bam file {}".format(bam_file))
    fi.check_files_exist(bam_files)  
    
    # define variables         
    genome_name=args.genome_name
    mapping_file=args.mapping_file
    prefix=args.prefix   
    
    print ("properties_file:",properties_file)
    print ("genome_name:",genome_name)
    print ("bam_file_pattern:",bam_file_pattern)
    print ("mapping_file:",mapping_file)
    print ("prefix:",prefix)
Ejemplo n.º 5
0
 def __init__(self, properties_file, genome_name, genome_fasta, bam_file,
              prefix, if_anno, subdir):
     self.properties_file = properties_file
     self.prop = properties(properties_file)
     self.genome_name = genome_name
     self.genome_fasta = genome_fasta
     self.bam_file = bam_file
     self.prefix = prefix
     self.if_anno = if_anno
     self.subdir = subdir
     self.fi = fileutils()
Ejemplo n.º 6
0
def get_args():    
    global properties_file
    global genome_name
    global prefix
    global vcf_file_pattern
    global go_file
    global mapping_file
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='''Script invests genes under selection pressure within 
                                                    species through dNdS. It creates variation annotation 
                                                    file for each SNP vcf file, and gene variation annotation 
                                                    summary file based on all vcf files by using snpEff''')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', 
                        required=True)
    parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name, only with those obtained from genome_list.txt''', 
                        required=True) 
    parser.add_argument('-vp', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", 
                        required=True) 
    parser.add_argument('-go', '--go_file', type=str, help="Please provide the full path of the gene ontology file",
                        required=True)
    parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, which contains one column of 
                                                                read_ID from vcf file and one column of its corresponding sample_name''', 
                        required=False)
    parser.add_argument('-pre', '--prefix', type=str,help='Please provide the prefix for the output file.', 
                        required=True)  
    
    # check args
    args = parser.parse_args() 
    FI.check_exist(args.properties_file)  
    properties_file=args.properties_file    
    prop=properties(properties_file)
    if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()):
        MISC.my_exit("{} is not available, please try another genome".format(args.genome_name))     
    if not re.search(".vcf$",args.vcf_file_pattern):
        MISC.my_exit("vcf_file_pattern need to end up with .vcf")
    FI.check_exist(args.go_file)
    go_file=args.go_file
    genome_name=args.genome_name
    vcf_file_pattern=args.vcf_file_pattern
    prefix=args.prefix   
    mapping_file=args.mapping_file
    
    print ("properties_file:",properties_file)
    print ("genome_name:",genome_name)
    print ("vcf_file_pattern:",vcf_file_pattern)
    print ("go_file:",go_file)
    print ("mapping_file:",mapping_file)
    print ("prefix:",prefix)
def get_args():
    global properties_file
    global g_names_str
    global fi
    global prop
    global min_homo

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure among multiple species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_names',
        type=str,
        help='''Please provide the genome names, seperating by "," 
        with the format of XXX, YYY, ZZZ''',
        required=True)
    parser.add_argument(
        '-min',
        '--min_homo',
        type=int,
        help=
        '''Please provide the minimum poteintial homologue numbers in one group, 
        if not defined, 4 will be used as the default''',
        required=False)
    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    g_names_str = args.genome_names
    if args.min_homo is None:
        min_homo = 4
    else:
        min_homo = args.min_homo
    print "properties_file:", properties_file
    print "gnames:", g_names_str
    print "min_homo:", str(min_homo)
def get_args():  
    global fi  
    global prop
    global properties_file
    global genome_fasta
    global bam_file_pattern
    global bam_request_pattern
    global bam_files
    global map_fpath
    global map_dict
    global prefix
                       
    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='''Script creating relocation files for multiple bam files from 
                                    various genomes and automatically open the GUI''')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True)
    parser.add_argument('-m', '--map_file', type=str, help='''Please provide the map file, 
                        in which the first column is the full path of the genome fasta file and the second column is
                        the full path of the bam file and the bam files need to ended with .bam''', required=True)   
    parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True)  

    # check args
    args = parser.parse_args()
    fi=fileutils()
    fi.check_files_exist([args.properties_file,args.map_file])   
    
    # define variables         
    properties_file=args.properties_file    
    prop=properties(properties_file)
    map_fpath=args.map_file
    fh_map=open(map_fpath, "r")
    map_dict={}
    for line in fh_map:
        line=line.rstrip()        
        (fasta_fpath,bam_fpath)=getVar(line.split(),[0,1])
        fi.check_files_exist([fasta_fpath,bam_fpath])
        map_dict[bam_fpath]=fasta_fpath
    prefix=args.prefix   
    
    print "properties_file:",properties_file
    print "map_file:",map_fpath
    print "prefix:",prefix
Ejemplo n.º 9
0
def get_args():
    global fi
    global prop
    global properties_file
    global genome_name
    global vcf_file_pattern
    global vcf_files
    global mapping_file
    global image_title
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script build phylogenetic tree and dendragram for the defined group of vcf files from the same genome'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from genome_list.txt''',
        required=True)
    parser.add_argument(
        '-v',
        '--vcf_file_pattern',
        type=str,
        help='''Please provide the vcf files' pattern with the full path,
                                                                  vcf files must ended with ".vcf" ''',
        required=True)
    parser.add_argument(
        '-m',
        '--mapping_file',
        type=str,
        help=
        '''Please provide the mapping file path, which contains one column of 
                                                                read_ID from vcf file and one column of its corresponding label on the tree branch,
                                                                otherwise, the read_ID will be labeled on the tree branch''',
        required=False)
    parser.add_argument('-t',
                        '--title',
                        type=str,
                        help='''Please provide the title of the image''',
                        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (
            line.rstrip()
            for line in open(prop.get_attrib("available_genomes")).readlines()
    ) and args.genome_name != "cryptosporidium_hominis":
        misc.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    if not re.search(".vcf$", args.vcf_file_pattern):
        misc.my_exit("vcf_file_pattern need to end up with .vcf")
    vcf_file_pattern = args.vcf_file_pattern
    vcf_files = glob.glob(vcf_file_pattern)
    fi.check_files_exist(vcf_files)

    # define variables
    genome_name = args.genome_name
    mapping_file = args.mapping_file
    image_title = args.title
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome_name:", genome_name
    print "vcf_file_pattern:", vcf_file_pattern
    print "mapping_file:", mapping_file
    print "title:", image_title
    print "prefix:", prefix
def get_args():
    global properties_file
    global genome_name
    global vcf_file_pattern
    global mapping_file
    global prefix
    global prop
    global vcf_files
    global vcf_request_pattern

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script creates the genome Short Tandem Repeat (STR) 
                                                    variation summary file based on all vcf files and 
                                                    multiple alignment files for all repeat regions'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name available in 
                                                                 genome_list.txt only or cryptosporidium_hominis''',
                        required=False)
    parser.add_argument('-vp',
                        '--vcf_file_pattern',
                        type=str,
                        help='''Please provide the paired vcf files' pattern 
                        with the full path, must end with .vcf''',
                        required=True)
    parser.add_argument('-m',
                        '--mapping_file',
                        type=str,
                        help='''Please provide the mapping file path, which 
                                                                  contains one column of read_ID from vcf file 
                                                                  and one column of its corresponding sample_name''',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    FI.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (line.rstrip() for line in open(
            prop.get_attrib("available_genomes")).readlines()):
        MISC.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    vcf_files = glob.glob(args.vcf_file_pattern)
    if len(vcf_files) == 0:
        sys.exit("ERROR: no vcf files provided")
    vcf_request_pattern = "^.*/?(.*?).vcf$"
    for vcf_file in vcf_files:
        if not re.search(vcf_request_pattern, vcf_file):
            sys.exit("vcf_file not end with .vcf")
    FI.check_files_exist(vcf_files)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    genome_name = args.genome_name
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix
    mapping_file = args.mapping_file

    print("properties_file:", properties_file)
    print("genome_name:", genome_name)
    print("vcf_file_pattern:", vcf_file_pattern)
    print("mapping_file:", mapping_file)
    print("prefix:", prefix)
Ejemplo n.º 11
0
def get_args():
    global properties_file
    global prop
    global genome_name
    global fastq1
    global fastq2
    global prefix_ori
    global runID
    global mapping_file
    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script assembles short reads by using spades and 
                                                    provides statistics summary based on the result assemblies 
                                                    by using QUAST''')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                                                                 which need to be in genome_list.txt''',
                        required=True)
    parser.add_argument('-fq1',
                        '--fastq1',
                        type=str,
                        help='Please provide the forward fastq file',
                        required=True)
    parser.add_argument('-fq2',
                        '--fastq2',
                        type=str,
                        help='Please provide the reverse fastq file',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    parser.add_argument(
        '-m',
        '--mapping_file',
        type=str,
        help='''if map file was provided, whose first column is runID
                                                                  and the second column is sample name, seperated by "\t",
                                                                  sample name will be included in the output files' names''',
        required=False)

    # check args
    args = parser.parse_args()
    FI.check_files_exist([args.properties_file, args.fastq1])
    properties_file = args.properties_file
    prop = properties(properties_file)
    MISC.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    runID = MISC.get_runID(args.fastq1)
    if args.fastq2 is not None:
        FI.check_exist(args.fastq2)
        MISC.get_runID(args.fastq2)
    if args.mapping_file is not None:
        FI.check_exist(args.mapping_file)

    # define variables
    genome_name = args.genome_name
    fastq1 = args.fastq1
    fastq2 = args.fastq2
    prefix_ori = args.prefix
    mapping_file = args.mapping_file
    print("properties_file:", properties_file)
    print("genome_name:", genome_name)
    print("fastq1:", fastq1)
    print("fastq2:", fastq2)
    print("prefix:", prefix_ori)
    print("mapping_file:", mapping_file)
Ejemplo n.º 12
0
def post_process():
    print("post_processing...")
    for out_file in out_pdf_files:
        FI.copy_file_to_destdir(out_file, outdir)


if __name__ == '__main__':
    global prop
    global FI
    global MISC
    FI = fileutils()
    MISC = misc()

    get_args()
    prop = properties(PROPERTIES_FILE)
    getVar = lambda searchList, ind: [searchList[i] for i in ind]
    print("\n", "Properties attributes:")
    print(prop.__dict__)

    #run the initiation code
    initiate()

    #execute the main part of the program
    execute()

    #post execution code
    post_process()

    print(os.path.realpath(__file__) + " DONE")
def get_args():
    global properties_file
    global genome
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script invests genes under selection pressure within species through dNdS. 
        Species can be chosen from -genome_list, which including 17 genomes. They are the common genomes of protists parasite and existing in snpEff'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-genome_list',
                        '--genome_list',
                        help="This will display the genome name list",
                        action="store_true")
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from -genome_list''',
        required='-genome_list' not in sys.argv)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide snp vcf files' pattern with full file path",
        required='-genome_list' not in sys.argv)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required='-genome_list' not in sys.argv)

    # check args
    args = parser.parse_args()
    if args.genome_list:
        print get_gene_list_str()
        sys.exit(0)
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    genome = args.genome_name
    if genome not in get_gene_list_str().split("\n"):
        print "ERROR: genome_name {} not in the list of -genome_list".format(
            genome)
        sys.exit(1)
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix
Ejemplo n.º 14
0
    print "stage 6 has started!"

def execute_stage7():
    print "stage 7 has started!"

def execute_stage8():
	print "stage 8 has started!"




if __name__ == '__main__':
    
    get_args()
    global prop
    prop=properties(properties_file)
    print "\n","Properties attributes:"
    print prop.__dict__
    
    initiate()
    execute_stage1()
    execute_stage2()
    execute_stage3()
    execute_stage4()
    execute_stage5()
    execute_stage6()
    execute_stage7()
    execute_stage8()

    print assemblers
    print fastq1
def get_args():
    global properties_file
    global genome
    global gff
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure within species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name, 
                        only "ch" for "C. hominis" or "cp" for "C. parvum" can be used''',
                        required=True)
    parser.add_argument('-gff',
                        '--genome_gff_file',
                        type=str,
                        help='''Please provide the genome gff file, 
                        only C. hominis or C. parvum gff file can be used''',
                        required=False)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide vcf files' pattern with full file path",
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    if args.genome_gff_file is not None:
        fi.check_exist(args.genome_gff_file)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name != 'ch' and args.genome_name != 'cp':
        print "only 'ch' or 'cp' can be used as the genome name"
        sys.exit(1)
    else:
        genome = args.genome_name
    if args.genome_gff_file is None:
        gff = prop.get_attrib(genome + "_gff")
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "genome_gff:", gff
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix
Ejemplo n.º 16
0
def get_args():
    global prop
    global properties_file
    global genome_name
    global gvcf_files
    global gvcf_files_str
    global prefix
    global if_filter
    filter_dict = {}

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script merging gvcf files, seperating into 
                                                  SNP and INDEL,and then filtering if requested'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='''Please provide the properties file, 
                                                                     which including the paths of workdir''',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                                                                 which is provided by genome_list.txt''',
                        required=True)
    parser.add_argument('-gv',
                        '--gvcf_files',
                        type=str,
                        help='Please provide gvcf files',
                        required=True)
    parser.add_argument('-f',
                        '--if_filter',
                        default=False,
                        action="store_true",
                        help='whether to filter SNP and INDEL seperately',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)

    args = parser.parse_args()
    # check args
    FI.check_exist(args.properties_file)
    FI.check_files_exist(glob.glob(args.gvcf_files))
    properties_file = args.properties_file
    prop = properties(properties_file)
    MISC.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    genome_name = args.genome_name
    # define variables
    properties_file = args.properties_file
    gvcf_files = glob.glob(args.gvcf_files)
    gvcf_files_str = ""
    for gvcf_file in gvcf_files:
        gvcf_files_str += gvcf_file + " "
    gvcf_files_str = gvcf_files_str.rstrip(" ")
    prefix = args.prefix
    if_filter = args.if_filter

    # print args
    print("properties_file:", str(properties_file))
    print("genome_name:", genome_name)
    print("gvcf_files:", gvcf_files_str)
    print("if_filter:", if_filter)
    print("prefix:", prefix)
Ejemplo n.º 17
0
def get_args():
    global properties_file
    global cds_fna1
    global cds_faa1
    global cds_fna2
    global cds_faa2
    global genome1
    global genome2
    global map_file
    global filter_eval
    global filter_identity
    global prefix
    global fi
    global makeblastdb_sw
    global blastn_sw
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure between two species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g1',
        '--genome_name1',
        type=str,
        help='''Please provide the first genome name, otherwise, 
                        "ch" for "C. hominis" will be used''',
        required=False)
    parser.add_argument(
        '-g2',
        '--genome_name2',
        type=str,
        help='''Please provide the second genome name, otherwise, 
                        "cp" for "C. parvum" will be used''',
        required=False)
    parser.add_argument(
        '-fn1',
        '--cds_fna1',
        type=str,
        help=
        'Please provide the first cds fna file, otherwise, ch fna file will be used.',
        required=False)
    parser.add_argument(
        '-fn2',
        '--cds_fna2',
        type=str,
        help=
        'Please provide the second cds fna file, otherwise, cp fna file will be used.',
        required=False)
    parser.add_argument(
        '-fa1',
        '--cds_faa1',
        type=str,
        help=
        'Please provide the first cds faa file, otherwise, ch faa file will be used.',
        required=False)
    parser.add_argument(
        '-fa2',
        '--cds_faa2',
        type=str,
        help=
        'Please provide the second cds faa file, otherwise, cp faa file will be used.',
        required=False)
    parser.add_argument(
        '-m',
        '--map',
        type=str,
        help='''Please provide the file for mapping the chromosome accessions, 
                                                      one pair in each line and separated by tab, 
                                                      otherwise, no chromosome information will be provided in the output file''',
        required=False)
    parser.add_argument(
        '-fi',
        '--filter_identity',
        type=str,
        help='the identity percentage for filtering the blast hits.',
        required=False)
    parser.add_argument('-fe',
                        '--filter_eval',
                        type=str,
                        help='the eval for filtering the blast hits.',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    for opt_arg_fpath in (args.cds_fna1, args.cds_fna2, args.cds_faa1,
                          args.cds_faa2, args.map):
        if opt_arg_fpath is not None:
            fi.check_exist(opt_arg_fpath)

    # define variables
    makeblastdb_sw = "makeblastdb"
    blastn_sw = "blastn"
    default_gname1 = "ch"
    default_gname2 = "cp"
    map_file = "None"
    filter_eval = "0"
    filter_identity = "0"
    filter_length = "0"

    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name1 is not None:
        genome1 = args.genome_name1
    else:
        genome1 = default_gname1
    if args.genome_name2 is not None:
        genome2 = args.genome_name2
    else:
        genome2 = default_gname2
    if args.cds_fna1 is not None:
        cds_fna1 = args.cds_fna1
    else:
        cds_fna1 = prop.get_attrib(genome1 + "_cds_fna")
    if args.cds_faa1 is not None:
        cds_faa1 = args.cds_faa1
    else:
        cds_faa1 = prop.get_attrib(genome1 + "_cds_faa")
    if args.cds_fna2 is not None:
        cds_fna2 = args.cds_fna2
    else:
        cds_fna2 = prop.get_attrib(genome2 + "_cds_fna")
    if args.cds_faa2 is not None:
        cds_faa2 = args.cds_faa2
    else:
        cds_faa2 = prop.get_attrib(genome2 + "_cds_faa")
    if args.map is not None:
        map_file = args.map
    if args.filter_eval is not None:
        filter_eval = args.filter_eval
    if args.filter_identity is not None:
        filter_identity = args.filter_identity
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome1:", genome1
    print "genome2:", genome2
    print "cds_fna1:", cds_fna1
    print "cds_faa1:", cds_faa1
    print "cds_fna2:", cds_fna2
    print "cds_faa2:", cds_faa2
    print "filter_eval:", filter_eval
    print "filter_identity_perc:", filter_identity
    print "prefix:", prefix
def get_args():
    global fi
    global prop
    global properties_file
    global genome_name
    global genome_fasta
    global bam_file_pattern
    global bam_request_pattern
    global bam_files
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script creating relocation files for multiple bam files from the same genome 
                                    and automatically open the GUI.''')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help='''if "C. hominis" or "C. parvum" will be used as the genome, 
                                                                 please provide "ch" for "C. hominis" or "cp" for "C. parvum"''',
        required=False)
    parser.add_argument(
        '-f',
        '--genome_fasta',
        type=str,
        help='''Please provide the directory for the genome fasta file, 
                                                                 if "ch" or "cp" is not the genome name.''',
        required=False)
    parser.add_argument(
        '-b',
        '--bam_file_pattern',
        type=str,
        help='''Please provide the bam files' pattern with the full path''',
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    if args.genome_name is not None and not args.genome_name == "ch" and not args.genome_name == "cp":
        print "genome name need to be ch or cp"
        sys.exit(1)
    if args.genome_fasta is not None:
        fi.check_exist(args.genome_fasta)
    bam_file_pattern = args.bam_file_pattern
    bam_files = glob.glob(bam_file_pattern)
    bam_request_pattern = "^.*/?(.*?).bam$"
    for bam_file in bam_files:
        if not re.search(bam_request_pattern, bam_file):
            print "bam_file not ended with .bam"
            sys.exit(1)
    fi.check_files_exist(bam_files)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name is not None:
        genome_name = args.genome_name
    if args.genome_fasta is not None:
        genome_fasta = args.genome_fasta
    else:
        if args.genome_name is None:
            print "If no genome_fasta provided, genome name must be provided as ch or cp."
            sys.exit(1)
        else:
            genome_fasta = prop.get_attrib(genome_name + "_fasta")
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome_name:", genome_name
    print "genome_fasta:", genome_fasta
    print "bam_file_pattern:", bam_file_pattern
    print "prefix:", prefix
Ejemplo n.º 19
0
def get_args():
    global prop
    global properties_file
    global genome_name
    global bam_file
    global prefix_ori
    global if_filter
    global mapping_file
    global runID

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script for getting filtered or unfiltered 
                                                    SNP and INDEL vcf and gvcf files from bam 
                                                    files using gatk, and then create statistics 
                                                    summary by using bcf_tools'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                                                                 which is provided by genome_list.txt''',
                        required=True)
    parser.add_argument('-bam',
                        '--bam_file',
                        type=str,
                        help='Please provide one bam file',
                        required=True)
    parser.add_argument('-f',
                        '--if_filter',
                        default=False,
                        action="store_true",
                        help='whether to filter SNP and INDEL',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    parser.add_argument('-m',
                        '--mapping_file',
                        type=str,
                        help='''if map file was provided, sample name will be 
                        included in the output file name''',
                        required=False)
    args = parser.parse_args()
    # check args
    FI.check_exist(args.properties_file)
    FI.check_exist(args.bam_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    MISC.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    runID = MISC.get_runID(args.bam_file)
    if args.mapping_file is not None:
        FI.check_exist(args.mapping_file)
    # define variable
    genome_name = args.genome_name
    bam_file = args.bam_file
    properties_file = args.properties_file
    prefix_ori = args.prefix
    if_filter = args.if_filter
    mapping_file = args.mapping_file

    # print args
    print("properties_file:", str(properties_file))
    print("genome_name:", genome_name)
    print("bam_file:", bam_file)
    print("if_filter:", if_filter)
    print("prefix:", prefix_ori)
    print("mapping_file:", mapping_file)
Ejemplo n.º 20
0
def get_args():
    global PROP
    global PROPERTIES_FILE
    global genome_name
    global vcf_files
    global vcf_files_str
    global gvcf_file
    global genotype_file
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script for creating images: phylogeny tree, PCA, heatmap, upset, 
                                                 and SNP distribution on chromosome for vcf files'''
    )
    parser.add_argument(
        '-p',
        '--properties_file',
        type=str,
        help='''Please provide the properties file, which including 
                                                                     the paths of samtools and bcftools and workdir''',
        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                                                                 which is provided by genome_list.txt''',
                        required=True)
    parser.add_argument('-v',
                        '--vcf_files',
                        type=str,
                        help='Please provide one or multiple vcf files',
                        required=True)
    parser.add_argument('-gv',
                        '--gvcf_file',
                        type=str,
                        help='Please provide one  concat gvcf file',
                        required=True)
    parser.add_argument(
        '-gt',
        '--genotype_file',
        type=str,
        help=
        '''The file need to contain 3 cols: runID, isolate name, and genotype''',
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)

    args = parser.parse_args()

    # check args
    FI.check_exist(args.properties_file)
    PROPERTIES_FILE = args.properties_file
    PROP = properties(PROPERTIES_FILE)
    MISC.check_genome_avl(PROP.get_attrib("available_genomes"),
                          args.genome_name)
    FI.check_files_exist(glob.glob(args.vcf_files))
    FI.check_exist(args.gvcf_file)
    FI.check_exist(args.genotype_file)
    genome_name = args.genome_name
    vcf_files = glob.glob(args.vcf_files)
    if len(vcf_files) == 0:
        sys.exit("Input vcf files not exist")
    vcf_files_str = ""
    for vcf_file in vcf_files:
        MISC.get_runID(vcf_file)
        vcf_files_str += os.path.abspath(vcf_file) + " "
    gvcf_file = args.gvcf_file
    genotype_file = args.genotype_file
    prefix = args.prefix

    # print args
    print("properties_file:", PROPERTIES_FILE)
    print("genome name:", genome_name)
    print("vcf_files:", vcf_files_str)
    print("gvcf_file:", gvcf_file)
    print("genotype_file:", genotype_file)
    print("prefix:", prefix)
Ejemplo n.º 21
0
def get_args():
    global properties_file
    global g_name_str
    global prop
    global min_homo
    global in_fastq1
    global in_fastq2
    global fastq1_key
    global fastq2_key
    global fastq1_postfix
    global fastq2_postfix
    global qc_sw
    global if_dedupQ
    global if_dedupM
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script provides basic analyses for the next generation sequences, including fastq file quality control, 
                                                    assembly by using spades, reference_mapping, and SNP calling using samtools 
                                                    for the genome available in the genome_list'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name which is provided by genome_list.txt''',
        required=True)
    parser.add_argument('-qc_sw',
                        '--qc_software',
                        type=str,
                        help='''Please provide the quality control software, 
                        otherwise the default trim_galore will be used''',
                        required=False)
    parser.add_argument('-deQ',
                        '--dedupQ',
                        action='store_true',
                        help='if remove all the exact read duplications',
                        default=False)
    parser.add_argument('-fq1',
                        '--fastq1',
                        type=str,
                        help='Please provide the forward fastq file',
                        required=True)
    parser.add_argument('-fq2',
                        '--fastq2',
                        type=str,
                        help='Please provide the reverse fastq file',
                        required=False)
    parser.add_argument(
        '-deM',
        '--dedupM',
        action='store_true',
        help='if true, remove duplications after mapping using samtools',
        default=False)

    # check args
    args = parser.parse_args()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (line.rstrip() for line in open(
            prop.get_attrib("available_genomes")).readlines()):
        misc.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    fi.check_exist(args.fastq1)
    (fastq1_key, fastq1_postfix) = check_fastq_postfix(args.fastq1)
    if args.fastq2 is not None:
        fi.check_exist(args.fastq2)
        (fastq2_key, fastq2_postfix) = check_fastq_postfix(args.fastq2)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    prefix = args.prefix
    g_name_str = args.genome_name
    in_fastq1 = args.fastq1
    in_fastq2 = args.fastq2
    qc_sw = args.qc_software
    if qc_sw is None:
        qc_sw = "trim_galore"
    if_dedupQ = args.dedupQ
    if_dedupM = args.dedupM

    print "properties_file:", properties_file
    print "prefix:", prefix
    print "gname:", g_name_str
    print "fastq1:", in_fastq1
    print "fastq2:", in_fastq2
    print "qc_software:", qc_sw
    print "assembly_software:spades"
    print "dedupQ:", if_dedupQ
    print "dedupM:", if_dedupM
Ejemplo n.º 22
0
def get_args():
    global properties_file
    global prop
    global fastq1
    global fastq2
    global prefix
    global if_dedup
    global runID
    global mapping_file

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script provides quality control on fastq files using trim_galore 
                                                  and remove duplicated reads using clumpify. The quality of the original
                                                  and filtered reads are monitored by fastQC and summarized by multiQC'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-fq1',
                        '--fastq1',
                        type=str,
                        help='Please provide the first fastq file.',
                        required=True)
    parser.add_argument('-fq2',
                        '--fastq2',
                        type=str,
                        help='Please provide the second fastq file.',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    parser.add_argument('-de',
                        '--dedup',
                        action='store_true',
                        help='if remove all the exact read duplications',
                        default=False)
    parser.add_argument(
        '-m',
        '--mapping_file',
        type=str,
        help='''if map file was provided, whose first column is runID
                                                                  and the second column is sample name, seperated by "\t",
                                                                  sample name will be included in fastQC and multiQC
                                                                  output file names''',
        required=False)

    # check args
    args = parser.parse_args()
    FI.check_files_exist([args.properties_file, args.fastq1])
    runID = MISC.get_runID(args.fastq1)
    if args.fastq2 is not None:
        FI.check_exist(args.fastq2)
        MISC.get_runID(args.fastq2)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    fastq1 = args.fastq1
    fastq2 = args.fastq2
    prefix = args.prefix
    if_dedup = args.dedup
    mapping_file = args.mapping_file

    print("properties_file:", properties_file)
    print("fastq1:", fastq1)
    print("fastq2:", fastq2)
    print("prefix:", prefix)
    print("dedup:", if_dedup)
    print("mapping_file:", mapping_file)
Ejemplo n.º 23
0
def get_args():
    global properties_file
    global prop
    global mapping_tool
    global genome_name
    global fastq1
    global fastq2
    global platform
    global dna_library
    global if_dedup
    global if_recom
    global prefix_ori
    global mapping_file
    global runID

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script mapping short reads to reference genomes
                                                    using BWA or bowtie2, with the following statistics 
                                                    summary files created by fastQC, qualiMap, and multipleQC'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file',
                        required=True)
    parser.add_argument('-t',
                        '--mapping_tool',
                        type=str,
                        help='Please choose mapping tool, bwa or bowtie2',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                                                                 which need to be in genome_list.txt''',
                        required=True)
    parser.add_argument(
        '-fq1',
        '--fastq1',
        type=str,
        help='''Please provide the paired forward fastq file or 
                                                              the single fastq file, containing runID''',
        required=True)
    parser.add_argument('-fq2',
                        '--fastq2',
                        type=str,
                        help='''Please provide the paired reverse fastq file, 
                                                              containing runID''',
                        required=False)
    parser.add_argument('-f',
                        '--platform',
                        type=str,
                        help='Please provide the plat_form',
                        required=True)
    parser.add_argument('-l',
                        '--dna_library',
                        type=str,
                        help='Please provide the DNA library',
                        required=True)
    parser.add_argument(
        '-pre',
        '--prefix',
        type=str,
        help='Please provide the subdir for holding the output file',
        required=True)
    parser.add_argument(
        '-de',
        '--dedup',
        action='store_true',
        help='if true, remove duplications after mapping using samtools',
        default=False)
    parser.add_argument(
        '-recom',
        '--recombination',
        action='store_true',
        help='''if true, mapping with bowtie2 with be use local 
                                                                               alignment, which is for recombination analysis''',
        default=False)
    parser.add_argument('-m',
                        '--mapping_file',
                        type=str,
                        help='''if map file was provided, sample name will be 
                                                                  included in the output file name''',
                        required=False)
    args = parser.parse_args()
    # check args
    FI.check_files_exist([args.properties_file, args.fastq1])
    properties_file = args.properties_file
    prop = properties(properties_file)
    MISC.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    runID = MISC.get_runID(args.fastq1)
    if args.fastq2 is not None:
        FI.check_exist(args.fastq2)
        MISC.get_runID(args.fastq2)
    if args.mapping_file is not None:
        FI.check_exist(args.mapping_file)
    if not args.mapping_tool == "bwa" and not args.mapping_tool == "bowtie2":
        sys.exit("only bwa and bowtie2 are available for the mapping_tool")

    # define variables
    mapping_tool = args.mapping_tool
    genome_name = args.genome_name
    fastq1 = args.fastq1
    fastq2 = args.fastq2
    platform = args.platform
    dna_library = args.dna_library
    prefix_ori = args.prefix
    if_dedup = args.dedup
    if_recom = args.recombination
    mapping_file = args.mapping_file
    print("properties_file:", properties_file)
    print("genome_name:", genome_name)
    print("mapping_tool:", mapping_tool)
    print("fastq1:", fastq1)
    print("fastq2:", fastq2)
    print("platform:", platform)
    print("dna_library:", dna_library)
    print("prefix:", prefix_ori)
    print("dedup:", if_dedup)
    print("recombination:", if_recom)
    print("mapping_file:", mapping_file)