def get_args(): global fi global prop global misc global properties_file global genome_name fi = fileutils() # Assign description to the help doc parser = argparse.ArgumentParser( description='Script downloading genome files') parser.add_argument('-p', '--properties_file', type=str, help='''Please provide the properties file, which including workdir''', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which is provided by genome_list.txt''', required=True) args = parser.parse_args() # check args fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) misc = misc() misc.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) # define variables genome_name = args.genome_name print("properties_file:", properties_file) print("genome_name:", genome_name)
def get_args(): global prop global properties_file global prefix global fi fi = fileutils() # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script creates multiQC html file using fastqc, bcftools, snpEff, QUAST, and QualiMap output files''') parser.add_argument( '-p', '--properties_file', type=str, help= 'Please provide the properties file, which including the paths of workdir', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) # check args args = parser.parse_args() fi.check_exist(args.properties_file) # define variables properties_file = args.properties_file prop = properties(properties_file) prefix = args.prefix print("properties_file:", str(properties_file)) print("prefix:", prefix)
def get_args(): global properties_file global genome global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script invests genes under selection pressure within species through dNdS. Species can be chosen from genome_list.txt''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from genome_list.txt''', required=True) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in ( line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines() ) and args.genome_name != "cryptosporidium_hominis": misc.my_exit("{} is not available, please try another genome".format( args.genome_name)) if not re.search(".vcf", args.vcf_file_pattern): misc.my_exit("vcf_file_pattern need to end up with .vcf") genome = args.genome_name vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix
def get_args(): global fi global prop global properties_file global genome_name global bam_file_pattern global bam_files global mapping_file global prefix global bam_key_pattern # Assign description to the help doc parser = argparse.ArgumentParser(description='Script build all individual chromosome multiple alignment for recombination') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name available in genome_list.txt only''', required=True) parser.add_argument('-bp', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern with the full path, ending with .bam, with runID in the bam file name''', required=True) parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, containing one column of the runID and the other column is the expression displayed in the multiple alignment file description line''', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi=fileutils() fi.check_exist(args.properties_file) properties_file=args.properties_file prop=properties(properties_file) if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()): misc.my_exit("{} is not available, please try another genome".format(args.genome_name)) if not re.search(".bam$",args.bam_file_pattern): misc.my_exit("bam_file_pattern need to end up with .bam") bam_file_pattern=args.bam_file_pattern bam_files=glob.glob(bam_file_pattern) bam_key_pattern="[A-Z]RR\d{6,}" for bam_file in bam_files: if not re.search(bam_key_pattern, bam_file): misc.my_exit("There is no runID in the bam file {}".format(bam_file)) fi.check_files_exist(bam_files) # define variables genome_name=args.genome_name mapping_file=args.mapping_file prefix=args.prefix print ("properties_file:",properties_file) print ("genome_name:",genome_name) print ("bam_file_pattern:",bam_file_pattern) print ("mapping_file:",mapping_file) print ("prefix:",prefix)
def __init__(self, properties_file, genome_name, genome_fasta, bam_file, prefix, if_anno, subdir): self.properties_file = properties_file self.prop = properties(properties_file) self.genome_name = genome_name self.genome_fasta = genome_fasta self.bam_file = bam_file self.prefix = prefix self.if_anno = if_anno self.subdir = subdir self.fi = fileutils()
def get_args(): global properties_file global genome_name global prefix global vcf_file_pattern global go_file global mapping_file global prop # Assign description to the help doc parser = argparse.ArgumentParser(description='''Script invests genes under selection pressure within species through dNdS. It creates variation annotation file for each SNP vcf file, and gene variation annotation summary file based on all vcf files by using snpEff''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name, only with those obtained from genome_list.txt''', required=True) parser.add_argument('-vp', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", required=True) parser.add_argument('-go', '--go_file', type=str, help="Please provide the full path of the gene ontology file", required=True) parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, which contains one column of read_ID from vcf file and one column of its corresponding sample_name''', required=False) parser.add_argument('-pre', '--prefix', type=str,help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() FI.check_exist(args.properties_file) properties_file=args.properties_file prop=properties(properties_file) if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()): MISC.my_exit("{} is not available, please try another genome".format(args.genome_name)) if not re.search(".vcf$",args.vcf_file_pattern): MISC.my_exit("vcf_file_pattern need to end up with .vcf") FI.check_exist(args.go_file) go_file=args.go_file genome_name=args.genome_name vcf_file_pattern=args.vcf_file_pattern prefix=args.prefix mapping_file=args.mapping_file print ("properties_file:",properties_file) print ("genome_name:",genome_name) print ("vcf_file_pattern:",vcf_file_pattern) print ("go_file:",go_file) print ("mapping_file:",mapping_file) print ("prefix:",prefix)
def get_args(): global properties_file global g_names_str global fi global prop global min_homo # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure among multiple species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_names', type=str, help='''Please provide the genome names, seperating by "," with the format of XXX, YYY, ZZZ''', required=True) parser.add_argument( '-min', '--min_homo', type=int, help= '''Please provide the minimum poteintial homologue numbers in one group, if not defined, 4 will be used as the default''', required=False) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) # define variables properties_file = args.properties_file prop = properties(properties_file) g_names_str = args.genome_names if args.min_homo is None: min_homo = 4 else: min_homo = args.min_homo print "properties_file:", properties_file print "gnames:", g_names_str print "min_homo:", str(min_homo)
def get_args(): global fi global prop global properties_file global genome_fasta global bam_file_pattern global bam_request_pattern global bam_files global map_fpath global map_dict global prefix # Assign description to the help doc parser = argparse.ArgumentParser(description='''Script creating relocation files for multiple bam files from various genomes and automatically open the GUI''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-m', '--map_file', type=str, help='''Please provide the map file, in which the first column is the full path of the genome fasta file and the second column is the full path of the bam file and the bam files need to ended with .bam''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi=fileutils() fi.check_files_exist([args.properties_file,args.map_file]) # define variables properties_file=args.properties_file prop=properties(properties_file) map_fpath=args.map_file fh_map=open(map_fpath, "r") map_dict={} for line in fh_map: line=line.rstrip() (fasta_fpath,bam_fpath)=getVar(line.split(),[0,1]) fi.check_files_exist([fasta_fpath,bam_fpath]) map_dict[bam_fpath]=fasta_fpath prefix=args.prefix print "properties_file:",properties_file print "map_file:",map_fpath print "prefix:",prefix
def get_args(): global fi global prop global properties_file global genome_name global vcf_file_pattern global vcf_files global mapping_file global image_title global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script build phylogenetic tree and dendragram for the defined group of vcf files from the same genome' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from genome_list.txt''', required=True) parser.add_argument( '-v', '--vcf_file_pattern', type=str, help='''Please provide the vcf files' pattern with the full path, vcf files must ended with ".vcf" ''', required=True) parser.add_argument( '-m', '--mapping_file', type=str, help= '''Please provide the mapping file path, which contains one column of read_ID from vcf file and one column of its corresponding label on the tree branch, otherwise, the read_ID will be labeled on the tree branch''', required=False) parser.add_argument('-t', '--title', type=str, help='''Please provide the title of the image''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in ( line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines() ) and args.genome_name != "cryptosporidium_hominis": misc.my_exit("{} is not available, please try another genome".format( args.genome_name)) if not re.search(".vcf$", args.vcf_file_pattern): misc.my_exit("vcf_file_pattern need to end up with .vcf") vcf_file_pattern = args.vcf_file_pattern vcf_files = glob.glob(vcf_file_pattern) fi.check_files_exist(vcf_files) # define variables genome_name = args.genome_name mapping_file = args.mapping_file image_title = args.title prefix = args.prefix print "properties_file:", properties_file print "genome_name:", genome_name print "vcf_file_pattern:", vcf_file_pattern print "mapping_file:", mapping_file print "title:", image_title print "prefix:", prefix
def get_args(): global properties_file global genome_name global vcf_file_pattern global mapping_file global prefix global prop global vcf_files global vcf_request_pattern # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script creates the genome Short Tandem Repeat (STR) variation summary file based on all vcf files and multiple alignment files for all repeat regions''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name available in genome_list.txt only or cryptosporidium_hominis''', required=False) parser.add_argument('-vp', '--vcf_file_pattern', type=str, help='''Please provide the paired vcf files' pattern with the full path, must end with .vcf''', required=True) parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, which contains one column of read_ID from vcf file and one column of its corresponding sample_name''', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() FI.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in (line.rstrip() for line in open( prop.get_attrib("available_genomes")).readlines()): MISC.my_exit("{} is not available, please try another genome".format( args.genome_name)) vcf_files = glob.glob(args.vcf_file_pattern) if len(vcf_files) == 0: sys.exit("ERROR: no vcf files provided") vcf_request_pattern = "^.*/?(.*?).vcf$" for vcf_file in vcf_files: if not re.search(vcf_request_pattern, vcf_file): sys.exit("vcf_file not end with .vcf") FI.check_files_exist(vcf_files) # define variables properties_file = args.properties_file prop = properties(properties_file) genome_name = args.genome_name vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix mapping_file = args.mapping_file print("properties_file:", properties_file) print("genome_name:", genome_name) print("vcf_file_pattern:", vcf_file_pattern) print("mapping_file:", mapping_file) print("prefix:", prefix)
def get_args(): global properties_file global prop global genome_name global fastq1 global fastq2 global prefix_ori global runID global mapping_file # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script assembles short reads by using spades and provides statistics summary based on the result assemblies by using QUAST''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which need to be in genome_list.txt''', required=True) parser.add_argument('-fq1', '--fastq1', type=str, help='Please provide the forward fastq file', required=True) parser.add_argument('-fq2', '--fastq2', type=str, help='Please provide the reverse fastq file', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) parser.add_argument( '-m', '--mapping_file', type=str, help='''if map file was provided, whose first column is runID and the second column is sample name, seperated by "\t", sample name will be included in the output files' names''', required=False) # check args args = parser.parse_args() FI.check_files_exist([args.properties_file, args.fastq1]) properties_file = args.properties_file prop = properties(properties_file) MISC.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) runID = MISC.get_runID(args.fastq1) if args.fastq2 is not None: FI.check_exist(args.fastq2) MISC.get_runID(args.fastq2) if args.mapping_file is not None: FI.check_exist(args.mapping_file) # define variables genome_name = args.genome_name fastq1 = args.fastq1 fastq2 = args.fastq2 prefix_ori = args.prefix mapping_file = args.mapping_file print("properties_file:", properties_file) print("genome_name:", genome_name) print("fastq1:", fastq1) print("fastq2:", fastq2) print("prefix:", prefix_ori) print("mapping_file:", mapping_file)
def post_process(): print("post_processing...") for out_file in out_pdf_files: FI.copy_file_to_destdir(out_file, outdir) if __name__ == '__main__': global prop global FI global MISC FI = fileutils() MISC = misc() get_args() prop = properties(PROPERTIES_FILE) getVar = lambda searchList, ind: [searchList[i] for i in ind] print("\n", "Properties attributes:") print(prop.__dict__) #run the initiation code initiate() #execute the main part of the program execute() #post execution code post_process() print(os.path.realpath(__file__) + " DONE")
def get_args(): global properties_file global genome global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script invests genes under selection pressure within species through dNdS. Species can be chosen from -genome_list, which including 17 genomes. They are the common genomes of protists parasite and existing in snpEff''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-genome_list', '--genome_list', help="This will display the genome name list", action="store_true") parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from -genome_list''', required='-genome_list' not in sys.argv) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", required='-genome_list' not in sys.argv) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required='-genome_list' not in sys.argv) # check args args = parser.parse_args() if args.genome_list: print get_gene_list_str() sys.exit(0) fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) genome = args.genome_name if genome not in get_gene_list_str().split("\n"): print "ERROR: genome_name {} not in the list of -genome_list".format( genome) sys.exit(1) vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix
print "stage 6 has started!" def execute_stage7(): print "stage 7 has started!" def execute_stage8(): print "stage 8 has started!" if __name__ == '__main__': get_args() global prop prop=properties(properties_file) print "\n","Properties attributes:" print prop.__dict__ initiate() execute_stage1() execute_stage2() execute_stage3() execute_stage4() execute_stage5() execute_stage6() execute_stage7() execute_stage8() print assemblers print fastq1
def get_args(): global properties_file global genome global gff global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure within species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name, only "ch" for "C. hominis" or "cp" for "C. parvum" can be used''', required=True) parser.add_argument('-gff', '--genome_gff_file', type=str, help='''Please provide the genome gff file, only C. hominis or C. parvum gff file can be used''', required=False) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide vcf files' pattern with full file path", required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) if args.genome_gff_file is not None: fi.check_exist(args.genome_gff_file) # define variables properties_file = args.properties_file prop = properties(properties_file) if args.genome_name != 'ch' and args.genome_name != 'cp': print "only 'ch' or 'cp' can be used as the genome name" sys.exit(1) else: genome = args.genome_name if args.genome_gff_file is None: gff = prop.get_attrib(genome + "_gff") vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "genome_gff:", gff print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix
def get_args(): global prop global properties_file global genome_name global gvcf_files global gvcf_files_str global prefix global if_filter filter_dict = {} # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script merging gvcf files, seperating into SNP and INDEL,and then filtering if requested''' ) parser.add_argument('-p', '--properties_file', type=str, help='''Please provide the properties file, which including the paths of workdir''', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which is provided by genome_list.txt''', required=True) parser.add_argument('-gv', '--gvcf_files', type=str, help='Please provide gvcf files', required=True) parser.add_argument('-f', '--if_filter', default=False, action="store_true", help='whether to filter SNP and INDEL seperately', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) args = parser.parse_args() # check args FI.check_exist(args.properties_file) FI.check_files_exist(glob.glob(args.gvcf_files)) properties_file = args.properties_file prop = properties(properties_file) MISC.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) genome_name = args.genome_name # define variables properties_file = args.properties_file gvcf_files = glob.glob(args.gvcf_files) gvcf_files_str = "" for gvcf_file in gvcf_files: gvcf_files_str += gvcf_file + " " gvcf_files_str = gvcf_files_str.rstrip(" ") prefix = args.prefix if_filter = args.if_filter # print args print("properties_file:", str(properties_file)) print("genome_name:", genome_name) print("gvcf_files:", gvcf_files_str) print("if_filter:", if_filter) print("prefix:", prefix)
def get_args(): global properties_file global cds_fna1 global cds_faa1 global cds_fna2 global cds_faa2 global genome1 global genome2 global map_file global filter_eval global filter_identity global prefix global fi global makeblastdb_sw global blastn_sw global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure between two species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g1', '--genome_name1', type=str, help='''Please provide the first genome name, otherwise, "ch" for "C. hominis" will be used''', required=False) parser.add_argument( '-g2', '--genome_name2', type=str, help='''Please provide the second genome name, otherwise, "cp" for "C. parvum" will be used''', required=False) parser.add_argument( '-fn1', '--cds_fna1', type=str, help= 'Please provide the first cds fna file, otherwise, ch fna file will be used.', required=False) parser.add_argument( '-fn2', '--cds_fna2', type=str, help= 'Please provide the second cds fna file, otherwise, cp fna file will be used.', required=False) parser.add_argument( '-fa1', '--cds_faa1', type=str, help= 'Please provide the first cds faa file, otherwise, ch faa file will be used.', required=False) parser.add_argument( '-fa2', '--cds_faa2', type=str, help= 'Please provide the second cds faa file, otherwise, cp faa file will be used.', required=False) parser.add_argument( '-m', '--map', type=str, help='''Please provide the file for mapping the chromosome accessions, one pair in each line and separated by tab, otherwise, no chromosome information will be provided in the output file''', required=False) parser.add_argument( '-fi', '--filter_identity', type=str, help='the identity percentage for filtering the blast hits.', required=False) parser.add_argument('-fe', '--filter_eval', type=str, help='the eval for filtering the blast hits.', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) for opt_arg_fpath in (args.cds_fna1, args.cds_fna2, args.cds_faa1, args.cds_faa2, args.map): if opt_arg_fpath is not None: fi.check_exist(opt_arg_fpath) # define variables makeblastdb_sw = "makeblastdb" blastn_sw = "blastn" default_gname1 = "ch" default_gname2 = "cp" map_file = "None" filter_eval = "0" filter_identity = "0" filter_length = "0" properties_file = args.properties_file prop = properties(properties_file) if args.genome_name1 is not None: genome1 = args.genome_name1 else: genome1 = default_gname1 if args.genome_name2 is not None: genome2 = args.genome_name2 else: genome2 = default_gname2 if args.cds_fna1 is not None: cds_fna1 = args.cds_fna1 else: cds_fna1 = prop.get_attrib(genome1 + "_cds_fna") if args.cds_faa1 is not None: cds_faa1 = args.cds_faa1 else: cds_faa1 = prop.get_attrib(genome1 + "_cds_faa") if args.cds_fna2 is not None: cds_fna2 = args.cds_fna2 else: cds_fna2 = prop.get_attrib(genome2 + "_cds_fna") if args.cds_faa2 is not None: cds_faa2 = args.cds_faa2 else: cds_faa2 = prop.get_attrib(genome2 + "_cds_faa") if args.map is not None: map_file = args.map if args.filter_eval is not None: filter_eval = args.filter_eval if args.filter_identity is not None: filter_identity = args.filter_identity prefix = args.prefix print "properties_file:", properties_file print "genome1:", genome1 print "genome2:", genome2 print "cds_fna1:", cds_fna1 print "cds_faa1:", cds_faa1 print "cds_fna2:", cds_fna2 print "cds_faa2:", cds_faa2 print "filter_eval:", filter_eval print "filter_identity_perc:", filter_identity print "prefix:", prefix
def get_args(): global fi global prop global properties_file global genome_name global genome_fasta global bam_file_pattern global bam_request_pattern global bam_files global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script creating relocation files for multiple bam files from the same genome and automatically open the GUI.''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help='''if "C. hominis" or "C. parvum" will be used as the genome, please provide "ch" for "C. hominis" or "cp" for "C. parvum"''', required=False) parser.add_argument( '-f', '--genome_fasta', type=str, help='''Please provide the directory for the genome fasta file, if "ch" or "cp" is not the genome name.''', required=False) parser.add_argument( '-b', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern with the full path''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) if args.genome_name is not None and not args.genome_name == "ch" and not args.genome_name == "cp": print "genome name need to be ch or cp" sys.exit(1) if args.genome_fasta is not None: fi.check_exist(args.genome_fasta) bam_file_pattern = args.bam_file_pattern bam_files = glob.glob(bam_file_pattern) bam_request_pattern = "^.*/?(.*?).bam$" for bam_file in bam_files: if not re.search(bam_request_pattern, bam_file): print "bam_file not ended with .bam" sys.exit(1) fi.check_files_exist(bam_files) # define variables properties_file = args.properties_file prop = properties(properties_file) if args.genome_name is not None: genome_name = args.genome_name if args.genome_fasta is not None: genome_fasta = args.genome_fasta else: if args.genome_name is None: print "If no genome_fasta provided, genome name must be provided as ch or cp." sys.exit(1) else: genome_fasta = prop.get_attrib(genome_name + "_fasta") prefix = args.prefix print "properties_file:", properties_file print "genome_name:", genome_name print "genome_fasta:", genome_fasta print "bam_file_pattern:", bam_file_pattern print "prefix:", prefix
def get_args(): global prop global properties_file global genome_name global bam_file global prefix_ori global if_filter global mapping_file global runID # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script for getting filtered or unfiltered SNP and INDEL vcf and gvcf files from bam files using gatk, and then create statistics summary by using bcf_tools''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which is provided by genome_list.txt''', required=True) parser.add_argument('-bam', '--bam_file', type=str, help='Please provide one bam file', required=True) parser.add_argument('-f', '--if_filter', default=False, action="store_true", help='whether to filter SNP and INDEL', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) parser.add_argument('-m', '--mapping_file', type=str, help='''if map file was provided, sample name will be included in the output file name''', required=False) args = parser.parse_args() # check args FI.check_exist(args.properties_file) FI.check_exist(args.bam_file) properties_file = args.properties_file prop = properties(properties_file) MISC.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) runID = MISC.get_runID(args.bam_file) if args.mapping_file is not None: FI.check_exist(args.mapping_file) # define variable genome_name = args.genome_name bam_file = args.bam_file properties_file = args.properties_file prefix_ori = args.prefix if_filter = args.if_filter mapping_file = args.mapping_file # print args print("properties_file:", str(properties_file)) print("genome_name:", genome_name) print("bam_file:", bam_file) print("if_filter:", if_filter) print("prefix:", prefix_ori) print("mapping_file:", mapping_file)
def get_args(): global PROP global PROPERTIES_FILE global genome_name global vcf_files global vcf_files_str global gvcf_file global genotype_file global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script for creating images: phylogeny tree, PCA, heatmap, upset, and SNP distribution on chromosome for vcf files''' ) parser.add_argument( '-p', '--properties_file', type=str, help='''Please provide the properties file, which including the paths of samtools and bcftools and workdir''', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which is provided by genome_list.txt''', required=True) parser.add_argument('-v', '--vcf_files', type=str, help='Please provide one or multiple vcf files', required=True) parser.add_argument('-gv', '--gvcf_file', type=str, help='Please provide one concat gvcf file', required=True) parser.add_argument( '-gt', '--genotype_file', type=str, help= '''The file need to contain 3 cols: runID, isolate name, and genotype''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) args = parser.parse_args() # check args FI.check_exist(args.properties_file) PROPERTIES_FILE = args.properties_file PROP = properties(PROPERTIES_FILE) MISC.check_genome_avl(PROP.get_attrib("available_genomes"), args.genome_name) FI.check_files_exist(glob.glob(args.vcf_files)) FI.check_exist(args.gvcf_file) FI.check_exist(args.genotype_file) genome_name = args.genome_name vcf_files = glob.glob(args.vcf_files) if len(vcf_files) == 0: sys.exit("Input vcf files not exist") vcf_files_str = "" for vcf_file in vcf_files: MISC.get_runID(vcf_file) vcf_files_str += os.path.abspath(vcf_file) + " " gvcf_file = args.gvcf_file genotype_file = args.genotype_file prefix = args.prefix # print args print("properties_file:", PROPERTIES_FILE) print("genome name:", genome_name) print("vcf_files:", vcf_files_str) print("gvcf_file:", gvcf_file) print("genotype_file:", genotype_file) print("prefix:", prefix)
def get_args(): global properties_file global g_name_str global prop global min_homo global in_fastq1 global in_fastq2 global fastq1_key global fastq2_key global fastq1_postfix global fastq2_postfix global qc_sw global if_dedupQ global if_dedupM global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script provides basic analyses for the next generation sequences, including fastq file quality control, assembly by using spades, reference_mapping, and SNP calling using samtools for the genome available in the genome_list''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name which is provided by genome_list.txt''', required=True) parser.add_argument('-qc_sw', '--qc_software', type=str, help='''Please provide the quality control software, otherwise the default trim_galore will be used''', required=False) parser.add_argument('-deQ', '--dedupQ', action='store_true', help='if remove all the exact read duplications', default=False) parser.add_argument('-fq1', '--fastq1', type=str, help='Please provide the forward fastq file', required=True) parser.add_argument('-fq2', '--fastq2', type=str, help='Please provide the reverse fastq file', required=False) parser.add_argument( '-deM', '--dedupM', action='store_true', help='if true, remove duplications after mapping using samtools', default=False) # check args args = parser.parse_args() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in (line.rstrip() for line in open( prop.get_attrib("available_genomes")).readlines()): misc.my_exit("{} is not available, please try another genome".format( args.genome_name)) fi.check_exist(args.fastq1) (fastq1_key, fastq1_postfix) = check_fastq_postfix(args.fastq1) if args.fastq2 is not None: fi.check_exist(args.fastq2) (fastq2_key, fastq2_postfix) = check_fastq_postfix(args.fastq2) # define variables properties_file = args.properties_file prop = properties(properties_file) prefix = args.prefix g_name_str = args.genome_name in_fastq1 = args.fastq1 in_fastq2 = args.fastq2 qc_sw = args.qc_software if qc_sw is None: qc_sw = "trim_galore" if_dedupQ = args.dedupQ if_dedupM = args.dedupM print "properties_file:", properties_file print "prefix:", prefix print "gname:", g_name_str print "fastq1:", in_fastq1 print "fastq2:", in_fastq2 print "qc_software:", qc_sw print "assembly_software:spades" print "dedupQ:", if_dedupQ print "dedupM:", if_dedupM
def get_args(): global properties_file global prop global fastq1 global fastq2 global prefix global if_dedup global runID global mapping_file # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script provides quality control on fastq files using trim_galore and remove duplicated reads using clumpify. The quality of the original and filtered reads are monitored by fastQC and summarized by multiQC''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-fq1', '--fastq1', type=str, help='Please provide the first fastq file.', required=True) parser.add_argument('-fq2', '--fastq2', type=str, help='Please provide the second fastq file.', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) parser.add_argument('-de', '--dedup', action='store_true', help='if remove all the exact read duplications', default=False) parser.add_argument( '-m', '--mapping_file', type=str, help='''if map file was provided, whose first column is runID and the second column is sample name, seperated by "\t", sample name will be included in fastQC and multiQC output file names''', required=False) # check args args = parser.parse_args() FI.check_files_exist([args.properties_file, args.fastq1]) runID = MISC.get_runID(args.fastq1) if args.fastq2 is not None: FI.check_exist(args.fastq2) MISC.get_runID(args.fastq2) # define variables properties_file = args.properties_file prop = properties(properties_file) fastq1 = args.fastq1 fastq2 = args.fastq2 prefix = args.prefix if_dedup = args.dedup mapping_file = args.mapping_file print("properties_file:", properties_file) print("fastq1:", fastq1) print("fastq2:", fastq2) print("prefix:", prefix) print("dedup:", if_dedup) print("mapping_file:", mapping_file)
def get_args(): global properties_file global prop global mapping_tool global genome_name global fastq1 global fastq2 global platform global dna_library global if_dedup global if_recom global prefix_ori global mapping_file global runID # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script mapping short reads to reference genomes using BWA or bowtie2, with the following statistics summary files created by fastQC, qualiMap, and multipleQC''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file', required=True) parser.add_argument('-t', '--mapping_tool', type=str, help='Please choose mapping tool, bwa or bowtie2', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which need to be in genome_list.txt''', required=True) parser.add_argument( '-fq1', '--fastq1', type=str, help='''Please provide the paired forward fastq file or the single fastq file, containing runID''', required=True) parser.add_argument('-fq2', '--fastq2', type=str, help='''Please provide the paired reverse fastq file, containing runID''', required=False) parser.add_argument('-f', '--platform', type=str, help='Please provide the plat_form', required=True) parser.add_argument('-l', '--dna_library', type=str, help='Please provide the DNA library', required=True) parser.add_argument( '-pre', '--prefix', type=str, help='Please provide the subdir for holding the output file', required=True) parser.add_argument( '-de', '--dedup', action='store_true', help='if true, remove duplications after mapping using samtools', default=False) parser.add_argument( '-recom', '--recombination', action='store_true', help='''if true, mapping with bowtie2 with be use local alignment, which is for recombination analysis''', default=False) parser.add_argument('-m', '--mapping_file', type=str, help='''if map file was provided, sample name will be included in the output file name''', required=False) args = parser.parse_args() # check args FI.check_files_exist([args.properties_file, args.fastq1]) properties_file = args.properties_file prop = properties(properties_file) MISC.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) runID = MISC.get_runID(args.fastq1) if args.fastq2 is not None: FI.check_exist(args.fastq2) MISC.get_runID(args.fastq2) if args.mapping_file is not None: FI.check_exist(args.mapping_file) if not args.mapping_tool == "bwa" and not args.mapping_tool == "bowtie2": sys.exit("only bwa and bowtie2 are available for the mapping_tool") # define variables mapping_tool = args.mapping_tool genome_name = args.genome_name fastq1 = args.fastq1 fastq2 = args.fastq2 platform = args.platform dna_library = args.dna_library prefix_ori = args.prefix if_dedup = args.dedup if_recom = args.recombination mapping_file = args.mapping_file print("properties_file:", properties_file) print("genome_name:", genome_name) print("mapping_tool:", mapping_tool) print("fastq1:", fastq1) print("fastq2:", fastq2) print("platform:", platform) print("dna_library:", dna_library) print("prefix:", prefix_ori) print("dedup:", if_dedup) print("recombination:", if_recom) print("mapping_file:", mapping_file)