def get_args():
    global fi
    global prop
    global misc
    global properties_file
    global genome_name
    fi = fileutils()

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='Script downloading genome files')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='''Please provide the properties file, 
                               which including workdir''',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name
                               which is provided by genome_list.txt''',
                        required=True)
    args = parser.parse_args()
    # check args
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    misc = misc()
    misc.check_genome_avl(prop.get_attrib("available_genomes"),
                          args.genome_name)
    # define variables
    genome_name = args.genome_name
    print("properties_file:", properties_file)
    print("genome_name:", genome_name)
Ejemplo n.º 2
0
def get_args():
    global prop
    global properties_file
    global prefix
    global fi

    fi = fileutils()

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='''Script creates multiQC html file using fastqc, 
                                                    bcftools, snpEff, QUAST, and QualiMap 
                                                    output files''')
    parser.add_argument(
        '-p',
        '--properties_file',
        type=str,
        help=
        'Please provide the properties file, which including the paths of workdir',
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    # check args
    args = parser.parse_args()
    fi.check_exist(args.properties_file)
    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    prefix = args.prefix
    print("properties_file:", str(properties_file))
    print("prefix:", prefix)
Ejemplo n.º 3
0
def get_args():
    global properties_file
    global genome
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script invests genes under selection pressure within species through dNdS. 
        Species can be chosen from genome_list.txt''')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from genome_list.txt''',
        required=True)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide snp vcf files' pattern with full file path",
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (
            line.rstrip()
            for line in open(prop.get_attrib("available_genomes")).readlines()
    ) and args.genome_name != "cryptosporidium_hominis":
        misc.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    if not re.search(".vcf", args.vcf_file_pattern):
        misc.my_exit("vcf_file_pattern need to end up with .vcf")
    genome = args.genome_name
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix
def get_args():  
    global fi  
    global prop
    global properties_file
    global genome_name    
    global bam_file_pattern
    global bam_files
    global mapping_file
    global prefix
    global bam_key_pattern
                       
    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='Script build all individual chromosome multiple alignment for recombination')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', 
                        required=True)
    parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name available 
                                                                 in genome_list.txt only''', 
                        required=True)
    parser.add_argument('-bp', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern 
                                                                       with the full path, ending with .bam, with runID 
                                                                       in the bam file name''', 
                        required=True)
    parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, containing one 
                                                                  column of the runID and the other column is the expression
                                                                  displayed in the multiple alignment file description line''', 
                        required=False)
    parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', 
                        required=True)  

    # check args
    args = parser.parse_args()
    fi=fileutils()
    fi.check_exist(args.properties_file)
    properties_file=args.properties_file
    prop=properties(properties_file)
    if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()):
        misc.my_exit("{} is not available, please try another genome".format(args.genome_name))
    if not re.search(".bam$",args.bam_file_pattern):
        misc.my_exit("bam_file_pattern need to end up with .bam")
    bam_file_pattern=args.bam_file_pattern
    bam_files=glob.glob(bam_file_pattern)
    bam_key_pattern="[A-Z]RR\d{6,}"
    for bam_file in bam_files:
        if not re.search(bam_key_pattern, bam_file):
            misc.my_exit("There is no runID in the bam file {}".format(bam_file))
    fi.check_files_exist(bam_files)  
    
    # define variables         
    genome_name=args.genome_name
    mapping_file=args.mapping_file
    prefix=args.prefix   
    
    print ("properties_file:",properties_file)
    print ("genome_name:",genome_name)
    print ("bam_file_pattern:",bam_file_pattern)
    print ("mapping_file:",mapping_file)
    print ("prefix:",prefix)
Ejemplo n.º 5
0
def initiate():
    fi=fileutils()
    dir=prop.workdir+"/assembly"
    fi.create_processing_dir(prop.workdir+"/quality")
    fi.create_processing_dir(prop.workdir+"/assembly")
    fi.create_processing_dir(prop.workdir+"/reference-mapping")
    fi.create_processing_dir(prop.workdir+"/SNPs")
    fi.create_processing_dir(prop.workdir+"/structural-recombination")
    fi.create_processing_dir(prop.workdir+"/hyper-variable-analysis")
    fi.create_processing_dir(prop.workdir+"/cluster-analysis")
Ejemplo n.º 6
0
 def __init__(self, properties_file, genome_name, genome_fasta, bam_file,
              prefix, if_anno, subdir):
     self.properties_file = properties_file
     self.prop = properties(properties_file)
     self.genome_name = genome_name
     self.genome_fasta = genome_fasta
     self.bam_file = bam_file
     self.prefix = prefix
     self.if_anno = if_anno
     self.subdir = subdir
     self.fi = fileutils()
Ejemplo n.º 7
0
 def post_process(self):
     fi = fileutils()
     indir = self.prop.workdir + "/quality/in/"
     outdir = self.prop.workdir + "/quality/out/"
     fi.create_processing_dir(indir)
     fi.create_processing_dir(outdir)
     fi.copy_src_into_dest(self.fq1, indir)
     fi.copy_src_into_dest(self.fq2, indir)
     fqout1 = self.fq1 + "_val_1.fq"
     fqout2 = self.fq2 + "_val_2.fq"
     report = self.fq1 + "_trimming_report.txt"
     fi.copy_src_into_dest(fqout1, indir)
     fi.copy_src_into_dest(fqout2, indir)
     fi.copy_src_into_dest(report, indir)
def get_args():
    global properties_file
    global g_names_str
    global fi
    global prop
    global min_homo

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure among multiple species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_names',
        type=str,
        help='''Please provide the genome names, seperating by "," 
        with the format of XXX, YYY, ZZZ''',
        required=True)
    parser.add_argument(
        '-min',
        '--min_homo',
        type=int,
        help=
        '''Please provide the minimum poteintial homologue numbers in one group, 
        if not defined, 4 will be used as the default''',
        required=False)
    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    g_names_str = args.genome_names
    if args.min_homo is None:
        min_homo = 4
    else:
        min_homo = args.min_homo
    print "properties_file:", properties_file
    print "gnames:", g_names_str
    print "min_homo:", str(min_homo)
def get_args():  
    global fi  
    global prop
    global properties_file
    global genome_fasta
    global bam_file_pattern
    global bam_request_pattern
    global bam_files
    global map_fpath
    global map_dict
    global prefix
                       
    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='''Script creating relocation files for multiple bam files from 
                                    various genomes and automatically open the GUI''')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True)
    parser.add_argument('-m', '--map_file', type=str, help='''Please provide the map file, 
                        in which the first column is the full path of the genome fasta file and the second column is
                        the full path of the bam file and the bam files need to ended with .bam''', required=True)   
    parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True)  

    # check args
    args = parser.parse_args()
    fi=fileutils()
    fi.check_files_exist([args.properties_file,args.map_file])   
    
    # define variables         
    properties_file=args.properties_file    
    prop=properties(properties_file)
    map_fpath=args.map_file
    fh_map=open(map_fpath, "r")
    map_dict={}
    for line in fh_map:
        line=line.rstrip()        
        (fasta_fpath,bam_fpath)=getVar(line.split(),[0,1])
        fi.check_files_exist([fasta_fpath,bam_fpath])
        map_dict[bam_fpath]=fasta_fpath
    prefix=args.prefix   
    
    print "properties_file:",properties_file
    print "map_file:",map_fpath
    print "prefix:",prefix
Ejemplo n.º 10
0
def initiate():
    print("initiating...")
    global indir
    global outdir
    global workdir
    global subdir
    global qcdir
    subdir = "reference_mapping"
    workdir = prop.workdir + "/" + subdir
    fi = fileutils()
    indir = workdir + "/in/"
    outdir = workdir + "/out/"
    qcdir = workdir + "/qc/" + prefix_ori
    FI.create_processing_dir(workdir)
    FI.create_processing_dir(indir)
    FI.create_processing_dir(outdir)
    FI.create_processing_dir(qcdir)
    FI.copy_file_to_destdir(fastq1, indir)
    if fastq2 is not None:
        FI.copy_file_to_destdir(fastq2, indir)
Ejemplo n.º 11
0
def get_args():
    global properties_file
    global cds_fna1
    global cds_faa1
    global cds_fna2
    global cds_faa2
    global genome1
    global genome2
    global map_file
    global filter_eval
    global filter_identity
    global prefix
    global fi
    global makeblastdb_sw
    global blastn_sw
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure between two species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g1',
        '--genome_name1',
        type=str,
        help='''Please provide the first genome name, otherwise, 
                        "ch" for "C. hominis" will be used''',
        required=False)
    parser.add_argument(
        '-g2',
        '--genome_name2',
        type=str,
        help='''Please provide the second genome name, otherwise, 
                        "cp" for "C. parvum" will be used''',
        required=False)
    parser.add_argument(
        '-fn1',
        '--cds_fna1',
        type=str,
        help=
        'Please provide the first cds fna file, otherwise, ch fna file will be used.',
        required=False)
    parser.add_argument(
        '-fn2',
        '--cds_fna2',
        type=str,
        help=
        'Please provide the second cds fna file, otherwise, cp fna file will be used.',
        required=False)
    parser.add_argument(
        '-fa1',
        '--cds_faa1',
        type=str,
        help=
        'Please provide the first cds faa file, otherwise, ch faa file will be used.',
        required=False)
    parser.add_argument(
        '-fa2',
        '--cds_faa2',
        type=str,
        help=
        'Please provide the second cds faa file, otherwise, cp faa file will be used.',
        required=False)
    parser.add_argument(
        '-m',
        '--map',
        type=str,
        help='''Please provide the file for mapping the chromosome accessions, 
                                                      one pair in each line and separated by tab, 
                                                      otherwise, no chromosome information will be provided in the output file''',
        required=False)
    parser.add_argument(
        '-fi',
        '--filter_identity',
        type=str,
        help='the identity percentage for filtering the blast hits.',
        required=False)
    parser.add_argument('-fe',
                        '--filter_eval',
                        type=str,
                        help='the eval for filtering the blast hits.',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    for opt_arg_fpath in (args.cds_fna1, args.cds_fna2, args.cds_faa1,
                          args.cds_faa2, args.map):
        if opt_arg_fpath is not None:
            fi.check_exist(opt_arg_fpath)

    # define variables
    makeblastdb_sw = "makeblastdb"
    blastn_sw = "blastn"
    default_gname1 = "ch"
    default_gname2 = "cp"
    map_file = "None"
    filter_eval = "0"
    filter_identity = "0"
    filter_length = "0"

    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name1 is not None:
        genome1 = args.genome_name1
    else:
        genome1 = default_gname1
    if args.genome_name2 is not None:
        genome2 = args.genome_name2
    else:
        genome2 = default_gname2
    if args.cds_fna1 is not None:
        cds_fna1 = args.cds_fna1
    else:
        cds_fna1 = prop.get_attrib(genome1 + "_cds_fna")
    if args.cds_faa1 is not None:
        cds_faa1 = args.cds_faa1
    else:
        cds_faa1 = prop.get_attrib(genome1 + "_cds_faa")
    if args.cds_fna2 is not None:
        cds_fna2 = args.cds_fna2
    else:
        cds_fna2 = prop.get_attrib(genome2 + "_cds_fna")
    if args.cds_faa2 is not None:
        cds_faa2 = args.cds_faa2
    else:
        cds_faa2 = prop.get_attrib(genome2 + "_cds_faa")
    if args.map is not None:
        map_file = args.map
    if args.filter_eval is not None:
        filter_eval = args.filter_eval
    if args.filter_identity is not None:
        filter_identity = args.filter_identity
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome1:", genome1
    print "genome2:", genome2
    print "cds_fna1:", cds_fna1
    print "cds_faa1:", cds_faa1
    print "cds_fna2:", cds_fna2
    print "cds_faa2:", cds_faa2
    print "filter_eval:", filter_eval
    print "filter_identity_perc:", filter_identity
    print "prefix:", prefix
Ejemplo n.º 12
0
    for tg_out_file in tg_out_files:
        FI.copy_file_add_prefix(tg_out_file, outdir, prefix + "_")
    for fastqc_out_file in fastqc_out_files:
        FI.copy_file_to_destdir(fastqc_out_file, qcdir)
        FI.copy_file_to_destdir(fastqc_out_file.replace(".zip", ".html"),
                                qcdir)
    command("cp -p {}.html {}".format(multiqc_out_prefix, qcdir)).run_comm(0)
    if if_dedup:
        for deduped_fq in deduped_fqs:
            FI.copy_file_add_prefix(deduped_fq, outdir, prefix + "_")


if __name__ == '__main__':
    global FI
    global MISC
    FI = fileutils()
    MISC = misc()

    get_args()

    print("\n", "Properties attributes:\n", prop.__dict__)

    #run the initiation code
    initiate()

    #execute the main part of the program
    execute()

    #post execution code
    post_process()
Ejemplo n.º 13
0
def get_args():
    global properties_file
    global fastq1
    global fastq2
    global qc_sw
    global rm_dup_sw
    global prefix
    global fi
    global if_dedup

    fi = fileutils()
    default_qc_sw = "trim_galore"
    rm_dup_sw = "clumpify"

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='Script assembles short reads based on some criteria')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-fq1',
                        '--fastq1',
                        type=str,
                        help='Please provide the first fastq file.',
                        required=True)
    parser.add_argument('-fq2',
                        '--fastq2',
                        type=str,
                        help='Please provide the second fastq file.',
                        required=False)
    parser.add_argument('-qc_sw',
                        '--qc_software',
                        type=str,
                        help='''Please provide the quality control software, 
        otherwise the default trim_galore will be used''',
                        required=False)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file',
                        required=True)
    parser.add_argument('-de',
                        '--dedup',
                        action='store_true',
                        help='if remove all the exact read duplications',
                        default=False)
    # check args
    args = parser.parse_args()
    fi.check_files_exist([args.properties_file, args.fastq1])
    if args.fastq2 is not None:
        fi.check_exist(args.fastq2)

    # define variables
    properties_file = args.properties_file
    fastq1 = args.fastq1
    fastq2 = args.fastq2
    qc_sw = args.qc_software
    if qc_sw is None:
        qc_sw = default_qc_sw
    prefix = args.prefix
    if_dedup = args.dedup

    print "properties_file:", properties_file
    print "fastq1:", fastq1
    print "fastq2:", fastq2
    print "qc_software:", qc_sw
    print "prefix:", prefix
    print "dedup:", if_dedup
Ejemplo n.º 14
0
def get_args():
    global fi
    global prop
    global properties_file
    global genome_name
    global vcf_file_pattern
    global vcf_files
    global mapping_file
    global image_title
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script build phylogenetic tree and dendragram for the defined group of vcf files from the same genome'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from genome_list.txt''',
        required=True)
    parser.add_argument(
        '-v',
        '--vcf_file_pattern',
        type=str,
        help='''Please provide the vcf files' pattern with the full path,
                                                                  vcf files must ended with ".vcf" ''',
        required=True)
    parser.add_argument(
        '-m',
        '--mapping_file',
        type=str,
        help=
        '''Please provide the mapping file path, which contains one column of 
                                                                read_ID from vcf file and one column of its corresponding label on the tree branch,
                                                                otherwise, the read_ID will be labeled on the tree branch''',
        required=False)
    parser.add_argument('-t',
                        '--title',
                        type=str,
                        help='''Please provide the title of the image''',
                        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name not in (
            line.rstrip()
            for line in open(prop.get_attrib("available_genomes")).readlines()
    ) and args.genome_name != "cryptosporidium_hominis":
        misc.my_exit("{} is not available, please try another genome".format(
            args.genome_name))
    if not re.search(".vcf$", args.vcf_file_pattern):
        misc.my_exit("vcf_file_pattern need to end up with .vcf")
    vcf_file_pattern = args.vcf_file_pattern
    vcf_files = glob.glob(vcf_file_pattern)
    fi.check_files_exist(vcf_files)

    # define variables
    genome_name = args.genome_name
    mapping_file = args.mapping_file
    image_title = args.title
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome_name:", genome_name
    print "vcf_file_pattern:", vcf_file_pattern
    print "mapping_file:", mapping_file
    print "title:", image_title
    print "prefix:", prefix
Ejemplo n.º 15
0
    run_snpEff()


def post_process():
    print "post_processing..."
    fi.copy_file("snpEff_summary.html",
                 "{}/{}_snpEff_summary.html".format(outdir, prefix))
    fi.copy_file("snpEff_genes.txt",
                 "{}/{}_snpEff_genes.txt".format(outdir, prefix))
    for ann_vcf_fpath in ann_vcf_fpaths:
        fi.copy_file_to_destdir(ann_vcf_fpath, outdir)
    #fi.copy_file_to_destdir(ann_csv_fname,outdir)


if __name__ == '__main__':
    getVar = lambda searchList, ind: [searchList[i] for i in ind]
    fi = fileutils()
    misc = misc()
    get_args()
    print "\n", "Properties attributes:"
    print prop.__dict__

    #run_blast the initiation code
    initiate()

    #execute the main part of the program
    execute()

    #post execution code
    post_process()
Ejemplo n.º 16
0
def get_args():
    global fi
    global properties_file
    global genome_name
    global ref_fasta
    global bam_files
    global bam_files_str
    global prefix
    global filter_dict
    global default_ref_fasta_root
    global if_classify
    fi=fileutils()
    default_ref_fasta_root='ena_ref_fasta'
    filter_dict={}
    
    # Assign description to the help doc
    parser = argparse.ArgumentParser(description='Script for getting SNP from bam files using samtools and bcftools')
    parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file, which including the paths of samtools and bcftools and workdir', required=True)
    parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name you are mapping to, only \'ch\' or \'cp\' permitted, 
                                                                \'ch\' stands for \'Cryptosporidium hominis\' and 
                                                                \'cp\' stands for \'Cryptosporidium parvum\'''', required=True)
    parser.add_argument('-r', '--ref_fasta', type=str, help='Please provide reference genome fasta file', required=False)
    parser.add_argument('-bam', '--bam_files', type=str, nargs='+', help='Please provide one or multiple bam files', required=True)
    parser.add_argument('-qual', '--QUAL_filter', type=str, help='To filter QUAL(phred-scaled quality score) in vcf file, please provide minimum QUAL value', required=False)
    parser.add_argument('-dp', '--DP_filter', type=str, help='To filter DP("Raw read depth") in vcf file, please provide the minimum DP value', required=False)
    parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True)  
    parser.add_argument('-c', '--classify', action='store_true',help='set classifying the result VCF file into coding and non-coding file to true', default=False)
    
    args=parser.parse_args() 
    
    # check args
    fi.check_exist(args.properties_file)
    fi.check_files_exist(args.bam_files)     
    if args.genome_name is not "ch" and not "cp":
        print "ERROR: Please provide the genome name you are mapping to, only \'ch\' or \'cp\' permitted"
        sys.exit(1)
    if args.ref_fasta is not None:
        fi.check_exist(args.ref_fasta)         
    
    # define variables  
    properties_file=args.properties_file
    genome_name=args.genome_name 
    if args.ref_fasta is None:
        ref_fasta=default_ref_fasta_root+'_'+genome_name
    else:
        ref_fasta=args.ref_fasta
    bam_files=args.bam_files
    bam_files_str=""
    for bam_file in bam_files:
        bam_files_str += os.path.abspath(bam_file)+" "
    if args.QUAL_filter is not None:
        filter_dict["QUAL"]=args.QUAL_filter
    if args.DP_filter is not None:
        filter_dict["DP"]=args.DP_filter
    prefix=args.prefix      
    if_classify=args.classify

    # print args
    print "properties_file:",str(properties_file)  
    print "refrence_genome fasta file:",ref_fasta
    print "bam_files:",bam_files_str
    print "QUAL_filter:",args.QUAL_filter
    print "DP_filter:",args.DP_filter
    print "prefix:",prefix
    print "if_classify:",if_classify
def get_args():
    global fi
    global prop
    global properties_file
    global genome_name
    global genome_fasta
    global bam_file_pattern
    global bam_request_pattern
    global bam_files
    global prefix

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script creating relocation files for multiple bam files from the same genome 
                                    and automatically open the GUI.''')
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help='''if "C. hominis" or "C. parvum" will be used as the genome, 
                                                                 please provide "ch" for "C. hominis" or "cp" for "C. parvum"''',
        required=False)
    parser.add_argument(
        '-f',
        '--genome_fasta',
        type=str,
        help='''Please provide the directory for the genome fasta file, 
                                                                 if "ch" or "cp" is not the genome name.''',
        required=False)
    parser.add_argument(
        '-b',
        '--bam_file_pattern',
        type=str,
        help='''Please provide the bam files' pattern with the full path''',
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    if args.genome_name is not None and not args.genome_name == "ch" and not args.genome_name == "cp":
        print "genome name need to be ch or cp"
        sys.exit(1)
    if args.genome_fasta is not None:
        fi.check_exist(args.genome_fasta)
    bam_file_pattern = args.bam_file_pattern
    bam_files = glob.glob(bam_file_pattern)
    bam_request_pattern = "^.*/?(.*?).bam$"
    for bam_file in bam_files:
        if not re.search(bam_request_pattern, bam_file):
            print "bam_file not ended with .bam"
            sys.exit(1)
    fi.check_files_exist(bam_files)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name is not None:
        genome_name = args.genome_name
    if args.genome_fasta is not None:
        genome_fasta = args.genome_fasta
    else:
        if args.genome_name is None:
            print "If no genome_fasta provided, genome name must be provided as ch or cp."
            sys.exit(1)
        else:
            genome_fasta = prop.get_attrib(genome_name + "_fasta")
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome_name:", genome_name
    print "genome_fasta:", genome_fasta
    print "bam_file_pattern:", bam_file_pattern
    print "prefix:", prefix
def get_args():
    global properties_file
    global genome
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        '''Script invests genes under selection pressure within species through dNdS. 
        Species can be chosen from -genome_list, which including 17 genomes. They are the common genomes of protists parasite and existing in snpEff'''
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-genome_list',
                        '--genome_list',
                        help="This will display the genome name list",
                        action="store_true")
    parser.add_argument(
        '-g',
        '--genome_name',
        type=str,
        help=
        '''Please provide the genome name, only with those obtained from -genome_list''',
        required='-genome_list' not in sys.argv)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide snp vcf files' pattern with full file path",
        required='-genome_list' not in sys.argv)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required='-genome_list' not in sys.argv)

    # check args
    args = parser.parse_args()
    if args.genome_list:
        print get_gene_list_str()
        sys.exit(0)
    fi = fileutils()
    fi.check_exist(args.properties_file)
    properties_file = args.properties_file
    prop = properties(properties_file)
    genome = args.genome_name
    if genome not in get_gene_list_str().split("\n"):
        print "ERROR: genome_name {} not in the list of -genome_list".format(
            genome)
        sys.exit(1)
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix
def get_args():
    global properties_file
    global genome
    global gff
    global prefix
    global vcf_file_pattern
    global prop

    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description=
        'Script invests genes under selection pressure within species through dNdS'
    )
    parser.add_argument('-p',
                        '--properties_file',
                        type=str,
                        help='Please provide the properties file.',
                        required=True)
    parser.add_argument('-g',
                        '--genome_name',
                        type=str,
                        help='''Please provide the genome name, 
                        only "ch" for "C. hominis" or "cp" for "C. parvum" can be used''',
                        required=True)
    parser.add_argument('-gff',
                        '--genome_gff_file',
                        type=str,
                        help='''Please provide the genome gff file, 
                        only C. hominis or C. parvum gff file can be used''',
                        required=False)
    parser.add_argument(
        '-f',
        '--vcf_file_pattern',
        type=str,
        help="Please provide vcf files' pattern with full file path",
        required=True)
    parser.add_argument('-pre',
                        '--prefix',
                        type=str,
                        help='Please provide the prefix for the output file.',
                        required=True)

    # check args
    args = parser.parse_args()
    fi = fileutils()
    fi.check_exist(args.properties_file)
    if args.genome_gff_file is not None:
        fi.check_exist(args.genome_gff_file)

    # define variables
    properties_file = args.properties_file
    prop = properties(properties_file)
    if args.genome_name != 'ch' and args.genome_name != 'cp':
        print "only 'ch' or 'cp' can be used as the genome name"
        sys.exit(1)
    else:
        genome = args.genome_name
    if args.genome_gff_file is None:
        gff = prop.get_attrib(genome + "_gff")
    vcf_file_pattern = args.vcf_file_pattern
    prefix = args.prefix

    print "properties_file:", properties_file
    print "genome:", genome
    print "genome_gff:", gff
    print "vcf_file_pattern:", vcf_file_pattern
    print "prefix:", prefix