def upload(args): ''' %prog upload imgdir projid - imgdir: Path to directory of the images to be uploaded - projid: Zooniverse project id (4 - 5 digit number) DESC: Uploads images from the image directory to zooniverse project. If there is no manifest will generate one. ''' from schnablelab.Zooniverse.Zootils import upload as load p = OptionParser(upload.__doc__) p.add_option('-s', '--subject', default=False, help='Designate a subject set id.') p.add_option('-q', '--quiet', action='store_true', default=False, help='Silences output when uploading images to zooniverse.') p.add_option( '-x', '--extension', default=False, help='Specify the extension of the image files to be uploaded.') ''' p.add_option('-c', '--convert', action='store_true', default=False, help="Compress and convert files to jpg for faster load times" + " on zooniverse.\n" + " Command: magick -strip -interlace Plane -quality 85%" + " -format jpg <img_directory>/<filename>.png") ''' opts, args = p.parse_args(args) if len(args) != 2: p.print_help() exit(False) imgdir, projid = args load(imgdir, projid, opts) return True
def sam2bam(args): """ %prog in_dir out_dir in_dir: sam files folder out_dir: bam files folder convert sam to bam using samtools/0.1. """ p = OptionParser(sam2bam.__doc__) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) in_dir, out_dir, = args out_path = Path(out_dir) if not out_path.exists(): sys.exit('%s does not exist...') dir_path = Path(in_dir) sams = dir_path.glob('*.sam') for sam in sams: prf = sam.name.split('.sam')[0] bam = prf + '.bam' bam_path = out_path / bam cmd = 'samtools view -bS %s > %s' % (sam, bam_path) header = Slurm_header % (100, 15000, prf, prf, prf) header += 'ml samtools/0.1\n' header += cmd with open('%s.sam2bam.slurm' % prf, 'w') as f: f.write(header)
def PredictSlurmGPU(args): """ %prog model_name npyPattern("CM*.npy") job_n generate prediction GPU jobs for all npy files """ p = OptionParser(PredictSlurmGPU.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mn, npy_pattern, jobn, = args if opts.prefix == 'myjob': print('specify job name prefix!') sys.exit() npys = glob(npy_pattern) print(len(npys)) grps = cutlist(npys, int(jobn)) for gn, grp in grps: st, ed = gn.split('-') ed = int(ed) + 1 gn = '%s-%s' % (st, ed) cmd = "python -m schnablelab.CNN.Predict Predict %s '%s' %s\n" % ( mn, npy_pattern, gn) opt = '%s.%s' % (opts.prefix, gn) header = Slurm_gpu_header % (opts.time, opts.memory, opt, opt, opt) header += "ml anaconda\nsource activate MCY\n" header += cmd with open('%s.gpu.slurm' % opt, 'w') as f: f.write(header) print('%s.gpu.slurm prediction GPU job file generated!' % opt)
def divide(args): ''' %prog divide input_dir output_dir_prefix ''' p = OptionParser(divide.__doc__) p.add_option('--pattern', default='*.jpg', help='file name pattern') p.add_option('--nimgs_per_folder', type='int', default=700, help='~ number of images (<1000) in each smaller folder') opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) input_dir, out_prefix, = args df = GenDataFrameFromPath(Path(input_dir), pattern=opts.pattern) n_folders = math.ceil(df.shape[0]/opts.nimgs_per_folder) print('%s will be divided to %s datasets'%(df.shape[0], n_folders)) n = 0 for _, grp in cutlist(df['fnpath'].values, n_folders): n += 1 output_folder = Path('%s_%s'%(out_prefix,n)) print(output_folder, grp.shape[0]) if not output_folder.exists(): output_folder.mkdir() for i in grp: copyfile(i, output_folder/i.name)
def gentesting(args): """ %prog source_imgs_dir source_imgs_csv training_imgs_csv testing_imgs_per_cls output_dir create the balanced testing dataset for each class """ p = OptionParser(gentraining.__doc__) p.add_option('--header', default=None, help='spefiy if the source csv file has header') p.add_option('--comma_sep', default=True, help='spefiy if the csv file is separated by comma') p.add_option('--groupby_col', default=1, help='spefiy the groupy column. 0: 1st column; 1: 2nd column') opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) source_dir, source_csv, training_csv, ipc, testing_dir = args # ipc: number of images per class. # read the source csv file if opts.header and opts.comma_sep: # without header with , df0 = pd.read_csv(source_csv, header=None) elif (not opts.header) and opts.comma_sep: # with header with , df0 = pd.read_csv(source_csv) elif not (opts.header and opts.comma_sep): # with header with tab/space df0 = pd.read_csv(source_csv, delim_whitespace=True) else: print('keke... implement this option first!') print('shape of source csv %s: %s' % (mycsv, df0.shape))
def Trim(args): """ %prog Trim dir quality control on raw fq.gz using Trimmomatric """ p = OptionParser(Trim.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mydir, = args allfiles = [i for i in os.listdir(mydir) if i.endswith('.fq.gz')] print('Total %s fastq.gz files' % len(allfiles)) for i in allfiles: sm = i.split('.')[0] cmd1 = 'java -jar $TM_HOME/trimmomatic.jar SE %s %s CROP:185 SLIDINGWINDOW:4:15 MINLEN:30' % ( i, sm + '.trimed.fq\n') cmd2 = 'gzip %s' % (sm + '.trimed.fq\n') header = Slurm_header % (opts.time, opts.memory, SM, SM, SM) header += cmd1 header += cmd2 jobfile = '%s.trimc.slurm' % sm f = open(jobfile, 'w') f.write(header) f.close() print( 'slurm files *.trimed.slurm has been created, you can sbatch your job file.' )
def SNPsCall(args): """ %prog SNPsCall ref info create the index for bam files """ p = OptionParser(SNPsCall.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) ref, info, = args allfiles = [i for i in os.listdir('.') if i.endswith('sorted.bam')] print('Total %s sorted.bam files' % len(allfiles)) f1 = open('bamfiles.fb.list', 'w') for i in allfiles: f1.write(i + '\n') f1.close() f2 = open(info) chrlist = [i.rstrip() for i in f2] for seq in chrlist: cmd = '/work/schnablelab/cmiao/SorghumGWAS/scripts/freebayes/bin/freebayes -r %s -f %s -C 1 -L bamfiles.fb.list > %s\n' % ( seq, ref, "_".join(seq.split(':')) + '.vcf') header = Slurm_header % (opts.time, opts.memory, seq, seq, seq) header += cmd jobfile = '%s.fb.slurm' % ("_".join(seq.split(':'))) f = open(jobfile, 'w') f.write(header) f.close() print( 'slurm files *.fb.slurm has been created, you can sbatch your job file.' )
def Sam2Bam(args): """ %prog Sam2Bam dir Convert sam to bam format """ p = OptionParser(Sam2Bam.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mydir, = args allfiles = [i for i in os.listdir(mydir) if i.endswith('sam')] print('Total %s sam files' % len(allfiles)) for i in allfiles: SM = i.split('.')[0] output = '%s.bam' % SM cmd = 'samtools view -bS %s > %s\n' % (i, output) header = Slurm_header % (opts.time, opts.memory, SM, SM, SM) header += 'module load samtools/0.1\n' header += cmd jobfile = '%s.sam2bam.slurm' % SM f = open(jobfile, 'w') f.write(header) f.close() print( 'slurm files *.sam2bam.slurm has been created, you can sbatch your job file.' )
def SortHmp(args): """ %prog SortHmp hmp Sort hmp in wired TASSEL way... """ p = OptionParser(SortHmp.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) hmp, = args prefix = hmp.replace('.hmp', '') out_prefix = hmp.replace('.hmp', '') + '.sorted' cmd = 'run_pipeline.pl -Xms16g -Xmx18g -SortGenotypeFilePlugin -inputFile %s -outputFile %s -fileType Hapmap\n' % ( hmp, out_prefix) cmd1 = 'mv %s %s' % (out_prefix + '.hmp.txt', out_prefix + '.hmp') h = Slurm_header h += 'module load java/1.8\n' h += 'module load tassel/5.2\n' header = h % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += cmd header += cmd1 f = open('%s.Sort.slurm' % prefix, 'w') f.write(header) f.close() print( 'slurm file %s.Sort.slurm has been created, you can sbatch your job file.' % prefix)
def ped2bed(args): """ %prog ped_prefix Convert plink ped to binary bed format using Plink """ p = OptionParser(ped2bed.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) ped_prefix, = args cmd = 'plink --noweb --file %s --make-bed --out %s\n' % (ped_prefix, ped_prefix) print('run cmd on local:\n%s' % cmd) header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += 'ml plink\n' header += cmd f = open('%s.ped2bed.slurm' % ped_prefix, 'w') f.write(header) f.close() print( 'Job file has been created. You can submit: sbatch -p jclarke %s.ped2bed.slurm' % ped_prefix)
def hmp2ped(args): """ %prog hmp Convert hmp to plink ped format using Tassel """ p = OptionParser(hmp2ped.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) hmp, = args prefix = '.'.join(hmp.split('.')[0:-1]) cmd = 'run_pipeline.pl -Xms512m -Xmx38G -fork1 -h %s -export -exportType Plink\n' % hmp header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += 'ml java/1.8\n' header += 'ml tassel/5.2\n' header += cmd f = open('%s.hmp2ped.slurm' % prefix, 'w') f.write(header) f.close() print( 'Job file has been created. You can submit: sbatch -p jclarke %s.hmp2ped.slurm' % prefix)
def hmp2MVP(args): """ %prog hmp2MVP hmp MVP_prefix Convert hmp genotypic data to bimnbam datasets (*.numeric and *.map). """ p = OptionParser(hmp2MVP.__doc__) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) hmp, mvp_pre = args f1 = open(hmp) f1.readline() f2 = open(mvp_pre + '.numeric', 'w') f3 = open(mvp_pre + '.map', 'w') f3.write('SNP\tChrom\tBP\n') for i in f1: j = i.split() rs = j[0] ref, alt = j[1].split('/')[0], j[1].split('/')[1] newNUMs = judge(ref, alt, j[11:]) newline = '\t'.join(newNUMs) + '\n' f2.write(newline) chro, pos = j[2], j[3] f3.write('%s\t%s\t%s\n' % (rs, chro, pos)) f1.close() f2.close() f3.close()
def combineHmp(args): """ %prog combineHmp N pattern output combine split hmp (1-based) files to a single one. Pattern example: hmp321_agpv4_chr%s.hmp """ p = OptionParser(combineHmp.__doc__) p.add_option('--header', default='yes', choices=('yes', 'no'), help='choose whether add header or not') opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) N, hmp_pattern, new_f, = args N = int(N) f = open(new_f, 'w') fn1 = open(hmp_pattern % 1) print(1) if opts.header == 'yes': for i in fn1: f.write(i) else: fn1.readline() for i in fn1: f.write(i) fn1.close() for i in range(2, N + 1): print(i) fn = open(hmp_pattern % i) fn.readline() for j in fn: f.write(j) fn.close() f.close()
def trim_single(args): """ %prog trim in_dir out_dir quality control on the single end reads """ p = OptionParser(trim_paired.__doc__) p.add_option('--pattern', default='*_Unpaired.fastq', help='filename pattern for all single end reads') opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) in_dir, out_dir, = args out_path = Path(out_dir) if not out_path.exists(): sys.exit('output dir %s does not exist...' % out_dir) fns = glob('%s/%s' % (in_dir, opts.pattern)) for fn in fns: fn_path = Path(fn) prf = '_'.join(fn_path.name.split('_')[0:-1]) + '.SE' print(prf) fn_out = fn_path.name.replace('Unpaired.fastq', 'trim.Unpaired.fastq') cmd = 'java -jar $TM_HOME/trimmomatic.jar SE -phred33 %s %s TRAILING:20 SLIDINGWINDOW:4:20 MINLEN:40' % ( fn, str(out_path / fn_out)) header = Slurm_header % (10, 10000, prf, prf, prf) header += 'ml trimmomatic\n' header += cmd with open('%s.trim.slurm' % (prf), 'w') as f: f.write(header)
def fastqc(args): """ %prog fastqc in_dir out_dir in_dir: the dir where fastq files are located out_dir: the dir saving fastqc reports generate slurm files for fastqc jobs """ p = OptionParser(fastqc.__doc__) p.add_option("--pattern", default='*.fastq', help="the pattern of fastq files, qutation needed") opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) in_dir, out_dir, = args out_path = Path(out_dir) if not out_path.exists(): sys.exit('%s does not exist...') dir_path = Path(in_dir) fqs = dir_path.glob(opts.pattern) for fq in fqs: prf = '.'.join(fq.name.split('.')[0:-1]) print(prf) cmd = 'fastqc %s -o %s' % (str(fq), out_dir) header = Slurm_header % (10, 10000, prf, prf, prf) header += 'ml fastqc\n' header += cmd with open('%s.fastqc.slurm' % (prf), 'w') as f: f.write(header)
def keras_cnn(args): """ %prog train_dir val_dir num_category model_name_prefix Run vgg model """ p = OptionParser(keras_cnn.__doc__) p.add_option('--epoch', default=500, help = 'number of epoches') p.add_option('--lr_n', default=1, type='int', help = 'train model with differnt learning rates. if n=1: set lr to 0.001. if n>1: try differnt lr from 1e-2 to 1e-5 n times') p.set_slurm_opts(gpu=True) opts, args = p.parse_args(args) if len(args) != 4: sys.exit(not p.print_help()) train_dir, val_dir, numC, mnp = args #mnp:model name prefix out_fns = fns(mnp, n=opts.lr_n) for i in range(int(opts.lr_n)): cmd = 'python -m schnablelab.CNN.keras_vgg %s %s %s %s %s %s'%(train_dir, val_dir, numC, out_fns.lrs[i], opts.epoch, out_fns.model_name[i]) SlurmHeader = Slurm_gpu_header%(opts.time, opts.memory, out_fns.model_name[i], out_fns.model_name[i], out_fns.model_name[i]) SlurmHeader += 'module load anaconda\nsource activate MCY\n' SlurmHeader += cmd f = open('%s.slurm'%out_fns.model_name[i], 'w') f.write(SlurmHeader) f.close() print('slurm file %s.slurm has been created, you can sbatch your job file.'%out_fns.model_name[i])
def plot(args): """ %prog plot gwas_out result_prefix plt MVP results using MVP.Report function. https://github.com/XiaoleiLiuBio/MVP """ p = OptionParser(plot.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) gwasfn, op, = args # op: output prefix f1 = open('%s.plot.R' % op, 'w') cmds = ''' library('MVP') myData = read.csv(%s) MVP.Report(myData, plot.type='m', col=c("dodgerblue4","deepskyblue"), LOG10=TRUE, ylim=NULL, th reshold=8.9e-8, threshold.col='grey', chr.den.col=NULL, file='png', memo='MLM', dpi=300) ''' f1.write(MVP_Run_header % (pheno, op, op, op, op)) f1.close() f2 = open('%s.mlm.farmcpu.slurm' % opts.prefix, 'w') header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += 'module load R\n' header += 'R CMD BATCH %s.mlm.farmcpu.R\n' % opts.prefix f2.write(header) f2.close() print('%s.mlm.farmcpu.R and %s.mlm.farmcpu.slurm have been created.' % (opts.prefix, opts.prefix))
def genPCA(args): """ %prog genPCA hmp N Generate first N PCs using tassel """ p = OptionParser(genPCA.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) hmp, N, = args out_prefix = hmp.replace('.hmp', '') cmd = 'run_pipeline.pl -Xms28g -Xmx29g -fork1 -h %s -PrincipalComponentsPlugin -ncomponents %s -covariance true -endPlugin -export %s_%sPCA -runfork1\n' % ( hmp, N, out_prefix, N) h = Slurm_header h += 'ml java/1.8\n' h += 'ml tassel/5.2\n' header = h % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += cmd f = open('%s.PCA%s.slurm' % (out_prefix, N), 'w') f.write(header) f.close() print( 'slurm file %s.PCA%s.slurm has been created, you can sbatch your job file.' % (out_prefix, N))
def IndexBam(args): """ %prog IndexBam dir create the index for bam files """ p = OptionParser(IndexBam.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mydir, = args allfiles = [i for i in os.listdir(mydir) if i.endswith('sorted.bam')] print('Total %s sorted.bam files' % len(allfiles)) for i in allfiles: SM = i.split('.')[0] cmd = 'samtools index %s\n' % i header = Slurm_header % (opts.time, opts.memory, SM, SM, SM) header += 'module load samtools/0.1\n' header += cmd jobfile = '%s.idx.slurm' % SM f = open(jobfile, 'w') f.write(header) f.close() print( 'slurm files *.idx.slurm has been created, you can sbatch your job file.' )
def reorgnzGemmaKinship(args): """ %prog reorgnzGemmaKinship GEMMAkinship hmp Reorganize kinship result from GEMMA so it can be used in other software, like GAPIT. The hmp file only provides the order of the smaple names. """ p = OptionParser(reorgnzGemmaKinship.__doc__) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) gemmaKin, hmpfile, = args f = open(hmpfile) SMs = f.readline().split()[11:] f.close() f1 = open(gemmaKin) f2 = open('GAPIT.' + gemmaKin, 'w') for i, j in zip(SMs, f1): newline = i + '\t' + j f2.write(newline) f1.close() f2.close() print( "Finished! Kinship matrix file for GEMMA 'GAPIT.%s' has been generated." % gemmaKin)
def CombineRep(args): """ %prog CombinRep dir combine all fg.gz files for same sample """ p = OptionParser(CombineRep.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mydir, = args fqs = [i for i in os.listdir(mydir) if i.endswith('fq.gz')] fqs = sorted( fqs, key=lambda x: int(x.split('.')[0].split('_')[0].split('R')[0])) SMs = [x.split('.')[0].split('_')[0].split('R')[0] for x in fqs] mydf = pd.DataFrame(dict(zip(['SM', 'FNs'], [SMs, fqs]))) mygrpdf = mydf.groupby('SM').agg(['count', lambda x: ' '.join(x)]) f = open('combine_fqs.sh', 'w') for sm in mygrpdf.index: n, fns = mygrpdf.loc[sm, :] cmd = 'cat %s > %s.cbd.fq.gz\n' % (fns, sm) f.write(cmd) f.close() cmd1 = 'chmod +x combine_fqs.sh\n' cmd2 = './combine_fqs.sh\n' header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += cmd1 header += cmd2 f = open('CombineFQs.slurm' % prefix, 'w') f.write(header) f.close() print( 'slurm file CombineFQs.slurm has been created, you can sbatch your job file.' )
def hmp2vcf(args): """ %prog hmp2vcf hmp convert hmp to vcf format using tassel """ p = OptionParser(hmp2vcf.__doc__) p.set_slurm_opts(jn=True) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) hmpfile, = args prefix = '.'.join(hmpfile.split('.')[0:-1]) cmd = 'run_pipeline.pl -Xms512m -Xmx10G -fork1 -h %s -export -exportType VCF\n' % ( hmpfile) print(cmd) header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += 'ml tassel/5.2\n' header += cmd f = open('%s.hmp2vcf.slurm' % prefix, 'w') f.write(header) f.close() print( 'slurm file %s.hmp2vcf.slurm has been created, you can sbatch your job file.' % prefix)
def extract_info(args): """ %prog log_file output_fn extract testing and prediction results from dpp log file """ p = OptionParser(extract_info.__doc__) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) logfile, opp, = args f0 = open(logfile) all_lines = f0.readlines() test_idx, predict_idx, hist_idx = 0, 0, 0 for i, j in enumerate(all_lines): if 'All test labels:' in j: test_idx = i if 'All predictions:' in j: predict_idx = i if 'Histogram of ' in j: hist_idx = i test_lines = all_lines[test_idx + 1:predict_idx] ground_truth = extract_num(test_lines) #print(len(ground_truth), '\n', ground_truth) predict_lines = all_lines[predict_idx + 1:hist_idx] prediction = extract_num(predict_lines) #print(len(prediction), '\n', prediction) df = pd.DataFrame( dict(zip(['groundtruth', 'prediction'], [groundtruth, prediction]))) df.to_csv(opp, index=False, sep='\t') print('Done! check %s' % opp)
def IndePvalue(args): """ %prog IndePvalue plink_bed_prefix output calculate the number of independent SNPs (Me) and the bonferroni pvalue """ p = OptionParser(IndePvalue.__doc__) p.set_slurm_opts(jn=True) p.add_option( '--cutoff', default='0.05', choices=('0.01', '0.05'), help='choose the pvalue cutoff for the calculation of bonferroni pvalue' ) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) bed, output = args mem = int(opts.memory / 1000) - 2 cmd = 'java -Xmx%sg -jar %s --noweb --effect-number --plink-binary %s --genome --out %s' % ( mem, GEC, bed, output) h = Slurm_header h += 'module load java/1.8\n' header = h % (opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) header += cmd f = open('%s.Me_SNP.slurm' % output, 'w') f.write(header) f.close() print( 'slurm file %s.Me_SNP.slurm has been created, you can sbatch your job file.' % output)
def export(args): ''' %prog export proj_id outfile - proj_id: The project id of the zooniverse project DESC: Fetches an export from the specified zooniverse project id. ''' from schnablelab.Zooniverse.Zootils import export as exp p = OptionParser(export.__doc__) p.add_option('-t', '--type', default='classifications', help='Specify the type of export') opts, args = p.parse_args(args) if len(args) != 2: exit(not p.print_help()) projid, outfile = args exp(projid, outfile, opts) return True
def Info(args): ''' %prog Info project_folder Show summary of images under project_folder ''' p = OptionParser(Info.__doc__) p.add_option( '--item_idx', default='1,2,3', help= 'the index of sample name, date, and time in each image directory name' ) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) project_folder, = args sm_idx, date_idx, time_idx = [int(i) for i in opts.item_idx.split(',')] prj = ParseProject(project_folder, sm_idx, date_idx, time_idx) print('Summary of samples:') for i, j in prj.sm_counts.items(): print(i, j) print('Summary of dates:') for i, j in prj.date_counts.items(): print(i, j) print('Angles for RGB images:') for angle in prj.df.loc[0, 'fnpath'].glob('Vis_*'): print(angle.name)
def cMLM(args): """ %prog cMLM pheno(with header, tab delimited) geno_prefix(GM and GD prefix) PCA Kinship Run automated GAPIT compressed mixed linear model """ p = OptionParser(cMLM.__doc__) p.set_slurm_opts(array=False) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) pheno, geno_prefix, PCA, Kinship = args mem = '.'.join(pheno.split('.')[0:-1]) f1 = open('%s.cMLM.R'%mem, 'w') #print(Gapit_header) gapit_cmd = Gapit_header%(pheno,geno_prefix,geno_prefix,PCA,Kinship,mem) f1.write(gapit_cmd) f2 = open('%s.cMLM.slurm'%mem, 'w') h = Slurm_header h += 'module load R/3.3\n' header = h%(opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix) f2.write(header) cmd = 'R CMD BATCH %s.cMLM.R\n'%mem f2.write(cmd) f1.close() f2.close() print('R script %s.cMLM.R and slurm file %s.cMLM.slurm has been created, you can sbatch your job file.'%(mem, mem))
def cpu(args): """ %prog request a cpu node from hcc. """ p = OptionParser(cpu.__doc__) p.add_option("--partition", default="jclarke", choices=('batch', 'jclarke'), help="which partition? [default: %default]") p.add_option("--memory", default="10240", help="specify the how much memory [default: %default]") p.add_option("--time", default='20', help="specify the time (hour) [default: %default]") opts, args = p.parse_args(args) if len(args) == 0: print('add --help to see options.\n') cmd = 'srun --partition=%s --mem-per-cpu=%s --ntasks-per-node=6 --nodes=1 --time=%s:0:0 --pty $SHELL\n' % ( opts.partition, opts.memory, opts.time) print(cmd) #call(cmd, shell=True) else: sys.exit(not p.print_help())
def Imgs2Arrs(args): ''' %prog hyp_dir(filepath of hyperspectral image data) Returns: numpy array object with shape [x*y, z]. x,y dims correspond to pixel coordinates for each image z dim corresponds to hyperspectral image wavelength. ''' import cv2 p = OptionParser(Imgs2Arrs.__doc__) opts, args = p.parse_args(args) if len(args) == 0: sys.exit(not p.print_help()) mydir, = args imgs = [i for i in os.listdir(mydir) if i.endswith('png')] sorted_imgs = sorted(imgs, key=lambda x: int(x.split('_')[0])) all_arrs = [] for i in sorted_imgs[2:]: print(i) #img = cv2.imread('%s/%s'%(mydir, i), cv2.IMREAD_GRAYSCALE) img = np.array(Image.open('%s/%s' % (mydir, i)).convert('L')) print(img.shape) all_arrs.append(img) arrs = np.stack(all_arrs, axis=2) np.save('%s.npy' % mydir, arrs)
def gpu(args): """ %prog request a gpu node from hcc. """ p = OptionParser(gpu.__doc__) p.add_option("--memory", default="12000", help="specify the how much memory [default: %default]") p.add_option("--time", default='20', help="specify the time (hour) [default: %default]") p.add_option( "--model", default='gpu_k40', choices=('gpu_p100', 'gpu_k20', 'gpu_k40'), help= "specify gpu mode, p100:16gb, k40:12gb, k20:5bg [default: %default]") opts, args = p.parse_args(args) if len(args) == 0: print('add --help to see options.\n') cmd = 'srun --partition=schnablelab --gres=gpu --constraint=%s --mem-per-cpu=%s --ntasks-per-node=1 --nodes=1 --time=%s:0:0 --pty $SHELL\n' % ( opts.model, opts.memory, opts.time) print(cmd) #call(cmd, shell=True) else: sys.exit(not p.print_help())