Beispiel #1
0
def sam2bam(args):
    """
    %prog in_dir out_dir
        in_dir: sam files folder
        out_dir: bam files folder

    convert sam to bam using samtools/0.1.
    """
    p = OptionParser(sam2bam.__doc__)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args

    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    sams = dir_path.glob('*.sam')
    for sam in sams:
        prf = sam.name.split('.sam')[0]
        bam = prf + '.bam'
        bam_path = out_path / bam
        cmd = 'samtools view -bS %s > %s' % (sam, bam_path)
        header = Slurm_header % (100, 15000, prf, prf, prf)
        header += 'ml samtools/0.1\n'
        header += cmd
        with open('%s.sam2bam.slurm' % prf, 'w') as f:
            f.write(header)
Beispiel #2
0
def divide(args):
    '''
    %prog divide input_dir output_dir_prefix
    '''
    p = OptionParser(divide.__doc__)
    p.add_option('--pattern', default='*.jpg',
                 help='file name pattern')
    p.add_option('--nimgs_per_folder', type='int', default=700,
                 help='~ number of images (<1000) in each smaller folder')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    input_dir, out_prefix, = args

    df = GenDataFrameFromPath(Path(input_dir), pattern=opts.pattern)
    n_folders = math.ceil(df.shape[0]/opts.nimgs_per_folder)
    print('%s will be divided to %s datasets'%(df.shape[0], n_folders))
    n = 0
    for _, grp in cutlist(df['fnpath'].values, n_folders):
        n += 1
        output_folder = Path('%s_%s'%(out_prefix,n))
        print(output_folder, grp.shape[0])
        if not output_folder.exists():
            output_folder.mkdir()
        for i in grp:
            copyfile(i, output_folder/i.name)
Beispiel #3
0
def PredictSlurmGPU(args):
    """
    %prog model_name npyPattern("CM*.npy") job_n
    generate prediction GPU jobs for all npy files
    """
    p = OptionParser(PredictSlurmGPU.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mn, npy_pattern, jobn, = args
    if opts.prefix == 'myjob':
        print('specify job name prefix!')
        sys.exit()

    npys = glob(npy_pattern)
    print(len(npys))
    grps = cutlist(npys, int(jobn))
    for gn, grp in grps:
        st, ed = gn.split('-')
        ed = int(ed) + 1
        gn = '%s-%s' % (st, ed)
        cmd = "python -m schnablelab.CNN.Predict Predict %s '%s' %s\n" % (
            mn, npy_pattern, gn)
        opt = '%s.%s' % (opts.prefix, gn)
        header = Slurm_gpu_header % (opts.time, opts.memory, opt, opt, opt)
        header += "ml anaconda\nsource activate MCY\n"
        header += cmd
        with open('%s.gpu.slurm' % opt, 'w') as f:
            f.write(header)
        print('%s.gpu.slurm prediction GPU job file generated!' % opt)
Beispiel #4
0
def Trim(args):
    """
    %prog Trim dir
    quality control on raw fq.gz using Trimmomatric
    """
    p = OptionParser(Trim.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('.fq.gz')]
    print('Total %s fastq.gz files' % len(allfiles))
    for i in allfiles:
        sm = i.split('.')[0]
        cmd1 = 'java -jar $TM_HOME/trimmomatic.jar SE %s %s CROP:185 SLIDINGWINDOW:4:15 MINLEN:30' % (
            i, sm + '.trimed.fq\n')
        cmd2 = 'gzip %s' % (sm + '.trimed.fq\n')
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += cmd1
        header += cmd2
        jobfile = '%s.trimc.slurm' % sm
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.trimed.slurm has been created, you can sbatch your job file.'
    )
Beispiel #5
0
def gentesting(args):
    """
    %prog source_imgs_dir source_imgs_csv training_imgs_csv testing_imgs_per_cls output_dir

    create the balanced testing dataset for each class
    """
    p = OptionParser(gentraining.__doc__)
    p.add_option('--header',
                 default=None,
                 help='spefiy if the source csv file has header')
    p.add_option('--comma_sep',
                 default=True,
                 help='spefiy if the csv file is separated by comma')
    p.add_option('--groupby_col',
                 default=1,
                 help='spefiy the groupy column. 0: 1st column; 1: 2nd column')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    source_dir, source_csv, training_csv, ipc, testing_dir = args  # ipc: number of images per class.

    # read the source csv file
    if opts.header and opts.comma_sep:  # without header with ,
        df0 = pd.read_csv(source_csv, header=None)
    elif (not opts.header) and opts.comma_sep:  # with header with ,
        df0 = pd.read_csv(source_csv)
    elif not (opts.header and opts.comma_sep):  # with header with tab/space
        df0 = pd.read_csv(source_csv, delim_whitespace=True)
    else:
        print('keke... implement this option first!')
    print('shape of source csv %s: %s' % (mycsv, df0.shape))
Beispiel #6
0
def Sam2Bam(args):
    """
    %prog Sam2Bam dir
    Convert sam to bam format
    """
    p = OptionParser(Sam2Bam.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('sam')]
    print('Total %s sam files' % len(allfiles))
    for i in allfiles:
        SM = i.split('.')[0]
        output = '%s.bam' % SM
        cmd = 'samtools view -bS %s > %s\n' % (i, output)
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += 'module load samtools/0.1\n'
        header += cmd
        jobfile = '%s.sam2bam.slurm' % SM
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.sam2bam.slurm has been created, you can sbatch your job file.'
    )
Beispiel #7
0
def SNPsCall(args):
    """
    %prog SNPsCall ref info
    create the index for bam files
    """
    p = OptionParser(SNPsCall.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    ref, info, = args
    allfiles = [i for i in os.listdir('.') if i.endswith('sorted.bam')]
    print('Total %s sorted.bam files' % len(allfiles))
    f1 = open('bamfiles.fb.list', 'w')
    for i in allfiles:
        f1.write(i + '\n')
    f1.close()

    f2 = open(info)
    chrlist = [i.rstrip() for i in f2]
    for seq in chrlist:
        cmd = '/work/schnablelab/cmiao/SorghumGWAS/scripts/freebayes/bin/freebayes -r %s -f %s -C 1 -L bamfiles.fb.list > %s\n' % (
            seq, ref, "_".join(seq.split(':')) + '.vcf')
        header = Slurm_header % (opts.time, opts.memory, seq, seq, seq)
        header += cmd
        jobfile = '%s.fb.slurm' % ("_".join(seq.split(':')))
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.fb.slurm has been created, you can sbatch your job file.'
    )
Beispiel #8
0
def ped2bed(args):
    """
    %prog ped_prefix

    Convert plink ped to binary bed format using Plink
    """
    p = OptionParser(ped2bed.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    ped_prefix, = args
    cmd = 'plink --noweb --file %s --make-bed --out %s\n' % (ped_prefix,
                                                             ped_prefix)
    print('run cmd on local:\n%s' % cmd)
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml plink\n'
    header += cmd
    f = open('%s.ped2bed.slurm' % ped_prefix, 'w')
    f.write(header)
    f.close()
    print(
        'Job file has been created. You can submit: sbatch -p jclarke %s.ped2bed.slurm'
        % ped_prefix)
Beispiel #9
0
def SortHmp(args):
    """
    %prog SortHmp hmp

    Sort hmp in wired TASSEL way...
    """
    p = OptionParser(SortHmp.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, = args
    prefix = hmp.replace('.hmp', '')
    out_prefix = hmp.replace('.hmp', '') + '.sorted'
    cmd = 'run_pipeline.pl -Xms16g -Xmx18g -SortGenotypeFilePlugin -inputFile %s -outputFile %s -fileType Hapmap\n' % (
        hmp, out_prefix)
    cmd1 = 'mv %s %s' % (out_prefix + '.hmp.txt', out_prefix + '.hmp')

    h = Slurm_header
    h += 'module load java/1.8\n'
    h += 'module load  tassel/5.2\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    header += cmd1
    f = open('%s.Sort.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.Sort.slurm has been created, you can sbatch your job file.'
        % prefix)
Beispiel #10
0
def hmp2MVP(args):
    """
    %prog hmp2MVP hmp MVP_prefix

    Convert hmp genotypic data to bimnbam datasets (*.numeric and *.map).
    """
    p = OptionParser(hmp2MVP.__doc__)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())

    hmp, mvp_pre = args
    f1 = open(hmp)
    f1.readline()
    f2 = open(mvp_pre + '.numeric', 'w')
    f3 = open(mvp_pre + '.map', 'w')
    f3.write('SNP\tChrom\tBP\n')
    for i in f1:
        j = i.split()
        rs = j[0]
        ref, alt = j[1].split('/')[0], j[1].split('/')[1]
        newNUMs = judge(ref, alt, j[11:])
        newline = '\t'.join(newNUMs) + '\n'
        f2.write(newline)
        chro, pos = j[2], j[3]
        f3.write('%s\t%s\t%s\n' % (rs, chro, pos))
    f1.close()
    f2.close()
    f3.close()
Beispiel #11
0
def hmp2ped(args):
    """
    %prog hmp

    Convert hmp to plink ped format using Tassel
    """
    p = OptionParser(hmp2ped.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, = args
    prefix = '.'.join(hmp.split('.')[0:-1])
    cmd = 'run_pipeline.pl -Xms512m -Xmx38G -fork1 -h %s -export -exportType Plink\n' % hmp
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml java/1.8\n'
    header += 'ml tassel/5.2\n'
    header += cmd
    f = open('%s.hmp2ped.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'Job file has been created. You can submit: sbatch -p jclarke %s.hmp2ped.slurm'
        % prefix)
Beispiel #12
0
def combineHmp(args):
    """
    %prog combineHmp N pattern output
    combine split hmp (1-based) files to a single one. Pattern example: hmp321_agpv4_chr%s.hmp
    """

    p = OptionParser(combineHmp.__doc__)
    p.add_option('--header', default='yes', choices=('yes', 'no'),
                 help='choose whether add header or not')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    N, hmp_pattern, new_f, = args
    N = int(N)

    f = open(new_f, 'w')

    fn1 = open(hmp_pattern % 1)
    print(1)
    if opts.header == 'yes':
        for i in fn1:
            f.write(i)
    else:
        fn1.readline()
        for i in fn1:
            f.write(i)
    fn1.close()
    for i in range(2, N + 1):
        print(i)
        fn = open(hmp_pattern % i)
        fn.readline()
        for j in fn:
            f.write(j)
        fn.close()
    f.close()
Beispiel #13
0
def trim_single(args):
    """
    %prog trim in_dir out_dir
    quality control on the single end reads
    """
    p = OptionParser(trim_paired.__doc__)
    p.add_option('--pattern',
                 default='*_Unpaired.fastq',
                 help='filename pattern for all single end reads')
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args
    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('output dir %s does not exist...' % out_dir)
    fns = glob('%s/%s' % (in_dir, opts.pattern))
    for fn in fns:
        fn_path = Path(fn)
        prf = '_'.join(fn_path.name.split('_')[0:-1]) + '.SE'
        print(prf)
        fn_out = fn_path.name.replace('Unpaired.fastq', 'trim.Unpaired.fastq')
        cmd = 'java -jar $TM_HOME/trimmomatic.jar SE -phred33 %s %s TRAILING:20 SLIDINGWINDOW:4:20 MINLEN:40' % (
            fn, str(out_path / fn_out))
        header = Slurm_header % (10, 10000, prf, prf, prf)
        header += 'ml trimmomatic\n'
        header += cmd
        with open('%s.trim.slurm' % (prf), 'w') as f:
            f.write(header)
Beispiel #14
0
def fastqc(args):
    """
    %prog fastqc in_dir out_dir
        in_dir: the dir where fastq files are located
        out_dir: the dir saving fastqc reports

    generate slurm files for fastqc jobs
    """
    p = OptionParser(fastqc.__doc__)
    p.add_option("--pattern",
                 default='*.fastq',
                 help="the pattern of fastq files, qutation needed")
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args

    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    fqs = dir_path.glob(opts.pattern)
    for fq in fqs:
        prf = '.'.join(fq.name.split('.')[0:-1])
        print(prf)
        cmd = 'fastqc %s -o %s' % (str(fq), out_dir)
        header = Slurm_header % (10, 10000, prf, prf, prf)
        header += 'ml fastqc\n'
        header += cmd
        with open('%s.fastqc.slurm' % (prf), 'w') as f:
            f.write(header)
Beispiel #15
0
def keras_cnn(args):
    """
    %prog train_dir val_dir num_category model_name_prefix
    
    Run vgg model
    """
    p = OptionParser(keras_cnn.__doc__)
    p.add_option('--epoch', default=500, help = 'number of epoches')
    p.add_option('--lr_n', default=1, type='int',
        help = 'train model with differnt learning rates. if n=1: set lr to 0.001. if n>1: try differnt lr from 1e-2 to 1e-5 n times')
    p.set_slurm_opts(gpu=True)
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    train_dir, val_dir, numC, mnp = args #mnp:model name prefix
    out_fns = fns(mnp, n=opts.lr_n)
    for i in range(int(opts.lr_n)):
        cmd = 'python -m schnablelab.CNN.keras_vgg %s %s %s %s %s %s'%(train_dir, val_dir, numC, out_fns.lrs[i], opts.epoch, out_fns.model_name[i]) 
        SlurmHeader = Slurm_gpu_header%(opts.time, opts.memory, out_fns.model_name[i], out_fns.model_name[i], out_fns.model_name[i])
        SlurmHeader += 'module load anaconda\nsource activate MCY\n'
        SlurmHeader += cmd
        f = open('%s.slurm'%out_fns.model_name[i], 'w')
        f.write(SlurmHeader)
        f.close()
        print('slurm file %s.slurm has been created, you can sbatch your job file.'%out_fns.model_name[i])
Beispiel #16
0
def genPCA(args):
    """
    %prog genPCA hmp N

    Generate first N PCs using tassel
    """
    p = OptionParser(genPCA.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, N, = args
    out_prefix = hmp.replace('.hmp', '')
    cmd = 'run_pipeline.pl -Xms28g -Xmx29g -fork1 -h %s -PrincipalComponentsPlugin -ncomponents %s -covariance true -endPlugin -export %s_%sPCA -runfork1\n' % (
        hmp, N, out_prefix, N)

    h = Slurm_header
    h += 'ml java/1.8\n'
    h += 'ml tassel/5.2\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    f = open('%s.PCA%s.slurm' % (out_prefix, N), 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.PCA%s.slurm has been created, you can sbatch your job file.'
        % (out_prefix, N))
Beispiel #17
0
def plot(args):
    """
    %prog plot gwas_out result_prefix

    plt MVP results using MVP.Report function.
    https://github.com/XiaoleiLiuBio/MVP
    """
    p = OptionParser(plot.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    gwasfn, op, = args  # op: output prefix
    f1 = open('%s.plot.R' % op, 'w')
    cmds = '''
    library('MVP')
    myData = read.csv(%s)
    MVP.Report(myData, plot.type='m', col=c("dodgerblue4","deepskyblue"), LOG10=TRUE, ylim=NULL, th
reshold=8.9e-8, threshold.col='grey', chr.den.col=NULL, file='png', memo='MLM', dpi=300)
    '''
    f1.write(MVP_Run_header % (pheno, op, op, op, op))
    f1.close()
    f2 = open('%s.mlm.farmcpu.slurm' % opts.prefix, 'w')
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load R\n'
    header += 'R CMD BATCH %s.mlm.farmcpu.R\n' % opts.prefix
    f2.write(header)
    f2.close()
    print('%s.mlm.farmcpu.R and %s.mlm.farmcpu.slurm have been created.' %
          (opts.prefix, opts.prefix))
Beispiel #18
0
def reorgnzGemmaKinship(args):
    """
    %prog reorgnzGemmaKinship GEMMAkinship hmp

    Reorganize kinship result from GEMMA so it can be used in other software, like GAPIT.
    The hmp file only provides the order of the smaple names.
    """
    p = OptionParser(reorgnzGemmaKinship.__doc__)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    gemmaKin, hmpfile, = args

    f = open(hmpfile)
    SMs = f.readline().split()[11:]
    f.close()
    f1 = open(gemmaKin)
    f2 = open('GAPIT.' + gemmaKin, 'w')
    for i, j in zip(SMs, f1):
        newline = i + '\t' + j
        f2.write(newline)
    f1.close()
    f2.close()
    print(
        "Finished! Kinship matrix file for GEMMA 'GAPIT.%s' has been generated."
        % gemmaKin)
Beispiel #19
0
def IndexBam(args):
    """
    %prog IndexBam dir
    create the index for bam files
    """
    p = OptionParser(IndexBam.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('sorted.bam')]
    print('Total %s sorted.bam files' % len(allfiles))
    for i in allfiles:
        SM = i.split('.')[0]
        cmd = 'samtools index %s\n' % i
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += 'module load samtools/0.1\n'
        header += cmd
        jobfile = '%s.idx.slurm' % SM
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.idx.slurm has been created, you can sbatch your job file.'
    )
Beispiel #20
0
def hmp2vcf(args):
    """
    %prog hmp2vcf hmp
    convert hmp to vcf format using tassel
    """
    p = OptionParser(hmp2vcf.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmpfile, = args
    prefix = '.'.join(hmpfile.split('.')[0:-1])
    cmd = 'run_pipeline.pl -Xms512m -Xmx10G -fork1 -h %s -export -exportType VCF\n' % (
        hmpfile)
    print(cmd)
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml tassel/5.2\n'
    header += cmd
    f = open('%s.hmp2vcf.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.hmp2vcf.slurm has been created, you can sbatch your job file.'
        % prefix)
Beispiel #21
0
def CombineRep(args):
    """
    %prog CombinRep dir
    combine all fg.gz files for same sample
    """
    p = OptionParser(CombineRep.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    fqs = [i for i in os.listdir(mydir) if i.endswith('fq.gz')]
    fqs = sorted(
        fqs, key=lambda x: int(x.split('.')[0].split('_')[0].split('R')[0]))
    SMs = [x.split('.')[0].split('_')[0].split('R')[0] for x in fqs]
    mydf = pd.DataFrame(dict(zip(['SM', 'FNs'], [SMs, fqs])))
    mygrpdf = mydf.groupby('SM').agg(['count', lambda x: ' '.join(x)])
    f = open('combine_fqs.sh', 'w')
    for sm in mygrpdf.index:
        n, fns = mygrpdf.loc[sm, :]
        cmd = 'cat %s > %s.cbd.fq.gz\n' % (fns, sm)
        f.write(cmd)
    f.close()
    cmd1 = 'chmod +x combine_fqs.sh\n'
    cmd2 = './combine_fqs.sh\n'
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += cmd1
    header += cmd2
    f = open('CombineFQs.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file CombineFQs.slurm has been created, you can sbatch your job file.'
    )
Beispiel #22
0
def IndePvalue(args):
    """
    %prog IndePvalue plink_bed_prefix output

    calculate the number of independent SNPs (Me) and the bonferroni pvalue
    """
    p = OptionParser(IndePvalue.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option(
        '--cutoff',
        default='0.05',
        choices=('0.01', '0.05'),
        help='choose the pvalue cutoff for the calculation of bonferroni pvalue'
    )
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())

    bed, output = args
    mem = int(opts.memory / 1000) - 2
    cmd = 'java -Xmx%sg -jar %s --noweb --effect-number --plink-binary %s --genome --out %s' % (
        mem, GEC, bed, output)
    h = Slurm_header
    h += 'module load java/1.8\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    f = open('%s.Me_SNP.slurm' % output, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.Me_SNP.slurm has been created, you can sbatch your job file.'
        % output)
Beispiel #23
0
def extract_info(args):
    """
    %prog log_file output_fn
    
    extract testing and prediction results from dpp log file
    """
    p = OptionParser(extract_info.__doc__)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    logfile, opp, = args

    f0 = open(logfile)
    all_lines = f0.readlines()
    test_idx, predict_idx, hist_idx = 0, 0, 0
    for i, j in enumerate(all_lines):
        if 'All test labels:' in j:
            test_idx = i
        if 'All predictions:' in j:
            predict_idx = i
        if 'Histogram of ' in j:
            hist_idx = i

    test_lines = all_lines[test_idx + 1:predict_idx]
    ground_truth = extract_num(test_lines)
    #print(len(ground_truth), '\n', ground_truth)

    predict_lines = all_lines[predict_idx + 1:hist_idx]
    prediction = extract_num(predict_lines)
    #print(len(prediction), '\n', prediction)

    df = pd.DataFrame(
        dict(zip(['groundtruth', 'prediction'], [groundtruth, prediction])))
    df.to_csv(opp, index=False, sep='\t')
    print('Done! check %s' % opp)
Beispiel #24
0
def Info(args):
    '''
    %prog Info project_folder

    Show summary of images under project_folder
    '''
    p = OptionParser(Info.__doc__)
    p.add_option(
        '--item_idx',
        default='1,2,3',
        help=
        'the index of sample name, date, and time in each image directory name'
    )
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    project_folder, = args

    sm_idx, date_idx, time_idx = [int(i) for i in opts.item_idx.split(',')]
    prj = ParseProject(project_folder, sm_idx, date_idx, time_idx)
    print('Summary of samples:')
    for i, j in prj.sm_counts.items():
        print(i, j)
    print('Summary of dates:')
    for i, j in prj.date_counts.items():
        print(i, j)
    print('Angles for RGB images:')
    for angle in prj.df.loc[0, 'fnpath'].glob('Vis_*'):
        print(angle.name)
Beispiel #25
0
def export(args):
    '''
    %prog export proj_id outfile

    - proj_id: The project id of the zooniverse project

    DESC: Fetches an export from the specified zooniverse project id.
    '''

    from schnablelab.Zooniverse.Zootils import export as exp

    p = OptionParser(export.__doc__)
    p.add_option('-t', '--type', default='classifications',
                 help='Specify the type of export')

    opts, args = p.parse_args(args)

    if len(args) != 2:
        exit(not p.print_help())

    projid, outfile = args

    exp(projid, outfile, opts)

    return True
Beispiel #26
0
def cpu(args):
    """
    %prog  
    request a cpu node from hcc.
    """
    p = OptionParser(cpu.__doc__)
    p.add_option("--partition",
                 default="jclarke",
                 choices=('batch', 'jclarke'),
                 help="which partition? [default: %default]")
    p.add_option("--memory",
                 default="10240",
                 help="specify the how much memory [default: %default]")
    p.add_option("--time",
                 default='20',
                 help="specify the time (hour) [default: %default]")
    opts, args = p.parse_args(args)
    if len(args) == 0:
        print('add --help to see options.\n')
        cmd = 'srun --partition=%s --mem-per-cpu=%s --ntasks-per-node=6 --nodes=1 --time=%s:0:0 --pty $SHELL\n' % (
            opts.partition, opts.memory, opts.time)
        print(cmd)
        #call(cmd, shell=True)
    else:
        sys.exit(not p.print_help())
Beispiel #27
0
def cMLM(args):
    """
    %prog cMLM pheno(with header, tab delimited) geno_prefix(GM and GD prefix) PCA Kinship
    
    Run automated GAPIT compressed mixed linear model
    """
    p = OptionParser(cMLM.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    
    pheno, geno_prefix, PCA, Kinship = args
    mem = '.'.join(pheno.split('.')[0:-1])
    f1 = open('%s.cMLM.R'%mem, 'w')
    #print(Gapit_header)
    gapit_cmd = Gapit_header%(pheno,geno_prefix,geno_prefix,PCA,Kinship,mem)
    f1.write(gapit_cmd)
    
    f2 = open('%s.cMLM.slurm'%mem, 'w')
    h = Slurm_header
    h += 'module load R/3.3\n'
    header = h%(opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix)
    f2.write(header)
    cmd = 'R CMD BATCH %s.cMLM.R\n'%mem
    f2.write(cmd)
    f1.close()
    f2.close()
    print('R script %s.cMLM.R and slurm file %s.cMLM.slurm has been created, you can sbatch your job file.'%(mem, mem))
Beispiel #28
0
def gpu(args):
    """
    %prog
    request a gpu node from hcc.
    """
    p = OptionParser(gpu.__doc__)
    p.add_option("--memory",
                 default="12000",
                 help="specify the how much memory [default: %default]")
    p.add_option("--time",
                 default='20',
                 help="specify the time (hour) [default: %default]")
    p.add_option(
        "--model",
        default='gpu_k40',
        choices=('gpu_p100', 'gpu_k20', 'gpu_k40'),
        help=
        "specify gpu mode, p100:16gb, k40:12gb, k20:5bg [default: %default]")
    opts, args = p.parse_args(args)
    if len(args) == 0:
        print('add --help to see options.\n')
        cmd = 'srun --partition=schnablelab --gres=gpu --constraint=%s --mem-per-cpu=%s --ntasks-per-node=1 --nodes=1 --time=%s:0:0 --pty $SHELL\n' % (
            opts.model, opts.memory, opts.time)
        print(cmd)
        #call(cmd, shell=True)
    else:
        sys.exit(not p.print_help())
Beispiel #29
0
def Imgs2Arrs(args):
    '''
    %prog hyp_dir(filepath of hyperspectral image data) 
    Returns: numpy array object with shape [x*y, z].
        x,y dims correspond to pixel coordinates for each image
        z dim corresponds to hyperspectral image wavelength.
    '''
    import cv2

    p = OptionParser(Imgs2Arrs.__doc__)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    imgs = [i for i in os.listdir(mydir) if i.endswith('png')]
    sorted_imgs = sorted(imgs, key=lambda x: int(x.split('_')[0]))
    all_arrs = []
    for i in sorted_imgs[2:]:
        print(i)
        #img = cv2.imread('%s/%s'%(mydir, i), cv2.IMREAD_GRAYSCALE)
        img = np.array(Image.open('%s/%s' % (mydir, i)).convert('L'))
        print(img.shape)
        all_arrs.append(img)
    arrs = np.stack(all_arrs, axis=2)
    np.save('%s.npy' % mydir, arrs)
Beispiel #30
0
def sortbam(args):
    """
    %prog in_dir out_dir
        in_dir: bam files folder
        out_dir: sorted bam files folder

    sort bam files using samtools/0.1 sort function.
    """
    p = OptionParser(sortbam.__doc__)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args

    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    bams = dir_path.glob('*.bam')
    for bam in bams:
        prf = bam.name.split('.bam')[0]
        sort_bam = prf + '.sorted'
        sort_bam_path = out_path / sort_bam
        cmd = 'samtools sort %s %s' % (bam, sort_bam_path)
        header = Slurm_header % (100, 15000, prf, prf, prf)
        header += 'ml samtools/0.1\n'
        header += cmd
        with open('%s.sortbam.slurm' % prf, 'w') as f:
            f.write(header)