Ejemplo n.º 1
0
# get cluster configuration
# needed for specifying logfile names with clust_conf['log_task_id']
conf_file = os.environ['HOME']+"/picopili.conf"
configs = read_conf(conf_file)
cluster = configs['cluster']
clust_conf = read_clust_conf()

# from config
impute_ex = find_exec('impute2',key='i2loc')
shapeit_ex = find_exec('shapeit',key='shloc')

# get directory containing current script
# (to get absolute path for scripts)
rp_bin = os.path.dirname(os.path.realpath(__file__))
chunker_ex = rp_bin+'/chunk_snps.py'
test_exec(chunker_ex,'picopili chunking script')

if args.ref_dir is not None:
	# verify exists
	assert os.path.isdir(args.ref_dir), "Failed to find imputation reference directory %s" % args.ref_dir

	# prepend to references accordingly
	args.ref_maps = str(args.ref_dir) +'/' + args.ref_maps
	args.ref_haps = str(args.ref_dir) +'/' + args.ref_haps
	args.ref_legs = str(args.ref_dir) +'/' + args.ref_legs
	args.ref_samps = str(args.ref_dir) +'/' + args.ref_samps


# TODO: here
# .hg19.ch.fl.bim for chunking
# imp. references
Ejemplo n.º 2
0
smartpcax = find_exec('smartpca', key='eloc')

# if unspecified
if args.rscript_ex == None or args.rscript_ex == "None":
    args.rscript_ex = find_exec("Rscript", key='rscloc')

if args.primus_ex == None or args.primus_ex == "None":
    args.primus_ex = find_exec("run_PRIMUS.pl", key='priloc')

# get directory containing current script
# (to get absolute path for scripts)
rp_bin = os.path.dirname(os.path.realpath(__file__))
Rplotpcax = str(rp_bin) + '/plot_pca.Rscript'

# test executables
test_exec(args.primus_ex, 'PRIMUS')
test_exec(plinkx, 'Plink')
test_exec(smartpcax, 'Eigensoft smartpca')
test_exec(args.rscript_ex, 'Rscript')

print '\n'
print '############'
print 'Begin!'
print '############'

####################################
# Compute maximum unrelated set
# a) run PRIMUS
# b) verify ran successfully
####################################
Ejemplo n.º 3
0
#############

### read config
conf_file = os.environ['HOME']+"/picopili.conf"
configs = read_conf(conf_file)
analyst = configs['init']

# find plink
plinkx = find_exec('plink',key='p2loc')

if not args.skip_platform:
    # get directory containing current script
    # (hack to get plague script location)
    rp_bin = os.path.dirname(os.path.realpath(__file__))
    plague_ex = rp_bin + '/plague_pico.pl'
    test_exec(plague_ex, 'Platform guessing script')
# TODO: verify plague works properly across platforms (primary concern is Compress::Zlib loading)

# verify bfiles are files, not paths
assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"


print '\n'
print '############'
print 'Begin!'
print '############'

#############
qcdir = 'qc_'+str(args.out)
print '\n...Setting up working directory (./%s)...' % qcdir
#############
Ejemplo n.º 4
0
configs = read_conf(conf_file)

plinkx = configs['p2loc']+"plink"

# get directory containing current script
# (hack to help find ld region text file)
rp_bin = os.path.dirname(os.path.realpath(__file__))
rp_dir = os.path.dirname(rp_bin)

#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# plink
test_exec(plinkx, 'Plink')

# ld region file, if needed
# try in rp_dir/lib/ in addition to cwd
if args.extra_ld_regions != None and args.extra_ld_regions != "None":
    if os.path.isfile(args.extra_ld_regions):
        print "LD region file found: %s" %  args.extra_ld_regions
    elif os.path.isfile(str(rp_dir + '/lib/' + args.extra_ld_regions)):
        args.extra_ld_regions = str(rp_dir + '/lib/' + args.extra_ld_regions)
        print "LD region file found: %s" %  args.extra_ld_regions
    else:
        raise IOError("LD region file %s not found in current directory or %s." % (args.extra_ld_regions, str(rp_dir + '/lib/')))



print '\n'
Ejemplo n.º 5
0
#############

# get variables from path as needed
# - Rscript (if unspecified)
# - IBD plotting script
# - PCA plotting script (optional)
if args.rscript_ex == None or args.rscript_ex == "None":
    args.rscript_ex = find_from_path('Rscript', 'Rscript')

Rplotibdx = find_from_path('plot_reap_ibd.Rscript', 'IBD plotting script')

if plot_pca:
    Rplotpcax = find_from_path('plot_pca.Rscript', 'PCA plotting script')

# verify executables
test_exec(plinkx, 'Plink')
test_exec(args.rscript_ex, 'Rscript')
test_exec(args.admixture_ex, 'ADMIXTURE')
test_exec(args.reap_ex, 'REAP')

# pca file
if plot_pca:
    assert os.path.isfile(
        args.plot_admix_pca
    ), "PCA file does not exist (%r)" % args.plot_admix_pca
    assert '/' not in args.target_bfile, "--plot-admix-pca must specify only a file, not a path"

# verify bfiles are files, not paths
assert '/' not in args.unrel_bfile, "--unrel-bfile must specify only a file stem, not a path"
assert '/' not in args.target_bfile, "--target-bfile must specify only a file stem, not a path"
Ejemplo n.º 6
0
print '--out ' + str(args.out)
print '--format ' + str(args.format)
print '--min-rel ' + str(args.min_rel)
print '--max-gens ' + str(args.max_gens)
print ' '

# verify input files exist
assert os.path.isfile(
    args.input_ibd
), "IBD/relatedness file does not exist (%r)" % args.input_ibd
assert os.path.isfile(
    str(args.bfile) +
    '.fam'), "Plink fam file does not exist (%s)" % str(args.bfile) + '.fam'

# test executables
test_exec(args.primus_ex, 'PRIMUS')
test_exec(args.findped_ex, 'PRIMUS pedigree matching script')
print ' '

# unzip relatedness file if needed
if args.input_ibd.endswith('.gz'):
    ibd_txtfile = str(args.input_ibd) + '.txt'
    print 'Unzipping IBD relatedness file to %s' % ibd_txtfile
    ibd_out = open(ibd_txtfile, 'w')
    subprocess.check_call(['gunzip', '-c', str(args.input_ibd)],
                          stdout=ibd_out)
    ibd_out.close()
else:
    ibd_txtfile = str(args.input_ibd)

assert os.path.isfile(
Ejemplo n.º 7
0
##############
#print '\n...Reading ricopili config file...'
##############
#
#### read plink loc from config
#conf_file = os.environ['HOME']+"/ricopili.conf"
#configs = read_conf(conf_file)

#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# verify executables
test_exec(args.rplink_ex, 'Plink')
#if not args.rserve_active:
test_exec(args.r_ex, 'R')
# TODO: find a way to test Rserve available?

# check required R scripts present
rp_bin = os.path.dirname(
    os.path.realpath(__file__))  # use location of current script to get rp_bin
if args.covar is None:
    R_gee = rp_bin + '/gee_logit_nocov.R'
else:
    R_gee = rp_bin + '/gee_logit_covar.R'
assert os.path.isfile(R_gee), 'Failed to find R GEE script %s' % str(R_gee)

# verify bfiles are files, not paths
assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"
Ejemplo n.º 8
0
# print settings
print 'Using settings:'
print '--input-ibd '+str(args.input_ibd)
print '--bfile '+str(args.bfile)
print '--out '+str(args.out)
print '--format '+str(args.format)
print '--min-rel '+str(args.min_rel)
print '--max-gens '+str(args.max_gens)
print ' '

# verify input files exist
assert os.path.isfile(args.input_ibd), "IBD/relatedness file does not exist (%r)" % args.input_ibd
assert os.path.isfile(str(args.bfile)+'.fam'), "Plink fam file does not exist (%s)" % str(args.bfile)+'.fam'

# test executables
test_exec(args.primus_ex, 'PRIMUS')
test_exec(args.findped_ex, 'PRIMUS pedigree matching script')
print ' '

# unzip relatedness file if needed
if args.input_ibd.endswith('.gz'):
    ibd_txtfile = str(args.input_ibd) + '.txt'
    print 'Unzipping IBD relatedness file to %s' % ibd_txtfile
    ibd_out = open(ibd_txtfile, 'w')
    subprocess.check_call(['gunzip','-c',str(args.input_ibd)],stdout=ibd_out)
    ibd_out.close()
else:
    ibd_txtfile = str(args.input_ibd)

assert os.path.isfile(ibd_txtfile), "Failed to extract IBD/relatedness file (%r)" % args.input_ibd
Ejemplo n.º 9
0
configs = read_conf(conf_file)

plinkx = configs['p2loc'] + "plink"

# get directory containing current script
# (hack to help find ld region text file)
rp_bin = os.path.dirname(os.path.realpath(__file__))
rp_dir = os.path.dirname(rp_bin)

#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# plink
test_exec(plinkx, 'Plink')

# ld region file, if needed
# try in rp_dir/lib/ in addition to cwd
if args.extra_ld_regions != None and args.extra_ld_regions != "None":
    if os.path.isfile(args.extra_ld_regions):
        print "LD region file found: %s" % args.extra_ld_regions
    elif os.path.isfile(str(rp_dir + '/lib/' + args.extra_ld_regions)):
        args.extra_ld_regions = str(rp_dir + '/lib/' + args.extra_ld_regions)
        print "LD region file found: %s" % args.extra_ld_regions
    else:
        raise IOError(
            "LD region file %s not found in current directory or %s." %
            (args.extra_ld_regions, str(rp_dir + '/lib/')))

print '\n'
Ejemplo n.º 10
0
##############
#print '\n...Reading ricopili config file...'
##############
#
#### read plink loc from config
#conf_file = os.environ['HOME']+"/ricopili.conf"
#configs = read_conf(conf_file)


#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# verify executables
test_exec(args.rplink_ex, 'Plink')

# verify bfiles are files, not paths
assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"

# verify input files exist
if args.keep is not None:
    assert os.path.isfile(args.keep), "ID inclusion file does not exist (%r)" % args.keep
if args.remove is not None:
    assert os.path.isfile(args.remove), "ID exclusion file does not exist (%r)" % args.remove
if args.extract is not None:
    assert os.path.isfile(args.extract), "SNP inclusion file does not exist (%r)" % args.extract
if args.exclude is not None:
    assert os.path.isfile(args.exclude), "SNP exclusion file does not exist (%r)" % args.exclude
if args.pheno is not None:
    assert os.path.isfile(args.pheno), "Phenotype file does not exist (%r)" % args.pheno
Ejemplo n.º 11
0
analyst = configs['init']

if not args.skip_platform:
    # get directory containing current script
    # (hack to get plague script location)
    rp_bin = os.path.dirname(os.path.realpath(__file__))
    plague_ex = rp_bin + '/plague.pl'


#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# verify executables
test_exec(plinkx, 'Plink')
if not args.skip_platform:
    test_exec(plague_ex, 'Platform guessing script')
# TODO: verify plague works properly across platforms (primary concern is Compress::Zlib loading)

# verify bfiles are files, not paths
assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"


print '\n'
print '############'
print 'Begin!'
print '############'

#############
qcdir = 'qc_'+str(args.out)
Ejemplo n.º 12
0
if args.rplink_ex is None or args.rplink_ex == "None":
    args.rplink_ex = find_exec('plink', key='rplloc')

# get R if not provided
if args.r_ex == None or args.r_ex == "None":
    args.r_ex = find_from_path('R', 'R')

# if still fail, try config
if args.r_ex is None or args.r_ex == "None":
    args.r_ex = find_exec('R', key='rloc')

if args.rserve_ex is None or args.rserve_ex == "None":
    args.rserve_ex = find_exec('Rserve', key='rservloc')

# verify executables
test_exec(args.rplink_ex, 'Plink')
test_exec(args.r_ex, 'R')
test_exec(args.rserve_ex, 'Rserve')

# check required R scripts present
rp_bin = os.path.dirname(
    os.path.realpath(__file__))  # use location of current script to get rp_bin
if args.covar is None:
    R_gee = rp_bin + '/gee_logit_nocov.R'
else:
    R_gee = rp_bin + '/gee_logit_covar.R'
assert os.path.isfile(R_gee), 'Failed to find R GEE script %s' % str(R_gee)

# verify bfiles are files, not paths
assert '/' not in args.bfile, "--bfile must specify only a file stem, not a path"
Ejemplo n.º 13
0

#############
print '\n...Checking dependencies...'
# check exists, executable
#############

# find required files
if args.rscript_ex == None or args.rscript_ex == "None":
    args.rscript_ex = find_from_path("Rscript", 'Rscript')

Rplotpcax = find_from_path("plot_pca.Rscript", 'PCA plotting script')


# test executables
test_exec(args.primus_ex, 'PRIMUS')
test_exec(plinkx, 'Plink')
test_exec(smartpcax, 'Eigensoft smartpca')
test_exec(args.rscript_ex, 'Rscript')



print '\n'
print '############'
print 'Begin!'
print '############'

####################################
# Compute maximum unrelated set
# a) run PRIMUS
# b) verify ran successfully
Ejemplo n.º 14
0
#############

# get variables from path as needed
# - Rscript (if unspecified)
# - IBD plotting script
# - PCA plotting script (optional)
if args.rscript_ex == None or args.rscript_ex == "None":
    args.rscript_ex = find_from_path('Rscript', 'Rscript')

Rplotibdx = find_from_path('plot_reap_ibd.Rscript', 'IBD plotting script')

if plot_pca:
    Rplotpcax = find_from_path('plot_pca.Rscript', 'PCA plotting script')

# verify executables
test_exec(plinkx, 'Plink')
test_exec(args.rscript_ex, 'Rscript')
test_exec(args.admixture_ex, 'ADMIXTURE')
test_exec(args.reap_ex, 'REAP')

# pca file
if plot_pca:
    assert os.path.isfile(args.plot_admix_pca), "PCA file does not exist (%r)" % args.plot_admix_pca
    assert '/' not in args.target_bfile, "--plot-admix-pca must specify only a file, not a path"

# verify bfiles are files, not paths
assert '/' not in args.unrel_bfile, "--unrel-bfile must specify only a file stem, not a path"
assert '/' not in args.target_bfile, "--target-bfile must specify only a file stem, not a path"