dumphead = chunks_in.readline() for line in chunks_in: (chrom, start, end, chname) = line.split() chunks[str(chname)] = [str(chrom), int(start), int(end)] # verify output file exists ch_imp = imp_dir + '/' + str(outdot) + '.imp.' + str(chname) + '.gz' # verify completed successfully # - based on expected output of concordance table on last line ch_sum = imp_dir + '/' + str(outdot) + '.imp.' + str(chname) + '_summary' # record failed chunks if not os.path.isfile(ch_imp): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] elif '[0.9-1.0]' not in file_tail(ch_sum, n=1): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] chunks_in.close() ############### # if there are missing chunks, restart their imputation and resub agg script ############### if len(mis_chunks) > 0: nummiss = len(mis_chunks) print 'Missing results for %d imputation chunks. Preparing to resubmit...' % nummiss # check if already tried with this number of missing chunks os.chdir(imp_dir) tmp_chunk_file_name = 'tmp_missing_' + str(nummiss) + '_chunks.' + str(
elif args.model == 'logistic': ch_out = 'logis.'+str(outdot)+'.'+str(chname)+'.assoc.logistic' out_len = 12 elif args.model == 'linear': ch_out = 'linear.'+str(outdot)+'.'+str(chname)+'.assoc.linear' out_len = 12 # record chunks with no/partial/broken output if not os.path.isfile(ch_out): print 'Output not found for %s' % str(ch_out) mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] elif file_len(ch_out) < file_len(str(outdot)+'.snps.'+str(chname)+'.txt'): print 'Output file %s is incomplete' % str(ch_out) mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] else: ft = file_tail(ch_out) if len(ft.split()) != out_len: print 'Last line of output file %s is incomplete' % str(ch_out) mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] chunks_in.close() ############### # if there are missing chunks, restart their gwas and resub agg script ############### if len(mis_chunks) > 0: nummiss = len(mis_chunks) print '\nMissing results for %d GWAS jobs. Preparing to resubmit...' % nummiss # just missing chunks for task array
ch_out = 'dfam.'+str(outdot)+'.'+str(chname)+'.dfam' out_len = 8 elif args.model == 'gmmat': ch_out = 'gmmat_score.'+str(outdot)+'.'+str(chname)+'.R.txt' out_len = 11 elif args.model == 'gmmat-fam': ch_out = 'gmmatfam_score.'+str(outdot)+'.'+str(chname)+'.R.txt' out_len = 11 # record chunks with no/partial/broken output if not os.path.isfile(ch_out): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] elif file_len(ch_out) != file_len(str(outdot)+'.snps.'+str(chname)+'.txt'): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] else: ft = file_tail(ch_out) if len(ft.split()) != out_len: mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] chunks_in.close() ############### # if there are missing chunks, restart their gwas and resub agg script ############### if len(mis_chunks) > 0: nummiss = len(mis_chunks) print 'Missing results for %d GWAS jobs. Preparing to resubmit...' % nummiss # just missing chunks for task array # fail if already tried
dumphead = chunks_in.readline() for line in chunks_in: (chrom, start, end, chname) = line.split() chunks[str(chname)] = [str(chrom), int(start), int(end)] # verify output file exists ch_imp = imp_dir + '/' + str(outdot) + '.imp.' + str(chname) + '.gz' # verify completed successfully # - based on expected output of concordance table on last line ch_sum = imp_dir + '/' + str(outdot) + '.imp.' + str(chname) + '_summary' # record failed chunks if not os.path.isfile(ch_imp): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] elif '[0.9-1.0]' not in file_tail(ch_sum, n=1): mis_chunks[str(chname)] = [str(chrom), int(start), int(end)] chunks_in.close() ############### # if there are missing chunks, restart their imputation and resub agg script ############### if len(mis_chunks) > 0: nummiss = len(mis_chunks) print 'Missing results for %d imputation chunks. Preparing to resubmit...' % nummiss # check if already tried with this number of missing chunks