def create_pon(vlist): banned_tup = {} norm_flag = {} print 'chr\tpos\tref\talt\tct' banned_filt = {'HighVafNormal': 0, 'HighAltCountNormal': 0} for fn in open(vlist): sys.stderr.write(date_time() + 'Processing ' + fn) fn = fn.rstrip('\n') bnids = re.search('(\d+-\d+)_(\d+-\d+)', fn) norm = bnids.group(2) vcf = open(fn) if norm not in norm_flag: norm_flag[norm] = {} for line in vcf: if line[0] != '#': info = line.rstrip('\n').split('\t') filt = info[6].split(';') for state in filt: if state in banned_filt: cur = '\t'.join((info[0], info[1], info[3], info[4])) if cur not in banned_tup: banned_tup[cur] = 1 norm_flag[norm][cur] = 1 elif cur not in norm_flag[norm]: norm_flag[norm][cur] = 1 banned_tup[cur] += 1 break vcf.close() sys.stderr.write(date_time() + 'Outputting results\n') for tup in banned_tup: sys.stdout.write(tup + '\t' + str(banned_tup[tup]) + '\n')
def flagstats(samtools_tool,sample): flagstats_cmd=samtools_tool + " flagstat " + sample + ".srt.bam > " + sample + ".srt.bam.flagstats" sys.stderr.write(date_time() + flagstats_cmd + "\n") Popen(flagstats_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True) flagstats_cmd=samtools_tool + " flagstat " + sample + ".rmdup.srt.bam > " + sample + ".rmdup.srt.bam.flagstats" sys.stderr.write(date_time() + flagstats_cmd + "\n") Popen(flagstats_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True)
def wait_until_status(status, status_verb, cinder_id, timeout): ''' Wait until a certain status is observed ''' src_cmd = '. /home/ubuntu/.novarc; ' sleep_time = 30 sleep_cmd = 'sleep ' + str(sleep_time) + 's' elapsed_time = 0 waiting_on_status = True while waiting_on_status: sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) + 's.\n') subprocess.call(sleep_cmd, shell=True) elapsed_time += sleep_time if elapsed_time > int( timeout ): # TODO I think we should delete the VM somehow, wait until ACTIVE raise Exception('FATAL ERROR: cinder still ' + status_verb + 'ing as timeout of ' + str(timeout) + 's was reached. Increase timeout and try again.\n') sys.stderr.write(date_time() + ': Checking success of cinder ' + status_verb + '. ' + str(elapsed_time) + ' seconds have passed.\n') cinder_show_cmd = src_cmd + 'cinder show ' + cinder_id cinder_show_cmd_output = subprocess.check_output(cinder_show_cmd, shell=True) cinder_status = get_cinder_show_attr(cinder_show_cmd_output, 'status') if cinder_status == status: waiting_on_status = False
def update_couchdb(fn, config_file): (server, user, password, db, http_proxy, https_proxy, no_proxy) = parse_config(config_file) set_proxy(http_proxy, https_proxy, no_proxy) fh = open(fn, 'r') # Get uuid command for obj in fh: obj = obj.rstrip('\n') get_uuid = 'curl -X GET ' + server + '/_uuids -k;' sys.stderr.write(date_time() + get_uuid + '\n') uuid_out = check_output(get_uuid, shell=True) m = re.findall('\"(\w+)\"', uuid_out) uuid = m[1] # typical response: {"uuids":["24ec4b43cfe304ff4709e76f7400074d"]} curl = 'curl -X PUT -d @' + obj + ' "' + server + '/' + db + '/' + uuid \ + '" -H "Content-Type: application/json" -k -u "' + user + ':' + password + '"' couch_cmd = curl # get response sys.stderr.write(date_time() + couch_cmd + '\n') result = check_output(couch_cmd, shell=True) result = result.rstrip('\n') sys.stderr.write(obj + '\t' + result + '\n') if result == 1: sys.stderr.write(date_time() + 'Database update failed for qc stats. Check connection') exit(1) return 0
def download_from_swift(cont,obj,lane_list): src_cmd=". /home/ubuntu/.novarc;" lanes=open(lane_list, 'r') head='' data=[] for line in lanes: line=line.rstrip('\n') (bid,seqtype,lane_csv)=line.split('\t') for lane in lane_csv.split(', '): cur=obj + '/' + bid + '/QC/' + bid + '_' + lane + '.qc_stats.txt' swift_cmd=src_cmd + "swift download " + cont + " --skip-identical --prefix " + cur sys.stderr.write(date_time() + swift_cmd + "\n") try: check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE) except: sys.stderr.write(date_time() + "Download of " + obj + " from " + cont + " failed\n") exit(1) stat=open(cur,'r') head=next(stat) data.append(next(stat)) stat.close() lanes.close() sys.stdout.write(head) for datum in data: sys.stdout.write(datum) return 0
def job_manager(cmd_list, max_t): x = len(cmd_list) # cur position in command list cur = 0 # completed comp = 0 # initialize process list p = {} sys.stderr.write(date_time() + 'Initializing run\n') n = int(max_t) if n > x: n = x for i in range(0, n, 1): p[i] = {} p[i]['job'] = subprocess.Popen(cmd_list[i], shell=True) p[i]['cmd'] = cmd_list[i] p[i]['status'] = 'Running' sys.stderr.write(cmd_list[i] + '\n') cur += 1 s = 0 j = 30 m = 30 while comp < x: if s % m == 0: sys.stderr.write(date_time() + 'Checking job statuses. ' + str(comp) + ' of ' + str(x) + ' completed. ' + str(s) + ' seconds have passed\n') for i in range(0, n, 1): check = p[i]['job'].poll() if str(check) == '1': sys.stderr.write(date_time() + 'Job returned an error while running ' + p[i]['cmd'] + ' aborting!\n') for k in range(0, n, 1): p[k]['job'].kill() sys.stderr.write('Killing job ' + str(k) + '\n') exit(1) if str(check) == '0' and p[i]['status'] != str(check): comp += 1 p[i]['status'] = str(check) if comp <= (x - n): try: p[i]['job'] = subprocess.Popen(cmd_list[cur], shell=True) p[i]['cmd'] = cmd_list[cur] p[i]['status'] = 'Running' cur += 1 except: sys.stderr.write(date_time() + "Tried to queue command " + p[i]['cmd'] + '\n was ' + str(cur) + ' in command list, ' + str(i) + ' in queue list\n') exit(1) s += j sleep_cmd = 'sleep ' + str(j) + 's' subprocess.call(sleep_cmd, shell=True) sys.stderr.write(date_time() + str(comp) + ' jobs completed\n') return 0
def download_from_swift(cont,obj): src_cmd=". /home/ubuntu/.novarc;" swift_cmd=src_cmd + "swift download " + cont + " --skip-identical --prefix " + obj sys.stderr.write(date_time() + swift_cmd + "\n") try: check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE) except: sys.stderr.write(date_time() + "Download of " + obj + " from " + cont + " failed\n") exit(1) return 0
def upload_to_swift(cont,obj): ONE_GB = 1073741824 src_cmd=". /home/ubuntu/.novarc;" swift_cmd=src_cmd + "swift upload " + cont + " ./ --skip-identical --object-name " + obj + " -S " + str(ONE_GB) sys.stderr.write(date_time() + swift_cmd + "\n") try: check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE) except: sys.stderr.write(date_time() + "Upload of " + obj + " to " + cont + " failed\n") exit(1) return 0
def job_manager(cmd_list, max_t): x = len(cmd_list) # cur position in command list cur = 0 # completed comp = 0 # initialize process list p = {} sys.stderr.write(date_time() + 'Initializing run\n') n = int(max_t) if n > x: n = x for i in range(0, n, 1): p[i] = {} p[i]['job'] = subprocess.Popen(cmd_list[i], shell=True) p[i]['cmd'] = cmd_list[i] p[i]['status'] = 'Running' sys.stderr.write(cmd_list[i] + '\n') cur += 1 s = 0 j = 30 m = 30 while comp < x: if s % m == 0: sys.stderr.write( date_time() + 'Checking job statuses. ' + str(comp) + ' of ' + str(x) + ' completed. ' + str( s) + ' seconds have passed\n') for i in range(0, n, 1): check = p[i]['job'].poll() if str(check) == '1': sys.stderr.write( date_time() + 'Job returned an error while running ' + p[i]['cmd'] + ' aborting!\n') for k in range(0, n, 1): p[k]['job'].kill() sys.stderr.write('Killing job ' + str(k) + '\n') exit(1) if str(check) == '0' and p[i]['status'] != str(check): comp += 1 p[i]['status'] = str(check) if comp <= (x - n): try: p[i]['job'] = subprocess.Popen(cmd_list[cur], shell=True) p[i]['cmd'] = cmd_list[cur] p[i]['status'] = 'Running' cur += 1 except: sys.stderr.write(date_time() + "Tried to queue command " + p[i]['cmd'] + '\n was ' + str( cur) + ' in command list, ' + str(i) + ' in queue list\n') exit(1) s += j sleep_cmd = 'sleep ' + str(j) + 's' subprocess.call(sleep_cmd, shell=True) sys.stderr.write(date_time() + str(comp) + ' jobs completed\n') return 0
def bid_swift_list(cont,obj,blist): src_cmd=". /home/ubuntu/.novarc;" fh=open(blist,'r') for bid in fh: swift_cmd=src_cmd + "swift list " + cont + " --prefix " + obj + "/" + bid sys.stderr.write(date_time() + swift_cmd + "\n") try: check=call(swift_cmd,shell=True) except: sys.stderr.write(date_time() + "Lising of " + bid + ' of ' + obj + " from " + cont + " failed\n") return 0
def bid_swift_list(cont, obj, blist, novarc): source_novarc(novarc) fh = open(blist, 'r') for bid in fh: bid = bid.rstrip('\n') swift_cmd = "swift list " + cont + " --prefix " + obj + "/" + bid + "/" sys.stderr.write(date_time() + swift_cmd + "\n") try: call(swift_cmd, shell=True) except: sys.stderr.write(date_time() + "Lising of " + bid + ' of ' + obj + " from " + cont + " failed\n") return 1 return 0
def setup_vm(bid, image, flavor, key, timeout): sys.stderr.write(date_time() + ': Starting VM QC for sample set ' + str(bid) + '.\n') # Source .novarc command src_cmd = '. /home/ubuntu/.novarc; ' # Build nova boot command vm_name = 'vm_pipe_' + str(bid) nova_boot_cmd = ('nova boot ' + vm_name + ' --image ' + image + ' --flavor ' + str(flavor) + ' --key-name ' + key) sys.stderr.write(date_time() + ': Booting up VM.\n' + nova_boot_cmd + '\n') nova_boot_cmd_output = subprocess.check_output(nova_boot_cmd, shell=True) # Get ID of VM in the event another has the same display name vm_id = get_nova_show_attr(nova_boot_cmd_output, 'id') # Check status of VM every 30 seconds until finished spawning sleep_time = 30 sleep_cmd = 'sleep ' + str(sleep_time) + 's' elapsed_time = 0 vm_still_booting = True nova_show_cmd = src_cmd + 'nova show ' + vm_id while vm_still_booting: sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) + 's.\n') subprocess.call(sleep_cmd, shell=True) elapsed_time += sleep_time if elapsed_time > int(timeout): # TODO I think we should delete the VM somehow, wait until ACTIVE raise Exception('FATAL ERROR: VM still booting as timeout of ' + str(timeout) + 's was reached. Increase timeout and try again.\n') sys.stderr.write(date_time() + ': Checking success of VM boot. ' + str(elapsed_time) + ' seconds have passed.\n') nova_show_cmd_output = subprocess.check_output(nova_show_cmd, shell=True) vm_status = get_nova_show_attr(nova_show_cmd_output, 'status') if vm_status == 'ACTIVE': vm_still_booting = False vm_ip = get_nova_show_attr(nova_show_cmd_output, 'private_network') if vm_status == 'ERROR': raise Exception('FATAL ERROR: VM boot produced ERROR for ' + vm_name + '. Check connection settings and try again.\n') # VM has now booted up, transfer .novarc to new VM sys.stderr.write(date_time() + ': VM booted!\n') sleep_cmd = 'sleep 60s' sys.stderr.write(date_time() + ': Pausing 60s to give VM a chance to initialize.\n') subprocess.call(sleep_cmd, shell=True) # TODO should we have a more robust check? rsync_nova_var_cmd = ('ssh-keyscan ' + vm_ip + ' >> ~/.ssh/known_hosts;rsync /home/ubuntu/.novarc ubuntu@' + vm_ip + ':/home/ubuntu') sys.stderr.write(date_time() + ': Copying openstack variables to VM\n' + rsync_nova_var_cmd + '\n') subprocess.call(rsync_nova_var_cmd, shell=True) sys.stderr.write(date_time() + ': VM setup for ' + vm_name + ' with IP address ' + vm_ip + ' with ID ' + vm_id + ' was successful.\n') # Return VM information return [vm_id, vm_ip]
def metalfox_pipe(config_file, sample_pairs, ref_mnt): (metalfox_tool, cont, obj, map_ref, max_t, ram) = parse_config(config_file) map_ref = ref_mnt + '/' + map_ref src_cmd = '. ~/.novarc;' deproxy = 'unset http_proxy; unset https_proxy;' pairs = open(sample_pairs, 'r') job_list = [] for sn in pairs: sn = sn.rstrip('\n') info = sn.split('\t') sys.stderr.write('Getting bam file name for ' + info[1] + '\n') get_bam_name = 'swift list ' + cont + ' --prefix ' + obj + '/' + info[1] + '/BAM/' + info[1] \ + ' | grep .rmdup.srt.ba* ' bam = subprocess.check_output(get_bam_name, shell=True).split('\n') dl_bam = 'swift download --skip-identical ' + cont + ' ' + bam[1] + ';swift download --skip-identical ' \ + cont + ' ' + bam[0] + ';' mut_out = 'ANALYSIS/' + info[0] + '/OUTPUT/' + info[0] + '.out.keep' dl_out = 'swift download ' + cont + ' ' + mut_out + ';' # .bai/.bam extension not always clear if bam[1][-3:] == 'bam': run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[1] + ' -m ' + map_ref + ' > ' + info[0] + \ '.foxog_scored_added.out;' else: run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[0] + ' -m ' + map_ref + ' > ' + info[0] + \ '.foxog_scored_added.out;' cleanup = 'rm ' + ' '.join((bam[0], bam[1], mut_out)) + ';' job_list.append(src_cmd + deproxy + dl_bam + dl_out + run_metal) # + cleanup) pairs.close() sys.stderr.write(date_time() + 'Queueing jobs\n') job_manager(job_list, max_t)
def fastqc(fastqc_tool,sample,end1,end2,t): # casual logging - look for a LOGS directory, otherwise assume current dir log_dir='./' if os.path.isdir('LOGS'): log_dir='LOGS/' loc=log_dir + sample + '.fastqc.log' fastqc_cmd=fastqc_tool + ' -t ' + t + ' -o QC/ ' + end1 + ' ' + end2 log(loc,date_time() + fastqc_cmd + "\n") f=Popen(fastqc_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True) # check after a minute whether the process is still good - shouldn't take too long to ascertain whether phred score didn't fit call('sleep 20s', shell=True) if str(f.poll()) == '1': log(loc,date_time() + 'fastqc returned an error. Check your inputs and try again!\n') exit(1) return 0
def delete_from_swift_list(cont,fn,l): src_cmd=". /home/ubuntu/.novarc;" deproxy='unset http_proxy; unset https_proxy;' fh=open(fn,'r') for obj in fh: obj = obj.rstrip('\n') if re.match('\W+',obj) or obj=='\n' or obj=='': sys.stderr.write(date_time() + 'Object ' + obj + ' looks malformed, skipping for safety reasons!\n' ) continue if l== 'y': swift_cmd=deproxy + src_cmd + "swift delete --leave-segments " + cont + " " + obj + " >> dl_log.txt 2>> dl_log.txt" else: swift_cmd=deproxy + src_cmd + "swift delete " + cont + " " + obj + " >> dl_log.txt 2>> dl_log.txt" sys.stderr.write(date_time() + swift_cmd + "\n") call(swift_cmd,shell=True) return 0
def __check(self, url): every = int(self.get_first_value(url, SNIFF_EVERY, str(SECONDS_IN_DAY))) # TODO: take the oldest value last = self.get_first_value(url, SNIFF_LAST, None) if not last or time() - parse_date_time(last) > every: self.remove(url, SNIFF_LAST, None) self.add(url, SNIFF_LAST, literal(date_time())) self.sniff(url)
def check_manifest(manifest, body): """ check if a body is the same object described by the manifest :param manifest: the raw body of the manifest from swift :param body: a file like object to check against the manfiest """ sys.stderr.write(date_time() + 'Checking manifest\n') manifest = json.loads(manifest) for segment in manifest: sys.stderr.write(date_time() + segment['name'] + '\n') hasher = md5(body.read(segment['bytes'])) sys.stderr.write(date_time() + '%s ?= %s' % (hasher.hexdigest(), segment['hash'] + '\n')) if hasher.hexdigest() != segment['hash']: sys.stderr.write('Not the same\n') return False sys.stderr.write('The same\n') return True
def bwa_mem_pe(bwa_tool,RGRP,bwa_ref,end1,end2,samtools_tool,samtools_ref,sample,log_dir): bwa_cmd="(" + bwa_tool + " mem -t 8 -R \"" + RGRP + "\" -v 2 " + bwa_ref + " " + end1 + " " + end2 + " | " + samtools_tool + " view -bT " + samtools_ref + " - > " + sample + ".bam) > " + log_dir + sample + ".bwa.pe.log 2>&1" loc=log_dir + sample + ".bwa.pe.log" log(loc,date_time() + bwa_cmd + "\n") try: subprocess.check_output(bwa_cmd,shell=True) except: exit(1) return 0
def bwt2_pe(bwt_tool,bwt_ref,end1,end2,samtools_tool,samtools_ref,sample,t,log_dir): bwt_cmd="(" + bwt_tool + " --fr -p " + t + " -I 0 -X 500 -x " + bwt_ref + " -1 " + end1 + " -2 " + end2 + " | " + samtools_tool + " view -bT " + samtools_ref + " - > " + sample + ".bam) > " + log_dir + sample + ".bwt.pe.log 2>&1" loc=log_dir + sample + ".bwt.pe.log" log(loc,date_time() + bwt_cmd + "\n") try: call(bwt_cmd,shell=True) except: exit(1) return 0
def download_from_swift_list(cont,fn): src_cmd=". /home/ubuntu/.novarc;" deproxy='unset http_proxy; unset https_proxy;' fh=open(fn,'r') for obj in fh: swift_cmd=deproxy + src_cmd + "swift download " + cont + " --skip-identical " + obj + " >> dl_log.txt" sys.stderr.write(date_time() + swift_cmd + "\n") call(swift_cmd,shell=True) return 0
def align_stats(sample): # casual logging - look for a LOGS directory, otherwise assume current dir log_dir='./' if os.path.isdir('LOGS'): log_dir='LOGS/' loc=log_dir + sample + '.aln.log' log(loc,date_time() + "Converting to table summary format\n") fh=open(sample + '/' + 'align_summary.txt', 'r') fo=open(sample + '.align.txt', 'w') fo.write('Sample\tMean insert size estimate(10k reads)\tStd dev read insert size estimate(10 k reads)\tStarting left reads\t% mapped\tmultimapped(mm)\tgt 20 mm\tStarting right reads\t% mapped\t% mm\tgt 20 mm\tOverall map rate\tAligned pairs\t% mm\t% discordant\t% condordant\n' + sample + '\t') fi=open(sample + '_subset.insert_metrics.hist') for i in xrange(0,7,1): skip=next(fi) stats=next(fi) fi.close() stat=stats.split('\t') fo.write('\t'.join([str(int(float(stat[4]))),str(int(float(stat[5])))])) next(fh) lstart=next(fh) m=re.search('(\d+)\n$',lstart) fo.write('\t' + m.group(1)) pct=next(fh) m=re.search('\(\s*(\S+) of input\)\n',pct) fo.write('\t' + m.group(1)) mm=next(fh) m=re.search('\(\s*(\S+)\).*\((\d+) have >20\)\n',mm) fo.write('\t' + m.group(1) + '\t' + m.group(2)) next(fh) rstart=next(fh) m=re.search('(\d+)\n$',rstart) fo.write('\t' + m.group(1)) pct=next(fh) m=re.search('\(\s*(\S+) of input\)\n',pct) fo.write('\t' + m.group(1)) mm=next(fh) m=re.search('\(\s*(\S+)\).*\((\d+) have >20\)\n',mm) fo.write('\t' + m.group(1) + '\t' + m.group(2)) ovr=next(fh) m=re.search('\s*(^\S+)',ovr) fo.write('\t' + m.group(1)) next(fh) aln=next(fh) m=re.search('(\d+)\n$',aln) fo.write('\t' + m.group(1)) mm=next(fh) m=re.search('\(\s*(\S+)\) have',mm) fo.write('\t' + m.group(1)) dc=next(fh) m=re.search('\(\s*(\S+)\) are',dc) fo.write('\t' + m.group(1)) cc=next(fh) m=re.search('^\s*(\S+)',cc) fo.write('\t' + m.group(1) + '\n') fo.close return 0
def attach_cinder(snapshot_id, vm_id, bid, cinder_size, vm_ip, timeout, mount_sh_path): cinder_name = 'refs_' + str(bid) sys.stderr.write(date_time() + ': Creating cinder volume ' + cinder_name + ' using snapshot ID ' + snapshot_id + ' to VM with ID ' + vm_id + '\n') # Source .novarc command src_cmd = '. /home/ubuntu/.novarc; ' # Build cinder create command cinder_create_cmd = (src_cmd + 'cinder create ' + str(cinder_size) + ' --snapshot-id ' + snapshot_id + ' --display-name ' + cinder_name) sys.stderr.write(cinder_create_cmd + '\n') cinder_create_output = subprocess.check_output(cinder_create_cmd, shell=True) # Get cinder id cinder_id = get_cinder_show_attr(cinder_create_output, 'id') # Check status of cinder every 30 seconds until finished spawning wait_until_status('available', 'boot', cinder_id, timeout) # Cinder is now booted, attach to VM sys.stderr.write(date_time() + ': Cinder create for ' + cinder_name + ' with ID ' + cinder_id + ' was successful. Attaching to VM.\n') volume_attach_cmd = src_cmd + 'nova volume-attach ' + vm_id + ' ' + cinder_id sys.stderr.write(volume_attach_cmd + '\n') subprocess.call(volume_attach_cmd, shell=True) # Make sure cinder attaches wait_until_status('in-use', 'attach', cinder_id, timeout) # Set mount point in VM sys.stderr.write(date_time() + ': Mounting volume in VM.\n') mount_cmd = ( 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@' + vm_ip + ' \"sh -s\" < ' + mount_sh_path + ' \"refs_' + bid + '\" exit;') sys.stderr.write(mount_cmd + '\n') subprocess.call(mount_cmd, shell=True) sys.stderr.write(date_time() + ': Cinder successfully mounted.\n')
def star(STAR,sam,genome,end1,end2,sample,log_dir,th): loc=log_dir + sample + ".star.log" # fix to determine which phred score to use using HGAC date assigned 150409 and greater phred33, else hpred 64 meta=sample.split('_') epoch=150409 star_cmd=STAR + " --runMode alignReads --outFileNamePrefix " + sample + " --runThreadN " + th + " --genomeDir " + genome + " --readFilesIn " + end1 + " " + end2 + " --readFilesCommand zcat --outSAMtype BAM Unsorted --outFilterType BySJout --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterMismatchNmax 8 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 2>> " + loc log(loc,date_time() + star_cmd + "\n") call(star_cmd,shell=True) return 0
def add_comment(self, comment, nick): comment = self._serialize_comment(nick, comment) self.make_statement(self.uri, COMMENT, literal(comment)) s_uri = self.store.generate_uri() self.make_statement(s_uri, TYPE, STATEMENT) self.make_statement(s_uri, SUBJECT, self.uri) self.make_statement(s_uri, PREDICATE, COMMENT) self.make_statement(s_uri, OBJECT, literal(comment)) self.make_statement(s_uri, CHUMP_WHO, literal(nick)) self.make_statement(s_uri, CHUMP_TIME, literal(date_time(time.time())))
def get_fqc_stats(bnids, cont, obj, novarc): source_novarc(novarc) flist = cont + '_fqc.txt' get_list_cmd = 'cat ' + bnids + ' | xargs -IBN swift list ' + cont + ' --prefix ' + obj + '/BN/QC | grep html | ' \ 'grep -v report > ' + flist sys.stderr.write(date_time() + get_list_cmd + '\n') subprocess.call(get_list_cmd, shell=True) sys.stderr.write(date_time() + 'Setting up dirs\n') setup_dirs(flist) for path in open(flist): path = path.rstrip('\n') bnid = path.split('/')[1] fn = os.path.basename(path) dl_cmd = 'swift download ' + cont + ' ' + path + ' --output FASTQC/' + bnid + '/QC/' + fn sys.stderr.write(date_time() + dl_cmd + '\n') subprocess.call(dl_cmd, shell=True) sys.stderr.write('Process complete!\n') return 0
def cleanup(cid,vid,bid,vip): cname="REFS_" + bid sys.stderr.write(date_time() + "Unmounting " + cid + " from vm with ID " + vid + "\n") # need build variables to call nova successfully src_cmd='. /home/ubuntu/.novarc;' unmount_cmd="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@" + vip + " \"sh -s\" < /home/ubuntu/TOOLS/Scripts/utility/unmount.sh \"" + cname + "\"" sys.stderr.write(date_time() + unmount_cmd + "\n") subprocess.call(unmount_cmd,shell=True) detach_vm=src_cmd+"nova volume-detach " + vid + " " + cid sys.stderr.write(date_time() + detach_vm + "\n") subprocess.call(detach_vm,shell=True) sleep_cmd='sleep 30s' subprocess.call(sleep_cmd,shell=True) delete_vol=src_cmd+"cinder delete " + cid sys.stderr.write(date_time() + "Deleting cinder volume " + cname + "with id " + cid + "\n") subprocess.call(delete_vol,shell=True) delete_vm=src_cmd + "nova delete " + vid sys.stderr.write(date_time() + "Deleting vm with id " + vid + "\n") subprocess.call(delete_vm,shell=True)
def bid_swift_list(container, obj, bid_list_file_path): src_cmd = '. /home/ubuntu/.novarc' with open(bid_list_file_path, 'r') as bid_list_file: for bid in bid_list_file: bid = bid.rstrip('\n') sys.stderr.write(date_time() + ': Executing swift list.\n') swift_list_cmd = (src_cmd + 'swift list ' + container + ' --prefix ' + obj + '/' + bid + '/') sys.stderr.write(swift_list_cmd + '\n')
def main(): try: _prog, filename, container, manifest, var = sys.argv except ValueError: return "usage: prog.py <filename> <container> <manifest> <openstack variable file>" fh = open(var, 'r') """ export OS_TENANT_NAME=xxx export OS_USERNAME=xxx export OS_PASSWORD=xxx export OS_AUTH_URL="xxx" """ for line in fh: line = line.rstrip('\n') info = line.split() pair = info[1].split('=') # pdb.set_trace() os.environ[pair[0]] = pair[1] fh.close() # url, token = client.get_auth(os.environ['OS_AUTH_URL'], os.environ['OS_USERNAME'], os.environ['OS_PASSWORD']) # using client to get token and url doesn't seem to work, doing it the stupid way src_cmd = '. ' + var + ';' deproxy = 'unset http_proxy; unset https_proxy;' swift_cmd = deproxy + src_cmd + "swift stat -v " + container + " " + manifest sys.stderr.write(date_time() + swift_cmd + "\n") stat = check_output(swift_cmd, shell=True) header = re.search('URL: (\S+)\s+Auth Token: (\S+)\s+', stat) url = header.group(1) token = header.group(2) # subtract object and manifest from url # m=re.match('(.*)'+container+'\/manifest',url) url = url.replace('/' + container + '/' + manifest, '') sys.stderr.write(date_time() + 'URL: ' + url + ' token: ' + token + '\n') headers, body = client.get_object(url, token, container, manifest, query_string='multipart-manifest=get') sys.stderr.write(date_time() + 'Object information recieved\n') with open(filename) as f: is_valid = check_manifest(body, f) if is_valid: return 0 else: return 1
def cutadapter(sample,end1,end2,config_file): # casual logging - look for a LOGS directory, otherwise assume current dir log_dir='./' if os.path.isdir('LOGS'): log_dir='LOGS/' loc=log_dir + sample + '.cutadapt.log' (cutadapt_tool,minlen,r1adapt,r2adapt,r1trim,r2trim)=parse_config(config_file) cutadapt_cmd=cutadapt_tool + ' -m ' + minlen + ' -a ' + r1adapt + ' -A ' + r2adapt + ' -u ' + r1trim + ' -U ' + r2trim + ' -o ' + end1 + ' -p ' + end2 + ' ../' + end1 + ' ../' + end2 + ' >> ' + loc + ' 2>> ' + loc log(loc,date_time() + cutadapt_cmd + "\n") call(cutadapt_cmd,shell=True) return 0
def add_sniffed(self, href, label, sniffed_from): if not self.ignore(href): uri = resource(href) label = literal(label) if not self.exists(uri, None, None): print "ADDED:", href, label self.add(uri, LABEL, label) self.add(uri, TYPE, SNIFFED) timestamp = literal(date_time()) self.add(uri, SNIFFED_ON, timestamp) self.add(uri, SNIFFED_FROM, resource(sniffed_from))
def cufflinks(cufflinks_tool,ens_ref,genome,sample,log_dir,t): loc=log_dir + sample + ".cufflinks.log" # cufflinks_cmd=cufflinks_tool + " " + sample + "/accepted_hits.bam -g " + ens_ref + " -p " + t + " --library-type fr-secondstrand -b " + genome + " -u --upper-quartile-norm --pre-mrna-fraction -o " + sample + " 2>> " + loc cufflinks_cmd=cufflinks_tool + " " + sample + "/accepted_hits.bam -g " + ens_ref + " -p " + t + " --library-type fr-firststrand -b " + genome + " -u --upper-quartile-norm --pre-mrna-fraction -o " + sample + " 2>> " + loc log(loc,date_time() + cufflinks_cmd + "\n") try: subprocess.check_output(cufflinks_cmd,shell=True) except: exit(1) return 0
def novosort_sort_se(novosort,sample,log_dir,threads,ram): novosort_sort_se_cmd='mkdir novosort_tmp;' + novosort + " --threads " + threads + " --ram " + ram + "G --tmpdir novosort_tmp --output " + sample + ".srt.bam --index " + sample + ".bam > " + log_dir + sample + ".novosort.sort.se.log 2>&1" log(log_dir + sample + ".novosort.sort.se.log",date_time() + novosort_sort_se_cmd + "\n") f=0 try: f=subprocess.call(novosort_sort_se_cmd,shell=True) rm_tmp='rm -rf novosort_tmp' subprocess.call(rm_tmp,shell=True) except: log(log_dir + sample + ".novosort.sort.se.log",'novosort sort failed for sample ' + sample + '\n') exit(1) return f
def novosort_merge_pe(config_file,sample_list,wait): fh=open(sample_list,'r') (novosort,cont,obj)=parse_config(config_file) for sample in fh: sample=sample.rstrip('\n') (bam_list,bai_list,n)=list_bam(cont,obj,sample,wait) bam_string=",".join(bam_list) if n > 1: novosort_merge_pe_cmd=novosort + " --threads 8 --ram 28G --assumesorted --output " + sample + '.merged.bam --index --tmpdir ./TMP ' + bam_string sys.stderr.write(date_time() + novosort_merge_pe_cmd + "\n") try: subprocess.check_output(novosort_merge_pe_cmd,shell=True) except: sys.stderr.write(date_time() + 'novosort failed for sample ' + sample + '\n') exit(1) else: rename_bam='cp ' + bam_list[0] + ' ' + sample + '.merged.final.bam;cp ' + bai_list[0] + ' ' + sample + '.merged.final.bai' sys.stderr.write(date_time() + rename_bam + ' Only one associated bam file, renaming\n') subprocess.call(rename_bam,shell=True) sys.stderr.write(date_time() + 'Merge process complete\n') return 0
def tophat(tophat_tool,tx,bwt2_ref,end1,end2,x,s,sample,log_dir,th): loc=log_dir + sample + ".tophat.log" # fix to determine which phred score to use using HGAC date assigned 150409 and greater phred33, else hpred 64 meta=sample.split('_') epoch=150409 tophat_cmd=tophat_tool + " --no-coverage-search --mate-inner-dist " + x + " --mate-std-dev " + s + " --num-threads " + th + " --library-type fr-firststrand --transcriptome-index " + tx + " -o " + sample + " " + bwt2_ref + " " + end1 + " " + end2 + " 2>> " + loc if len(meta) >=2 and RepresentsInt(meta[1]) == True and int(meta[1]) < epoch: tophat_cmd=tophat_tool + " --no-coverage-search --phred64-quals --mate-inner-dist " + x + " --mate-std-dev " + s + " --num-threads " + th + " --library-type fr-firststrand --transcriptome-index " + tx + " -o " + sample + " " + bwt2_ref + " " + end1 + " " + end2 + " 2>> " + loc log(loc,date_time() + tophat_cmd + "\n") call(tophat_cmd,shell=True) return 0
def cov_hole_matrix(hlist, aflag): cov_dict = {} low = 30 slist = [] # coverage column is 4th unless bedfile wasn't annotated c = 4 if aflag == 'n': c = 3 for floc in open(hlist): floc = floc.rstrip('\n') samp = os.path.basename(floc).replace('.hist', '') sys.stderr.write(date_time() + 'Processing file ' + floc + ' sample name ' + samp + '\n') slist.append(samp) fh = open(floc) for line in fh: if line[0:3] != 'all': info = line.rstrip('\n').split('\t') if int(info[c]) < low: reg = info[3] if aflag == 'n': reg = info[0] + ':' + info[1] + '-' + info[2] if reg not in cov_dict: cov_dict[reg] = {} cov_dict[reg][samp] = info[c] else: fh.close() break sys.stderr.write(date_time() + 'Outputting matrix\n') print 'Sample/Region\t' + '\t'.join(slist) for region in cov_dict: sys.stdout.write(region) for samp in slist: if samp in cov_dict[region]: sys.stdout.write('\t' + cov_dict[region][samp]) else: sys.stdout.write('\t30') print
def setup_vm(bid, image, flavor, key, timeout): sys.stderr.write(date_time() + ': Starting VM QC for sample set ' + str(bid) + '.\n') # Source .novarc command src_cmd = '. /home/ubuntu/.novarc; ' # Build nova boot command vm_name = 'vm_pipe_' + str(bid) nova_boot_cmd = ('nova boot ' + vm_name + ' --image ' + image + ' --flavor ' + str(flavor) + ' --key-name ' + key) sys.stderr.write(date_time() + ': Booting up VM.\n' + nova_boot_cmd + '\n') nova_boot_cmd_output = subprocess.check_output(nova_boot_cmd, shell=True) # Get ID of VM in the event another has the same display name vm_id = get_nova_show_attr(nova_boot_cmd_output, 'id') # Check status of VM every 30 seconds until finished spawning sleep_time = 30 sleep_cmd = 'sleep ' + str(sleep_time) + 's' elapsed_time = 0 vm_still_booting = True nova_show_cmd = src_cmd + 'nova show ' + vm_id while vm_still_booting: sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) + 's.\n') subprocess.call(sleep_cmd, shell=True) elapsed_time += sleep_time if elapsed_time > int( timeout ): # TODO I think we should delete the VM somehow, wait until ACTIVE raise Exception('FATAL ERROR: VM still booting as timeout of ' + str(timeout) + 's was reached. Increase timeout and try again.\n') sys.stderr.write(date_time() + ': Checking success of VM boot. ' + str(elapsed_time) + ' seconds have passed.\n') nova_show_cmd_output = subprocess.check_output(nova_show_cmd, shell=True) vm_status = get_nova_show_attr(nova_show_cmd_output, 'status') if vm_status == 'ACTIVE': vm_still_booting = False vm_ip = get_nova_show_attr(nova_show_cmd_output, 'private_network') if vm_status == 'ERROR': raise Exception('FATAL ERROR: VM boot produced ERROR for ' + vm_name + '. Check connection settings and try again.\n') # VM has now booted up, transfer .novarc to new VM sys.stderr.write(date_time() + ': VM booted!\n') sleep_cmd = 'sleep 60s' sys.stderr.write(date_time() + ': Pausing 60s to give VM a chance to initialize.\n') subprocess.call(sleep_cmd, shell=True) # TODO should we have a more robust check? rsync_nova_var_cmd = ( 'ssh-keyscan ' + vm_ip + ' >> ~/.ssh/known_hosts;rsync /home/ubuntu/.novarc ubuntu@' + vm_ip + ':/home/ubuntu') sys.stderr.write(date_time() + ': Copying openstack variables to VM\n' + rsync_nova_var_cmd + '\n') subprocess.call(rsync_nova_var_cmd, shell=True) sys.stderr.write(date_time() + ': VM setup for ' + vm_name + ' with IP address ' + vm_ip + ' with ID ' + vm_id + ' was successful.\n') # Return VM information return [vm_id, vm_ip]
#!/usr/bin/python import sys sys.path.append('/Users/Miguel/Documents/Scripts/white_lab/RNAseq/utility') from date_time import date_time flist = open(sys.argv[1], 'r') data = {} bids = [] for fn in flist: fn = fn.rstrip('\n') sys.stderr.write(date_time() + 'Processing file ' + fn + '\n') parts = fn.split('.') bid = parts[2] bids.append(bid) cur = open(fn, 'r') head = next(cur) for line in cur: line = line.rstrip('\n') datum = line.split('\t') # will only bother outputting transcripts with values > 0 tx = datum[4] val = datum[9] if tx == '-': tx = datum[6] if float(val) > 0: if tx not in data: data[tx] = {} data[tx][bid] = val sys.stderr.write(date_time() + 'Completed processing file ' + fn + '\n')