Esempio n. 1
0
def create_pon(vlist):
    banned_tup = {}
    norm_flag = {}
    print 'chr\tpos\tref\talt\tct'
    banned_filt = {'HighVafNormal': 0, 'HighAltCountNormal': 0}

    for fn in open(vlist):
        sys.stderr.write(date_time() + 'Processing ' + fn)
        fn = fn.rstrip('\n')
        bnids = re.search('(\d+-\d+)_(\d+-\d+)', fn)
        norm = bnids.group(2)
        vcf = open(fn)
        if norm not in norm_flag:
            norm_flag[norm] = {}
        for line in vcf:
            if line[0] != '#':
                info = line.rstrip('\n').split('\t')
                filt = info[6].split(';')
                for state in filt:
                    if state in banned_filt:
                        cur = '\t'.join((info[0], info[1], info[3], info[4]))
                        if cur not in banned_tup:
                            banned_tup[cur] = 1
                            norm_flag[norm][cur] = 1
                        elif cur not in norm_flag[norm]:
                            norm_flag[norm][cur] = 1
                            banned_tup[cur] += 1
                        break
        vcf.close()
    sys.stderr.write(date_time() + 'Outputting results\n')
    for tup in banned_tup:
        sys.stdout.write(tup + '\t' + str(banned_tup[tup]) + '\n')
Esempio n. 2
0
def flagstats(samtools_tool,sample):
    flagstats_cmd=samtools_tool + " flagstat " + sample + ".srt.bam > " + sample + ".srt.bam.flagstats"
    sys.stderr.write(date_time() + flagstats_cmd + "\n")
    Popen(flagstats_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True)
    flagstats_cmd=samtools_tool + " flagstat " + sample + ".rmdup.srt.bam > " + sample + ".rmdup.srt.bam.flagstats"
    sys.stderr.write(date_time() + flagstats_cmd + "\n")
    Popen(flagstats_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True)
Esempio n. 3
0
def wait_until_status(status, status_verb, cinder_id, timeout):
    ''' Wait until a certain status is observed '''
    src_cmd = '. /home/ubuntu/.novarc; '
    sleep_time = 30
    sleep_cmd = 'sleep ' + str(sleep_time) + 's'
    elapsed_time = 0
    waiting_on_status = True
    while waiting_on_status:
        sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) +
                         's.\n')
        subprocess.call(sleep_cmd, shell=True)
        elapsed_time += sleep_time
        if elapsed_time > int(
                timeout
        ):  # TODO I think we should delete the VM somehow, wait until ACTIVE
            raise Exception('FATAL ERROR: cinder still ' + status_verb +
                            'ing as timeout of ' + str(timeout) +
                            's was reached. Increase timeout and try again.\n')
        sys.stderr.write(date_time() + ': Checking success of cinder ' +
                         status_verb + '. ' + str(elapsed_time) +
                         ' seconds have passed.\n')
        cinder_show_cmd = src_cmd + 'cinder show ' + cinder_id
        cinder_show_cmd_output = subprocess.check_output(cinder_show_cmd,
                                                         shell=True)
        cinder_status = get_cinder_show_attr(cinder_show_cmd_output, 'status')
        if cinder_status == status:
            waiting_on_status = False
Esempio n. 4
0
def update_couchdb(fn, config_file):
    (server, user, password, db, http_proxy, https_proxy, no_proxy) = parse_config(config_file)
    set_proxy(http_proxy, https_proxy, no_proxy)
    fh = open(fn, 'r')
    # Get uuid command

    for obj in fh:
        obj = obj.rstrip('\n')
        get_uuid = 'curl -X GET ' + server + '/_uuids -k;'
        sys.stderr.write(date_time() + get_uuid + '\n')
        uuid_out = check_output(get_uuid, shell=True)
        m = re.findall('\"(\w+)\"', uuid_out)
        uuid = m[1]
        # typical response: {"uuids":["24ec4b43cfe304ff4709e76f7400074d"]}
        curl = 'curl -X PUT -d @' + obj + ' "' + server + '/' + db + '/' + uuid \
               + '" -H "Content-Type: application/json" -k -u "' + user + ':' + password + '"'
        couch_cmd = curl
        # get response
        sys.stderr.write(date_time() + couch_cmd + '\n')
        result = check_output(couch_cmd, shell=True)
        result = result.rstrip('\n')
        sys.stderr.write(obj + '\t' + result + '\n')
        if result == 1:
            sys.stderr.write(date_time() + 'Database update failed for qc stats.  Check connection')
            exit(1)
    return 0
Esempio n. 5
0
def download_from_swift(cont,obj,lane_list):
    src_cmd=". /home/ubuntu/.novarc;"
    lanes=open(lane_list, 'r')
    head=''
    data=[]
    for line in lanes:
        line=line.rstrip('\n')
        (bid,seqtype,lane_csv)=line.split('\t')
        for lane in lane_csv.split(', '):
            cur=obj + '/' + bid + '/QC/' + bid + '_' + lane + '.qc_stats.txt'
            swift_cmd=src_cmd + "swift download " + cont + " --skip-identical --prefix " + cur 
            sys.stderr.write(date_time() + swift_cmd + "\n")
            try:
                check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE)
            except:
                sys.stderr.write(date_time() + "Download of " + obj + " from " + cont + " failed\n")
                exit(1)
            stat=open(cur,'r')
            head=next(stat)
            data.append(next(stat))
            stat.close()
    lanes.close()
    sys.stdout.write(head)
    for datum in data:
        sys.stdout.write(datum)
    return 0
Esempio n. 6
0
def job_manager(cmd_list, max_t):
    x = len(cmd_list)
    # cur position in command list
    cur = 0
    # completed
    comp = 0
    # initialize process list
    p = {}
    sys.stderr.write(date_time() + 'Initializing run\n')
    n = int(max_t)
    if n > x:
        n = x
    for i in range(0, n, 1):
        p[i] = {}
        p[i]['job'] = subprocess.Popen(cmd_list[i], shell=True)
        p[i]['cmd'] = cmd_list[i]
        p[i]['status'] = 'Running'
        sys.stderr.write(cmd_list[i] + '\n')
        cur += 1
    s = 0
    j = 30
    m = 30
    while comp < x:
        if s % m == 0:
            sys.stderr.write(date_time() + 'Checking job statuses. ' +
                             str(comp) + ' of ' + str(x) + ' completed. ' +
                             str(s) + ' seconds have passed\n')
            for i in range(0, n, 1):
                check = p[i]['job'].poll()
                if str(check) == '1':
                    sys.stderr.write(date_time() +
                                     'Job returned an error while running ' +
                                     p[i]['cmd'] + '  aborting!\n')
                    for k in range(0, n, 1):
                        p[k]['job'].kill()
                        sys.stderr.write('Killing job ' + str(k) + '\n')
                    exit(1)
                if str(check) == '0' and p[i]['status'] != str(check):
                    comp += 1
                    p[i]['status'] = str(check)
                    if comp <= (x - n):
                        try:
                            p[i]['job'] = subprocess.Popen(cmd_list[cur],
                                                           shell=True)
                            p[i]['cmd'] = cmd_list[cur]
                            p[i]['status'] = 'Running'
                            cur += 1
                        except:
                            sys.stderr.write(date_time() +
                                             "Tried to queue command " +
                                             p[i]['cmd'] + '\n was ' +
                                             str(cur) + ' in command list, ' +
                                             str(i) + ' in queue list\n')
                            exit(1)
        s += j
        sleep_cmd = 'sleep ' + str(j) + 's'
        subprocess.call(sleep_cmd, shell=True)
    sys.stderr.write(date_time() + str(comp) + ' jobs completed\n')
    return 0
Esempio n. 7
0
def download_from_swift(cont,obj):
    src_cmd=". /home/ubuntu/.novarc;"
    swift_cmd=src_cmd + "swift download " + cont + " --skip-identical --prefix " + obj
    sys.stderr.write(date_time() + swift_cmd + "\n")
    try:
        check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE)
    except:
        sys.stderr.write(date_time() + "Download of " + obj + " from " + cont + " failed\n")
        exit(1)
    return 0
Esempio n. 8
0
def upload_to_swift(cont,obj):
    ONE_GB = 1073741824
    src_cmd=". /home/ubuntu/.novarc;"
    swift_cmd=src_cmd + "swift upload " + cont + " ./ --skip-identical --object-name " + obj + " -S " + str(ONE_GB)
    sys.stderr.write(date_time() + swift_cmd + "\n")
    try:
        check=check_output(swift_cmd,shell=True,stderr=subprocess.PIPE)
    except:
        sys.stderr.write(date_time() + "Upload of " + obj + " to " + cont +  " failed\n")
        exit(1)
    return 0
Esempio n. 9
0
def job_manager(cmd_list, max_t):
    x = len(cmd_list)
    # cur position in command list
    cur = 0
    # completed
    comp = 0
    # initialize process list
    p = {}
    sys.stderr.write(date_time() + 'Initializing run\n')
    n = int(max_t)
    if n > x:
        n = x
    for i in range(0, n, 1):
        p[i] = {}
        p[i]['job'] = subprocess.Popen(cmd_list[i], shell=True)
        p[i]['cmd'] = cmd_list[i]
        p[i]['status'] = 'Running'
        sys.stderr.write(cmd_list[i] + '\n')
        cur += 1
    s = 0
    j = 30
    m = 30
    while comp < x:
        if s % m == 0:
            sys.stderr.write(
                date_time() + 'Checking job statuses. ' + str(comp) + ' of ' + str(x) + ' completed. ' + str(
                    s) + ' seconds have passed\n')
            for i in range(0, n, 1):
                check = p[i]['job'].poll()
                if str(check) == '1':
                    sys.stderr.write(
                        date_time() + 'Job returned an error while running ' + p[i]['cmd'] + '  aborting!\n')
                    for k in range(0, n, 1):
                        p[k]['job'].kill()
                        sys.stderr.write('Killing job ' + str(k) + '\n')
                    exit(1)
                if str(check) == '0' and p[i]['status'] != str(check):
                    comp += 1
                    p[i]['status'] = str(check)
                    if comp <= (x - n):
                        try:
                            p[i]['job'] = subprocess.Popen(cmd_list[cur], shell=True)
                            p[i]['cmd'] = cmd_list[cur]
                            p[i]['status'] = 'Running'
                            cur += 1
                        except:
                            sys.stderr.write(date_time() + "Tried to queue command " + p[i]['cmd'] + '\n was ' + str(
                                cur) + ' in command list, ' + str(i) + ' in queue list\n')
                            exit(1)
        s += j
        sleep_cmd = 'sleep ' + str(j) + 's'
        subprocess.call(sleep_cmd, shell=True)
    sys.stderr.write(date_time() + str(comp) + ' jobs completed\n')
    return 0
Esempio n. 10
0
def bid_swift_list(cont,obj,blist):
    src_cmd=". /home/ubuntu/.novarc;"
    fh=open(blist,'r')
    for bid in fh:
        swift_cmd=src_cmd + "swift list " + cont + " --prefix " + obj + "/" + bid 
        sys.stderr.write(date_time() + swift_cmd + "\n")
        try:
            check=call(swift_cmd,shell=True)
        except:
            sys.stderr.write(date_time() + "Lising of " + bid + ' of ' + obj + " from " + cont + " failed\n")

    return 0
def bid_swift_list(cont, obj, blist, novarc):
    source_novarc(novarc)
    fh = open(blist, 'r')
    for bid in fh:
        bid = bid.rstrip('\n')
        swift_cmd = "swift list " + cont + " --prefix " + obj + "/" + bid + "/"
        sys.stderr.write(date_time() + swift_cmd + "\n")
        try:
            call(swift_cmd, shell=True)
        except:
            sys.stderr.write(date_time() + "Lising of " + bid + ' of ' + obj + " from " + cont + " failed\n")
            return 1

    return 0
Esempio n. 12
0
def setup_vm(bid, image, flavor, key, timeout):
    sys.stderr.write(date_time() + ': Starting VM QC for sample set ' + str(bid) + '.\n')
    
    # Source .novarc command
    src_cmd = '. /home/ubuntu/.novarc; '
    
    # Build nova boot command
    vm_name = 'vm_pipe_' + str(bid)
    nova_boot_cmd = ('nova boot ' + vm_name + ' --image ' + image
        + ' --flavor ' + str(flavor) + ' --key-name ' + key)
    sys.stderr.write(date_time() + ': Booting up VM.\n' + nova_boot_cmd + '\n')
    nova_boot_cmd_output = subprocess.check_output(nova_boot_cmd, shell=True)
    
    # Get ID of VM in the event another has the same display name
    vm_id = get_nova_show_attr(nova_boot_cmd_output, 'id')
    
    # Check status of VM every 30 seconds until finished spawning
    sleep_time = 30
    sleep_cmd = 'sleep ' + str(sleep_time) + 's'
    elapsed_time = 0
    vm_still_booting = True
    nova_show_cmd = src_cmd + 'nova show ' + vm_id
    
    while vm_still_booting:
        sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) + 's.\n')
        subprocess.call(sleep_cmd, shell=True)
        elapsed_time += sleep_time
        if elapsed_time > int(timeout): # TODO I think we should delete the VM somehow, wait until ACTIVE
            raise Exception('FATAL ERROR: VM still booting as timeout of ' + str(timeout) 
                + 's was reached. Increase timeout and try again.\n')
        sys.stderr.write(date_time() + ': Checking success of VM boot. '
            + str(elapsed_time) + ' seconds have passed.\n')
        nova_show_cmd_output = subprocess.check_output(nova_show_cmd, shell=True)
        vm_status = get_nova_show_attr(nova_show_cmd_output, 'status')
        if vm_status == 'ACTIVE':
            vm_still_booting = False
            vm_ip = get_nova_show_attr(nova_show_cmd_output, 'private_network')
        if vm_status == 'ERROR':
            raise Exception('FATAL ERROR: VM boot produced ERROR for ' + vm_name
                + '. Check connection settings and try again.\n')
    
    # VM has now booted up, transfer .novarc to new VM
    sys.stderr.write(date_time() + ': VM booted!\n')
    sleep_cmd = 'sleep 60s'
    sys.stderr.write(date_time() + ': Pausing 60s to give VM a chance to initialize.\n')
    subprocess.call(sleep_cmd, shell=True)  # TODO should we have a more robust check?
    rsync_nova_var_cmd = ('ssh-keyscan ' + vm_ip
        + ' >> ~/.ssh/known_hosts;rsync /home/ubuntu/.novarc ubuntu@'
        + vm_ip + ':/home/ubuntu')
    sys.stderr.write(date_time() + ': Copying openstack variables to VM\n'
        + rsync_nova_var_cmd + '\n')
    subprocess.call(rsync_nova_var_cmd, shell=True)
    sys.stderr.write(date_time() + ': VM setup for ' + vm_name
        + ' with IP address ' + vm_ip
        + ' with ID ' + vm_id + ' was successful.\n')
    
    # Return VM information
    return [vm_id, vm_ip]
Esempio n. 13
0
def metalfox_pipe(config_file, sample_pairs, ref_mnt):
    (metalfox_tool, cont, obj, map_ref, max_t, ram) = parse_config(config_file)
    map_ref = ref_mnt + '/' + map_ref
    src_cmd = '. ~/.novarc;'
    deproxy = 'unset http_proxy; unset https_proxy;'
    pairs = open(sample_pairs, 'r')
    job_list = []
    for sn in pairs:
        sn = sn.rstrip('\n')
        info = sn.split('\t')
        sys.stderr.write('Getting bam file name for ' + info[1] + '\n')
        get_bam_name = 'swift list ' + cont + ' --prefix ' + obj + '/' + info[1] + '/BAM/' + info[1] \
                       + ' | grep .rmdup.srt.ba* '
        bam = subprocess.check_output(get_bam_name, shell=True).split('\n')
        dl_bam = 'swift download --skip-identical ' + cont + ' ' + bam[1] + ';swift download --skip-identical ' \
                 + cont + ' ' + bam[0] + ';'
        mut_out = 'ANALYSIS/' + info[0] + '/OUTPUT/' + info[0] + '.out.keep'
        dl_out = 'swift download ' + cont + ' ' + mut_out + ';'
        # .bai/.bam extension not always clear
        if bam[1][-3:] == 'bam':
            run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[1] + ' -m ' + map_ref + ' > ' + info[0] + \
                    '.foxog_scored_added.out;'
        else:
            run_metal = metalfox_tool + ' -f1 ' + mut_out + ' -f3 ' + bam[0] + ' -m ' + map_ref + ' > ' + info[0] + \
                    '.foxog_scored_added.out;'
        cleanup = 'rm ' + ' '.join((bam[0], bam[1], mut_out)) + ';'
        job_list.append(src_cmd + deproxy + dl_bam + dl_out + run_metal)  # + cleanup)
    pairs.close()
    sys.stderr.write(date_time() + 'Queueing jobs\n')
    job_manager(job_list, max_t)
Esempio n. 14
0
def fastqc(fastqc_tool,sample,end1,end2,t):
    # casual logging - look for a LOGS directory, otherwise assume current dir
    log_dir='./'
    if os.path.isdir('LOGS'):
        log_dir='LOGS/'
    loc=log_dir + sample + '.fastqc.log'
    fastqc_cmd=fastqc_tool + ' -t ' + t + ' -o QC/ ' + end1 + ' ' + end2
    log(loc,date_time() + fastqc_cmd + "\n")
    f=Popen(fastqc_cmd,shell=True,stdin=None,stdout=None,stderr=None,close_fds=True)
    # check after a minute whether the process is still good - shouldn't take too long to ascertain whether phred score didn't fit
    call('sleep 20s', shell=True)

    if str(f.poll()) == '1':
        log(loc,date_time() + 'fastqc returned an error.  Check your inputs and try again!\n')
        exit(1)
    return 0
Esempio n. 15
0
def delete_from_swift_list(cont,fn,l):
    src_cmd=". /home/ubuntu/.novarc;"
    deproxy='unset http_proxy; unset https_proxy;'
    fh=open(fn,'r')
    for obj in fh:
        obj = obj.rstrip('\n')
        if re.match('\W+',obj) or obj=='\n' or obj=='':
            sys.stderr.write(date_time() + 'Object ' + obj + ' looks malformed, skipping for safety reasons!\n' )
            continue
        if l== 'y':
            swift_cmd=deproxy + src_cmd + "swift delete --leave-segments " + cont + " " + obj + " >> dl_log.txt 2>> dl_log.txt"            
        else:
            swift_cmd=deproxy + src_cmd + "swift delete " + cont + " " + obj + " >> dl_log.txt 2>> dl_log.txt"
        sys.stderr.write(date_time() + swift_cmd + "\n")
        call(swift_cmd,shell=True)
    return 0
Esempio n. 16
0
 def __check(self, url):
     every = int(self.get_first_value(url, SNIFF_EVERY, str(SECONDS_IN_DAY)))
     # TODO: take the oldest value
     last = self.get_first_value(url, SNIFF_LAST, None)
     if not last or time() - parse_date_time(last) > every:
         self.remove(url, SNIFF_LAST, None)
         self.add(url, SNIFF_LAST, literal(date_time()))
         self.sniff(url)
Esempio n. 17
0
def check_manifest(manifest, body):
    """
    check if a body is the same object described by the manifest

    :param manifest: the raw body of the manifest from swift
    :param body: a file like object to check against the manfiest
    """
    sys.stderr.write(date_time() + 'Checking manifest\n')
    manifest = json.loads(manifest)
    for segment in manifest:
        sys.stderr.write(date_time() + segment['name'] + '\n')
        hasher = md5(body.read(segment['bytes']))
        sys.stderr.write(date_time() + '%s ?= %s' % (hasher.hexdigest(), segment['hash'] + '\n'))
        if hasher.hexdigest() != segment['hash']:
            sys.stderr.write('Not the same\n')
            return False
    sys.stderr.write('The same\n')
    return True
Esempio n. 18
0
def bwa_mem_pe(bwa_tool,RGRP,bwa_ref,end1,end2,samtools_tool,samtools_ref,sample,log_dir):
    bwa_cmd="(" + bwa_tool + " mem -t 8 -R \"" + RGRP + "\" -v 2 " + bwa_ref + " " + end1 + " " + end2 + " | " + samtools_tool + " view -bT " + samtools_ref + " - > " + sample + ".bam) > " + log_dir + sample + ".bwa.pe.log 2>&1"
    loc=log_dir + sample + ".bwa.pe.log"
    log(loc,date_time() + bwa_cmd + "\n")
    try:
        subprocess.check_output(bwa_cmd,shell=True)
    except:
        exit(1)
    return 0
Esempio n. 19
0
def bwt2_pe(bwt_tool,bwt_ref,end1,end2,samtools_tool,samtools_ref,sample,t,log_dir):
    bwt_cmd="(" + bwt_tool + " --fr -p " + t + " -I 0 -X 500 -x " + bwt_ref + " -1 " + end1 + " -2 " + end2 + " | " + samtools_tool + " view -bT " + samtools_ref + " - > " + sample + ".bam) > " + log_dir + sample + ".bwt.pe.log 2>&1"
    loc=log_dir + sample + ".bwt.pe.log"
    log(loc,date_time() + bwt_cmd + "\n")
    try:
        call(bwt_cmd,shell=True)
    except:
        exit(1)
    return 0
def download_from_swift_list(cont,fn):
    src_cmd=". /home/ubuntu/.novarc;"
    deproxy='unset http_proxy; unset https_proxy;'
    fh=open(fn,'r')
    for obj in fh:
        swift_cmd=deproxy + src_cmd + "swift download " + cont + " --skip-identical " + obj + " >> dl_log.txt"
        sys.stderr.write(date_time() + swift_cmd + "\n")
        call(swift_cmd,shell=True)
    return 0
Esempio n. 21
0
def align_stats(sample):
    # casual logging - look for a LOGS directory, otherwise assume current dir
    log_dir='./'
    if os.path.isdir('LOGS'):
        log_dir='LOGS/'
    loc=log_dir + sample + '.aln.log'
    log(loc,date_time() + "Converting to table summary format\n")
    fh=open(sample + '/' + 'align_summary.txt', 'r')
    fo=open(sample + '.align.txt', 'w')
    fo.write('Sample\tMean insert size estimate(10k reads)\tStd dev read insert size estimate(10 k reads)\tStarting left reads\t% mapped\tmultimapped(mm)\tgt 20 mm\tStarting right reads\t% mapped\t% mm\tgt 20 mm\tOverall map rate\tAligned pairs\t% mm\t% discordant\t% condordant\n' + sample + '\t')
    fi=open(sample + '_subset.insert_metrics.hist')
    for i in xrange(0,7,1):
        skip=next(fi)
    stats=next(fi)
    fi.close()
    stat=stats.split('\t')
    fo.write('\t'.join([str(int(float(stat[4]))),str(int(float(stat[5])))]))
    next(fh)
    lstart=next(fh)
    m=re.search('(\d+)\n$',lstart)
    fo.write('\t' + m.group(1))
    pct=next(fh)
    m=re.search('\(\s*(\S+) of input\)\n',pct)
    fo.write('\t' + m.group(1))
    mm=next(fh)
    m=re.search('\(\s*(\S+)\).*\((\d+) have >20\)\n',mm)
    fo.write('\t' + m.group(1) + '\t' + m.group(2))

    next(fh)
    rstart=next(fh)
    m=re.search('(\d+)\n$',rstart)
    fo.write('\t' + m.group(1))
    pct=next(fh)
    m=re.search('\(\s*(\S+) of input\)\n',pct)
    fo.write('\t' + m.group(1))
    mm=next(fh)
    m=re.search('\(\s*(\S+)\).*\((\d+) have >20\)\n',mm)
    fo.write('\t' + m.group(1) + '\t' + m.group(2))
    ovr=next(fh)
    m=re.search('\s*(^\S+)',ovr)
    fo.write('\t' + m.group(1))
    next(fh)
    
    aln=next(fh)
    m=re.search('(\d+)\n$',aln)
    fo.write('\t' + m.group(1))
    mm=next(fh)
    m=re.search('\(\s*(\S+)\) have',mm)
    fo.write('\t' + m.group(1))
    dc=next(fh)
    m=re.search('\(\s*(\S+)\) are',dc)
    fo.write('\t' + m.group(1))
    cc=next(fh)
    m=re.search('^\s*(\S+)',cc)
    fo.write('\t' + m.group(1) + '\n')
    fo.close
    return 0
Esempio n. 22
0
def attach_cinder(snapshot_id, vm_id, bid, cinder_size, vm_ip, timeout,
                  mount_sh_path):
    cinder_name = 'refs_' + str(bid)
    sys.stderr.write(date_time() + ': Creating cinder volume ' + cinder_name +
                     ' using snapshot ID ' + snapshot_id + ' to VM with ID ' +
                     vm_id + '\n')

    # Source .novarc command
    src_cmd = '. /home/ubuntu/.novarc; '

    # Build cinder create command
    cinder_create_cmd = (src_cmd + 'cinder create ' + str(cinder_size) +
                         ' --snapshot-id ' + snapshot_id + ' --display-name ' +
                         cinder_name)
    sys.stderr.write(cinder_create_cmd + '\n')
    cinder_create_output = subprocess.check_output(cinder_create_cmd,
                                                   shell=True)

    # Get cinder id
    cinder_id = get_cinder_show_attr(cinder_create_output, 'id')

    # Check status of cinder every 30 seconds until finished spawning
    wait_until_status('available', 'boot', cinder_id, timeout)

    # Cinder is now booted, attach to VM
    sys.stderr.write(date_time() + ': Cinder create for ' + cinder_name +
                     ' with ID ' + cinder_id +
                     ' was successful. Attaching to VM.\n')
    volume_attach_cmd = src_cmd + 'nova volume-attach ' + vm_id + ' ' + cinder_id
    sys.stderr.write(volume_attach_cmd + '\n')
    subprocess.call(volume_attach_cmd, shell=True)

    # Make sure cinder attaches
    wait_until_status('in-use', 'attach', cinder_id, timeout)

    # Set mount point in VM
    sys.stderr.write(date_time() + ': Mounting volume in VM.\n')
    mount_cmd = (
        'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@'
        + vm_ip + ' \"sh -s\" < ' + mount_sh_path + ' \"refs_' + bid +
        '\" exit;')
    sys.stderr.write(mount_cmd + '\n')
    subprocess.call(mount_cmd, shell=True)
    sys.stderr.write(date_time() + ': Cinder successfully mounted.\n')
Esempio n. 23
0
def star(STAR,sam,genome,end1,end2,sample,log_dir,th):
    loc=log_dir + sample + ".star.log"
    # fix to determine which phred score to use using HGAC date assigned 150409 and greater phred33, else hpred 64
    meta=sample.split('_')
    epoch=150409
    star_cmd=STAR + " --runMode alignReads --outFileNamePrefix " + sample + " --runThreadN " + th + " --genomeDir " + genome + " --readFilesIn " + end1 + " " + end2 + " --readFilesCommand zcat --outSAMtype BAM Unsorted --outFilterType BySJout --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterMismatchNmax 8 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 2>> " + loc
    
    log(loc,date_time() + star_cmd + "\n")
    call(star_cmd,shell=True)
    return 0
Esempio n. 24
0
 def add_comment(self, comment, nick):
     comment = self._serialize_comment(nick, comment)
     self.make_statement(self.uri, COMMENT, literal(comment))
     s_uri = self.store.generate_uri()
     self.make_statement(s_uri, TYPE, STATEMENT)
     self.make_statement(s_uri, SUBJECT, self.uri)
     self.make_statement(s_uri, PREDICATE, COMMENT)
     self.make_statement(s_uri, OBJECT, literal(comment))
     self.make_statement(s_uri, CHUMP_WHO, literal(nick))
     self.make_statement(s_uri, CHUMP_TIME, literal(date_time(time.time())))
def get_fqc_stats(bnids, cont, obj, novarc):
    source_novarc(novarc)
    flist = cont + '_fqc.txt'
    get_list_cmd = 'cat ' + bnids + ' | xargs -IBN swift list ' + cont + ' --prefix ' + obj + '/BN/QC | grep  html | ' \
                                                                                            'grep -v report > ' + flist
    sys.stderr.write(date_time() + get_list_cmd + '\n')
    subprocess.call(get_list_cmd, shell=True)
    sys.stderr.write(date_time() + 'Setting up dirs\n')
    setup_dirs(flist)
    for path in open(flist):
        path = path.rstrip('\n')
        bnid = path.split('/')[1]
        fn = os.path.basename(path)
        dl_cmd = 'swift download ' + cont + ' ' + path + ' --output FASTQC/' + bnid + '/QC/' + fn
        sys.stderr.write(date_time() + dl_cmd + '\n')
        subprocess.call(dl_cmd, shell=True)
    sys.stderr.write('Process complete!\n')

    return 0
Esempio n. 26
0
def cleanup(cid,vid,bid,vip):
    cname="REFS_" + bid
    sys.stderr.write(date_time() + "Unmounting " + cid + " from vm with ID " + vid + "\n")
    # need build variables to call nova successfully
    src_cmd='. /home/ubuntu/.novarc;'
    unmount_cmd="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@" + vip + " \"sh -s\" < /home/ubuntu/TOOLS/Scripts/utility/unmount.sh \"" + cname + "\""
    sys.stderr.write(date_time() + unmount_cmd + "\n")
    subprocess.call(unmount_cmd,shell=True)
    detach_vm=src_cmd+"nova volume-detach " + vid + " " + cid
    sys.stderr.write(date_time() + detach_vm + "\n")
    subprocess.call(detach_vm,shell=True)
    sleep_cmd='sleep 30s'
    subprocess.call(sleep_cmd,shell=True)
    delete_vol=src_cmd+"cinder delete " + cid
    sys.stderr.write(date_time() + "Deleting cinder volume " + cname + "with id " + cid + "\n")
    subprocess.call(delete_vol,shell=True)
    delete_vm=src_cmd + "nova delete " + vid
    sys.stderr.write(date_time() + "Deleting vm with id " + vid + "\n")
    subprocess.call(delete_vm,shell=True)
Esempio n. 27
0
def bid_swift_list(container, obj, bid_list_file_path):
    src_cmd = '. /home/ubuntu/.novarc'
    with open(bid_list_file_path, 'r') as bid_list_file:
        for bid in bid_list_file:
            bid = bid.rstrip('\n')
            sys.stderr.write(date_time() + ': Executing swift list.\n')
            swift_list_cmd = (src_cmd + 'swift list '
                + container + ' --prefix '
                + obj + '/' + bid + '/')
            sys.stderr.write(swift_list_cmd + '\n')
Esempio n. 28
0
def main():
    try:
        _prog, filename, container, manifest, var = sys.argv
    except ValueError:
        return "usage: prog.py <filename> <container> <manifest> <openstack variable file>"
    fh = open(var, 'r')
    """
    export OS_TENANT_NAME=xxx
    export OS_USERNAME=xxx
    export OS_PASSWORD=xxx
    export OS_AUTH_URL="xxx"
    """
    for line in fh:
        line = line.rstrip('\n')
        info = line.split()
        pair = info[1].split('=')
        # pdb.set_trace()
        os.environ[pair[0]] = pair[1]
    fh.close()
    #    url, token = client.get_auth(os.environ['OS_AUTH_URL'], os.environ['OS_USERNAME'], os.environ['OS_PASSWORD'])
    # using client to get token and url doesn't seem to work, doing it the stupid way
    src_cmd = '. ' + var + ';'
    deproxy = 'unset http_proxy; unset https_proxy;'
    swift_cmd = deproxy + src_cmd + "swift stat -v " + container + " " + manifest
    sys.stderr.write(date_time() + swift_cmd + "\n")
    stat = check_output(swift_cmd, shell=True)
    header = re.search('URL: (\S+)\s+Auth Token: (\S+)\s+', stat)
    url = header.group(1)
    token = header.group(2)
    # subtract object and manifest from url
    #    m=re.match('(.*)'+container+'\/manifest',url)
    url = url.replace('/' + container + '/' + manifest, '')
    sys.stderr.write(date_time() + 'URL: ' + url + ' token: ' + token + '\n')
    headers, body = client.get_object(url, token, container, manifest, query_string='multipart-manifest=get')
    sys.stderr.write(date_time() + 'Object information recieved\n')
    with open(filename) as f:
        is_valid = check_manifest(body, f)

    if is_valid:
        return 0
    else:
        return 1
Esempio n. 29
0
def cutadapter(sample,end1,end2,config_file):
    # casual logging - look for a LOGS directory, otherwise assume current dir
    log_dir='./'
    if os.path.isdir('LOGS'):
        log_dir='LOGS/'
    loc=log_dir + sample + '.cutadapt.log'
    (cutadapt_tool,minlen,r1adapt,r2adapt,r1trim,r2trim)=parse_config(config_file)
    cutadapt_cmd=cutadapt_tool + ' -m ' + minlen + ' -a ' + r1adapt + ' -A ' + r2adapt + ' -u ' + r1trim + ' -U ' + r2trim + ' -o ' + end1 + ' -p ' + end2 + ' ../' + end1 + ' ../' + end2 + ' >> ' + loc + ' 2>> ' + loc
    log(loc,date_time() + cutadapt_cmd + "\n")
    call(cutadapt_cmd,shell=True)
    return 0
Esempio n. 30
0
 def add_sniffed(self, href, label, sniffed_from):
     if not self.ignore(href):
         uri = resource(href)
         label = literal(label)
         if not self.exists(uri, None, None):
             print "ADDED:", href, label
             self.add(uri, LABEL, label)
             self.add(uri, TYPE, SNIFFED)
             timestamp = literal(date_time())
             self.add(uri, SNIFFED_ON, timestamp)
             self.add(uri, SNIFFED_FROM, resource(sniffed_from))
Esempio n. 31
0
def cufflinks(cufflinks_tool,ens_ref,genome,sample,log_dir,t):
    loc=log_dir + sample + ".cufflinks.log"
#    cufflinks_cmd=cufflinks_tool + " " + sample + "/accepted_hits.bam -g " + ens_ref + " -p " + t + " --library-type fr-secondstrand -b " + genome + " -u --upper-quartile-norm --pre-mrna-fraction -o " + sample + " 2>> " + loc
    cufflinks_cmd=cufflinks_tool + " " + sample + "/accepted_hits.bam -g " + ens_ref + " -p " + t + " --library-type fr-firststrand -b " + genome + " -u --upper-quartile-norm --pre-mrna-fraction -o " + sample + " 2>> " + loc

    log(loc,date_time() + cufflinks_cmd + "\n")
    try:
        subprocess.check_output(cufflinks_cmd,shell=True)
    except:
        exit(1)
    
    return 0
Esempio n. 32
0
def novosort_sort_se(novosort,sample,log_dir,threads,ram):
    novosort_sort_se_cmd='mkdir novosort_tmp;' + novosort + " --threads " + threads + " --ram " + ram  + "G --tmpdir novosort_tmp --output " + sample + ".srt.bam --index  " + sample + ".bam > " + log_dir + sample + ".novosort.sort.se.log 2>&1"
    log(log_dir + sample + ".novosort.sort.se.log",date_time() + novosort_sort_se_cmd + "\n")
    f=0
    try:
        f=subprocess.call(novosort_sort_se_cmd,shell=True)
        rm_tmp='rm -rf novosort_tmp'
        subprocess.call(rm_tmp,shell=True)
    except:
        log(log_dir + sample + ".novosort.sort.se.log",'novosort sort failed for sample ' + sample + '\n')
        exit(1)
    return f
Esempio n. 33
0
def novosort_merge_pe(config_file,sample_list,wait):
    fh=open(sample_list,'r')
    (novosort,cont,obj)=parse_config(config_file)
    for sample in fh:
        sample=sample.rstrip('\n')
        (bam_list,bai_list,n)=list_bam(cont,obj,sample,wait)
        bam_string=",".join(bam_list)
        if n > 1:
            novosort_merge_pe_cmd=novosort + " --threads 8 --ram 28G --assumesorted --output " + sample + '.merged.bam --index --tmpdir ./TMP ' + bam_string
            sys.stderr.write(date_time() + novosort_merge_pe_cmd + "\n")
            try:
                subprocess.check_output(novosort_merge_pe_cmd,shell=True) 
            except:
                sys.stderr.write(date_time() + 'novosort failed for sample ' + sample + '\n')
                exit(1)
        else:
            rename_bam='cp ' + bam_list[0] + ' ' + sample + '.merged.final.bam;cp ' + bai_list[0] + ' ' + sample + '.merged.final.bai'
            sys.stderr.write(date_time() + rename_bam + ' Only one associated bam file, renaming\n')
            subprocess.call(rename_bam,shell=True)
    sys.stderr.write(date_time() + 'Merge process complete\n')
    return 0
Esempio n. 34
0
def tophat(tophat_tool,tx,bwt2_ref,end1,end2,x,s,sample,log_dir,th):
    loc=log_dir + sample + ".tophat.log"
    # fix to determine which phred score to use using HGAC date assigned 150409 and greater phred33, else hpred 64
    meta=sample.split('_')
    epoch=150409
    tophat_cmd=tophat_tool + " --no-coverage-search --mate-inner-dist " + x  + " --mate-std-dev " + s + " --num-threads " + th + " --library-type fr-firststrand --transcriptome-index " + tx + " -o " + sample + " " + bwt2_ref + " " + end1 + " " + end2 + " 2>> " + loc
        
    if len(meta) >=2 and RepresentsInt(meta[1]) == True and int(meta[1]) < epoch:
        tophat_cmd=tophat_tool + " --no-coverage-search  --phred64-quals --mate-inner-dist " + x  + " --mate-std-dev " + s + " --num-threads " + th + " --library-type fr-firststrand --transcriptome-index " + tx + " -o " + sample + " " + bwt2_ref + " " + end1 + " " + end2 + " 2>> " + loc
    log(loc,date_time() + tophat_cmd + "\n")
    call(tophat_cmd,shell=True)
    return 0
Esempio n. 35
0
def cov_hole_matrix(hlist, aflag):
    cov_dict = {}
    low = 30
    slist = []
    # coverage column is 4th unless bedfile wasn't annotated
    c = 4
    if aflag == 'n':
        c = 3
    for floc in open(hlist):
        floc = floc.rstrip('\n')
        samp = os.path.basename(floc).replace('.hist', '')
        sys.stderr.write(date_time() + 'Processing file ' + floc +
                         ' sample name ' + samp + '\n')
        slist.append(samp)
        fh = open(floc)
        for line in fh:
            if line[0:3] != 'all':
                info = line.rstrip('\n').split('\t')
                if int(info[c]) < low:
                    reg = info[3]
                    if aflag == 'n':
                        reg = info[0] + ':' + info[1] + '-' + info[2]
                    if reg not in cov_dict:
                        cov_dict[reg] = {}
                    cov_dict[reg][samp] = info[c]
            else:
                fh.close()
                break
    sys.stderr.write(date_time() + 'Outputting matrix\n')
    print 'Sample/Region\t' + '\t'.join(slist)
    for region in cov_dict:
        sys.stdout.write(region)
        for samp in slist:
            if samp in cov_dict[region]:
                sys.stdout.write('\t' + cov_dict[region][samp])
            else:
                sys.stdout.write('\t30')
        print
Esempio n. 36
0
def setup_vm(bid, image, flavor, key, timeout):
    sys.stderr.write(date_time() + ': Starting VM QC for sample set ' +
                     str(bid) + '.\n')

    # Source .novarc command
    src_cmd = '. /home/ubuntu/.novarc; '

    # Build nova boot command
    vm_name = 'vm_pipe_' + str(bid)
    nova_boot_cmd = ('nova boot ' + vm_name + ' --image ' + image +
                     ' --flavor ' + str(flavor) + ' --key-name ' + key)
    sys.stderr.write(date_time() + ': Booting up VM.\n' + nova_boot_cmd + '\n')
    nova_boot_cmd_output = subprocess.check_output(nova_boot_cmd, shell=True)

    # Get ID of VM in the event another has the same display name
    vm_id = get_nova_show_attr(nova_boot_cmd_output, 'id')

    # Check status of VM every 30 seconds until finished spawning
    sleep_time = 30
    sleep_cmd = 'sleep ' + str(sleep_time) + 's'
    elapsed_time = 0
    vm_still_booting = True
    nova_show_cmd = src_cmd + 'nova show ' + vm_id

    while vm_still_booting:
        sys.stderr.write(date_time() + ': Sleeping ' + str(sleep_time) +
                         's.\n')
        subprocess.call(sleep_cmd, shell=True)
        elapsed_time += sleep_time
        if elapsed_time > int(
                timeout
        ):  # TODO I think we should delete the VM somehow, wait until ACTIVE
            raise Exception('FATAL ERROR: VM still booting as timeout of ' +
                            str(timeout) +
                            's was reached. Increase timeout and try again.\n')
        sys.stderr.write(date_time() + ': Checking success of VM boot. ' +
                         str(elapsed_time) + ' seconds have passed.\n')
        nova_show_cmd_output = subprocess.check_output(nova_show_cmd,
                                                       shell=True)
        vm_status = get_nova_show_attr(nova_show_cmd_output, 'status')
        if vm_status == 'ACTIVE':
            vm_still_booting = False
            vm_ip = get_nova_show_attr(nova_show_cmd_output, 'private_network')
        if vm_status == 'ERROR':
            raise Exception('FATAL ERROR: VM boot produced ERROR for ' +
                            vm_name +
                            '. Check connection settings and try again.\n')

    # VM has now booted up, transfer .novarc to new VM
    sys.stderr.write(date_time() + ': VM booted!\n')
    sleep_cmd = 'sleep 60s'
    sys.stderr.write(date_time() +
                     ': Pausing 60s to give VM a chance to initialize.\n')
    subprocess.call(sleep_cmd,
                    shell=True)  # TODO should we have a more robust check?
    rsync_nova_var_cmd = (
        'ssh-keyscan ' + vm_ip +
        ' >> ~/.ssh/known_hosts;rsync /home/ubuntu/.novarc ubuntu@' + vm_ip +
        ':/home/ubuntu')
    sys.stderr.write(date_time() + ': Copying openstack variables to VM\n' +
                     rsync_nova_var_cmd + '\n')
    subprocess.call(rsync_nova_var_cmd, shell=True)
    sys.stderr.write(date_time() + ': VM setup for ' + vm_name +
                     ' with IP address ' + vm_ip + ' with ID ' + vm_id +
                     ' was successful.\n')

    # Return VM information
    return [vm_id, vm_ip]
Esempio n. 37
0
#!/usr/bin/python

import sys

sys.path.append('/Users/Miguel/Documents/Scripts/white_lab/RNAseq/utility')
from date_time import date_time

flist = open(sys.argv[1], 'r')
data = {}
bids = []
for fn in flist:
    fn = fn.rstrip('\n')
    sys.stderr.write(date_time() + 'Processing file ' + fn + '\n')
    parts = fn.split('.')
    bid = parts[2]
    bids.append(bid)
    cur = open(fn, 'r')
    head = next(cur)
    for line in cur:
        line = line.rstrip('\n')
        datum = line.split('\t')
        # will only bother outputting transcripts with values > 0
        tx = datum[4]
        val = datum[9]
        if tx == '-':
            tx = datum[6]
        if float(val) > 0:
            if tx not in data:
                data[tx] = {}
            data[tx][bid] = val
    sys.stderr.write(date_time() + 'Completed processing file ' + fn + '\n')