예제 #1
0
파일: venn.py 프로젝트: cschu/ngslib
def main(argv):
    sets = []
    use_all = True
    if argv[0] == '--USE_MOBILE':
        use_all = False
        argv = argv[1:]
    
    setnames = []
    for fn in argv:
        if 'ped' in os.path.basename(fn).lower():
            sample = 'Ped'
        else:
            sample = 'Col'
        
        set_ = load_set(load_transcript_data(open(fn, 'rb'), sample=sample), use_all=use_all)
        setname = os.path.basename(fn).replace('_TRANSCRIPTDATA.pickled', '').replace('Sample_', '')
        sets.append((setname, set_))
        setnames.append(setname)
    
    outfile = '+'.join(setnames)# + '.venn.txt'
    
    if not use_all:
        outfile += '.mobile_only'
        # outfile.replace('.venn.txt', '.mobile_only.venn.txt')
    
    fo = open(outfile + '.venn.txt', 'w')
    
    
    fo.write('#SETS = %i\n' % len(sets))
    fo.write('#\t'.join(['%s: %i' % (s[0], len(s[1])) for s in sets]) + '\n')
    
    processed = process_sets(sets)
    generate_diagram(processed, outfile + '.png')
    
    for id_, set_ in processed:
        fo.write('%s\t%s\n' % (id_, len(set_)))
    fo.write('\n')
    
    for id_, set_ in processed:
        fo.write('%s\t%s\n' % (id_, len(set_)))
        for item in set_:
            fo.write('%s\n' % item)
    
    fo.close()
    
    pass
예제 #2
0
def main(argv):    
    """
    Input EITHER (to generate per-transcript-results from pileup counts)
    0. transcript/snp data
    1. sample name {Col,Ped}
    2. output prefix (output is <prefix>_<DATATYPE>.pickled)
    3-n. number of bamfiles
    OR (to summarise per-transcript-results)
    0. name of a <arbitrary>_TRANSCRIPTDATA.pickled file 
    1. sample name {Col,Ped}
    """
    
    if len(argv) > 3:
        sys.stderr.write('%s: MODE1\n' % get_timestamp())
        sys.stderr.write('')
        transcript_d, snp_d = read_transcript_data(open(argv[0]))
        # show_data(transcript_d, snp_d)
        sample = argv[1]
        prefix = argv[2]
        read_checklist = set([])
        for bam_fn in argv[3:]:  
            sys.stderr.write('%s: Processing file %s...\n' % (get_timestamp(), bam_fn))      
            process_pileups(pysam.Samfile(bam_fn, 'rb'), snp_d, read_checklist)    
            # show_data(transcript_d, snp_d)
            # break
        # print list(read_checklist), len(read_checklist)
        ts = get_timestamp()
        pickle.dump(snp_d, open(prefix + '_SNPDATA.pickled', 'wb'))
        pickle.dump(transcript_d, open(prefix + '_TRANSCRIPTDATA.pickled', 'wb'))
        pickle.dump(read_checklist, open(prefix + '_READCHECKLIST.pickled', 'wb'))
	# sys.exit(0)
    else:
        sys.stderr.write('%s: MODE2\n' % get_timestamp())
        sys.stderr.write('%s: Loading data from %s.\n' % (get_timestamp(), argv[0]))
        # transcript_d = pickle.load(open(argv[0], 'rb'))
        transcript_d = load_transcript_data(open(argv[0], 'rb'), sample=argv[1])
        sys.stderr.write('%s: Finished loading %s.\n' % (get_timestamp(), argv[0]))
        prefix = argv[0].rstrip('TRANSCRIPTDATA.pickled')
    
    # print '======'
    
    #show_data(transcript_d, None, sample=argv[1])
    write_data(transcript_d, sample=argv[1], prefix=prefix)
    pass