def main(argv): sets = [] use_all = True if argv[0] == '--USE_MOBILE': use_all = False argv = argv[1:] setnames = [] for fn in argv: if 'ped' in os.path.basename(fn).lower(): sample = 'Ped' else: sample = 'Col' set_ = load_set(load_transcript_data(open(fn, 'rb'), sample=sample), use_all=use_all) setname = os.path.basename(fn).replace('_TRANSCRIPTDATA.pickled', '').replace('Sample_', '') sets.append((setname, set_)) setnames.append(setname) outfile = '+'.join(setnames)# + '.venn.txt' if not use_all: outfile += '.mobile_only' # outfile.replace('.venn.txt', '.mobile_only.venn.txt') fo = open(outfile + '.venn.txt', 'w') fo.write('#SETS = %i\n' % len(sets)) fo.write('#\t'.join(['%s: %i' % (s[0], len(s[1])) for s in sets]) + '\n') processed = process_sets(sets) generate_diagram(processed, outfile + '.png') for id_, set_ in processed: fo.write('%s\t%s\n' % (id_, len(set_))) fo.write('\n') for id_, set_ in processed: fo.write('%s\t%s\n' % (id_, len(set_))) for item in set_: fo.write('%s\n' % item) fo.close() pass
def main(argv): """ Input EITHER (to generate per-transcript-results from pileup counts) 0. transcript/snp data 1. sample name {Col,Ped} 2. output prefix (output is <prefix>_<DATATYPE>.pickled) 3-n. number of bamfiles OR (to summarise per-transcript-results) 0. name of a <arbitrary>_TRANSCRIPTDATA.pickled file 1. sample name {Col,Ped} """ if len(argv) > 3: sys.stderr.write('%s: MODE1\n' % get_timestamp()) sys.stderr.write('') transcript_d, snp_d = read_transcript_data(open(argv[0])) # show_data(transcript_d, snp_d) sample = argv[1] prefix = argv[2] read_checklist = set([]) for bam_fn in argv[3:]: sys.stderr.write('%s: Processing file %s...\n' % (get_timestamp(), bam_fn)) process_pileups(pysam.Samfile(bam_fn, 'rb'), snp_d, read_checklist) # show_data(transcript_d, snp_d) # break # print list(read_checklist), len(read_checklist) ts = get_timestamp() pickle.dump(snp_d, open(prefix + '_SNPDATA.pickled', 'wb')) pickle.dump(transcript_d, open(prefix + '_TRANSCRIPTDATA.pickled', 'wb')) pickle.dump(read_checklist, open(prefix + '_READCHECKLIST.pickled', 'wb')) # sys.exit(0) else: sys.stderr.write('%s: MODE2\n' % get_timestamp()) sys.stderr.write('%s: Loading data from %s.\n' % (get_timestamp(), argv[0])) # transcript_d = pickle.load(open(argv[0], 'rb')) transcript_d = load_transcript_data(open(argv[0], 'rb'), sample=argv[1]) sys.stderr.write('%s: Finished loading %s.\n' % (get_timestamp(), argv[0])) prefix = argv[0].rstrip('TRANSCRIPTDATA.pickled') # print '======' #show_data(transcript_d, None, sample=argv[1]) write_data(transcript_d, sample=argv[1], prefix=prefix) pass