def main(argv=None): '''Command line options.''' program_name = os.path.basename(sys.argv[0]) program_version = "v0.1" program_build_date = "%s" % __updated__ program_version_string = '%%prog %s (%s)' % (program_version, program_build_date) #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse program_longdesc = '''''' # optional - give further explanation about what the program does program_license = "Copyright 2014 Jake Yeung (Laboratory for Advanced Genome Analysis) \ Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0" if argv is None: argv = sys.argv[1:] # setup option parser parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license) parser.add_option("-i", "--in", dest="infile", help="set input path containing list of filepaths [default: %default]", metavar="FILE") parser.add_option("-o", "--out", dest="outfile", help="set output path for merged read counts table[default: %default]", metavar="FILE") parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]") parser.add_option("-n", "--nan", dest="nan", type="int", help="Output nan if sample does not have info on an miRNA.", metavar="INT") # set defaults parser.set_defaults(outfile="./out.txt", infile="./in.txt", nan=0) # process options (opts, args) = parser.parse_args(argv) if opts.verbose > 0: print("verbosity level = %d" % opts.verbose) if opts.infile: print("infile = %s" % opts.infile) if opts.outfile: print("outfile = %s" % opts.outfile) # MAIN BODY # # get list of files filepaths = [] sampids = [] with open(opts.infile, 'rb') as infile: infile_reader = csv.reader(infile, delimiter='\t') for row in infile_reader: # each row is a file path filepaths.append(row[0]) sampids.append(row[1]) merged_dic = {} for readsfile, sampid in zip(filepaths, sampids): readsobj = AnnotatedReads.AnnotatedReads(readsfile, stats_file=None) reads_dic = AnnotatedReads.index_annotatedreads_file(readsobj, count_id='reads', subdic=False) for annot_count, annot in enumerate(reads_dic): if annot not in merged_dic: # init subdic merged_dic[annot] = {} merged_dic[annot].update({sampid: reads_dic[annot]}) if opts.verbose > 0: print('Sample: %s added to merged_dic (%s mirna IDs)' %(sampid, annot_count)) #write to file with open(opts.outfile, 'wb') as outfile: outwriter = csv.writer(outfile, delimiter='\t') #write header: mirna ID + samples writeheader = ['id'] + sampids outwriter.writerow(writeheader) #write data: mirna ID + reads from samples writerowcount = 0 for annot in merged_dic: writerow = [annot] # append from mirnaID for sampid in sampids: #if sample has reads for miRNA: append that value #otherwise, write nan if sampid in merged_dic[annot]: writerow.append(merged_dic[annot][sampid]) else: writerow.append(opts.nan) outwriter.writerow(writerow) writerowcount += 1 if opts.verbose > 0: print '%s rows written to file: %s' %(writerowcount, opts.outfile)
def main(argv=None): '''Command line options.''' program_name = os.path.basename(sys.argv[0]) program_version = "v0.1" program_build_date = "%s" % __updated__ program_version_string = '%%prog %s (%s)' % (program_version, program_build_date) #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse program_longdesc = '''''' # optional - give further explanation about what the program does program_license = "Copyright 2014 Jake Yeung (Laboratory for Advanced Genome Analysis) \ Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0" if argv is None: argv = sys.argv[1:] # setup option parser parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license) parser.add_option("-t", "--tumour", dest="tumourfile", help="set input path for tumour [default: %default]", metavar="FILE") parser.add_option("-b", "--benign", dest="benignfile", help="set input path for benign [default: %default]", metavar="FILE") parser.add_option("-o", "--out", dest="outfile", help="set output path [default: %default]", metavar="FILE") parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]") parser.add_option("-r", "--reads_id", dest="reads_id", help="Extract raw reads or normalized reads (TPM)", metavar="TPM | reads") # set defaults parser.set_defaults(outfile="./out.txt", infile="./in.txt", reads_id="reads") # process options (opts, args) = parser.parse_args(argv) if opts.verbose > 0: print("verbosity level = %d" % opts.verbose) if opts.infile: print("tumourfile = %s" % opts.tumourfile) if opts.outfile: print("benignfile = %s" % opts.benignfile) if opts.outfile: print("outfile = %s" % opts.outfile) if opts.reads_id: print("reads_id = %s" % opts.reads_id) # MAIN BODY # tumour_annotatedreads = AnnotatedReads.AnnotatedReads(opts.tumourfile) benign_annotatedreads = AnnotatedReads.AnnotatedReads(opts.benignfile) benign_dic = AnnotatedReads.index_annotatedreads_file(benign_annotatedreads, count_id=opts.reads_id) tumour_dic = AnnotatedReads.index_annotatedreads_file(tumour_annotatedreads, count_id=opts.reads_id) ''' for annot in benign_dic: print benign_dic[annot] raw_input() ''' #init outfile outfile = open(opts.outfile, 'wb') outwriter = csv.writer(outfile, delimiter='\t') ''' # write header containing: [mirna(annotations), annotatedreads_benign, annotatedreads_tumour, fold_change, locations_benign, locations_benign_annotatedreadss, locations_tumour, locations_tumour_annotatedreadss] ''' writeheader = ['mirna', '%s_benign_sum' %opts.reads_id, '%s_tumour_sum' %opts.reads_id, '%s_sum_fold_change' %opts.reads_id] outwriter.writerow(writeheader) for annot in benign_dic.keys(): try: tumour_annotatedreads_sum = tumour_dic[annot]['%s' %opts.reads_id] except KeyError: continue #calculate fold change, get locations and annotatedreadss lists benign_annotatedreads_sum = benign_dic[annot]['%s' %opts.reads_id] fold_change = float(tumour_annotatedreads_sum) / benign_annotatedreads_sum # write row, matching to writeheader writerow = [annot, benign_annotatedreads_sum, tumour_annotatedreads_sum, fold_change] outwriter.writerow(writerow) outfile.close()