def main(argv=None):
    '''Command line options.'''

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.1"
    program_build_date = "%s" % __updated__

    program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
    #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
    program_longdesc = '''''' # optional - give further explanation about what the program does
    program_license = "Copyright 2014 Jake Yeung (Laboratory for Advanced Genome Analysis)                                            \
                Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0"

    if argv is None:
        argv = sys.argv[1:]

    # setup option parser
    parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
    parser.add_option("-i", "--in", dest="infile", help="set input path containing list of filepaths [default: %default]", metavar="FILE")
    parser.add_option("-o", "--out", dest="outfile", help="set output path for merged read counts table[default: %default]", metavar="FILE")
    parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")
    parser.add_option("-n", "--nan", dest="nan", type="int", help="Output nan if sample does not have info on an miRNA.", metavar="INT")
    # set defaults
    parser.set_defaults(outfile="./out.txt", infile="./in.txt", nan=0)

    # process options
    (opts, args) = parser.parse_args(argv)

    if opts.verbose > 0:
        print("verbosity level = %d" % opts.verbose)
    if opts.infile:
        print("infile = %s" % opts.infile)
    if opts.outfile:
        print("outfile = %s" % opts.outfile)

    # MAIN BODY #
    # get list of files
    filepaths = []
    sampids = []
    with open(opts.infile, 'rb') as infile:
        infile_reader = csv.reader(infile, delimiter='\t')
        for row in infile_reader:
            # each row is a file path
            filepaths.append(row[0])
            sampids.append(row[1])

    merged_dic = {}
    for readsfile, sampid in zip(filepaths, sampids):
        readsobj = AnnotatedReads.AnnotatedReads(readsfile, stats_file=None)
        reads_dic = AnnotatedReads.index_annotatedreads_file(readsobj, count_id='reads', subdic=False)
        for annot_count, annot in enumerate(reads_dic):
            if annot not in merged_dic:
                # init subdic
                merged_dic[annot] = {}
            merged_dic[annot].update({sampid: reads_dic[annot]})
        if opts.verbose > 0:
            print('Sample: %s added to merged_dic (%s mirna IDs)' %(sampid, annot_count))

    #write to file
    with open(opts.outfile, 'wb') as outfile:
        outwriter = csv.writer(outfile, delimiter='\t')
        #write header: mirna ID + samples
        writeheader = ['id'] + sampids
        outwriter.writerow(writeheader)
        #write data: mirna ID + reads from samples
        writerowcount = 0
        for annot in merged_dic:
            writerow = [annot]    # append from mirnaID
            for sampid in sampids:
                #if sample has reads for miRNA: append that value
                #otherwise, write nan
                if sampid in merged_dic[annot]:
                    writerow.append(merged_dic[annot][sampid])
                else:
                    writerow.append(opts.nan)
            outwriter.writerow(writerow)
            writerowcount += 1
    if opts.verbose > 0:
        print '%s rows written to file: %s' %(writerowcount, opts.outfile)
def main(argv=None):
    '''Command line options.'''

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.1"
    program_build_date = "%s" % __updated__

    program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
    #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
    program_longdesc = '''''' # optional - give further explanation about what the program does
    program_license = "Copyright 2014 Jake Yeung (Laboratory for Advanced Genome Analysis)                                            \
                Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0"

    if argv is None:
        argv = sys.argv[1:]

    # setup option parser
    parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
    parser.add_option("-t", "--tumour", dest="tumourfile", help="set input path for tumour [default: %default]", metavar="FILE")
    parser.add_option("-b", "--benign", dest="benignfile", help="set input path for benign [default: %default]", metavar="FILE")
    parser.add_option("-o", "--out", dest="outfile", help="set output path [default: %default]", metavar="FILE")
    parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")
    parser.add_option("-r", "--reads_id", dest="reads_id", help="Extract raw reads or normalized reads (TPM)", metavar="TPM | reads")
    # set defaults
    parser.set_defaults(outfile="./out.txt", infile="./in.txt", reads_id="reads")

    # process options
    (opts, args) = parser.parse_args(argv)

    if opts.verbose > 0:
        print("verbosity level = %d" % opts.verbose)
    if opts.infile:
        print("tumourfile = %s" % opts.tumourfile)
    if opts.outfile:
        print("benignfile = %s" % opts.benignfile)
    if opts.outfile:
        print("outfile = %s" % opts.outfile)
    if opts.reads_id:
        print("reads_id = %s" % opts.reads_id)

    # MAIN BODY #
    tumour_annotatedreads = AnnotatedReads.AnnotatedReads(opts.tumourfile)
    benign_annotatedreads = AnnotatedReads.AnnotatedReads(opts.benignfile)

    benign_dic = AnnotatedReads.index_annotatedreads_file(benign_annotatedreads, count_id=opts.reads_id)
    tumour_dic = AnnotatedReads.index_annotatedreads_file(tumour_annotatedreads, count_id=opts.reads_id)

    '''
    for annot in benign_dic:
        print benign_dic[annot]
        raw_input()
    '''
    #init outfile
    outfile = open(opts.outfile, 'wb')
    outwriter = csv.writer(outfile, delimiter='\t')
    '''
    # write header containing:
    [mirna(annotations), annotatedreads_benign, annotatedreads_tumour, fold_change,
    locations_benign,
    locations_benign_annotatedreadss, locations_tumour, locations_tumour_annotatedreadss]
    '''
    writeheader = ['mirna',
                   '%s_benign_sum' %opts.reads_id,
                   '%s_tumour_sum' %opts.reads_id,
                   '%s_sum_fold_change' %opts.reads_id]
    outwriter.writerow(writeheader)

    for annot in benign_dic.keys():
        try:
            tumour_annotatedreads_sum = tumour_dic[annot]['%s' %opts.reads_id]
        except KeyError:
            continue
        #calculate fold change, get locations and annotatedreadss lists
        benign_annotatedreads_sum = benign_dic[annot]['%s' %opts.reads_id]
        fold_change = float(tumour_annotatedreads_sum) / benign_annotatedreads_sum
        # write row, matching to writeheader
        writerow = [annot,
                    benign_annotatedreads_sum,
                    tumour_annotatedreads_sum,
                    fold_change]
        outwriter.writerow(writerow)
    outfile.close()