def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument("--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument("--distance", dest="distance", default='bray-curtis', help="distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis") parser.add_argument("--metadata", dest="metadata", default=None, help="metadata field to color by, only for 'biom' input") parser.add_argument("--groups", dest="groups", default=None, help="list of groups in JSON or tabbed format - either as input string or filename") parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)") parser.add_argument("--color_auto", dest="color_auto", type=int, default=0, help="auto-create colors based on like group names, default is use group name as color: 1=true, 0=false") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 6") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 6") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument("--three", dest="three", type=int, default=0, help="create 3-D PCoA, default is 2-D: 1=true, 0=false") parser.add_argument("--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument("--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.metadata: opts.color_auto = 1 for o in ['reference', 'color_auto', 'three', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n"%o) return 1 # get auth token = get_auth_token(opts) # parse inputs tmp_in = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) mg_list = map(lambda x: x['id'], indata['columns']) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) if opts.metadata: groups = metadata_from_biom(indata, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos-1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile(opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos-1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # print groups to file for R input tmp_group = None if len(groups) == len(mg_list): tmp_group = 'tmp_'+random_str()+'.txt' hdl_group = open(tmp_group, 'w') hdl_group.write("\tgroup\n") for i, m in enumerate(mg_list): hdl_group.write("%s\t%s\n"%(m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]]))) hdl_group.close() elif len(groups) > 0: sys.stderr.write("Warning: Not all metagenomes in a group\n") # build R cmd three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)' label = 'TRUE' if opts.label == 1 else 'FALSE' table = '"%s"'%tmp_group if tmp_group else 'NA' color = 'TRUE' if opts.color_auto == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_pcoa.r") suppressMessages( plot_mg_pcoa( table_in="%s", image_out="%s", plot_pcs=%s, dist_metric="%s", label_points=%s, color_table=%s, color_column=1, auto_colors=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))"""%(opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table, color, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) if tmp_group: os.remove(tmp_group) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp%AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url+'/metagenome/'+opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url+'/researchobject/manifest/'+opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n"%(s1[0], s1[1])) else: tagsha1.write("%s\t%s\n"%(s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="communities API url") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_argument( "--outdir", dest="outdir", default=None, help= "ouput is placed in dir as filenmae.obj, fielname.type, only for 'biom' input" ) parser.add_argument( "--format", dest="format", default='biom', help= "input / output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] # parse inputs biom = None rows = [] cols = [] data = [] maxval = 0 tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) if opts.rlib: maxval = biom_to_tab(biom, tmp_hdl) else: rows, cols, data = biom_to_matrix(biom) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: rows, cols, data = tab_to_matrix(indata) data = map(lambda x: map(float, x), data) # floatify it if opts.rlib: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 finally: tmp_hdl.close() # check values to see if already normalized, otherwise R fails badly data = list(data) if len(data) > 0: maxval = max(map(max, data)) if maxval <= 1: os.remove(tmp_in) sys.stderr.write("ERROR: data is already normalized.\n") return 1 # retrieve data norm = None if opts.rlib: tmp_out = 'tmp_' + random_str() + '.txt' r_cmd = """source("%s/preprocessing.r") suppressMessages( MGRAST_preprocessing( file_in="%s", file_out="%s" ))""" % (opts.rlib, tmp_in, tmp_out) execute_r(r_cmd) nrows, ncols, ndata = tab_to_matrix(open(tmp_out, 'r').read()) num_data = map(lambda x: map(float, x), ndata) norm = {"columns": ncols, "rows": nrows, "data": num_data} os.remove(tmp_out) else: post = {"columns": cols, "rows": rows, "data": data} norm = obj_from_url(opts.url + '/compute/normalize', data=json.dumps(post, separators=(',', ':'))) # output data os.remove(tmp_in) if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if biom and (opts.format == 'biom'): # may have rows removed new_rows = [] for r in biom['rows']: if r['id'] in norm['rows']: new_rows.append(r) biom['rows'] = new_rows biom['data'] = norm['data'] biom['shape'][0] = len(biom['rows']) biom['id'] = biom['id'] + '_normalized' biom['matrix_type'] = 'dense' biom['matrix_element_type'] = 'float' matrix_type = None if biom['type'].startswith('Taxon'): matrix_type = "Communities.TaxonomicMatrix" elif biom['type'].startswith('Function'): matrix_type = "Communities.FunctionalMatrix" if opts.outdir and matrix_type: if not os.path.isdir(opts.outdir): os.mkdir(opts.outdir) ohdl = open(os.path.join(opts.outdir, opts.output + '.obj'), 'w') thdl = open(os.path.join(opts.outdir, opts.output + '.type'), 'w') ohdl.write(json.dumps(biom) + "\n") thdl.write(matrix_type) ohdl.close() thdl.close() else: out_hdl.write(json.dumps(biom) + "\n") else: out_hdl.write("\t%s\n" % "\t".join(norm['columns'])) for i, d in enumerate(norm['data']): out_hdl.write("%s\t%s\n" % (norm['rows'][i], "\t".join(map(str, d)))) out_hdl.close() if os.stat(opts.output).st_size == 0: os.remove(opts.output) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument("--format", dest="format", default='text', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is text") parser.add_argument("--groups", dest="groups", default=None, help="groups in JSON format - either as input string or filename") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--top", dest="top", type=int, default=10, help="display only the top N most changing groups, default is 10") parser.add_argument("--stat_test", dest="stat_test", default='Kruskal-Wallis', help="supported statistical tests, one of: Kruskal-Wallis, t-test-paired, Wilcoxon-paired, t-test-unpaired, Mann-Whitney-unpaired-Wilcoxon, ANOVA-one-way, default is Kruskal-Wallis") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 # get inputs tmp_in = 'tmp_'+random_str()+'.txt' tmp_out = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) biom_to_tab(indata, tmp_hdl) mg_list = map(lambda x: x['id'], indata['columns']) try: groups = map(lambda x: x['group'], indata['columns']) except: pass except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM if not groups: try: grdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups) except: sys.stderr.write("ERROR: unable to parse groups JSON\n") return 1 for mg in mg_list: found_gr = None for gr in grdata.keys(): if mg in grdata[gr]: found_gr = gr break if found_gr: groups.append(found_gr) else: sys.stderr.write("ERROR: metagenome %s not in a group\n"%mg) return 1 # build R cmd group_str = 'c('+','.join(map(lambda x: '"%s"'%x, groups))+')' r_cmd = """source("%s/group_stats_plot.r") suppressMessages( group_stats_plot( file_in="%s", file_out="%s", figure_out=NULL, stat_test="%s", order_by=NULL, order_decreasing=FALSE, my_grouping=%s ))"""%(opts.rlib, tmp_in, tmp_out, opts.stat_test, group_str) execute_r(r_cmd) # output results results = open(tmp_out, 'r').readlines() output = "\n".join(results[0:opts.top+1]) os.remove(tmp_in) os.remove(tmp_out) safe_print(output) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--url", dest="url", default=API_URL, help="communities API url") parser.add_option("", "--rlib", dest="rlib", default=None, help="R lib path") parser.add_option("", "--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_option("", "--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout") parser.add_option("", "--outdir", dest="outdir", default=None, help="ouput is placed in dir as filenmae.obj, fielname.type, only for 'biom' input") parser.add_option("", "--format", dest="format", default='biom', help="input / output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") # get inputs (opts, args) = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] # parse inputs biom = None rows = [] cols = [] data = [] maxval = 0 tmp_in = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) if opts.rlib: maxval = biom_to_tab(biom, tmp_hdl) else: rows, cols, data = biom_to_matrix(biom) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: rows, cols, data = tab_to_matrix(indata) data = map(lambda x: map(float, x), data) # floatify it if opts.rlib: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 finally: tmp_hdl.close() # check values to see if already normalized, otherwise R fails badly if len(data) > 0: maxval = max( map(max, data) ) if maxval <= 1: os.remove(tmp_in) sys.stderr.write("ERROR: data is already normalized.\n") return 1 # retrieve data norm = None if opts.rlib: tmp_out = 'tmp_'+random_str()+'.txt' r_cmd = """source("%s/preprocessing.r") suppressMessages( MGRAST_preprocessing( file_in="%s", file_out="%s" ))"""%(opts.rlib, tmp_in, tmp_out) execute_r(r_cmd) nrows, ncols, ndata = tab_to_matrix(open(tmp_out, 'r').read()) num_data = map(lambda x: map(float, x), ndata) norm = {"columns": ncols, "rows": nrows, "data": num_data} os.remove(tmp_out) else: post = {"columns": cols, "rows": rows, "data": data} norm = obj_from_url(opts.url+'/compute/normalize', data=json.dumps(post, separators=(',',':'))) # output data os.remove(tmp_in) if (not opts.output) or (opts.output == '-'): out_hdl = sys.stdout else: out_hdl = open(opts.output, 'w') if biom and (opts.format == 'biom'): # may have rows removed new_rows = [] for r in biom['rows']: if r['id'] in norm['rows']: new_rows.append(r) biom['rows'] = new_rows biom['data'] = norm['data'] biom['shape'][0] = len(biom['rows']) biom['id'] = biom['id']+'_normalized' biom['matrix_type'] = 'dense' biom['matrix_element_type'] = 'float' matrix_type = None if biom['type'].startswith('Taxon'): matrix_type = "Communities.TaxonomicMatrix" elif biom['type'].startswith('Function'): matrix_type = "Communities.FunctionalMatrix" if opts.outdir and matrix_type: if not os.path.isdir(opts.outdir): os.mkdir(opts.outdir) ohdl = open(os.path.join(opts.outdir, opts.output+'.obj'), 'w') thdl = open(os.path.join(opts.outdir, opts.output+'.type'), 'w') ohdl.write(json.dumps(biom)+"\n") thdl.write(matrix_type) ohdl.close() thdl.close() else: out_hdl.write(json.dumps(biom)+"\n") else: out_hdl.write( "\t%s\n" %"\t".join(norm['columns']) ) for i, d in enumerate(norm['data']): out_hdl.write( "%s\t%s\n" %(norm['rows'][i], "\t".join(map(str, d))) ) out_hdl.close() if os.stat(opts.output).st_size == 0: os.remove(opts.output); return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--user", dest="user", default=None, help="OAuth username") parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password") parser.add_option("", "--token", dest="token", default=None, help="OAuth token") parser.add_option("", "--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_option("", "--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_option("", "--plot", dest="plot", default=None, help="filename for output plot") parser.add_option("", "--rlib", dest="rlib", default=None, help="R lib path") parser.add_option("", "--height", dest="height", type="float", default=8.5, help="image height in inches, default is 4") parser.add_option("", "--width", dest="width", type="float", default=11, help="image width in inches, default is 5") parser.add_option("", "--dpi", dest="dpi", type="int", default=300, help="image DPI, default is 300") parser.add_option("", "--name", dest="name", type="int", default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_option("", "--label", dest="label", type="int", default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs (opts, args) = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 for o in ['reference', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n"%o) return 1 return 0 # get auth token = get_auth_token(opts) # parse input for R tmp_in = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # build R cmd label = 'TRUE' if opts.label == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_boxplot.r") suppressMessages( plot_mg_boxplot( table_in="%s", image_out="%s", label_rows=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))"""%(opts.rlib, tmp_in, opts.plot, label, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--format", dest="format", default='text', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is text" ) parser.add_argument( "--groups", dest="groups", default=None, help="groups in JSON format - either as input string or filename") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument( "--top", dest="top", type=int, default=10, help="display only the top N most changing groups, default is 10") parser.add_argument( "--stat_test", dest="stat_test", default='Kruskal-Wallis', help= "supported statistical tests, one of: Kruskal-Wallis, t-test-paired, Wilcoxon-paired, t-test-unpaired, Mann-Whitney-unpaired-Wilcoxon, ANOVA-one-way, default is Kruskal-Wallis" ) # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 # get inputs tmp_in = 'tmp_' + random_str() + '.txt' tmp_out = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) biom_to_tab(indata, tmp_hdl) mg_list = map(lambda x: x['id'], indata['columns']) try: groups = map(lambda x: x['group'], indata['columns']) except: pass except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM if not groups: try: grdata = json.load(open(opts.groups, 'r')) if os.path.isfile( opts.groups) else json.loads(opts.groups) except: sys.stderr.write("ERROR: unable to parse groups JSON\n") return 1 for mg in mg_list: found_gr = None for gr in grdata.keys(): if mg in grdata[gr]: found_gr = gr break if found_gr: groups.append(found_gr) else: sys.stderr.write("ERROR: metagenome %s not in a group\n" % mg) return 1 # build R cmd group_str = 'c(' + ','.join(map(lambda x: '"%s"' % x, groups)) + ')' r_cmd = """source("%s/group_stats_plot.r") suppressMessages( group_stats_plot( file_in="%s", file_out="%s", figure_out=NULL, stat_test="%s", order_by=NULL, order_decreasing=TRUE, my_grouping=%s ))""" % (opts.rlib, tmp_in, tmp_out, opts.stat_test, group_str) execute_r(r_cmd) # output results results = open(tmp_out, 'r').readlines() output = "\n".join(results[0:opts.top + 1]) os.remove(tmp_in) os.remove(tmp_out) safe_print(output) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp.format(VERSION, RO_VERSION), epilog=posthelp % AUTH_LIST) parser.add_argument("--url", dest="url", default=API_URL, help="MG-RAST API url") parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID") parser.add_argument("--dir", dest="dir", default=".", help="directory to export to") parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files in manifest") # get inputs opts = parser.parse_args() if not opts.metagenome: sys.stderr.write("ERROR: a metagenome id is required\n") return 1 if not os.path.isdir(opts.dir): sys.stderr.write("ERROR: dir '%s' does not exist\n" % opts.dir) return 1 # get auth token = get_auth_token(opts) # get mg info url = opts.url + '/metagenome/' + opts.metagenome mg = obj_from_url(url, auth=token) # get manifest url = opts.url + '/researchobject/manifest/' + opts.metagenome data = obj_from_url(url, auth=token) # just list if opts.list: pt = PrettyTable(["File Name", "Folder", "Media Type"]) for info in data["aggregates"]: pt.add_row([ info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"] ]) pt.align = "l" print(pt) return 0 # get cwl files temp_name = random_str(10) pipeline_dir = os.path.join(opts.dir, temp_name) git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir os.system(git_clone) # download manifest sha1s = [] base = data["@context"][0]["@base"].strip('/') manifest_dir = os.path.join(opts.dir, base) os.mkdir(manifest_dir) data_str = json.dumps(data) open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str) sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ]) # download aggregates for info in data["aggregates"]: sys.stdout.write("Downloading %s ... " % (info["bundledAs"]["filename"])) folder = info["bundledAs"]["folder"].strip('/') folder_dir = os.path.join(opts.dir, folder) if not os.path.isdir(folder_dir): os.mkdir(folder_dir) if "githubusercontent" in info["uri"]: pos = info["uri"].find("CWL") src = os.path.join(pipeline_dir, info["uri"][pos:]) dst = os.path.join(folder_dir, info["bundledAs"]["filename"]) text = open(src, 'r').read().replace('../Inputs/', '').replace( '../Tools/', '').replace('../Workflows/', '') if dst.endswith('job.yaml'): text = edit_input(text, mg) open(dst, 'w').write(text) sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ]) else: fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w') s1 = file_from_url(info["uri"], fh, auth=token, sha1=True) fh.close() sha1s.append( [s1, os.path.join(folder, info["bundledAs"]["filename"])]) sys.stdout.write("Done\n") # output sha1 mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w') tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w') sha1s.sort(key=lambda x: x[1]) for s1 in sha1s: if s1[1].startswith('data'): mansha1.write("%s\t%s\n" % (s1[0], s1[1])) else: tagsha1.write("%s\t%s\n" % (s1[0], s1[1])) mansha1.close() tagsha1.close() # cleanup shutil.rmtree(pipeline_dir) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--format", dest="format", default='biom', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument( "--cluster", dest="cluster", default='ward', help= "cluster function, one of: ward, single, complete, mcquitty, median, centroid, default is ward" ) parser.add_argument( "--distance", dest="distance", default='bray-curtis', help= "distance function, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis" ) parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 5") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 4") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument("--order", dest="order", type=int, default=0, help="order columns, default is off: 1=true, 0=false") parser.add_argument( "--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument( "--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 for o in ['reference', 'order', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n" % o) return 1 # get auth token = get_auth_token(opts) # parse input for R tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # build R cmd order = 'TRUE' if opts.order == 1 else 'FALSE' label = 'TRUE' if opts.label == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_heatdend.r") suppressMessages( plot_mg_heatdend( table_in="%s", image_out="%s", order_columns=%s, label_rows=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))""" % (opts.rlib, tmp_in, opts.plot, order, label, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp % VERSION, epilog=posthelp % AUTH_LIST) parser.add_argument("--user", dest="user", default=None, help="OAuth username") parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password") parser.add_argument("--token", dest="token", default=None, help="OAuth token") parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument( "--format", dest="format", default='biom', help= "input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom" ) parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot") parser.add_argument( "--distance", dest="distance", default='bray-curtis', help= "distance metric, one of: bray-curtis, euclidean, maximum, manhattan, canberra, minkowski, difference, default is bray-curtis" ) parser.add_argument( "--metadata", dest="metadata", default=None, help="metadata field to color by, only for 'biom' input") parser.add_argument( "--groups", dest="groups", default=None, help= "list of groups in JSON or tabbed format - either as input string or filename" ) parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)") parser.add_argument( "--color_auto", dest="color_auto", type=int, default=0, help= "auto-create colors based on like group names, default is use group name as color: 1=true, 0=false" ) parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--height", dest="height", type=float, default=10, help="image height in inches, default is 6") parser.add_argument("--width", dest="width", type=float, default=10, help="image width in inches, default is 6") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") parser.add_argument( "--three", dest="three", type=int, default=0, help="create 3-D PCoA, default is 2-D: 1=true, 0=false") parser.add_argument( "--name", dest="name", type=int, default=0, help="label columns by name, default is by id: 1=true, 0=false") parser.add_argument( "--label", dest="label", type=int, default=0, help="label image rows, default is off: 1=true, 0=false") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if not opts.plot: sys.stderr.write("ERROR: missing output filename\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.metadata: opts.color_auto = 1 for o in ['reference', 'color_auto', 'three', 'name', 'label']: if getattr(opts, o) not in [0, 1]: sys.stderr.write("ERROR: invalid value for '%s'\n" % o) return 1 # get auth token = get_auth_token(opts) # parse inputs tmp_in = 'tmp_' + random_str() + '.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] try: indata = sys.stdin.read() if opts.input == '-' else open( opts.input, 'r').read() if opts.format == 'biom': try: indata = json.loads(indata) mg_list = map(lambda x: x['id'], indata['columns']) col_name = True if opts.name == 1 else False biom_to_tab(indata, tmp_hdl, col_name=col_name) if opts.metadata: groups = metadata_from_biom(indata, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile( opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write( "ERROR: position (%d) of group is out of bounds\n" % opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos - 1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n" % m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile( opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write( "ERROR: position (%d) of group is out of bounds\n" % opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos - 1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n" % m) return 1 # print groups to file for R input tmp_group = None if len(groups) == len(mg_list): tmp_group = 'tmp_' + random_str() + '.txt' hdl_group = open(tmp_group, 'w') hdl_group.write("\tgroup\n") for i, m in enumerate(mg_list): hdl_group.write( "%s\t%s\n" % (m, ''.join([x if ord(x) < 128 else '?' for x in groups[i]]))) hdl_group.close() elif len(groups) > 0: sys.stderr.write("Warning: Not all metagenomes in a group\n") # build R cmd three = 'c(1,2,3)' if opts.three == 1 else 'c(1,2)' label = 'TRUE' if opts.label == 1 else 'FALSE' table = '"%s"' % tmp_group if tmp_group else 'NA' color = 'TRUE' if opts.color_auto == 1 else 'FALSE' r_cmd = """source("%s/plot_mg_pcoa.r") suppressMessages( plot_mg_pcoa( table_in="%s", image_out="%s", plot_pcs=%s, dist_metric="%s", label_points=%s, color_table=%s, color_column=1, auto_colors=%s, image_height_in=%.1f, image_width_in=%.1f, image_res_dpi=%d ))""" % (opts.rlib, tmp_in, opts.plot, three, opts.distance, label, table, color, opts.height, opts.width, opts.dpi) execute_r(r_cmd) # cleanup os.remove(tmp_in) if tmp_group: os.remove(tmp_group) return 0
def main(args): OptionParser.format_description = lambda self, formatter: self.description OptionParser.format_epilog = lambda self, formatter: self.epilog parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_option("", "--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_option("", "--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_option("", "--output", dest="output", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_option("", "--plot", dest="plot", default=None, help="filename for output plot, optional") parser.add_option("", "--stat_test", dest="stat_test", default='Kruskal-Wallis', help="supported statistical tests, one of: Kruskal-Wallis, t-test-paired, Wilcoxon-paired, t-test-unpaired, Mann-Whitney-unpaired-Wilcoxon, ANOVA-one-way, default is Kruskal-Wallis") parser.add_option("", "--metadata", dest="metadata", default=None, help="metadata field to group by, only for 'biom' input") parser.add_option("", "--groups", dest="groups", default=None, help="list of groups in JSON or tabbed format - either as input string or filename") parser.add_option("", "--group_pos", dest="group_pos", type="int", default=1, help="position of group to use, default is 1 (first)") parser.add_option("", "--rlib", dest="rlib", default=None, help="R lib path") parser.add_option("", "--order", dest="order", default=None, help="column number to order output by, default is last column") parser.add_option("", "--direction", dest="direction", default="desc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is desc") parser.add_option("", "--height", dest="height", type="float", default=6, help="image height in inches, default is 6") parser.add_option("", "--width", dest="width", type="float", default=6, help="image width in inches, default is 6") parser.add_option("", "--dpi", dest="dpi", type="int", default=300, help="image DPI, default is 300") # get inputs (opts, args) = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if opts.output not in ['text', 'biom']: sys.stderr.write("ERROR: invalid output format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.direction not in ['asc', 'desc']: sys.stderr.write("ERROR: invalid order direction\n") return 1 # parse inputs tmp_in = 'tmp_'+random_str()+'.txt' tmp_out = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] biom = None try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) mg_list = map(lambda x: x['id'], biom['columns']) biom_to_tab(biom, tmp_hdl) if opts.metadata: groups = metadata_from_biom(biom, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos-1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile(opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos-1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # validate groups if len(groups) != len(mg_list): sys.stderr.write("ERROR: Not all metagenomes in a group\n") return 1 # build R cmd fig_out = '"%s"'%opts.plot if opts.plot else 'NULL' order_by = 'NULL' if opts.order is None else int(opts.order) order_desc = 'TRUE' if opts.direction == 'desc' else 'FALSE' group_str = 'c('+','.join(map(lambda x: '"%s"'%x, groups))+')' r_cmd = """source("%s/group_stats_plot.r") suppressMessages( group_stats_plot( file_in="%s", file_out="%s", figure_out=%s, figure_width_in=%.1f, figure_height_in=%.1f, figure_res_dpi=%d, stat_test="%s", order_by=%s, order_decreasing=%s, my_grouping=%s ))"""%(opts.rlib, tmp_in, tmp_out, fig_out, opts.height, opts.width, opts.dpi, opts.stat_test, order_by, order_desc, group_str) execute_r(r_cmd) # output results results = open(tmp_out, 'r').read() os.remove(tmp_in) os.remove(tmp_out) if biom and (opts.output == 'biom'): cnum = biom['shape'][1] rids = [r['id'] for r in biom['rows']] rrows, rcols, rdata = tab_to_matrix(results) if (len(rrows) != biom['shape'][0]) or (len(rcols[:cnum]) != biom['shape'][1]): sys.stderr.write("ERROR: significance test returned invalid results\n") return 1 # add stats to row data, re-order new_rows = [] new_data = [] for i, row in enumerate(rdata): # get row rindex = rids.index(rrows[i]) robj = biom['rows'][rindex] if not robj['metadata']: robj['metadata'] = {'significance': []} else: robj['metadata']['significance'] = [] # add stats for j, stat in enumerate(row[cnum:]): try: stat = float(stat) except: stat = None robj['metadata']['significance'].append((rcols[cnum:][j], stat)) new_rows.append(robj) new_data.append(row[:cnum]) # update biom biom['id'] = biom['id']+'_sig' biom['rows'] = new_rows biom['data'] = new_data biom['matrix_type'] = 'dense' for i, g in enumerate(groups): biom['columns'][i]['group'] = g safe_print(json.dumps(biom)+'\n') else: safe_print(results) return 0
def main(args): ArgumentParser.format_description = lambda self, formatter: self.description ArgumentParser.format_epilog = lambda self, formatter: self.epilog parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST) parser.add_argument("--input", dest="input", default='-', help="input: filename or stdin (-), default is stdin") parser.add_argument("--format", dest="format", default='biom', help="input format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--output", dest="output", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom") parser.add_argument("--plot", dest="plot", default=None, help="filename for output plot, optional") parser.add_argument("--stat_test", dest="stat_test", default='Kruskal-Wallis', help="supported statistical tests, one of: Kruskal-Wallis, t-test-paired, Wilcoxon-paired, t-test-unpaired, Mann-Whitney-unpaired-Wilcoxon, ANOVA-one-way, default is Kruskal-Wallis") parser.add_argument("--metadata", dest="metadata", default=None, help="metadata field to group by, only for 'biom' input") parser.add_argument("--groups", dest="groups", default=None, help="list of groups in JSON or tabbed format - either as input string or filename") parser.add_argument("--group_pos", dest="group_pos", type=int, default=1, help="position of group to use, default is 1 (first)") parser.add_argument("--rlib", dest="rlib", default=None, help="R lib path") parser.add_argument("--order", dest="order", default=None, help="column number to order output by, default is last column") parser.add_argument("--direction", dest="direction", default="desc", help="direction of order. 'asc' for ascending order, 'desc' for descending order, default is desc") parser.add_argument("--height", dest="height", type=float, default=6, help="image height in inches, default is 6") parser.add_argument("--width", dest="width", type=float, default=6, help="image width in inches, default is 6") parser.add_argument("--dpi", dest="dpi", type=int, default=300, help="image DPI, default is 300") # get inputs opts = parser.parse_args() if (opts.input != '-') and (not os.path.isfile(opts.input)): sys.stderr.write("ERROR: input data missing\n") return 1 if opts.format not in ['text', 'biom']: sys.stderr.write("ERROR: invalid input format\n") return 1 if opts.output not in ['text', 'biom']: sys.stderr.write("ERROR: invalid output format\n") return 1 if (not opts.rlib) and ('KB_PERL_PATH' in os.environ): opts.rlib = os.environ['KB_PERL_PATH'] if not opts.rlib: sys.stderr.write("ERROR: missing path to R libs\n") return 1 if opts.direction not in ['asc', 'desc']: sys.stderr.write("ERROR: invalid order direction\n") return 1 # parse inputs tmp_in = 'tmp_'+random_str()+'.txt' tmp_out = 'tmp_'+random_str()+'.txt' tmp_hdl = open(tmp_in, 'w') mg_list = [] groups = [] biom = None try: indata = sys.stdin.read() if opts.input == '-' else open(opts.input, 'r').read() if opts.format == 'biom': try: biom = json.loads(indata) mg_list = map(lambda x: x['id'], biom['columns']) biom_to_tab(biom, tmp_hdl) if opts.metadata: groups = metadata_from_biom(biom, opts.metadata) except: sys.stderr.write("ERROR: input BIOM data not correct format\n") return 1 else: tmp_hdl.write(indata) mg_list = indata.split('\n')[0].strip().split('\t') except: sys.stderr.write("ERROR: unable to load input data\n") return 1 tmp_hdl.close() # get groups if not in BIOM metadata and option used if (len(groups) == 0) and opts.groups: # is it json ? ## example of 2 group sets in json format ## [ {"group1": ["mg_id_1", "mg_id_2"], "group2": ["mg_id_3", "mg_id_4", "mg_id_5"]}, ## {"group1": ["mg_id_1", "mg_id_2", "mg_id_3"], "group2": ["mg_id_4", "mg_id_5"]} ] try: gdata = json.load(open(opts.groups, 'r')) if os.path.isfile(opts.groups) else json.loads(opts.groups) if opts.group_pos > len(gdata): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) return 1 for m in mg_list: found_g = None for g, mgs in gdata[opts.group_pos-1].items(): if m in mgs: found_g = g break if found_g: groups.append(found_g) else: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # no - its tabbed except: gtext = open(opts.groups, 'r').read() if os.path.isfile(opts.groups) else opts.groups grows, gcols, gdata = tab_to_matrix(gtext) if opts.group_pos > len(gdata[0]): sys.stderr.write("ERROR: position (%d) of group is out of bounds\n"%opts.group_pos) for m in mg_list: try: midx = gcols.index(m) groups.append(gdata[midx][opts.group_pos-1]) except: sys.stderr.write("ERROR: metagenome %s not in a group\n"%m) return 1 # validate groups if len(groups) != len(mg_list): sys.stderr.write("ERROR: Not all metagenomes in a group\n") return 1 # build R cmd fig_out = '"%s"'%opts.plot if opts.plot else 'NULL' order_by = 'NULL' if opts.order is None else int(opts.order) order_desc = 'TRUE' if opts.direction == 'desc' else 'FALSE' group_str = 'c('+','.join(map(lambda x: '"%s"'%x, groups))+')' r_cmd = """source("%s/group_stats_plot.r") suppressMessages( group_stats_plot( file_in="%s", file_out="%s", figure_out=%s, figure_width_in=%.1f, figure_height_in=%.1f, figure_res_dpi=%d, stat_test="%s", order_by=%s, order_decreasing=%s, my_grouping=%s ))"""%(opts.rlib, tmp_in, tmp_out, fig_out, opts.height, opts.width, opts.dpi, opts.stat_test, order_by, order_desc, group_str) execute_r(r_cmd) # output results results = open(tmp_out, 'r').read() os.remove(tmp_in) os.remove(tmp_out) if biom and (opts.output == 'biom'): cnum = biom['shape'][1] rids = [r['id'] for r in biom['rows']] rrows, rcols, rdata = tab_to_matrix(results) if (len(rrows) != biom['shape'][0]) or (len(rcols[:cnum]) != biom['shape'][1]): sys.stderr.write("ERROR: significance test returned invalid results\n") return 1 # add stats to row data, re-order new_rows = [] new_data = [] for i, row in enumerate(rdata): # get row rindex = rids.index(rrows[i]) robj = biom['rows'][rindex] if not robj['metadata']: robj['metadata'] = {'significance': []} else: robj['metadata']['significance'] = [] # add stats for j, stat in enumerate(row[cnum:]): try: stat = float(stat) except: stat = None robj['metadata']['significance'].append((rcols[cnum:][j], stat)) new_rows.append(robj) new_data.append(row[:cnum]) # update biom biom['id'] = biom['id']+'_sig' biom['rows'] = new_rows biom['data'] = new_data biom['matrix_type'] = 'dense' for i, g in enumerate(groups): biom['columns'][i]['group'] = g safe_print(json.dumps(biom)+'\n') else: safe_print(results) return 0