def test_convert_trait_table_entries(self): """convert_trait_entries should convert labels,values using conversion fns""" lines =[\ ['organism 1','0','0.3','15','1','6'],\ ['organism 2','1','1','13','-1',-1],\ ['organism 3','2','0','12','0.9','5']] val_conv_fns = [] #Handle replacement of ambiguous values replacement_dict = {'-': 0, '-1': 0, -1: 0, 'NULL': 0, None: 0} replace_ambig_fn = make_translate_conversion_fn(replacement_dict) val_conv_fns.append(replace_ambig_fn) label_conv_fns = [remove_spaces] #Convert values to integers (in string form) val_conv_fns.append(lambda x: str(int(float(x)))) obs = [l for l in convert_trait_table_entries(lines,\ value_conversion_fns = val_conv_fns,\ label_conversion_fns=label_conv_fns)] exp =[\ ['organism_1','0','0','15','1','6'],\ ['organism_2','1','1','13','0','0'],\ ['organism_3','2','0','12','0','5']] self.assertEqual(obs, exp)
def test_convert_trait_table_entries(self): """convert_trait_entries should convert labels,values using conversion fns""" lines =[\ ['organism 1','0','0.3','15','1','6'],\ ['organism 2','1','1','13','-1',-1],\ ['organism 3','2','0','12','0.9','5']] val_conv_fns = [] #Handle replacement of ambiguous values replacement_dict ={'-':0,'-1':0,-1:0,'NULL':0,None:0} replace_ambig_fn = make_translate_conversion_fn(replacement_dict) val_conv_fns.append(replace_ambig_fn) label_conv_fns = [remove_spaces] #Convert values to integers (in string form) val_conv_fns.append(lambda x: str(int(float(x)))) obs = [l for l in convert_trait_table_entries(lines,\ value_conversion_fns = val_conv_fns,\ label_conversion_fns=label_conv_fns)] exp =[\ ['organism_1','0','0','15','1','6'],\ ['organism_2','1','1','13','0','0'],\ ['organism_3','2','0','12','0','5']] self.assertEqual(obs,exp)
def test_convert_trait_table_entries(self): """convert_trait_entries should convert labels,values using conversion fns""" lines = [ ["organism 1", "0", "0.3", "15", "1", "6"], ["organism 2", "1", "1", "13", "-1", -1], ["organism 3", "2", "0", "12", "0.9", "5"], ] val_conv_fns = [] # Handle replacement of ambiguous values replacement_dict = {"-": 0, "-1": 0, -1: 0, "NULL": 0, None: 0} replace_ambig_fn = make_translate_conversion_fn(replacement_dict) val_conv_fns.append(replace_ambig_fn) label_conv_fns = [remove_spaces] # Convert values to integers (in string form) val_conv_fns.append(lambda x: str(int(float(x)))) obs = [ l for l in convert_trait_table_entries( lines, value_conversion_fns=val_conv_fns, label_conversion_fns=label_conv_fns ) ] exp = [ ["organism_1", "0", "0", "15", "1", "6"], ["organism_2", "1", "1", "13", "0", "0"], ["organism_3", "2", "0", "12", "0", "5"], ] self.assertEqual(obs, exp)
def main(): """Generate test trees given parameters""" option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading trait table..." input_trait_table = open(opts.input_trait_table,"U") if opts.verbose: print "Loading tree..." #PicrustNode seems to run into very slow/memory intentsive perfromance... #tree = DndParser(open(opts.input_tree),constructor=PicrustNode) tree = DndParser(open(opts.input_tree)) if opts.verbose: print "Parsing trait table..." #Find which taxa are to be used in tests #(by default trait table taxa) trait_table_header,trait_table_fields = \ parse_trait_table(input_trait_table) if opts.verbose: print "Ensuring tree and trait table labels are formatted consistently..." label_conversion_fns = set_label_conversion_fns(verbose=opts.verbose) fix_tree_labels(tree,label_conversion_fns) trait_table_fields = convert_trait_table_entries(trait_table_fields,\ value_conversion_fns = [],\ label_conversion_fns = label_conversion_fns) trait_table_fields = [t for t in trait_table_fields] print "Number of trait table fields with single quotes:",\ len([t for t in trait_table_fields if "'" in t[0]]) if opts.verbose: print "Making output directory..." make_output_dir(opts.output_dir) if opts.limit_to_tips: included_tips = opts.limit_to_tips.split(",") if opts.verbose: print "Limiting test datasets to %i tips: %s" %(len(included_tips),included_tips) else: included_tips = False method_fns =\ {"exclude_tips_by_distance":\ make_distance_based_exclusion_fn,\ "randomize_tip_labels_by_distance":\ make_distance_based_tip_label_randomizer } test_fn_factory = method_fns[opts.method] if opts.verbose: print "Setting tree modification method to:", opts.method print "(%s)" % test_fn_factory.__doc__ modify_tree = True if opts.suppress_tree_modification: if opts.verbose: print "Suppressing modification of tree when making test datasets" modify_tree = False if opts.verbose: print "Starting generation of test datsets" test_datasets = \ yield_genome_test_data_by_distance(tree,trait_table_fields,\ test_fn_factory,min_dist = opts.min_dist,\ max_dist=opts.max_dist,increment=opts.dist_increment,\ modify_tree=modify_tree,limit_to_tips= included_tips,verbose = opts.verbose) if opts.verbose: print "Writing files for test datasets" for curr_dist,test_tree,tip_to_predict,\ expected_traits,test_trait_table_fields in test_datasets: if included_tips is not False: if tip_to_predict not in included_tips: if opts.verbose: print "Skipping tip %s: limiting to tip(s): %s" %(tip_to_predict,included_tips) continue #Make a safe version of tip to predict # So odd characters like | don't mess up OS safe_tip_to_predict = "'%s'"%tip_to_predict #Write tree base_name = "--".join(map(str,["test_tree",opts.method,curr_dist])) curr_filepath = write_tree(opts.output_dir,base_name,test_tree,safe_tip_to_predict) if opts.verbose: print "Wrote test tree to: %s" % curr_filepath #Write expected trait table base_name = "--".join(map(str,["exp_traits",opts.method,curr_dist,safe_tip_to_predict])) exp_trait_table_lines = [trait_table_header] exp_trait_table_lines.append("\t".join(expected_traits)+"\n") #print "Expected_trait_table_lines:",exp_trait_table_lines filename=os.path.join(opts.output_dir,base_name) if opts.verbose: print "Writing expected trait table to:", filename f=open(filename,"w") f.write("".join(exp_trait_table_lines)) f.close() #Output a transposed, BIOM format expectation table for comparison with predict_traits output #NOTE: this is a clumsy way of getting the translated trait table # but more elegant, direct methods (directly feeding data to biom's table_factory) # weren't working for me readily. In the future, we should streamline this process # Leaving as is for now since this code is mostly for developers so speed/elegence # are probably not essential here. #Let the hackishness begin #Reload the tab-delimited trait table header, fields = parse_trait_table(open(filename,"U")) fields = [f for f in fields] #converts generator to list #Transpose table for .BIOM format so that Observation ids are KOs transposed_header, transposed_trait_table_lines =\ transpose_trait_table_fields(fields,header,\ id_row_idx=0, input_header_delimiter="\t",output_delimiter="\t") #Eliminate newline in header trans_trait_table_lines = [transposed_header.strip()] trans_trait_table_lines.extend(["\t".join(r) for r in transposed_trait_table_lines]) trans_trait_table = '\n'.join(trans_trait_table_lines) #Write BIOM format expected trait table base_name = "--".join(map(str,["exp_biom_traits",opts.method,curr_dist,safe_tip_to_predict])) expected_biom_table = parse_table_to_biom(trans_trait_table.split('\n'),\ table_format = "tab-delimited") #print "Expected_trait_table_lines:",exp_trait_table_lines filename=os.path.join(opts.output_dir,base_name) if opts.verbose: print "Writing BIOM-format expected trait table to:", filename f=open(filename,"w") f.write(format_biom_table(expected_biom_table)) f.close() #Write test trait table test_trait_table_fields = test_trait_table_fields if expected_traits in test_trait_table_fields: test_trait_table_fields.remove(expected_traits) test_trait_table_lines = [trait_table_header] test_trait_table_lines.extend(["\t".join(r)+"\n" for r in test_trait_table_fields]) #print "Test_trait_table_lines:",test_trait_table_lines base_name = "--".join(map(str,["test_trait_table",opts.method,curr_dist,safe_tip_to_predict])) filename=os.path.join(opts.output_dir,base_name) if opts.verbose: print "Writing test trait table to:", filename f=open(filename,"w") f.write("".join(test_trait_table_lines)) f.close() if opts.verbose: print "Done generating test datasets"
def main(): """Generate test trees given parameters""" option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading trait table..." input_trait_table = open(opts.input_trait_table, "U") if opts.verbose: print "Loading tree..." #PicrustNode seems to run into very slow/memory intentsive perfromance... #tree = DndParser(open(opts.input_tree),constructor=PicrustNode) tree = DndParser(open(opts.input_tree)) if opts.verbose: print "Parsing trait table..." #Find which taxa are to be used in tests #(by default trait table taxa) trait_table_header,trait_table_fields = \ parse_trait_table(input_trait_table) if opts.verbose: print "Ensuring tree and trait table labels are formatted consistently..." label_conversion_fns = set_label_conversion_fns(verbose=opts.verbose) fix_tree_labels(tree, label_conversion_fns) trait_table_fields = convert_trait_table_entries(trait_table_fields,\ value_conversion_fns = [],\ label_conversion_fns = label_conversion_fns) trait_table_fields = [t for t in trait_table_fields] print "Number of trait table fields with single quotes:",\ len([t for t in trait_table_fields if "'" in t[0]]) if opts.verbose: print "Making output directory..." make_output_dir(opts.output_dir) if opts.limit_to_tips: included_tips = opts.limit_to_tips.split(",") if opts.verbose: print "Limiting test datasets to %i tips: %s" % ( len(included_tips), included_tips) else: included_tips = False method_fns =\ {"exclude_tips_by_distance":\ make_distance_based_exclusion_fn,\ "randomize_tip_labels_by_distance":\ make_distance_based_tip_label_randomizer } test_fn_factory = method_fns[opts.method] if opts.verbose: print "Setting tree modification method to:", opts.method print "(%s)" % test_fn_factory.__doc__ modify_tree = True if opts.suppress_tree_modification: if opts.verbose: print "Suppressing modification of tree when making test datasets" modify_tree = False if opts.verbose: print "Starting generation of test datsets" test_datasets = \ yield_genome_test_data_by_distance(tree,trait_table_fields,\ test_fn_factory,min_dist = opts.min_dist,\ max_dist=opts.max_dist,increment=opts.dist_increment,\ modify_tree=modify_tree,limit_to_tips= included_tips,verbose = opts.verbose) if opts.verbose: print "Writing files for test datasets" for curr_dist,test_tree,tip_to_predict,\ expected_traits,test_trait_table_fields in test_datasets: if included_tips is not False: if tip_to_predict not in included_tips: if opts.verbose: print "Skipping tip %s: limiting to tip(s): %s" % ( tip_to_predict, included_tips) continue #Make a safe version of tip to predict # So odd characters like | don't mess up OS safe_tip_to_predict = "'%s'" % tip_to_predict #Write tree base_name = "--".join(map(str, ["test_tree", opts.method, curr_dist])) curr_filepath = write_tree(opts.output_dir, base_name, test_tree, safe_tip_to_predict) if opts.verbose: print "Wrote test tree to: %s" % curr_filepath #Write expected trait table base_name = "--".join( map(str, ["exp_traits", opts.method, curr_dist, safe_tip_to_predict])) exp_trait_table_lines = [trait_table_header] exp_trait_table_lines.append("\t".join(expected_traits) + "\n") #print "Expected_trait_table_lines:",exp_trait_table_lines filename = os.path.join(opts.output_dir, base_name) if opts.verbose: print "Writing expected trait table to:", filename f = open(filename, "w") f.write("".join(exp_trait_table_lines)) f.close() #Output a transposed, BIOM format expectation table for comparison with predict_traits output #NOTE: this is a clumsy way of getting the translated trait table # but more elegant, direct methods (directly feeding data to biom's table_factory) # weren't working for me readily. In the future, we should streamline this process # Leaving as is for now since this code is mostly for developers so speed/elegence # are probably not essential here. #Let the hackishness begin #Reload the tab-delimited trait table header, fields = parse_trait_table(open(filename, "U")) fields = [f for f in fields] #converts generator to list #Transpose table for .BIOM format so that Observation ids are KOs transposed_header, transposed_trait_table_lines =\ transpose_trait_table_fields(fields,header,\ id_row_idx=0, input_header_delimiter="\t",output_delimiter="\t") #Eliminate newline in header trans_trait_table_lines = [transposed_header.strip()] trans_trait_table_lines.extend( ["\t".join(r) for r in transposed_trait_table_lines]) trans_trait_table = '\n'.join(trans_trait_table_lines) #Write BIOM format expected trait table base_name = "--".join( map(str, [ "exp_biom_traits", opts.method, curr_dist, safe_tip_to_predict ])) expected_biom_table = parse_table_to_biom(trans_trait_table.split('\n'),\ table_format = "tab-delimited") #print "Expected_trait_table_lines:",exp_trait_table_lines filename = os.path.join(opts.output_dir, base_name) if opts.verbose: print "Writing BIOM-format expected trait table to:", filename f = open(filename, "w") f.write(format_biom_table(expected_biom_table)) f.close() #Write test trait table test_trait_table_fields = test_trait_table_fields if expected_traits in test_trait_table_fields: test_trait_table_fields.remove(expected_traits) test_trait_table_lines = [trait_table_header] test_trait_table_lines.extend( ["\t".join(r) + "\n" for r in test_trait_table_fields]) #print "Test_trait_table_lines:",test_trait_table_lines base_name = "--".join( map(str, [ "test_trait_table", opts.method, curr_dist, safe_tip_to_predict ])) filename = os.path.join(opts.output_dir, base_name) if opts.verbose: print "Writing test trait table to:", filename f = open(filename, "w") f.write("".join(test_trait_table_lines)) f.close() if opts.verbose: print "Done generating test datasets"