def policy_enumerate(args, access_key, secret_key, token): session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, aws_session_token=token) iam = session.client('iam') iamres = session.resource('iam') r = requests.get('http://169.254.169.254/latest/meta-data/iam/info') role_arn = json.loads(r.text)['InstanceProfileArn'] role = role_arn.split('/')[1] response1 = None response2 = None try: response1 = iam.list_attached_role_policies(RoleName=role) response2 = iam.list_role_policies(RoleName=role) except ClientError as error: common.exception(error, 'List role policy failed.') print('\nThe following permissions belong to the role {}: \n'.format(role)) values = [] values += attached_policy_enum(iam, iamres, response1) values += managed_policy_enum(iamres, response2, role) values_to_print = filter_results(values, args) common.print_table( values_to_print, ["Service", "Action", "Resource", "Effect", "Policy name"])
def search_in_mentors_db(column): if column == "favourite_number": valid_user_input = common.input_verification( "Insert a number input to search by: ", "number") cur.execute("SELECT * FROM mentors\ WHERE " + column + " = " + str(valid_user_input) + "\ ORDER BY first_name ASC;") else: valid_user_input = common.input_verification( "Insert text to search by: ", "string") cur.execute("SELECT * FROM mentors\ WHERE lower(" + column + ") LIKE " + "\'%" + valid_user_input + "%\' \ ORDER BY first_name ASC;") search_result = cur.fetchall() # Get rid of None elements to be able to print list with common.print_table function result_to_list = [list(element) for element in search_result] for element in result_to_list: if element[7] is None: element[7] = "Nothing" os.system('clear') MENTOR_DB_COL_TITLES = get_col_titles("mentors") common.print_table(result_to_list, MENTOR_DB_COL_TITLES)
def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = '\n'.join([line[start + 1:] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = '\n'.join([line[start + 1:] for line in topo2_lines]) showtable.append([ "%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2 ]) print_table(showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def show_difftable(difftable): showtable = [] for dist, side1, side2, diff, n1, n2 in difftable: showtable.append([dist, len(side1), len(side2), len(diff), sepstring(diff)]) print_table( showtable, header=["distance", "size1", "size2", "ndiffs", "diff"], max_col_width=80, wrap_style="wrap", row_line=True, )
def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = "\n".join([line[start + 1 :] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = "\n".join([line[start + 1 :] for line in topo2_lines]) showtable.append( ["%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2] ) print_table( showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True, ) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def show_difftable(difftable): showtable = [] for dist, side1, side2, diff, n1, n2 in difftable: showtable.append( [dist, len(side1), len(side2), len(diff), sepstring(diff)]) print_table(showtable, header=["distance", "size1", "size2", "ndiffs", "diff"], max_col_width=80, wrap_style="wrap", row_line=True)
def get_and_prints() -> Dict[str, int]: assigned_open_issues_per_project = get_assigned_open_issues_per_project() # {'xxx': 1, 'yyy': 2, 'zzz': 3} print('Total issues:', sum(assigned_open_issues_per_project.values())) print() print_table(assigned_open_issues_per_project) # PROJECT | Issues # --------+------- # xxx | 1 # yyy | 2 # zzz | 3 return assigned_open_issues_per_project
def main(): args = init() arn = ARN() if not args['service']: services = ['s3', 'dynamodb', 'sqs'] elif args['service'] in ['s3', 'dynamodb', 'sqs']: services = [args['service']] else: print('Invalid service.') sys.exit() services.sort() values = enum_resources(arn, services) print('\nAvailable resources: \n') common.print_table(values, ["Service", "Region", "Name"])
def _system_list_table(arguments): if arguments['--long']: col_names = 'name ip user installer cleaner config monitor comment'.split(' ') elif arguments['--fields']: col_names = arguments['--fields'].lower().split(',') else: col_names = ['name'] if not arguments['--col-1']: col_names += ['user', 'comment'] where = dict(name=arguments['<name>']) if arguments['<name>'] else dict() systems = db.list_table('systems', **where) rows = ([sys[col] for col in col_names] for sys in systems) col_titles = [name.upper() for name in col_names] common.print_table(col_titles, sorted(rows, key=lambda row: row[0]))
def _task_list_table(arguments): if arguments['--col-1']: col_names = ['name'] elif arguments['--long']: col_names = 'name parent schedule state command condition resources email log last'.split(' ') elif arguments['--fields']: col_names = arguments['--fields'].lower().split(',') else: col_names = 'name state schedule last'.split(' ') if arguments['--ancestor']: tasks = filter(lambda rec: _holdings_filter(Task(record=rec), arguments), db.list_table('tasks')) tasks = filter(lambda task: _decendant_filter(task, arguments['--ancestor']), tasks) else: where = dict(name=arguments['<name>']) if arguments['<name>'] else dict() tasks = filter(lambda rec: _holdings_filter(Task(record=rec), arguments), db.list_table('tasks', **where)) rows = ([task[col] for col in col_names] for task in tasks) common.print_table([name.upper() for name in col_names], rows)
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument( "--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument( "--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument( "--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument( "--min_support_src", type=float, default=0.0, help=( "min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument( "--outgroup", dest="outgroup", nargs="+", help= """outgroup used to root reference and source trees before distance computation""" ) opt_args.add_argument("--expand_polytomies", dest="polytomies", action="store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action="store_true", help="""compare trees as unrooted""") opt_args.add_argument( "--min_support", dest="min_support", type=float, default=0.0, help= ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)" )) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument( "--extract_species", action="store_true", help= "When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument( "--sp_regexp", type=str, help= ("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'." )) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >> OUT, '# ' + ctime() print >> OUT, '# ' + ' '.join(sys.argv) print >> OUT, '#' + '\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function=get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' % counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source=args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([ map(istr, [ fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist'] ]) ], fix_col_width=COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
import hh_data_provider import superjob_data_provider import common import os if __name__ == '__main__': hh_data = hh_data_provider.get_salaries() common.print_table('HeadHunter Moscow', hh_data) superjob_api_version = os.environ['SUPERJOB_API_VERSION'] superjob_secret_key = os.environ['SUPERJOB_SECRET_KEY'] superjob_data = superjob_data_provider.get_salaries(superjob_api_version, superjob_secret_key) common.print_table('SuperJob Moscow', superjob_data)
def main(): lambda_client, role_arn = init() values = list_functions(lambda_client) print('\nThe existing functions in Lambda:') print_table(values, ['FunctionName', 'Runtime', 'Description']) create_run_function(lambda_client, role_arn)
from django.utils import six op = OptionParser() op.add_option('-s', '--separator', dest='separator', default=' ', metavar='CHAR', type=str, help="The seoarator between spaces") options, args = op.parse_args() if sys.stdout.encoding is None: reload_module(sys) sys.setdefaultencoding('utf-8') data = [] for u in args_to_users(args): data.append(( u.first_name, u.last_name, u.studentNumber, u.institute, u.study, u.dateOfBirth, u.dateJoined, u.email, u.addr_street, u.addr_number, u.addr_zipCode, u.addr_city, u.telephone)) data = [[six.text_type(x) for x in r] for r in data] print_table(data, separator=options.separator)
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") parser.add_argument("source_trees", metavar='source_trees', type=str, nargs="+", help='A list of newick tree files used as a source for node annotations') parser.add_argument("--discard", dest="discard", type=str, nargs="+", default=[], help=("A list of attributes that should be ignored from source trees. " "Node dist, name and support values are always ignored unless they" " are explicitly passed as target features")) parser.add_argument("--features", dest="features", type=str, nargs="+", default = [], help=("A list of attributes that should be transferred from source trees.")) parser.add_argument("-o", dest="output", type=str, required=True, help=("output file name for the annotated tree")) args = parser.parse_args(argv) ref = Tree(args.reftree) TARGET_FEATURES = args.features DISCARD_FEATURES = args.discard + ["support", "name", "dist"] key2node = {} for node in ref.traverse(): nodekey = frozenset(node.get_leaf_names()) key2node[nodekey] = node out = ref.children[0].get_leaf_names() out2 = ref.children[1].get_leaf_names() transferred_features = defaultdict(int) for target in args.source_trees: print target tt = Tree(target) tt.prune(ref.get_leaf_names()) if len(out) > 1: try: tt.set_outgroup(tt.get_common_ancestor(out)) except ValueError: tt.set_outgroup(tt.get_common_ancestor(out2)) else: tt.set_outgroup(tt.search_nodes(name=out[0])[0]) for node in tt.traverse(): nodekey = frozenset([n.name for n in node.get_leaves()]) target_node = key2node.get(nodekey, None) if target_node: for f in node.features: if f in DISCARD_FEATURES and not TARGET_FEATURES: continue elif TARGET_FEATURES and f not in TARGET_FEATURES: continue else: transferred_features[f] += 1 target_node.add_feature(f, getattr(node, f)) ref.write(outfile=args.output, features=[], format_root_node=True) print print_table(transferred_features.items(), header=["feature name", "#nodes"])
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", metavar='target_trees', type=str, nargs="*", help='a list of target tree files') parser.add_argument("--targets_file", dest="targets_file", type=str, help="""path to a file containing target trees, one per line""") parser.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") parser.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") parser.add_argument("--outgroup", dest="outgroup", nargs = "+", help="""outgroup used to root reference and target trees before distance computation""") parser.add_argument("--expand_polytomies", dest="polytomies", action = "store_true", help="""expand politomies if necessary""") parser.add_argument("--unrooted", dest="unrooted", action = "store_true", help="""compare trees as unrooted""") parser.add_argument("--min_support", dest="min_support", type=float, default=0.0, help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)")) parser.add_argument("--extract_species", dest="extract_species", action = "store_true", help="""When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found.""") parser.add_argument("--spname_delimiter", dest="spname_delimiter", type=str, default="_", help=("species code delimiter in node names")) parser.add_argument("--spname_field", dest="spname_field", type=int, default=-1, help=("position of the species code extracted from node names. -1 = last field")) parser.add_argument("--collateral", dest="collateral", action='store_true', help=("")) parser.add_argument("--ref_attr", dest="ref_attr", type=str, help=("attribute in ref tree used as leaf name")) parser.add_argument("--target_attr", dest="target_attr", type=str, help=("attribute in target tree used as leaf name")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.targets_file and args.target_trees: print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.targets_file: target_trees = tree_iterator(args.targets_file) else: target_trees = args.target_trees t = Tree(reftree) if args.ref_attr: for lf in t.iter_leaves(): lf._origname = lf.name if args.ref_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_attr) if args.outgroup: if len(args.outgroup) > 1: out = t.get_common_ancestor(args.outgroup) else: out = t.search_nodes(name=args.outgroup[0])[0] t.set_outgroup(out) ref_names = set(t.get_leaf_names()) reftree_len = len(t) reftree_edges = (reftree_len*2)-2 ncollapsed_branches = len([n for n in t.traverse() if n.children and n.support < args.min_support]) #reftree_edges -= ncollapsed_branches #if ncollapsed_branches: # print '%d branches collapsed in reference tree' %ncollapsed_branches HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF", "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize", "common tips", "refSize", "targetSize") if args.output: OUT = open(args.output, "w") print >>OUT, '# ' + ctime() print >>OUT, '# ' + ' '.join(sys.argv) print >>OUT, '#'+'\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None for counter, tfile in enumerate(target_trees): if args.targets_file: seedid, tfile = tfile else: seedid = None if args.extract_species: tt = PhyloTree(tfile, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field]) else: tt = Tree(tfile) if args.target_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.target_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.target_trees: fname = os.path.basename(tfile) else: fname = '%05d' %counter max_size, min_size, avg_size, common = -1, -1, -1, -1 total_rf, max_rf, norm_rf = -1, -1, -1 treeko_d = -1 ref_branches_in_target, target_branches_in_ref = -1, -1 target_tree_len = -1 used_subtrees = -1 if args.extract_species: orig_target_size = len(tt) ntrees, ndups, sp_trees = tt.get_speciation_trees(autodetect_duplications=True, newick_only=True) if ntrees < 1000: all_rf = [] ref_found = [] target_found = [] tree_sizes = [] all_max_rf = [] common_names = 0 for subtree_nw in sp_trees: if seedid and not args.collateral and (seedid not in subtree_nw): continue subtree = PhyloTree(subtree_nw, sp_naming_function = lambda name: name.split(args.spname_delimiter)[args.spname_field]) # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously if args.min_support: subtree_content = subtree.get_cached_content(store_attr='name') for n in subtree.traverse(): if n.children: n.support = tt.get_common_ancestor(subtree_content[n]).support rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds(subtree, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted, attr_t2='species', min_support_t2=args.min_support) if maxr > 0 and p1 and p2: all_rf.append(rf) tree_sizes.append(len(common)) all_max_rf.append(maxr) common_names = max(common_names, len(common)) ref_found.append(float(len(p2 & p1)) / reftree_edges) p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # valid edges in target not leaves if p2bis: incompatible_target_branches = float(len((p2-d2) - p1)) target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) # valid_target = p2-d2 # valid_ref = p1-d1 # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges) # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # if p2bis-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) if all_rf: # Treeko speciation distance alld = [(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))] a = numpy.sum([alld[i] * tree_sizes[i] for i in xrange(len(all_rf))]) b = float(numpy.sum(tree_sizes)) treeko_d = a/b total_rf = numpy.mean(all_rf) norm_rf = numpy.mean([(all_rf[i]/float(all_max_rf[i])) for i in xrange(len(all_rf))]) max_rf = numpy.max(all_max_rf) ref_branches_in_target = numpy.mean(ref_found) target_branches_in_ref = numpy.mean(target_found) if target_found else -1 target_tree_len = numpy.mean(tree_sizes) used_subtrees = len(all_rf) else: target_tree_len = len(tt) ndups, ntrees, used_subtrees = 0, 1, 1 treeko_d = -1 total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds(t, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted) common_names = len(common) if max_rf: norm_rf = total_rf / float(max_rf) if p1 and p2: sizes = [len(p) for p in p2 ^ p1] if sizes: avg_size = sum(sizes) / float(len(sizes)) max_size, min_size = max(sizes), min(sizes) else: max_size, min_size, avg_size = 0, 0, 0 ref_branches_in_target = float(len(p2 & p1)) / reftree_edges #if p2-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) else: ref_branches_in_target = 0.0 target_branches_in_ref = 0.0 max_size, min_size, avg_size = -1, -1, -1 if args.output: print >>OUT, '\t'.join(map(str, (fname, ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, ref_branches_in_target, target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))) else: print_table([map(istr, (fname[-30:], ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, '%0.4f' %ref_branches_in_target, '%0.4f' %target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))], fix_col_width = COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
return run, created = Run.get_or_create(date=DT.date.today()) if not created: return False for project_name, issue_numbers in assigned_open_issues_per_project.items( ): project, _ = Project.get_or_create(name=project_name) IssueNumber.create(value=issue_numbers, run=run, project=project) db_create_backup() return True db.connect() db.create_tables([Run, Project, IssueNumber]) if __name__ == '__main__': projects = [p.name for p in Project.select()] print(f"Projects ({len(projects)}): {projects}\n") # Print last rows for run in Run.select().order_by(Run.id.desc()).limit(5): print(run, '\n') print_table(run.get_project_by_issue_numbers()) print('\n' + '-' * 100 + '\n')
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo. # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version) # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) ) # const - A constant value required by some action and nargs selections. # default - The value produced if the argument is absent from the command line. # type - The type to which the command-line argument should be converted. # choices - A container of the allowable values for the argument. # required - Whether or not the command-line option may be omitted (optionals only). # help - A brief description of what the argument does. # metavar - A name for the argument in usage messages. # dest - The name of the attribute to be added to the object returned by parse_args(). parser.add_argument("--show", dest="show_tree", action="store_true", help="""Display tree after the analysis.""") parser.add_argument("--render", dest="render", action="store_true", help="""Render tree.""") parser.add_argument("--dump", dest="dump", action="store_true", help="""Dump analysis""") parser.add_argument( "--explore", dest="explore", type=str, help="""Reads a previously analyzed tree and visualize it""") input_args = parser.add_mutually_exclusive_group() input_args.required = True input_args.add_argument("-t", "--tree", dest="target_tree", nargs="+", type=str, help="""Tree file in newick format""") input_args.add_argument("-tf", dest="tree_list_file", type=str, help="File with the list of tree files") parser.add_argument("--tax", dest="tax_info", type=str, help="If the taxid attribute is not set in the" " newick file for all leaf nodes, a tab file file" " with the translation of name and taxid can be" " provided with this option.") parser.add_argument( "--sp_delimiter", dest="sp_delimiter", type=str, help= "If taxid is part of the leaf name, delimiter used to split the string" ) parser.add_argument( "--sp_field", dest="sp_field", type=int, default=0, help="field position for taxid after splitting leaf names") parser.add_argument("--ref", dest="ref_tree", type=str, help="Uses ref tree to compute robinson foulds" " distances of the different subtrees") parser.add_argument("--rf-only", dest="rf_only", action="store_true", help="Skip ncbi consensus analysis") parser.add_argument( "--outgroup", dest="outgroup", type=str, nargs="+", help="A list of node names defining the trees outgroup") parser.add_argument("--is_sptree", dest="is_sptree", action="store_true", help="Assumes no duplication nodes in the tree") parser.add_argument("-o", dest="output", type=str, help="Writes result into a file") parser.add_argument("--tax2name", dest="tax2name", type=str, help="") parser.add_argument("--tax2track", dest="tax2track", type=str, help="") parser.add_argument("--dump_tax_info", dest="dump_tax_info", action="store_true", help="") args = parser.parse_args(argv) if args.sp_delimiter: GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field] else: GET_TAXID = None reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else "" if args.explore: print >> sys.stderr, "Reading tree from file:", args.explore t = cPickle.load(open(args.explore)) ts = TreeStyle() ts.force_topology = True ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.show(tree_style=ts) print >> sys.stderr, "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) sys.exit() if args.output: OUT = open(args.output, "w") else: OUT = sys.stdout print >> sys.stderr, "Dumping results into", OUT target_trees = [] if args.tree_list_file: target_trees = [line.strip() for line in open(args.tree_list_file)] if args.target_tree: target_trees += args.target_tree prev_tree = None if args.tax2name: tax2name = cPickle.load(open(args.tax2name)) else: tax2name = {} if args.tax2track: tax2track = cPickle.load(open(args.tax2track)) else: tax2track = {} print len(tax2track), len(tax2name) header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Clade sizes", "RF (avg)", "RF (med)", "RF (std)", "RF (max)", "Shared tips") print >> OUT, '|'.join([h.ljust(15) for h in header]) if args.ref_tree: print >> sys.stderr, "Reading ref tree from", args.ref_tree reft = Tree(args.ref_tree, format=1) else: reft = None SHOW_TREE = False if args.show_tree or args.render: SHOW_TREE = True prev_broken = set() ENTRIES = [] ncbi.connect_database() for tfile in target_trees: #print tfile t = PhyloTree(tfile, sp_naming_function=None) if GET_TAXID: for n in t.iter_leaves(): n.name = GET_TAXID(n.name) if args.outgroup: if len(args.outgroup) == 1: out = t & args.outgroup[0] else: out = t.get_common_ancestor(args.outgroup) if set(out.get_leaf_names()) ^ set(args.outgroup): raise ValueError("Outgroup is not monophyletic") t.set_outgroup(out) t.ladderize() if prev_tree: tree_compare(t, prev_tree) prev_tree = t if args.tax_info: tax2name, tax2track = annotate_tree_with_taxa( t, args.tax_info, tax2name, tax2track) if args.dump_tax_info: cPickle.dump(tax2track, open("tax2track.pkl", "w")) cPickle.dump(tax2name, open("tax2name.pkl", "w")) print "Tax info written into pickle files" else: for n in t.iter_leaves(): spcode = n.name n.add_features(taxid=spcode) n.add_features(species=spcode) tax2name, tax2track = annotate_tree_with_taxa( t, None, tax2name, tax2track) # Split tree into species trees #subtrees = t.get_speciation_trees() if not args.rf_only: #print "Calculating tree subparts..." t1 = time.time() if not args.is_sptree: subtrees = t.split_by_dups() #print "Subparts:", len(subtrees), time.time()-t1 else: subtrees = [t] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees( t, subtrees, show_tree=SHOW_TREE) #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf else: subtrees = [] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0 ndups = 0 nsubtrees = len(subtrees) rf = 0 rf_max = 0 rf_std = 0 rf_med = 0 common_names = 0 max_size = 0 if reft and len(subtrees) == 1: rf = t.robinson_foulds(reft, attr_t1="realname") rf_max = rf[1] rf = rf[0] rf_med = rf elif reft: #print "Calculating avg RF..." nsubtrees, ndups, subtrees = t.get_speciation_trees( map_features=["taxid"]) #print len(subtrees), "Sub-Species-trees found" avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 print nsubtrees, "subtrees", ndups, "duplications" for ii, subt in enumerate(subtrees): print "\r%d" % ii, sys.stdout.flush() try: partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") except ValueError: pass else: sptree_size = len( set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append( (partial_rf[0] / float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) #print partial_rf[:2] rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" % (numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names ] print >> OUT, '|'.join( map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" % fixed_string) if fixed else None OUT.write(" New broken: %s\n" % problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string ]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w")) print print HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Broken branches", "Clade sizes", "Fixed Groups", "New Broken Clades") print_table(ENTRIES, max_col_width=50, row_line=True, header=HEADER) if args.output: OUT.close()
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo. # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version) # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) ) # const - A constant value required by some action and nargs selections. # default - The value produced if the argument is absent from the command line. # type - The type to which the command-line argument should be converted. # choices - A container of the allowable values for the argument. # required - Whether or not the command-line option may be omitted (optionals only). # help - A brief description of what the argument does. # metavar - A name for the argument in usage messages. # dest - The name of the attribute to be added to the object returned by parse_args(). parser.add_argument("--show", dest="show_tree", action="store_true", help="""Display tree after the analysis.""") parser.add_argument("--render", dest="render", action="store_true", help="""Render tree.""") parser.add_argument("--dump", dest="dump", action="store_true", help="""Dump analysis""") parser.add_argument("--explore", dest="explore", type=str, help="""Reads a previously analyzed tree and visualize it""") input_args = parser.add_mutually_exclusive_group() input_args.required=True input_args.add_argument("-t", "--tree", dest="target_tree", nargs="+", type=str, help="""Tree file in newick format""") input_args.add_argument("-tf", dest="tree_list_file", type=str, help="File with the list of tree files") parser.add_argument("--tax", dest="tax_info", type=str, help="If the taxid attribute is not set in the" " newick file for all leaf nodes, a tab file file" " with the translation of name and taxid can be" " provided with this option.") parser.add_argument("--sp_delimiter", dest="sp_delimiter", type=str, help="If taxid is part of the leaf name, delimiter used to split the string") parser.add_argument("--sp_field", dest="sp_field", type=int, default=0, help="field position for taxid after splitting leaf names") parser.add_argument("--ref", dest="ref_tree", type=str, help="Uses ref tree to compute robinson foulds" " distances of the different subtrees") parser.add_argument("--rf-only", dest="rf_only", action = "store_true", help="Skip ncbi consensus analysis") parser.add_argument("--outgroup", dest="outgroup", type=str, nargs="+", help="A list of node names defining the trees outgroup") parser.add_argument("--is_sptree", dest="is_sptree", action = "store_true", help="Assumes no duplication nodes in the tree") parser.add_argument("-o", dest="output", type=str, help="Writes result into a file") parser.add_argument("--tax2name", dest="tax2name", type=str, help="") parser.add_argument("--tax2track", dest="tax2track", type=str, help="") parser.add_argument("--dump_tax_info", dest="dump_tax_info", action="store_true", help="") args = parser.parse_args(argv) if args.sp_delimiter: GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field] else: GET_TAXID = None reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else "" if args.explore: print >>sys.stderr, "Reading tree from file:", args.explore t = cPickle.load(open(args.explore)) ts = TreeStyle() ts.force_topology = True ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.show(tree_style=ts) print >>sys.stderr, "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) sys.exit() if args.output: OUT = open(args.output, "w") else: OUT = sys.stdout print >>sys.stderr, "Dumping results into", OUT target_trees = [] if args.tree_list_file: target_trees = [line.strip() for line in open(args.tree_list_file)] if args.target_tree: target_trees += args.target_tree prev_tree = None if args.tax2name: tax2name = cPickle.load(open(args.tax2name)) else: tax2name = {} if args.tax2track: tax2track = cPickle.load(open(args.tax2track)) else: tax2track = {} print len(tax2track), len(tax2name) header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Clade sizes", "RF (avg)", "RF (med)", "RF (std)", "RF (max)", "Shared tips") print >>OUT, '|'.join([h.ljust(15) for h in header]) if args.ref_tree: print >>sys.stderr, "Reading ref tree from", args.ref_tree reft = Tree(args.ref_tree, format=1) else: reft = None SHOW_TREE = False if args.show_tree or args.render: SHOW_TREE = True prev_broken = set() ENTRIES = [] ncbi.connect_database() for tfile in target_trees: #print tfile t = PhyloTree(tfile, sp_naming_function=None) if GET_TAXID: for n in t.iter_leaves(): n.name = GET_TAXID(n.name) if args.outgroup: if len(args.outgroup) == 1: out = t & args.outgroup[0] else: out = t.get_common_ancestor(args.outgroup) if set(out.get_leaf_names()) ^ set(args.outgroup): raise ValueError("Outgroup is not monophyletic") t.set_outgroup(out) t.ladderize() if prev_tree: tree_compare(t, prev_tree) prev_tree = t if args.tax_info: tax2name, tax2track = annotate_tree_with_taxa(t, args.tax_info, tax2name, tax2track) if args.dump_tax_info: cPickle.dump(tax2track, open("tax2track.pkl", "w")) cPickle.dump(tax2name, open("tax2name.pkl", "w")) print "Tax info written into pickle files" else: for n in t.iter_leaves(): spcode = n.name n.add_features(taxid=spcode) n.add_features(species=spcode) tax2name, tax2track = annotate_tree_with_taxa(t, None, tax2name, tax2track) # Split tree into species trees #subtrees = t.get_speciation_trees() if not args.rf_only: #print "Calculating tree subparts..." t1 = time.time() if not args.is_sptree: subtrees = t.split_by_dups() #print "Subparts:", len(subtrees), time.time()-t1 else: subtrees = [t] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(t, subtrees, show_tree=SHOW_TREE) #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf else: subtrees = [] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0 ndups = 0 nsubtrees = len(subtrees) rf = 0 rf_max = 0 rf_std = 0 rf_med = 0 common_names = 0 max_size = 0 if reft and len(subtrees) == 1: rf = t.robinson_foulds(reft, attr_t1="realname") rf_max = rf[1] rf = rf[0] rf_med = rf elif reft: #print "Calculating avg RF..." nsubtrees, ndups, subtrees = t.get_speciation_trees(map_features=["taxid"]) #print len(subtrees), "Sub-Species-trees found" avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 print nsubtrees, "subtrees", ndups, "duplications" for ii, subt in enumerate(subtrees): print "\r%d" %ii, sys.stdout.flush() try: partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") except ValueError: pass else: sptree_size = len(set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append((partial_rf[0]/float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) #print partial_rf[:2] rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" %( numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names] print >>OUT, '|'.join(map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" %fixed_string) if fixed else None OUT.write(" New broken: %s\n" %problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w")) print print HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Broken branches", "Clade sizes", "Fixed Groups", "New Broken Clades") print_table(ENTRIES, max_col_width = 50, row_line=True, header=HEADER) if args.output: OUT.close()
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--tf", dest='target_trees_file', type=str, help='target_trees') parser.add_argument("-t", dest='target_trees', type=str, nargs="+", help='target_trees') parser.add_argument( "--unique", dest='unique', type=str, help= 'When used, all the provided trees are compared and unique topologies are dumped in the specified file.' ) parser.add_argument("--stats", dest='stats', type=str, help='Show general stats for the provided trees') parser.add_argument( "--distmatrix", dest='distmatrix', type=str, help='Dump a distance matrix (robinson foulds) among all topologies') args = parser.parse_args(argv) print __DESCRIPTION__ unique_topo = {} stats_table = [] for tfile in itertrees(args.target_trees, args.target_trees_file): t = Tree(tfile) if args.unique: tid = t.get_topology_id() if tid not in unique_topo: unique_topo[tid] = t if args.stats: most_distance_node, tree_length = t.get_farthest_leaf() supports = [] names = [] distances = [] leaves = 0 for n in t.traverse(): names.append(n.name) if n.up: supports.append(n.support) distances.append(n.dist) if n.is_leaf(): leaves += 1 min_support, max_support = min(supports), max(supports) mean_support, std_support = mean_std_dev(supports) min_dist, max_dist = min(distances), max(distances) mean_dist, std_dist = mean_std_dev(distances) stats_table.append([ str(t.children <= 2), leaves, tree_length, most_distance_node.name, min_support, max_support, mean_support, std_support, min_dist, max_dist, mean_dist, std_dist, ]) if stats_table: header = [ 'rooted', '#tips', 'tree length', 'most distant tip', 'min support', 'max support', 'min support', 'std support', 'max dist', 'min dist', 'mean dist', 'std dist' ] print_table(stats_table, header=header, max_col_width=12) if unique_topo: print '%d unique topologies found' % len(unique_topo) topos = unique_topo.values() open(args.unique + '.trees', 'w').write('\n'.join([topo.write(format=9) for topo in topos]) + '\n') import itertools for a, b in itertools.product(topos, topos): print a.diff(b, output='diffs_tab')
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("target_trees", metavar='target_trees', type=str, nargs="*", help='a list of target tree files') parser.add_argument( "--targets_file", dest="targets_file", type=str, help="""path to a file containing target trees, one per line""") parser.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") parser.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") parser.add_argument( "--outgroup", dest="outgroup", nargs="+", help= """outgroup used to root reference and target trees before distance computation""" ) parser.add_argument("--expand_polytomies", dest="polytomies", action="store_true", help="""expand politomies if necessary""") parser.add_argument("--unrooted", dest="unrooted", action="store_true", help="""compare trees as unrooted""") parser.add_argument( "--min_support", dest="min_support", type=float, default=0.0, help= ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)" )) parser.add_argument( "--extract_species", dest="extract_species", action="store_true", help= """When used, reference tree is assumed to contain species names, while target trees as expected to be gene trees. Species name will be extracted from gene tree nodes and treeko will be used if duplication events are found.""" ) parser.add_argument("--spname_delimiter", dest="spname_delimiter", type=str, default="_", help=("species code delimiter in node names")) parser.add_argument( "--spname_field", dest="spname_field", type=int, default=-1, help= ("position of the species code extracted from node names. -1 = last field" )) parser.add_argument("--collateral", dest="collateral", action='store_true', help=("")) parser.add_argument("--ref_attr", dest="ref_attr", type=str, help=("attribute in ref tree used as leaf name")) parser.add_argument("--target_attr", dest="target_attr", type=str, help=("attribute in target tree used as leaf name")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.targets_file and args.target_trees: print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.targets_file: target_trees = tree_iterator(args.targets_file) else: target_trees = args.target_trees t = Tree(reftree) if args.ref_attr: for lf in t.iter_leaves(): lf._origname = lf.name if args.ref_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_attr) if args.outgroup: if len(args.outgroup) > 1: out = t.get_common_ancestor(args.outgroup) else: out = t.search_nodes(name=args.outgroup[0])[0] t.set_outgroup(out) ref_names = set(t.get_leaf_names()) reftree_len = len(t) reftree_edges = (reftree_len * 2) - 2 ncollapsed_branches = len([ n for n in t.traverse() if n.children and n.support < args.min_support ]) #reftree_edges -= ncollapsed_branches #if ncollapsed_branches: # print '%d branches collapsed in reference tree' %ncollapsed_branches HEADER = ("target tree", 'dups', 'subtrees', 'used trees', 'treeko', "RF", "maxRF", 'normRF', "%reftree", "%genetree", "avgSize", "minSize", "common tips", "refSize", "targetSize") if args.output: OUT = open(args.output, "w") print >> OUT, '# ' + ctime() print >> OUT, '# ' + ' '.join(sys.argv) print >> OUT, '#' + '\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None for counter, tfile in enumerate(target_trees): if args.targets_file: seedid, tfile = tfile else: seedid = None if args.extract_species: tt = PhyloTree(tfile, sp_naming_function=lambda name: name.split( args.spname_delimiter)[args.spname_field]) else: tt = Tree(tfile) if args.target_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.target_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.target_trees: fname = os.path.basename(tfile) else: fname = '%05d' % counter max_size, min_size, avg_size, common = -1, -1, -1, -1 total_rf, max_rf, norm_rf = -1, -1, -1 treeko_d = -1 ref_branches_in_target, target_branches_in_ref = -1, -1 target_tree_len = -1 used_subtrees = -1 if args.extract_species: orig_target_size = len(tt) ntrees, ndups, sp_trees = tt.get_speciation_trees( autodetect_duplications=True, newick_only=True) if ntrees < 1000: all_rf = [] ref_found = [] target_found = [] tree_sizes = [] all_max_rf = [] common_names = 0 for subtree_nw in sp_trees: if seedid and not args.collateral and (seedid not in subtree_nw): continue subtree = PhyloTree( subtree_nw, sp_naming_function=lambda name: name.split( args.spname_delimiter)[args.spname_field]) # only necessary if rf function is going to filter by support value. It slows downs the analysis, obviously if args.min_support: subtree_content = subtree.get_cached_content( store_attr='name') for n in subtree.traverse(): if n.children: n.support = tt.get_common_ancestor( subtree_content[n]).support rf, maxr, common, p1, p2, d1, d2 = t.robinson_foulds( subtree, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted, attr_t2='species', min_support_t2=args.min_support) if maxr > 0 and p1 and p2: all_rf.append(rf) tree_sizes.append(len(common)) all_max_rf.append(maxr) common_names = max(common_names, len(common)) ref_found.append(float(len(p2 & p1)) / reftree_edges) p2bis = set([ p for p in (p2 - d2) if len(p[0]) > 1 and len(p[1]) > 1 ]) # valid edges in target not leaves if p2bis: incompatible_target_branches = float( len((p2 - d2) - p1)) target_found.append(1 - (incompatible_target_branches / (len(p2 - d2)))) # valid_target = p2-d2 # valid_ref = p1-d1 # ref_found.append(float(len(valid_target & valid_ref)) / reftree_edges) # p2bis = set([p for p in (p2-d2) if len(p[0])>1 and len(p[1])>1]) # if p2bis-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) if all_rf: # Treeko speciation distance alld = [(all_rf[i] / float(all_max_rf[i])) for i in xrange(len(all_rf))] a = numpy.sum( [alld[i] * tree_sizes[i] for i in xrange(len(all_rf))]) b = float(numpy.sum(tree_sizes)) treeko_d = a / b total_rf = numpy.mean(all_rf) norm_rf = numpy.mean([(all_rf[i] / float(all_max_rf[i])) for i in xrange(len(all_rf))]) max_rf = numpy.max(all_max_rf) ref_branches_in_target = numpy.mean(ref_found) target_branches_in_ref = numpy.mean( target_found) if target_found else -1 target_tree_len = numpy.mean(tree_sizes) used_subtrees = len(all_rf) else: target_tree_len = len(tt) ndups, ntrees, used_subtrees = 0, 1, 1 treeko_d = -1 total_rf, max_rf, common, p1, p2, d1, d2 = tt.robinson_foulds( t, expand_polytomies=args.polytomies, unrooted_trees=args.unrooted) common_names = len(common) if max_rf: norm_rf = total_rf / float(max_rf) if p1 and p2: sizes = [len(p) for p in p2 ^ p1] if sizes: avg_size = sum(sizes) / float(len(sizes)) max_size, min_size = max(sizes), min(sizes) else: max_size, min_size, avg_size = 0, 0, 0 ref_branches_in_target = float(len(p2 & p1)) / reftree_edges #if p2-d2: # incompatible_target_branches = float(len((p2-d2) - p1)) # target_found.append(1 - (incompatible_target_branches / (len(p2-d2)))) else: ref_branches_in_target = 0.0 target_branches_in_ref = 0.0 max_size, min_size, avg_size = -1, -1, -1 if args.output: print >> OUT, '\t'.join( map(str, (fname, ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, ref_branches_in_target, target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len))) else: print_table([ map(istr, (fname[-30:], ndups, ntrees, used_subtrees, treeko_d, total_rf, max_rf, norm_rf, '%0.4f' % ref_branches_in_target, '%0.4f' % target_branches_in_ref, avg_size, min_size, common_names, reftree_len, target_tree_len)) ], fix_col_width=COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
# vim: et:sta:bs=2:sw=4: import _import # noqa: F401 import sys from optparse import OptionParser from common import args_to_users, print_table from six.moves import reload_module from django.utils import six op = OptionParser() op.add_option('-s', '--separator', dest='separator', default=' ', metavar='CHAR', type=str, help="The seoarator between spaces") options, args = op.parse_args() if sys.stdout.encoding is None: reload_module(sys) sys.setdefaultencoding('utf-8') data = [] for u in args_to_users(args): data.append((u.first_name, u.last_name, u.studentNumber, u.institute, u.study, u.dateOfBirth, u.dateJoined, u.email, u.addr_street, u.addr_number, u.addr_zipCode, u.addr_city, u.telephone)) data = [[six.text_type(x) for x in r] for r in data] print_table(data, separator=options.separator)
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument("--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument("--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument("--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument("--min_support_src", type=float, default=0.0, help=("min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument("--outgroup", dest="outgroup", nargs = "+", help="""outgroup used to root reference and source trees before distance computation""") opt_args.add_argument("--expand_polytomies", dest="polytomies", action = "store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action = "store_true", help="""compare trees as unrooted""") opt_args.add_argument("--min_support", dest="min_support", type=float, default=0.0, help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)")) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument("--extract_species", action = "store_true", help="When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument("--sp_regexp", type=str, help=("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'.")) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >>OUT, '# ' + ctime() print >>OUT, '# ' + ' '.join(sys.argv) print >>OUT, '#'+'\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function = get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' %counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source = args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([map(istr, [fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist']])], fix_col_width = COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--tf", dest="target_trees_file", type=str, help="target_trees") parser.add_argument("-t", dest="target_trees", type=str, nargs="+", help="target_trees") parser.add_argument( "--unique", dest="unique", type=str, help="When used, all the provided trees are compared and unique topologies are dumped in the specified file.", ) parser.add_argument("--stats", dest="stats", type=str, help="Show general stats for the provided trees") parser.add_argument( "--distmatrix", dest="distmatrix", type=str, help="Dump a distance matrix (robinson foulds) among all topologies", ) args = parser.parse_args(argv) print __DESCRIPTION__ unique_topo = {} stats_table = [] for tfile in itertrees(args.target_trees, args.target_trees_file): t = Tree(tfile) if args.unique: tid = t.get_topology_id() if tid not in unique_topo: unique_topo[tid] = t if args.stats: most_distance_node, tree_length = t.get_farthest_leaf() supports = [] names = [] distances = [] leaves = 0 for n in t.traverse(): names.append(n.name) if n.up: supports.append(n.support) distances.append(n.dist) if n.is_leaf(): leaves += 1 min_support, max_support = min(supports), max(supports) mean_support, std_support = mean_std_dev(supports) min_dist, max_dist = min(distances), max(distances) mean_dist, std_dist = mean_std_dev(distances) stats_table.append( [ str(t.children <= 2), leaves, tree_length, most_distance_node.name, min_support, max_support, mean_support, std_support, min_dist, max_dist, mean_dist, std_dist, ] ) if stats_table: header = [ "rooted", "#tips", "tree length", "most distant tip", "min support", "max support", "min support", "std support", "max dist", "min dist", "mean dist", "std dist", ] print_table(stats_table, header=header, max_col_width=12) if unique_topo: print "%d unique topologies found" % len(unique_topo) topos = unique_topo.values() open(args.unique + ".trees", "w").write("\n".join([topo.write(format=9) for topo in topos]) + "\n") import itertools for a, b in itertools.product(topos, topos): print a.diff(b, output="diffs_tab")