def unzip_and_cast_to_cxoracle_types(data, cursor, types, \ type_lookup=type_lookup_oracle): """Unzips data and casts each field to the corresponding oracle type data - a list or tuple of lists or tuples types - a list of 'i', 's', 'f' for int, string or float """ res = [] for t,f in zip(types, unzip(data)): if t == 'i': tmp = map(int, f) elif t == 'f' or t == 'bf' : tmp = map(float, f) # yes, this method is absolutely f*****g ugly right now elif t == 'c': clob = cursor.var(CLOB) all_strings = '@'.join(f) all_strings_splits = [i+1 for i,s in enumerate(all_strings) if s == '@'] all_strings_splits.append(len(all_strings)) clob.setvalue(0,all_strings) res.append(clob) res.append(cursor.arrayvar(type_lookup['i'], all_strings_splits)) continue else: tmp = f res.append(cursor.arrayvar(type_lookup[t], tmp)) return res
def unzip_and_cast_to_cxoracle_types(data, cursor): ids,acc,dec,coreset,seq,checksum = unzip(data) ids = cursor.arrayvar(cx_Oracle.STRING, ids) acc = cursor.arrayvar(cx_Oracle.STRING, acc) dec = cursor.arrayvar(cx_Oracle.STRING, dec) coreset = cursor.arrayvar(cx_Oracle.NUMBER, map(int, coreset)) seq = cursor.arrayvar(cx_Oracle.STRING, seq) checksum = cursor.arrayvar(cx_Oracle.FIXED_CHAR, checksum) return ids,acc,dec,coreset,seq,checksum
def unzip_and_cast_to_cxoracle_types(data, cursor, types): """Unzips data and casts each field to the corresponding oracle type data - a list or tuple of lists or tuples types - a list of 'i', 's', 'f' for int, string or float """ if isinstance(cursor, MockConnection): type_lookup = type_lookup_mock else: type_lookup = type_lookup_oracle res = [] for t,f in zip(types, unzip(data)): if t == 'i': tmp = map(int, f) elif t == 'f': tmp = map(float, f) elif t == 's': tmp = f else: tmp = f res.append(cursor.arrayvar(type_lookup[t], tmp)) return res
def name_node_score_fold(tree, score_f=fmeasure, tiebreak_f=min_tips, \ verbose=False): """Compute name scores for internal nodes, pick the 'best' For this method, we traverse the tree once building up a dict of scores for names and nodes, we can then pick the 'best' node out of the dict to avoid horrible lookups in the tree """ if verbose: print "Starting name_node_score_fold..." name_node_score = dict([(i, {}) for i in range(len(RANK_ORDER))]) n_ranks = len(RANK_ORDER) for node in tree.nontips(include_self=True): node.RankNameScores = [None] * n_ranks for rank, name in enumerate(node.RankNames): if name is None: continue # precision in this case is the percent of informative tips that # descend that are of the name relative to the number of # informative tips that descend precision = node.ValidRelFreq[rank][name] # recall in this case is the percent of informative tips that # descent that are of the name relative to the total number of # tips in the tree with name recall = node.ConsensusRelFreq[rank][name] # calculate score and save it for the corrisponding rank position # so that these values can be examined later in other contexts score = score_f(precision, recall) node.RankNameScores[rank] = score if name not in name_node_score[rank]: name_node_score[rank][name] = [] name_node_score[rank][name].append((node, score)) # run through the built up dict and pick the best node for a name for rank, names in name_node_score.items(): for name, node_scores in names.items(): node_scores_sorted = sorted(node_scores, key=itemgetter(1))[::-1] nodes, scores = unzip(node_scores_sorted) scores = array(scores) # if there is a tie in scores... if sum(scores == scores[0]) > 1: # ugly hack to get around weird shape mismatch indices = where(scores == scores[0], range(len(nodes)), None) tie_nodes = [] for i in indices: if i is not None: tie_nodes.append(nodes[i]) else: tie_nodes.append(None) node_to_keep = tiebreak_f(tie_nodes) for node,score in node_scores_sorted: if node == node_to_keep: continue else: node.RankNames[rank] = None else: for node,score in node_scores_sorted[1:]: node.RankNames[rank] = None