def unzip_and_cast_to_cxoracle_types(data, cursor, types, \
        type_lookup=type_lookup_oracle):
    """Unzips data and casts each field to the corresponding oracle type

    data - a list or tuple of lists or tuples

    types - a list of 'i', 's', 'f' for int, string or float
    """
    res = []
    for t,f in zip(types, unzip(data)):
        if t == 'i':
            tmp = map(int, f)
        elif t == 'f'  or t == 'bf' :
            tmp = map(float, f)
        # yes, this method is absolutely f*****g ugly right now
        elif t == 'c':
            clob = cursor.var(CLOB)
            all_strings = '@'.join(f)
            all_strings_splits = [i+1 for i,s in enumerate(all_strings) if s == '@']
            all_strings_splits.append(len(all_strings))
            clob.setvalue(0,all_strings)
            res.append(clob)
            res.append(cursor.arrayvar(type_lookup['i'], all_strings_splits))
            continue
        else:
            tmp = f
        res.append(cursor.arrayvar(type_lookup[t], tmp))
    return res 
def unzip_and_cast_to_cxoracle_types(data, cursor):
    ids,acc,dec,coreset,seq,checksum = unzip(data)
    ids = cursor.arrayvar(cx_Oracle.STRING, ids)
    acc = cursor.arrayvar(cx_Oracle.STRING, acc)
    dec = cursor.arrayvar(cx_Oracle.STRING, dec)
    coreset = cursor.arrayvar(cx_Oracle.NUMBER, map(int, coreset))
    seq = cursor.arrayvar(cx_Oracle.STRING, seq)
    checksum = cursor.arrayvar(cx_Oracle.FIXED_CHAR, checksum)
    return ids,acc,dec,coreset,seq,checksum
def unzip_and_cast_to_cxoracle_types(data, cursor, types):
    """Unzips data and casts each field to the corresponding oracle type

    data - a list or tuple of lists or tuples

    types - a list of 'i', 's', 'f' for int, string or float
    """
    if isinstance(cursor, MockConnection):
        type_lookup = type_lookup_mock
    else:
        type_lookup = type_lookup_oracle

    res = []
    for t,f in zip(types, unzip(data)):
        if t == 'i':
            tmp = map(int, f)
        elif t == 'f':
            tmp = map(float, f)
        elif t == 's':
            tmp = f
        else:
            tmp = f
        res.append(cursor.arrayvar(type_lookup[t], tmp))
    return res 
Example #4
0
def name_node_score_fold(tree, score_f=fmeasure, tiebreak_f=min_tips, \
        verbose=False):
    """Compute name scores for internal nodes, pick the 'best'
    
    For this method, we traverse the tree once building up a dict of scores 
    for names and nodes, we can then pick the 'best' node out of the dict
    to avoid horrible lookups in the tree
    """
    if verbose:
        print "Starting name_node_score_fold..."
    name_node_score = dict([(i, {}) for i in range(len(RANK_ORDER))])
    n_ranks = len(RANK_ORDER)

    for node in tree.nontips(include_self=True):
        node.RankNameScores = [None] * n_ranks

        for rank, name in enumerate(node.RankNames):
            if name is None:
                continue

            # precision in this case is the percent of informative tips that 
            # descend that are of the name relative to the number of 
            # informative tips that descend
            precision = node.ValidRelFreq[rank][name]

            # recall in this case is the percent of informative tips that
            # descent that are of the name relative to the total number of
            # tips in the tree with name
            recall = node.ConsensusRelFreq[rank][name]

            # calculate score and save it for the corrisponding rank position
            # so that these values can be examined later in other contexts
            score = score_f(precision, recall)
            node.RankNameScores[rank] = score

            if name not in name_node_score[rank]:
                name_node_score[rank][name] = []
            name_node_score[rank][name].append((node, score))

    # run through the built up dict and pick the best node for a name
    for rank, names in name_node_score.items():
        for name, node_scores in names.items():
            node_scores_sorted = sorted(node_scores, key=itemgetter(1))[::-1]
            nodes, scores = unzip(node_scores_sorted)
            scores = array(scores)

            # if there is a tie in scores...
            if sum(scores == scores[0]) > 1:
                # ugly hack to get around weird shape mismatch
                indices = where(scores == scores[0], range(len(nodes)), None)
                tie_nodes = []
                for i in indices:
                    if i is not None:
                        tie_nodes.append(nodes[i])
                    else:
                        tie_nodes.append(None)
                node_to_keep = tiebreak_f(tie_nodes)
                for node,score in node_scores_sorted:
                    if node == node_to_keep:
                        continue
                    else:
                        node.RankNames[rank] = None
            else:
                for node,score in node_scores_sorted[1:]:
                    node.RankNames[rank] = None