Beispiel #1
0
def intf_PCOV(E):
    """Calculate the population covariance of two lists of values taken from
    v1 and v2. The two lists should be the same length. Use this when all of
    the members of the entire population are represented.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] 
    [76  89  83  79  91  95  82  69  66  75  80  88 ] pcov -> 3.53194444444
    """
    if not inc.LST(E, 1) and not inc.LST(E, 2):
        print("Input Error: pcov")
        print(intf_PCOV.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    v2 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1 or not v2 or len(v1) != len(v2):  # An input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist1 = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist1.append(v)
    numlist2 = list()  # List of just numeric values.
    for v in v2:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist2.append(v)
    out = objectifier.StackOB_VAL(covariance(numlist1, numlist2, True))
    E.The.StackPush(out)
Beispiel #2
0
def intf_TANIMOTO(E):
    """The `tanimoto` function takes a list of properties of
  item A from v2 and a list of properties of item B from v1 and
  calculates the Tanimoto similarity coefficient. The idea is to
  compute a measure of how similar these things A and B are based on
  their characteristics. For example, if you're an alien naturalist
  and you find a mystery animal with "horns", "tail", "beard", "red", 
  and "hoof", and you want to see if its more like a zebra ("tail",
  "stripes", "hoof", "mane") or a goat ("horns", "tail", "hoof",
  "beard") this might be helpful. In recommendation engines, this can
  be used to find people with similar properties. There is much
  confusion aboout the origin and use of this. It seems to be a
  generalization of the Jaccard Index applicable to sets, which is
  what this function assumes. The formula used here is `T=
  Ni/(Na+Nb-Ni)` where Ni is the number of items in both lists or the
  count of the intersection, Na is the number of items in list A, and
  Nb is the number of items in list B. Dissimilarity, a distance
  metric, can be acheived with `1 rot rot tanimoto -`. The properties
  can be specified as TXT or VAL or SYM. If they are SYM, they are
  resolved.
        [''horns'' ''tail'' ''beard'' ''red'' ''hoof''] |devil sto
        devil [''tail'' ''stripes'' ''hoof'' ''mane''] tanimoto -> 0.285714285714
        devil [''horns'' ''beard'' ''tail'' ''hoof''] tanimoto -> .8
    """
    if not inc.LST(E, 1) and not inc.LST(E, 2):
        print("Input Error: tanimoto")
        print(intf_TANIMOTO.__doc__)
        return  # Without doing much of anything.
    v2 = E.The.StackPop().val  # A Python list of gg OBs.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    out = objectifier.StackOB_VAL(0)
    if not v1 or not v2:  # An input is empty list.
        E.The.StackPush(out)
        return
    sl1 = list()  # List of numric or text values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        sl1.append(v)
    sl2 = list()  # List of just numeric values.
    for v in v2:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        sl2.append(v)

    C = len(set(sl1) & set(sl2))
    num = float(C)
    den = len(sl1) + len(sl2) - C

    if den:
        out = objectifier.StackOB_VAL(num / den)
    E.The.StackPush(out)
Beispiel #3
0
def intf_WEIGHTEDMEAN(E):
    """The `weightedmean` function takes a list of numbers to be averaged
    from v2 and a list of weights to bias the result by. The lists must be
    or resolve to VALs and they must be the same length. This is commonly 
    used to calculate weighted grades. If a student gets grades of C, A, A,
    D, B (corresponding to [2 4 4 1 3]) in classes of 4, 3, 3, 4, 3 credits
    respectively, the weighted average of these two lists will give a GPA
    adjusted for the "importantance" based on credits. This could also be
    used to make recommendations based on how well rated an item is by a
    list of people (list v2) and how closely each of those people match the
    user's previous tastes (list v1, the weights).
        [2 4 4 1 3] [4 3 3 4 3] weightedmean -> 2.64705882353
        [2 4 4 1 3] mean -> 2.8
        [76  89  83  79  91  95  82  69  66  75  80  88 ] 
        [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] weightedmean -> 82.3834355828
        [76  89  83  79  91  95  82  69  66  75  80  88 ] mean -> 81.0833333333
    """
    if not inc.LST(E, 1) and not inc.LST(E, 2):
        print("Input Error: weightedmean")
        print(intf_WEIGHTEDMEAN.__doc__)
        return  # Without doing much of anything.
    v2 = E.The.StackPop().val  # A Python list of gg OBs.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    n = len(v1)
    out = objectifier.StackOB_VAL(0)
    if not v1 or not v2 or n != len(v2):  # An input is empty list.
        E.The.StackPush(out)
        return
    numlist1 = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist1.append(v)
    numlist2 = list()  # List of just numeric values.
    for v in v2:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist2.append(v)
    num = sum([numlist1[i] * numlist2[i] for i in range(n)])
    den = sum([numlist2[i] for i in range(n)])
    if den:
        out = objectifier.StackOB_VAL(num / den)
    E.The.StackPush(out)
Beispiel #4
0
def intf_PSDEV(E):
    """Calculate the population standard deviation of a list of values. Use
    this when all of the members of the entire population are represented.
    This is the square root of the population variance
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] var -> 0.511262055006
    [76 89 83 79 91 95 82 69 66 75 80 88] var -> 8.41088910613
    """
    if not inc.LST(E, 1):
        print("Input Error: psdev")
        print(intf_PSDEV.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1:  # Input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    import math
    numlist = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist.append(v)
    out = objectifier.StackOB_VAL(math.sqrt(variance(numlist, True)))
    E.The.StackPush(out)
Beispiel #5
0
def intf_MAX(E):
    """Return the maximum value from a list at v1.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] max -> 3.6
    [76 89 83 79 91 95 82 69 66 75 80 88] max -> 95
    """
    if not inc.LST(E, 1):
        print("Input Error: max")
        print(intf_MAX.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1: return  # Input is empty list.
    m = v1[0]
    if m.whatami == "SYM":
        if m.val in E.symtab:
            mv = E.symtab[m.val].val
    else:
        mv = m.val
    for ob in v1[1:]:
        if ob.whatami == "SYM":
            if ob.val in E.symtab:
                q = E.symtab[ob.val].val
        else:
            q = ob.val
        if q > mv:
            m = ob
            mv = q
    E.The.StackPush(m)
Beispiel #6
0
def intf_PCORR(E):
    """Calculate the population Pearson's correleation coefficient (rho)
    between two lists of values taken from v1 and v2. The two lists should be
    the same length. Use this when all of the members of the entire population
    are represented. This function is probably not needed since it does not
    seem to matter if population or sample statistics are used to calculate
    the Pearson's correleation coefficient. See `corr`.
    """
    if not inc.LST(E, 1) and not inc.LST(E, 2):
        print("Input Error: pcorr")
        print(intf_PCORR.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    v2 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1 or not v2 or len(v1) != len(v2):  # An input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist1 = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist1.append(v)
    numlist2 = list()  # List of just numeric values.
    for v in v2:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist2.append(v)
    import math
    sd1 = math.sqrt(variance(numlist1, True))
    sd2 = math.sqrt(variance(numlist2, True))
    if not sd1 or not sd2:  # That would be bad.
        E.The.StackPush(objectifier.StackOB_VAL(0))
    out = objectifier.StackOB_VAL(
        covariance(numlist1, numlist2, True) / (sd1 * sd2))
    E.The.StackPush(out)
Beispiel #7
0
def intf_CORR(E):
    """Calculate the sample Pearson's correleation coefficient (r)
    between two lists of values taken from v1 and v2. The two lists should be
    the same length. Use this when some of the members of the entire population
    are not represented.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] 
    [76  89  83  79  91  95  82  69  66  75  80  88 ] corr -> 0.821350253246
    """
    if not inc.LST(E, 1) and not inc.LST(E, 2):
        print("Input Error: corr")
        print(intf_CORR.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    v2 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1 or not v2 or len(v1) != len(v2):  # An input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist1 = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist1.append(v)
    numlist2 = list()  # List of just numeric values.
    for v in v2:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist2.append(v)
    import math
    sd1 = math.sqrt(variance(numlist1))
    sd2 = math.sqrt(variance(numlist2))
    if not sd1 or not sd2:  # That would be bad.
        E.The.StackPush(objectifier.StackOB_VAL(0))
    out = objectifier.StackOB_VAL(covariance(numlist1, numlist2) / (sd1 * sd2))
    E.The.StackPush(out)
Beispiel #8
0
def intf_SHUFFLE(E):
    """Takes a list item from v1 and returns a list of the same objects
    in a completely random order. This will shuffle the entire stack.
        depth 2list shuffle wake !
    """
    if not inc.LST(E, 1):
        print("Input Error: shuffle")
        print(intf_SHUFFLE.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # List to be shuffled.
    import random
    random.shuffle(v1)
    E.The.StackPush(objectifier.StackOB_LST(v1))
Beispiel #9
0
def intf_ENTROPY(E):
    """Calculate the Shannon entropy for a list of items. This is a measure
    of how surprising a random selection of the data is. It can be used to
    measure the amount of disorder in a set. Lowering entropy during an
    iterative classification process could be a sign that the data is becoming
    better organized. Symbols do get resolved although other data stays as is.
    The formula is `H(X)=-sum[i=1,n](p(x)*log2(p(x)))` where p(x) is the
    probability that X is in state x. The n is the number of unique items in
    the original list. The log2 aspect causes this function to return output
    measured in bits of entropy. This entropy calculation is likely not correct
    for cryptographic keys and passphrases since the probabilities of a certain
    value are generally lower than this process assumes (by counting extant
    examples).
        [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] entropy -> 3.25162916739
        [76 89 83 79 91 95 82 69 66 75 80 88] entropy -> 3.58496250072
        [''p'' ''a'' ''s'' ''s'' ''w'' ''o'' ''r'' ''d''] entropy -> 2.75
        [''3'' ''T'' ''^'' '','' ''d'' ''9'' ''9'' ''w''] entropy -> 2.75
        100000 range entropy -> 16.6096404744
    """
    if not inc.LST(E, 1):
        print("Input Error: entropy")
        print(intf_ENTROPY.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1:  # Input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    import math
    l = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        l.append(v)
    log2 = lambda x: math.log(x) / math.log(2)
    total = len(l)
    counts = dict()
    for item in l:
        counts.setdefault(item, 0)
        counts[item] += 1
    ent = 0
    for i in counts:
        p = float(counts[i]) / total
        ent -= p * log2(p)
    out = objectifier.StackOB_VAL(ent)
    E.The.StackPush(out)
Beispiel #10
0
def intf_SUM(E):
    """Calculate the sum of a list of values. There is an alias `tot`.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] sum -> 32.6
    [76 89 83 79 91 95 82 69 66 75 80 88] tot -> 973
    """
    if not inc.LST(E, 1):
        print("Input Error: sum")
        print(intf_SUM.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1:  # Input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist.append(v)
    out = objectifier.StackOB_VAL(sum(numlist))
    E.The.StackPush(out)
Beispiel #11
0
def intf_PVAR(E):
    """Calculate the population variance of a list of values. Use this when
    all of the members of the entire population are represented.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] var -> 0.261388888889
    [76 89 83 79 91 95 82 69 66 75 80 88] var -> 70.7430555556
    """
    if not inc.LST(E, 1):
        print("Input Error: pvar")
        print(intf_PVAR.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1:  # Input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist.append(v)
    out = objectifier.StackOB_VAL(variance(numlist, True))
    E.The.StackPush(out)
Beispiel #12
0
def intf_AVG(E):
    """Calculate the average or arithmetic mean of a list of values. There
    is an alias `mean`.
    [2.2 2.4 3.1 2.5 3.5 3.6 2.5 2.0 2.2 2.6 2.7 3.3] avg -> 2.71666666667
    [76 89 83 79 91 95 82 69 66 75 80 88] mean -> 81.0833333333
    """
    if not inc.LST(E, 1):
        print("Input Error: avg")
        print(intf_AVG.__doc__)
        return  # Without doing much of anything.
    v1 = E.The.StackPop().val  # A Python list of gg OBs.
    if not v1:  # Input is empty list.
        E.The.StackPush(objectifier.StackOB_VAL(0))
        return
    numlist = list()  # List of just numeric values.
    for v in v1:
        if v.whatami == "SYM":
            if v.val in E.symtab:
                v = E.symtab[v.val].val
        else:
            v = v.val
        numlist.append(v)
    out = objectifier.StackOB_VAL(mean(numlist))
    E.The.StackPush(out)