Ejemplo n.º 1
0
def Phi(X, order, normalise=True, compute_sigs=True):
    if compute_sigs:
        dim = np.shape(X)[1]
        sig = tosig.stream2sig(np.array(X), order)
    else:
        dim = 2
        sig = np.array(X)

    if not normalise:
        return sig

    print(np.linalg.norm(sig)**2)

    keys = get_keys(dim, order)

    phi_x = phi(tuple(sig), order, keys)
    Lambda = np.array([phi_x**len(t) for t in keys])

    print(np.linalg.norm(Lambda * sig)**2)

    return Lambda * sig
def fit(collection, threshold, order=2):
    """Fits the model using the training set.

    Parameters
    ----------
    collection : list
        Training set.
    threshold : array
        List of 3 points on the plane.
    order : int, optional
        Order of the signature.
        Default is 2.

    Returns
    -------
    RandomForestRegressor
        Trained model.

    """

    # x will contain the input of the model, while
    # y will contain the output.
    x = []
    y = []

    for participant in collection:
        # The input will be the signature of the stream of
        # the participant.
        x.append(tosig.stream2sig(np.array(participant.data), order))

        # The output, on the other hand, will be the point
        # on the plane corresponding to the clinical group
        # of the participant.
        y.append(threshold[participant.diagnosis])

    # We train the model using Random Forest.
    reg = RandomForestRegressor(n_estimators=100, oob_score=True)
    reg.fit(x, y)

    return reg
Ejemplo n.º 3
0
def check(collection, reg, threshold, order=2):
        """Checks the performance of the model against an out
        of sample set.

        Args:
            collection (list): The out-of-sample set.

            reg (RandomForestRegressor): The trained random forest.

            threshold (list): List of 3 points on the plane.

            order (int): Order of the signature.

        Returns:
            float: Percentage of correct guesses of the predictions.

        """

        # x will contain the input of the model, while
        # y will contain the output.
        x=[]
        y=[]

        for X in collection:
                x.append(list(tosig.stream2sig(np.array(X.data), order)))

                y.append(f(X.diagnosis, threshold=threshold))

        predicted=reg.predict(x)

        guesses=0
        total=0
        for i in range(len(x)):
                if set(findMin(predicted[i], threshold))==set(y[i]):
                        guesses+=1
                total+=1

        return guesses/float(total)
def test(collection, reg, threshold, order=2):
    """Tests the model against an out-of-sample set.

    Parameters
    ----------
    collection : list
        The out-of-sample set.
    reg : RandomForestRegressor
        Trained random forest.
    threshold : array
        List of 3 points on the plane.
    order : int, optional
        Order of the signature.
        Default is 2.

    Returns
    -------
    float
        Accuracy of the predictions.

    """

    # x will contain the input of the model, while
    # y will contain the output.
    x=[]
    y=[]

    for X in collection:
            x.append(list(tosig.stream2sig(np.array(X.data), order)))

            y.append(threshold[X.diagnosis])

    predicted_raw = reg.predict(x)
    predicted = np.array([_findMin(prediction, threshold) for prediction in predicted_raw])
    
    acc = np.mean([1. if (val1 == val2).all() else 0. for val1, val2 in zip(predicted, y)])

    return acc
def Phi_fix(X, order, normalise=True, compute_sigs=True, phi_x = 1):
#     print(phi_x)
#     print('not fix')
    if compute_sigs:
        dim = np.shape(X)[1]
        sig = tosig.stream2sig(np.array(X), order)
    else:
        dim = 2
        sig = np.array(X)
    if not normalise:
        return sig
    
    keys = get_keys(dim, order)
    Lambda = np.array([phi_x ** len(t) for t in keys])
    
    sig_now = Lambda * sig
    
    phi_x_now = phi(tuple(sig_now), order, keys)
    Lambda_now = np.array([phi_x_now ** len(t) for t in keys])
#     print('after normalized::')
#     print(np.linalg.norm(Lambda_now * sig_now)**2)
    
    return Lambda_now * sig_now
def fit(collection, order=2):
    """Trains a random forest.

    Parameters
    ----------
    collection : list
        Training set.
    order : int, optional
        Order of the signature.
        Default is 2.
    
    Returns
    -------
    RandomForestRegressor
        Trained model.

    """

    # x will contain the inputs of the model, and y
    # will contain the outputs.
    x = []
    y = []

    for X in collection:
        # The input will be the signature of the stream of
        # the participant.
        x.append(tosig.stream2sig(np.array(X.data), order))

        # The output, on the other hand, will be the mood of
        # the participant the following observation.
        y.append(X.nextDay[1:len(X.nextDay)])

    # We train the model using Random Forests.
    reg = RandomForestRegressor(n_estimators=100)
    reg.fit(x, y)

    return reg
Ejemplo n.º 7
0
 def test_logsig(self):
     self.assertEqual(all(ts.stream2sig(np.array(path1), 3)[1:] - logsig),
                      0)
Ejemplo n.º 8
0
 def test_linsig(self):
     self.assertEqual(all(ax.linsig(y[0], 4) - ts.stream2sig(x, 4)), 0)
Ejemplo n.º 9
0
     [2, 0], [1, 0], [1, -1], [1, 0], [0, 0]]

b = [[0.0, 0], [1, 3], [0, 0], [1, 5], [2, 5], [1, 5], [0, 6], [1, 5], [0, 0]]

c = [[0.0, 0], [1, 1], [3, 1], [2, 1], [1, 1], [2, 0]]

aa = ax.fullprune(a)
bb = ax.fullprune(b)
cc = ax.fullprune(c)
a = np.array(a)
b = np.array(b)
c = np.array(c)
aa = np.array(aa)
bb = np.array(bb)
cc = np.array(cc)
sigdiffa = ts.stream2sig(a, 3) - ts.stream2sig(aa, 3)
sigdiffb = ts.stream2sig(b, 3) - ts.stream2sig(bb, 3)
sigdiffc = ts.stream2sig(c, 3) - ts.stream2sig(cc, 3)

y = ax.random_path(1, range(-10, 11), 3)

x = [[0.0, 0, 0]]
xx = x

xx.append(y[0])
x = np.array(xx)

path1 = ax.random_path(5, range(-10, 11), 3)
path2 = ax.random_path(5, range(-10, 11), 3)
#print(path1)
#print(ax.concatenate(path1,path2))
Ejemplo n.º 10
0
 def signature(self, degree, log=False):
     return ts.stream2sig(self.concat_drawing(),
                          degree) if not log else ts.stream2logsig(
                              self.concat_drawing(), degree)
Ejemplo n.º 11
0
def shuffle_test(l, a, dims, degs):
    """ Evaluates whether a product sum of shuffles is equal to the multiplication evaluated pairs of signature elements

    stream2sig and stream2logsig functions included in the esig library return a list of values according to (a) the dimensions present in the data and (b) signature degrees.
    sigkeys and logsigkeys functions included in the esig library return keys which correspond to each of the signature element produced with either stream2sig and stream2logsig functions.

    Properties of path signature are such that for any 2 keys, if one multiplies the values which they correspond to, the result of the multiplication is equal to a sum of
    values for all keys which are shuffles of the two keys. This function evaluates whether this property of path signature holds true for all possible pairs of
    signature elements whose keys, together, are shorter or as long as the longest keys of values included in the signature.

    Args:
        l (int): length of a random path to be produced
        a (list): list of modifiers applied at each step of the random path
        dims (int): number of dimensions each path step has
        degs (int): number of signature degrees to be used in the test

    Returns:
        str: "ok" if the test has been successful

    Raises:
        ValueError: If the sum of values corresponding to key shuffles is not equal to the multiplication of the pair of values with corresponding keys

    Example:
        >>> shuffle_test(100,[-1,0,1],3,3)
        "ok"

    """

    test_path = np.array(
        ax.random_path(l, a, dims)
    )  # generates a random path given number of path steps 'l', possible moves at each step 'a',
    # and the number dimensions at each path step 'dims'
    signature = list(
        ts.stream2sig(test_path, degs)
    )  # generates signature for the provided random path and chosen number of signature degrees 'degs'

    #5 LINES BELOW COULD BE A SEPARATE TEST
    if signature[0] != 1:  # check if the first signature element is 1
        print(
            "The first signature element has a value different from 0 in shuffle_test"
        )
        raise ValueError
    else:
        pass

    keys = ts.sigkeys(dims, degs).split(
    )  # generates keys of the signature for the provided random path
    keys2 = []
    lengths = []
    for row in keys:
        row = row.strip('(')
        row = row.strip(')')
        if len(row) > 0:
            row = row.split(',')
            lengths.append(len(row))
            keys2.append(row)
        else:
            pass

    del signature[0]

    keys3 = list(zip(keys2, lengths, signature))

    max_mutant_length = len(keys2[-1])

    # obtain all possible mutations which are no longer than the max_mutant_length
    mutants = []
    mutant_parent_indices = []
    for row_ind, row in enumerate(keys3):
        vals = []
        inds = []
        for col_ind, col in enumerate(keys3):
            if row_ind != col_ind:
                tmp = list(row[0]) + list(col[0])
                if len(tmp) <= max_mutant_length:
                    val = [list(row[0]), list(col[0])]
                    vals.append(val)
                    ind = [row_ind, col_ind]
                    inds.append(ind)
        mutants.extend(vals)
        mutant_parent_indices.extend(inds)

    # generate shuffles, get results
    shuffle_prep = list(zip(mutants, mutant_parent_indices))
    for row in shuffle_prep:
        my_shuffles = ax.shuffles(row[0][0], row[0][1])

        # sum shuffles, knowing their values in keys3 table
        shuffle_product = 0
        for shuffle in my_shuffles:
            for x in keys3:
                if str(x[0]) != str(shuffle):
                    pass
                else:
                    shuffle_product = shuffle_product + x[2]

        # multiply components, knowing their indics in keys3 table
        parent1 = row[1][0]
        parent1 = keys3[parent1][2]
        parent2 = row[1][1]
        parent2 = keys3[parent2][2]
        mutant_parent_multiplication = parent1 * parent2

        # shuffle product - multiplication product into 'solutions' variable
        solution = shuffle_product - mutant_parent_multiplication
        if solution >= -0.00001 and solution <= 0.00001:
            pass
        else:
            raise ValueError
    return 1
Ejemplo n.º 12
0
def compare_reverse_test(l, a, dims, degs):
    """ Compare signature of a random path with its inverse

    If a path has steps [a,b,c], its inverse has steps [c,b,a]

    Args:
        l (int): length of the random path to be produced
        a (list): list of modifiers applied at each step of the random path
        dims (int): number of dimensions each path step has
        degs (int): number of signature degrees to be used in the test

    Returns:
        str: "ok" if the test has been successful

    Raises:
        ValueError: If the signature of the inverse path is not actually computed for the inverse of the original path

    Example:
        >>> compare_reverse_test(100,[-1,0,1],2,4)
        "ok"
    """

    input = np.array(ax.random_path(l, a, dims))

    # make a reverse path
    rev = list(reversed(input))
    rev = [list(x) for x in rev]
    rev = np.array(rev)
    out_sig = ts.stream2sig(input, degs)
    out_rev = ts.stream2sig(rev, degs)

    # calculate corresponding 'k' addresses for the signature elements of the original and the inverse paths
    pos = ts.sigkeys(dims, degs).split()
    how_long = []
    pos_out = []
    pos_rev = []
    for row in pos:
        row = row.strip('(')
        row = row.strip(')')
        row = row.split(',')
        pos_out.append(''.join(row))
        pos_rev.append(''.join(list(reversed(row))))
        if len(row) % 2 == 0:  # even key lengths
            how_long.append(-1)
        else:  # odd key lengths
            how_long.append(1)

    # match addresses for the signature elements of the original and the inverse paths
    out_and_rev = []
    for ind, row in enumerate(pos_out):
        temp = []
        temp.append(out_sig[ind])
        temp.append(out_rev[pos_rev.index(row)])
        temp.append(how_long[ind])
        out_and_rev.append(temp)

    # check if the differences are correct (corresponding addresses of odd length have opposite signs, corresponding addresses with even length have the sae signs)
    out = []
    for row in out_and_rev:
        test_calculation = row[-1] * row[-3] + row[-2]
        out.append(test_calculation)

    if out[0] != 2:
        raise ValueError
    for row in out[1:]:
        if row >= -0.00001 and row <= 0.00001:
            pass
        else:
            raise ValueError
    return 1
Ejemplo n.º 13
0
def naive_SigKernel_esig(X, Y, depth):
    sig_x = sig.stream2sig(X, depth)
    sig_y = sig.stream2sig(Y, depth)
    return np.sum([x * y for x, y in zip(sig_x, sig_y)], dtype=np.float64)
    def test(self, path, order=2, is_sig=False):
        """Tests the model against a particular participant.

        Parameters
        ----------
        path : str
            Path of the pickle file containing the streams
            of data from the participant.
        order : int, optional
            Order of the signature.
            Default is 2.
        is_sig : bool, optional
            Whether the test set files contain signatures.
            Default is false, in which case conversion to signatures will be carried out here.
            
        Returns
        -------
        list
            3-dimensional vector indicating how often the participant
            has buckets that were classified in each clinical group.

        """

        # We load the pickle file of the participant
        file = open(path, 'rb')
        collection = pickle.load(file)
        file.close()

        # Each clinical group is assigned a point
        # on the plane, which was found using cross-validation.

        threshold = np.array([
            [1, 0],  # Borderline participants
            [0, 1],  # Healthy participants
            [-1 / np.sqrt(2), -1 / np.sqrt(2)]
        ])  # Bipolar participants

        # We construct the inputs and outputs to test the model
        x = []
        y = []

        for X in collection:
            # The input is the signature of the normalised path
            if is_sig:
                # If using synthetic data, the input is already a signature
                x.append(X.data)
            else:
                # If using the original data, we convert the normalised path into the signature here
                x.append(tosig.stream2sig(np.array(X.data), order))

            # The function f returns the point for the corresponding
            # clinical group
            y.append(threshold[X.diagnosis])

        # We find the predictions corresponding to the computed inputs
        predicted = self.reg.predict(x)

        # We find which group the predictions belong to, and
        # store how often the participant belongs to each group
        vector = np.zeros(3)
        for i in range(len(x)):
            threshold2 = [tuple(l) for l in threshold.tolist()]
            vector[threshold2.index(tuple(_findMin(predicted[i],
                                                   threshold)))] += 1

        vector /= float(len(x))

        return vector
Ejemplo n.º 15
0
 def compute_signature(self, stream, depth):
     return tosig.stream2sig(stream, depth)
 def embed(self, data):
     return ts.stream2sig(data.concat_drawing(), self.degree) if not self.log else ts.stream2logsig(data.concat_drawing(), self.degree)
Ejemplo n.º 17
0
def calculateSignature(path, order):
    time = np.array(range(len(path[1]))).reshape((-1, 1))
    two_dim_stream = np.append(path[1], time, axis=1)
    out = ts.stream2sig(two_dim_stream, 2)[1:]
    return out