def create_feat_mat_1(graph):
    CCs = list(nx_clustering(graph).values())

    DCs = list(nx_average_neighbor_degree(graph).values())

    degrees = [tup[1] for tup in graph.degree()]

    edge_wts = [tup[2] for tup in graph.edges.data('weight')]

    A_mat = nx_to_numpy_matrix(graph)
    svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False)

    if len(svs) >= 3:
        sv1 = svs[0]
        sv2 = svs[1]
        sv3 = svs[2]
    elif len(svs) >= 2:
        sv1 = svs[0]
        sv2 = svs[1]
        sv3 = 0
    else:
        sv1 = svs[0]
        sv2 = sv3 = 0

    feat_mat = np_vstack(
        (nx_density(graph), nx_number_of_nodes(graph), max(degrees),
         np_mean(degrees), np_median(degrees), np_var(degrees), max(CCs),
         np_mean(CCs), np_var(CCs), np_mean(edge_wts), max(edge_wts),
         np_var(edge_wts), np_mean(DCs), np_var(DCs), max(DCs), sv1, sv2,
         sv3)).T

    return feat_mat
Example #2
0
 def data_handing_after_handing(self,
                                data={},
                                dt_max=0.013,
                                dt_min=0.006,
                                var_threshold=1000):
     '''
     process the data after rough OD calculation and return the real OD after calculation
     :param data: the data that needs to be processed has only one key value
     :param dt_max:  confidence interval upper limit parameter
     :param dt_min: confidence interval lower limit parameter
     :param var_threshold: variance threshold
     :return: 返回真实OD
     '''
     if var_threshold == 1000:
         for i in data:
             var_tem = np_var(data[i])
             if var_tem <= 1000:  # static state
                 key = list(data.keys())[0]
                 key = self.all_board_address.index(key)
                 # print('key=',key)
                 # print('ave=',ave)
                 max_lim = ave[key] + ave[
                     key] * dt_max  # calculate the upper limit of confidence interval
                 min_lim = ave[key] - ave[
                     key] * dt_min  # calculate the lower limit of confidence interval
                 for i in data:
                     for j in data[i]:
                         if j > max_lim or j < min_lim:
                             data[i].remove(j)
                 return data
             else:  # status of agitation
                 for i in data:
                     data[i] = [max(data[i])]
                 return data
     else:
         for i in data:
             var_tem = np_var(data[i])
             if var_tem <= 500:  # static status
                 key = list(data.keys())[0]
                 key = self.all_board_address.index(key)
                 max_lim = ave[key] + ave[
                     key] * dt_max  # calculate the upper limit of confidence interval
                 min_lim = ave[key] - ave[
                     key] * dt_min  # calculate the lower limit of confidence interval
                 for i in data:
                     for j in data[i]:
                         if j > max_lim or j < min_lim:
                             data[i].remove(j)
                 return data
             else:  # status of agitation
                 for i in data:
                     data[i] = [max(data[i])]
                 return data
Example #3
0
def within_cluster_similarity_statistics(cluster):
    """ Calculate the sequence similarities within a cluster.

    Return the similarity matrix.
    """
    representations = cluster.seqs
    _representations = cluster.seqs_as_list()
    lenrep = len(_representations)

    similarities = np.ones((lenrep, lenrep, 3))
    for j in range(lenrep):
        for k in range(j + 1, lenrep):
            # calculate once
            sim = diff_sequences(_representations[j], _representations[k])
            # but fill both triangles of the matrix
            similarities[j, k, :] = [
                representations[j].id, representations[k].id, sim
            ]
            similarities[k, j, :] = [
                representations[k].id, representations[j].id, sim
            ]

    average_rep_sim = np_mean(similarities[:, :, 2])
    var_rep_sim = np_var(similarities[:, :, 2])

    return similarities, average_rep_sim, var_rep_sim
def transfer_same_dist(test_list, train_list, com_comp, test_rem):
    if len(test_rem) == 0:
        return test_list, train_list, com_comp

    sizes = [len(line) for line in test_rem]
    mean_test_size = np_mean(sizes)
    sd = sqrt(np_var(sizes))
    if sd != 0:
        test_rem_dist = norm_dist(mean_test_size, sd)
        p_dist = [test_rem_dist.pdf(len(line)) for line in train_list]
        norm_ct = sum(p_dist)
        if norm_ct != 0:
            p_dist = [val / norm_ct for val in p_dist]
        train_rem = rand_choice(train_list,
                                size=com_comp,
                                replace=False,
                                p=p_dist)
    else:
        train_rem = [
            line for line in train_list if len(line) == mean_test_size
        ][:com_comp]
    test_list = test_list + train_rem
    for line in train_rem:
        train_list.remove(line)
    return test_list, train_list
Example #5
0
File: main2.py Project: luk-f/SAX
def test(data=None,
         precision_bp=2000,
         nb_bp=3,
         taille_fenetre=10,
         breakp=None,
         abscisse=None):
    """Paramètres"""
    #donnees
    if data == None:
        data = [
            580.38, 581.86, 580.97, 580.8, 579.79, 580.39, 580.42, 580.82,
            581.4, 581.32, 581.44, 581.68, 581.17, 580.53, 580.01, 579.91,
            579.14, 579.16, 579.55, 579.67, 578.44, 578.24, 579.1, 579.09,
            579.35, 578.82, 579.32, 579.01, 579, 579.8, 579.83, 579.72, 579.89,
            580.01, 579.37, 578.69, 578.19, 578.67, 579.55, 578.92, 578.09,
            579.37, 580.13, 580.14, 579.51, 579.24, 578.66, 578.86, 578.05,
            577.79, 576.75, 576.75, 577.82, 578.64, 580.58, 579.48, 577.38,
            576.9, 576.94, 576.24, 576.84, 576.85, 576.9, 577.79, 578.18,
            577.51, 577.23, 578.42, 579.61, 579.05, 579.26, 579.22, 579.38,
            579.1, 577.95, 578.12, 579.75, 580.85, 580.41, 579.96, 579.61,
            578.76, 578.18, 577.21, 577.13, 579.1, 578.25, 577.91, 576.89,
            575.96, 576.8, 577.68, 578.38, 578.52, 579.74, 579.31, 579.89,
            579.96, 579.96, 579.96
        ]
    #valeur du découpage pour trouver les breakpoints
    #nombre de breakpoints > 0

    #Affichage variance et moyenne des données
    print("variance = ", np_var(data))
    print("ecart type = ", np_var(data)**0.5)
    print("moyenne = ", np_mean(data))

    #Calcul de l'intégrale de la gaussienne trouvé
    mu = np_mean(data)
    sig = np_var(data)
    ecart = (max(data) - min(data))
    integral_g = quad(gaussian,
                      min(data) - ecart,
                      max(data) + ecart,
                      args=(mu, sig))
    print("integrale gauss", integral_g)
    print(mu, sig, ecart)

    #Appel de la fonctio SAX
    vector_c, vector_c_fit = sax(data, taille_fenetre)
Example #6
0
def precision(PE):
    """ Calculate precision as the inverse variance of the updated prediction error.

    return updated precision and updated average_free_energy
    """
    with np.errstate(all='raise'):
        try:
            variance = np_var(PE)  # np_var(PE, ddof=1)  # mad(PE)
            variance = variance if variance > 0.00001 else 0.00001  # so log(var) should max at -5
            pi = np_log(1. / variance)  # should max at 5
            new_precision = 1 / (1 + np.exp(-(pi - 2.5))
                                 )  # should be about max. 1

            return new_precision  # , variance
        except Exception as e:
            raise Exception("RuntimeWarning in precision(PE):", str(e), "PE:",
                            PE) from e
Example #7
0
 def clc_var_ave(self, file_dir=r'F:\test\----dif.txt', save_='yes'):
     '''
     #calculate the variance and mean
     #If the variance threshold is set at 450, the fermenter is considered to be in the state of agitation
     :param file_dir: directory of file that saves data
     :param save_: save file or not
     '''
     global var, ave
     var = []
     ave = []
     for i in self.all_board_address:
         var.append(np_var(self.OD_raw_value_ON_OFF[i][:min_len]))
         ave.append(np_average(self.OD_raw_value_ON_OFF[i][:min_len]))
     if save_ == 'yes':
         with open(file_dir, 'a') as file:
             for i in var:
                 file.write(str(i) + '\t')
             file.write('\n')
             for i in ave:
                 file.write(str(i) + '\t')
             file.write('\n')
Example #8
0
def inter_cluster_similarity_statistics(cluster_a, cluster_b):
    """ Calculate the similarities between the two cluster's sequences.

    Return the matrix, mean distance and variance.
    """
    seqs_a = cluster_a.seqs_as_list()
    seqs_b = cluster_b.seqs_as_list()
    lenrep_a = len(cluster_a.seqs)
    lenrep_b = len(cluster_b.seqs)

    similarities = np.ones((lenrep_a, lenrep_b, 3))
    for j in range(lenrep_a):
        for k in range(lenrep_b):  # sadly have to compare all of them
            # calculate similarity between sequences
            sim = diff_sequences(seqs_a[j], seqs_b[k])
            similarities[j, k, :] = [
                cluster_a.seqs[j].id, cluster_b.seqs[k].id, sim
            ]

    average_cluster_sim = np_mean(similarities[:, :, 2])
    var_cluster_sim = np_var(similarities[:, :, 2])

    return similarities, average_cluster_sim, var_cluster_sim
Example #9
0
def test(data=None,
         precision_bp=2000,
         nb_bp=3,
         taille_fenetre=10,
         breakp=None,
         abscisse=None):
    """Paramètres"""
    #donnees
    if data == None:
        data = [
            580.38, 581.86, 580.97, 580.8, 579.79, 580.39, 580.42, 580.82,
            581.4, 581.32, 581.44, 581.68, 581.17, 580.53, 580.01, 579.91,
            579.14, 579.16, 579.55, 579.67, 578.44, 578.24, 579.1, 579.09,
            579.35, 578.82, 579.32, 579.01, 579, 579.8, 579.83, 579.72, 579.89,
            580.01, 579.37, 578.69, 578.19, 578.67, 579.55, 578.92, 578.09,
            579.37, 580.13, 580.14, 579.51, 579.24, 578.66, 578.86, 578.05,
            577.79, 576.75, 576.75, 577.82, 578.64, 580.58, 579.48, 577.38,
            576.9, 576.94, 576.24, 576.84, 576.85, 576.9, 577.79, 578.18,
            577.51, 577.23, 578.42, 579.61, 579.05, 579.26, 579.22, 579.38,
            579.1, 577.95, 578.12, 579.75, 580.85, 580.41, 579.96, 579.61,
            578.76, 578.18, 577.21, 577.13, 579.1, 578.25, 577.91, 576.89,
            575.96, 576.8, 577.68, 578.38, 578.52, 579.74, 579.31, 579.89,
            579.96, 579.96, 579.96
        ]
    #valeur du découpage pour trouver les breakpoints
    #nombre de breakpoints > 0

    fig = plt.figure(figsize=(15, 10))

    # TODO arranger les axes ici !!!
    years = mdates.YearLocator()
    yearsFmt = mdates.DateFormatter('%Y')
    months = mdates.MonthLocator()
    days = mdates.DayLocator()
    daysFmt = mdates.DateFormatter('%d')
    hours = mdates.HourLocator()
    minutes = mdates.MinuteLocator()

    gauss = fig.add_subplot(2, 1, 1)
    gauss.xaxis.set_major_locator(daysFmt)
    gauss.xaxis.set_minor_locator(hours)
    #plt.ylabel('some numbers')

    #Affichage variance et moyenne des données
    print("variance = ", np_var(data))
    print("ecart type = ", np_var(data)**0.5)
    print("moyenne = ", np_mean(data))

    #Calcul de l'intégrale de la gaussienne trouvé
    mu = np_mean(data)
    sig = np_var(data)
    ecart = (np.amax(data) - np.amin(data))
    integral_g = quad(gaussian,
                      np.amin(data) - ecart,
                      np.amax(data) + ecart,
                      args=(mu, sig))
    print("integrale gauss", integral_g)

    #Appel de la fonctio SAX
    vector_c, vector_c_fit = sax(data, taille_fenetre)
    if abscisse is None:
        plt.plot(vector_c)
        plt.plot(data)
    else:
        plt.plot(abscisse, vector_c)
        plt.plot(abscisse, data)

    if breakp is None:
        breakp = breakpoints(integral_g, min(data), np_mean(data),
                             precision_bp, nb_bp, mu, sig)
    for bp in breakp:
        print("seuil : ", bp)
        plt.axhline(bp, c='grey')
        #print("seuil 2 : ",bp+2*(mu-bp))
        #plt.axhline(bp+2*(mu-bp))

    #Affichage de la gaussienne
    #fig.add_subplot(2,1,2)
    """
    x = np_linspace(min(data)-ecart,max(data)+ecart,100)
    plt.plot(x,gaussian(x,mu,sig))
    for bp in breakp:
        plt.axvline(bp)
    fig.add_subplot(2,1,1)
    """

    tab_classif = [0] * (len(breakp) + 1)
    print(tab_classif)
    """
    for val in vector_c:
        it = 0
        for var in breakp:
            if val < var:
                tab_classif[it] = tab_classif[it] + 1
                break
            it = it + 1
    """

    pos_x = 1
    for val in vector_c_fit:
        it = 0
        test = 0
        int_char = 0
        for var in breakp:
            if val < var:
                tab_classif[it] = tab_classif[it] + 1
                test = 1
                # conversion en binaire str(bin(int_char))[2:]
                plt.annotate(
                    str(bin(int_char))[2:],
                    xy=(taille_fenetre * pos_x - taille_fenetre / 2, val),
                    xytext=(taille_fenetre * pos_x - taille_fenetre / 2,
                            val + 2),
                    arrowprops=dict(facecolor='white', shrink=0.05),
                )
                break
            it = it + 1
            int_char = int_char + 1
        if test == 0:
            tab_classif[it] = tab_classif[it] + 1
            # conversion en binaire str(bin(int_char))[2:]
            plt.annotate(
                str(bin(int_char))[2:],
                xy=(taille_fenetre * pos_x - taille_fenetre / 2, val),
                xytext=(taille_fenetre * pos_x - taille_fenetre / 2, val + 2),
                arrowprops=dict(facecolor='white', shrink=0.05),
            )
        pos_x = pos_x + 1

    print(tab_classif)

    plt.show()
    return breakp
Example #10
0
def create_feat_mat(graph_list, n_feats):
    dens_pos = [nx_density(graph) for graph in graph_list]
    nodes_pos = [nx_number_of_nodes(graph) for graph in graph_list]

    # CC statistics - mean and max  - faster to use a big loop mostly
    CC_mean = []
    CC_mean_append = CC_mean.append
    CC_max = []
    CC_max_append = CC_max.append
    CC_var = []
    CC_var_append = CC_var.append
    # Degree correlation - avg degree of the neighborhood     
    DC_mean = []
    DC_mean_append = DC_mean.append
    DC_max = []
    DC_max_append = DC_max.append
    DC_var = []
    DC_var_append = DC_var.append
    # Degree statistics
    degree_mean = []
    degree_mean_append = degree_mean.append
    degree_max = []
    degree_max_append = degree_max.append
    degree_median = []
    degree_median_append = degree_median.append
    degree_var = []
    degree_var_append = degree_var.append
    # Edge weight statistics 
    edge_wt_mean = []
    edge_wt_mean_append = edge_wt_mean.append
    edge_wt_max = []
    edge_wt_max_append = edge_wt_max.append
    edge_wt_var = []
    edge_wt_var_append = edge_wt_var.append
    # First 3 singular values 
    sv1 = []
    sv1_append = sv1.append
    sv2 = []
    sv2_append = sv2.append
    sv3 = []
    sv3_append = sv3.append
    for graph in graph_list:

        CCs = list(nx_clustering(graph).values())
        CC_max_append(max(CCs))
        CC_mean_append(np_mean(CCs))
        CC_var_append(np_var(CCs))

        DCs = list(nx_average_neighbor_degree(graph).values())
        DC_max_append(max(DCs))
        DC_mean_append(np_mean(DCs))
        DC_var_append(np_var(DCs))

        degrees = [tup[1] for tup in graph.degree()]
        degree_mean_append(np_mean(degrees))
        degree_median_append(np_median(degrees))
        degree_max_append(max(degrees))
        degree_var_append(np_var(degrees))

        edge_wts = [tup[2] for tup in graph.edges.data('weight')]
        edge_wt_mean_append(np_mean(edge_wts))
        edge_wt_var_append(np_var(edge_wts))
        edge_wt_max_append(max(edge_wts))

        A_mat = nx_to_numpy_matrix(graph)
        svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False)

        if len(svs) >= 3:
            sv1_append(svs[0])
            sv2_append(svs[1])
            sv3_append(svs[2])
        elif len(svs) >= 2:
            sv1_append(svs[0])
            sv2_append(svs[1])
            sv3_append(0)
        else:
            sv1_append(svs[0])
            sv2_append(0)
            sv3_append(0)

    feat_mat = np_vstack((dens_pos, nodes_pos, degree_max, degree_mean, degree_median, degree_var, CC_max, CC_mean,
                          CC_var, edge_wt_mean, edge_wt_max, edge_wt_var, DC_mean, DC_var, DC_max, sv1, sv2, sv3)).T

    if n_feats == 1:
        feat_mat = np_array(dens_pos).reshape(-1, 1)

    return feat_mat