예제 #1
0
    def init_sims(self, replace=False):
        """
        Precompute L2-normalized vectors.

        If `replace` is set, forget the original vectors and only keep the normalized
        ones = saves lots of memory!

        Note that you **cannot continue training** after doing a replace. The model becomes
        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.

        """
        if getattr(self, 'doctag_syn0norm', None) is None or replace:
            logger.info("precomputing L2-norms of doc weight vectors")
            if replace:
                for i in xrange(self.doctag_syn0.shape[0]):
                    self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1))
                self.doctag_syn0norm = self.doctag_syn0
            else:
                if self.mapfile_path:
                    self.doctag_syn0norm = np_memmap(
                        self.mapfile_path+'.doctag_syn0norm', dtype=REAL,
                        mode='w+', shape=self.doctag_syn0.shape)
                else:
                    self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL)
                np_divide(self.doctag_syn0, sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis], self.doctag_syn0norm)
예제 #2
0
    def polaset2np_array(self, i_dataset):
        """Return a list of numpy 2D array containing the selected dataset

        i_dataset: index of the dataset table
        """
        integration = self.dw_io.get_integration(self.datasets[i_dataset].th)
        if self.correction == False:
            data_l = self.dw_io.get_data(self.datasets[i_dataset].th)
        else:
            data_l = self.dw_io.get_cdata(self.datasets[i_dataset].th,
                                          self.cfileh)

        El = np_divide((data_l[0] + 0.001).transpose(),
                       integration).transpose()
        Er = np_divide((data_l[1] + 0.001).transpose(),
                       integration).transpose()
        stk_q = np_divide((data_l[2] + 0.001).transpose(),
                          integration).transpose()
        stk_u = np_divide((data_l[3] + 0.001).transpose(),
                          integration).transpose()
        stk_i = El + Er
        stk_v = El - Er
        stk_phi = 0.5 * np_arctan(div0(stk_u, stk_q))
        stk_p = stk_u * stk_u + stk_q * stk_q + stk_v * stk_v
        stk_p = np_sqrt(stk_p)
        stk_p = div0(stk_p, stk_i)
        stk_p[stk_p >= 100] = 0.0
        #stk_p   = np_array([ 0.0 if x >= 100 else x for x in stk_p ])

        return [El, Er, stk_q, stk_u]  #, stk_i, stk_v, stk_phi, stk_p]
예제 #3
0
 def run(self):
     doc_model = self.doc_model
     docvecs = doc_model.docvecs
     n = len(doc_model.docvecs)
     #p=len(doc_model.docvecs[0])
     #docvecs.init_sims()
     #self.doctag_syn0norm = docvecs.doctag_syn0norm
     self.syn0norm = empty(docvecs.doctag_syn0.shape, dtype='float32')
     np_divide(docvecs.doctag_syn0,
               sqrt((docvecs.doctag_syn0**2).sum(-1))[..., None],
               self.syn0norm)
     #nn=Parallel(n_jobs=6)(delayed(find_nn)(i,self.syn0norm) for i in range(n))
     nn = [find_nn(i, self.syn0norm) for i in range(n)]
예제 #4
0
    def dataset2np_array(self, i_dataset):
        """Return a numpy 2D array or a list of numpy 2D arrays containing the selected dataset
        (ALPHA)
        i_dataset: index of the dataset table
        """
        integration = self.dw_io.get_integration(self.datasets[i_dataset].th)
        if self.correction == False:
            data_l = self.dw_io.get_data(self.datasets[i_dataset].th)
        else:
            data_l = self.dw_io.get_cdata(self.datasets[i_dataset].th,
                                          self.cfileh)

        if len(data_l) == 1:
            return np_divide(data_l[0].transpose(), integration).transpose()
        else:
            El = np_divide((data_l[0] + 0.001).transpose(),
                           integration).transpose()
            Er = np_divide((data_l[1] + 0.001).transpose(),
                           integration).transpose()
            return [El, Er]
예제 #5
0
    def distrib_nn_for_cdf(self, ntss_tmp, bool_print: bool = False):
        """
        Calculates the two indicators, average and standard deviation of the distances, necessary for the use of the CDF of the normal distribution.
        The computation of these indicators are described in `Scoring Message Stream Anomalies in Railway Communication Systems, L.Foulon et al., 2019, ICDMWorkshop <https://ieeexplore.ieee.org/abstract/document/8955558>`_.

        :param numpy.ndarray ntss_tmp: Reference sequences
        :param boolean bool_print: and True, Displays the nodes stats on the standard output

        :returns:
        :rtype: list(numpy.ndarray, numpy.array)
        """

        start_time = time_time()
        node_list, node_list_leaf, node_leaf_ndarray_mean = self.get_list_nodes_and_barycentre(
        )
        if bool_print:
            print("pretrait node --- %s seconds ---" %
                  (time_time() - start_time))
            stdout.flush()
            print(len(node_list), " nodes whose ", len(node_list_leaf),
                  " leafs in tree")
            stdout.flush()

        nb_leaf = len(node_list_leaf)

        cdf_mean = np_zeros((nb_leaf, len(ntss_tmp)))
        cdf_std = np_zeros(nb_leaf)
        nb_ts_by_node = np_zeros(nb_leaf, dtype=np_uint32)
        centroid_dist = np_square(cdist(node_leaf_ndarray_mean, ntss_tmp))

        for num, node in enumerate(node_list_leaf):
            cdf_std[node.id_numpy_leaf] = np_mean(node.std)
            nb_ts_by_node[node.id_numpy_leaf] = node.get_nb_sequences()

        dist_list = np_array([np_zeros(i) for i in nb_ts_by_node],
                             dtype=object)

        # calcul distance au carre entre [barycentre et ts] du meme nœud
        """ TODO np.vectorize ?"""
        for node_nn in node_list_leaf:
            dist_list[node_nn.id_numpy_leaf] = cdist(
                [node_nn.mean], node_nn.get_sequences())[0]
        dist_list = np_square(dist_list)
        """ TODO np.vectorize ?"""
        for num, node in enumerate(node_list_leaf):
            node_id = node.id_numpy_leaf

            centroid_dist_tmp = centroid_dist[node_id]
            centroid_dist_tmp = centroid_dist_tmp.reshape(
                centroid_dist_tmp.shape + (1, ))
            centroid_dist_tmp = np_repeat(centroid_dist_tmp,
                                          nb_ts_by_node[node_id],
                                          axis=1)

            cdf_mean_tmp = np_add(centroid_dist_tmp, dist_list[node_id])
            cdf_mean[node_id] = np_sum(cdf_mean_tmp, axis=1)

        del dist_list
        del cdf_mean_tmp
        del centroid_dist_tmp

        cdf_mean = np_divide(cdf_mean.T, nb_ts_by_node)
        cdf_mean = np_sqrt(cdf_mean)

        self.cdf_mean = cdf_mean
        self.cdf_std = cdf_std
예제 #6
0
def r5_dnn_image(target_dirname,
                 chandat_obj=None,
                 chandat_dnn_obj=None,
                 is_saving_chandat_image=True):
    LOGGER.info('{}: r5: Turning chandat into upsampled envelope...'.format(
        target_dirname))
    if chandat_obj is None:
        chandat_obj = loadmat(os_path_join(target_dirname, CHANDAT_FNAME))
    f0 = chandat_obj['f0']
    if chandat_dnn_obj is None:
        chandat_dnn_obj = loadmat(
            os_path_join(target_dirname, CHANDAT_DNN_FNAME))
    chandat_dnn = chandat_dnn_obj['chandat_dnn']
    beam_position_x = chandat_dnn_obj['beam_position_x']
    depth = chandat_dnn_obj['depth']
    if f0.ndim and f0.ndim == 2:
        f0 = f0[0, 0]

    rf_data = chandat_dnn.sum(axis=1)
    del chandat_dnn, chandat_dnn_obj['chandat_dnn']
    # design a bandpass filter
    n = 4
    order = n / 2
    critical_frequencies = [1e6, 9e6] / (4 * f0 / 2)
    b, a = butter(order, critical_frequencies,
                  btype='bandpass')  # Results are correct

    # chandat_dnn = chandat_dnn.astype(float, copy=False) # REVIEW: necessary?

    rf_data_filt = filtfilt(b,
                            a,
                            rf_data,
                            axis=0,
                            padtype='odd',
                            padlen=3 * (max(len(b), len(a)) - 1))  # Correct
    del a, b

    env = np_apply_along_axis(better_envelope, 0, rf_data_filt)
    # print('r5: env.shape =', env.shape)

    np_divide(env, env.max(), out=env)
    clip_to_eps(env)
    # np_clip(env, np_spacing(1), None, out=env)
    env_dB = np_zeros_like(env)
    np_log10(env, out=env_dB)
    np_multiply(env_dB, 20, out=env_dB)

    # Upscale lateral sampling
    up_scale = get_dict_from_file_json(
        os_path_join(
            target_dirname,
            TARGET_PARAMETERS_FNAME))[TARGET_PARAMETERS_KEY_SCALE_UPSAMPLE]
    up_scale_inverse = 1 / up_scale

    num_beams = env.shape[1]

    x = np_arange(1, num_beams + 1)

    new_x = np_arange(1, num_beams + up_scale_inverse, up_scale_inverse)

    # TODO: optimization: instead of doing this apply thing, can we pass in the
    #       whole `env` and specify axis?
    def curried_pchip(y):
        return pchip(x, y)(new_x)

    env_up = np_apply_along_axis(curried_pchip, 1, env)
    # print('r5: env_up.shape =', env_up.shape)
    del curried_pchip, new_x, x

    clip_to_eps(env_up)
    # np_clip(env_up, np_spacing(1), None, out=env_up)
    env_up_dB = np_zeros_like(env_up)
    np_log10(env_up, out=env_up_dB)
    np_multiply(env_up_dB, 20, out=env_up_dB)

    beam_position_x_up = np_linspace(beam_position_x.min(),
                                     beam_position_x.max(), env_up_dB.shape[1])  # pylint: disable=E1101, E1136
    del beam_position_x

    chandat_image_obj = {
        'rf_data': rf_data,
        'rf_data_filt': rf_data_filt,
        'env': env,
        'env_dB': env_dB,
        'envUp': env_up,
        'envUp_dB': env_up_dB,
        'beam_position_x_up': beam_position_x_up,
        'depth': depth,
    }

    if is_saving_chandat_image is True:
        chandat_image_path = os_path_join(target_dirname,
                                          CHANDAT_IMAGE_SAVE_FNAME)
        savemat(chandat_image_path, chandat_image_obj)

    LOGGER.info('{}: r5 Done'.format(target_dirname))
    return chandat_image_obj