예제 #1
0
def enlarge(img, xTimes, yTimes):
    # todo 未调试
    if xTimes > 1:
        img = np_repeat(img, xTimes, axis=0)
    if yTimes > 1:
        img = np_repeat(img, yTimes, axis=1)
    return img
예제 #2
0
파일: doc2vec.py 프로젝트: nonva/gensim
    def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
                          learn_doctags=True, learn_words=True, learn_hidden=True,
                          word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
        """
        Update distributed memory model ("PV-DM") by training on a single document.

        Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
        method implements the DM model with a projection (input) layer that is
        either the sum or mean of the context vectors, depending on the model's
        `dm_mean` configuration field.  See `train_dm_concat()` for the DM model
        with a concatenated input layer.

        The document is provided as `doc_words`, a list of word tokens which are looked up
        in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
        into the doctag_vectors array.

        Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
        prevent learning-updates to those respective model weights, as if using the
        (partially-)frozen model to infer other compatible vectors.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        if word_vectors is None:
            word_vectors = model.syn0
        if word_locks is None:
            word_locks = model.syn0_lockf
        if doctag_vectors is None:
            doctag_vectors = model.docvecs.doctag_syn0
        if doctag_locks is None:
            doctag_locks = model.docvecs.doctag_syn0_lockf

        word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and
                       model.vocab[w].sample_int > model.random.randint(2**32)]
        doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0)
        doctag_len = len(doctag_indexes)

        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indexes = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            l1 = np_sum(word_vectors[word2_indexes], axis=0) + doctag_sum  # 1 x layer1_size
            if word2_indexes and model.cbow_mean:
                l1 /= (len(word2_indexes) + doctag_len)
            neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
                                    learn_vectors=False, learn_hidden=learn_hidden)
            if word2_indexes and not model.cbow_mean:
                neu1e /= (len(word2_indexes) + doctag_len)
            if learn_doctags:
                doctag_vectors[doctag_indexes] += neu1e * \
                    np_repeat(doctag_locks[doctag_indexes], model.vector_size).reshape(-1, model.vector_size)
            if learn_words:
                word_vectors[word2_indexes] += neu1e * \
                    np_repeat(word_locks[word2_indexes], model.vector_size).reshape(-1, model.vector_size)

        return len(word_vocabs)
예제 #3
0
def fan_regular_pols(np_verts,
                     np_pols,
                     np_distances,
                     np_faces_id,
                     custom_normals,
                     index_offset=0,
                     use_custom_normals=False,
                     output_old_v_id=True,
                     output_old_face_id=True,
                     output_pols_groups=True):

    pols_number = np_pols.shape[0]
    pol_sides = np_pols.shape[1]
    v_pols = np_verts[np_pols]  #shape [num_pols, num_corners, 3]

    if (len(np_distances) > 1
            and np.any(np_distances != 0)) or np_distances != 0:
        if use_custom_normals:
            normals = custom_normals
        else:
            normals = np_faces_normals(v_pols)
        average = np.sum(
            v_pols, axis=1
        ) / pol_sides + normals * np_distances[:,
                                               np_newaxis]  #shape [num_pols, 3]
    else:
        average = np.sum(v_pols, axis=1) / pol_sides

    idx_offset = len(np_verts) + index_offset
    new_idx = np_arange(idx_offset, pols_number + idx_offset)
    new_pols = np.zeros([pols_number, pol_sides, 3], dtype=int)
    new_pols[:, :, 0] = np_pols
    new_pols[:, :, 1] = np_roll(np_pols, -1, axis=1)
    new_pols[:, :, 2] = new_idx[:, np_newaxis]

    old_vert_id = np_pols[:, 0].tolist() if output_old_v_id else []

    if output_old_face_id:
        old_face_id = np_repeat(np_faces_id[:, np_newaxis], pol_sides,
                                axis=1).tolist()
    else:
        old_face_id = []

    if output_pols_groups:
        pols_groups = np_repeat(1, len(new_pols) * pol_sides).tolist()
    else:
        pols_groups = []

    return (
        average.tolist(),
        new_pols.reshape(-1, 3).tolist(),
        old_vert_id,
        old_face_id,
        pols_groups,
    )
예제 #4
0
def numpy_full_list(array, desired_length):
    '''retuns array with desired length by repeating last item'''
    if not isinstance(array, ndarray):
        array = np_array(array)

    length_diff = desired_length - array.shape[0]

    if length_diff > 0:
        new_part = np_repeat(array[np_newaxis, -1], length_diff, axis=0)
        return np_concatenate((array, new_part))[:desired_length]
    return array[:desired_length]
예제 #5
0
def numpy_match_long_repeat(list_of_arrays):
    '''match numpy arrays length by repeating last one'''
    out = []
    maxl = 0
    for array in list_of_arrays:
        maxl = max(maxl, array.shape[0])
    for array in list_of_arrays:
        length_diff = maxl - array.shape[0]
        if length_diff > 0:
            new_part = np_repeat(array[np_newaxis, -1], length_diff, axis=0)
            array = np_concatenate((array, new_part))
        out.append(array)
    return out
예제 #6
0
def numpy_full_list_cycle(array, desired_length):
    '''retuns array with desired length by cycling'''

    length_diff = desired_length - array.shape[0]
    if length_diff > 0:
        if length_diff < array.shape[0]:

            return np_concatenate((array, array[:length_diff]))

        new_part = np_repeat(array, ceil(length_diff / array.shape[0]), axis=0)
        if len(array.shape) > 1:
            shape = (ceil(length_diff / array.shape[0]), 1)
        else:
            shape = ceil(length_diff / array.shape[0])
        new_part = np_tile(array, shape)
        return np_concatenate((array, new_part[:length_diff]))

    return array[:desired_length]
예제 #7
0
def numpy_match_long_cycle(list_of_arrays):
    '''match numpy arrays length by cycling over the array'''
    out = []
    maxl = 0
    for array in list_of_arrays:
        maxl = max(maxl, array.shape[0])
    for array in list_of_arrays:
        length_diff = maxl - array.shape[0]
        if length_diff > 0:
            if length_diff < array.shape[0]:

                array = np_concatenate((array, array[:length_diff]))
            else:
                new_part = np_repeat(array, ceil(length_diff / array.shape[0]), axis=0)
                if len(array.shape) > 1:
                    shape = (ceil(length_diff / array.shape[0]), 1)
                else:
                    shape = ceil(length_diff / array.shape[0])
                new_part = np_tile(array, shape)
                array = np_concatenate((array, new_part[:length_diff]))
        out.append(array)
    return out
예제 #8
0
    def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
                                 learn_doctags=True, learn_words=True, learn_hidden=True,
                                 word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
        """
        Update distributed memory model ("PV-DM") by training on a single document, using a
        concatenation of the context window word vectors (rather than a sum or average).

        Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`.

        The document is provided as `doc_words`, a list of word tokens which are looked up
        in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
        into the doctag_vectors array.

        Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
        prevent learning-updates to those respective model weights, as if using the
        (partially-)frozen model to infer other compatible vectors.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        if word_vectors is None:
            word_vectors = model.syn0
        if word_locks is None:
            word_locks = model.syn0_lockf
        if doctag_vectors is None:
            doctag_vectors = model.docvecs.doctag_syn0
        if doctag_locks is None:
            doctag_locks = model.docvecs.doctag_syn0_lockf

        word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and
                       model.vocab[w].sample_int > model.random.rand() * 2**32]
        doctag_len = len(doctag_indexes)
        if doctag_len != model.dm_tag_count:
            return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)

        null_word = model.vocab['\0']
        pre_pad_count = model.window
        post_pad_count = model.window
        padded_document_indexes = (
            (pre_pad_count * [null_word.index])  # pre-padding
            + [word.index for word in word_vocabs if word is not None]  # elide out-of-Vocabulary words
            + (post_pad_count * [null_word.index])  # post-padding
        )

        for pos in range(pre_pad_count, len(padded_document_indexes) - post_pad_count):
            word_context_indexes = (
                padded_document_indexes[(pos - pre_pad_count): pos]  # preceding words
                + padded_document_indexes[(pos + 1):(pos + 1 + post_pad_count)]  # following words
            )
            word_context_len = len(word_context_indexes)
            predict_word = model.vocab[model.index2word[padded_document_indexes[pos]]]
            # numpy advanced-indexing copies; concatenate, flatten to 1d
            l1 = concatenate((doctag_vectors[doctag_indexes], word_vectors[word_context_indexes])).ravel()
            neu1e = train_cbow_pair(model, predict_word, None, l1, alpha,
                                    learn_hidden=learn_hidden, learn_vectors=False)

            # filter by locks and shape for addition to source vectors
            e_locks = concatenate((doctag_locks[doctag_indexes], word_locks[word_context_indexes]))
            neu1e_r = (neu1e.reshape(-1, model.vector_size)
                       * np_repeat(e_locks, model.vector_size).reshape(-1, model.vector_size))

            if learn_doctags:
                np_add.at(doctag_vectors, doctag_indexes, neu1e_r[:doctag_len])
            if learn_words:
                np_add.at(word_vectors, word_context_indexes, neu1e_r[doctag_len:])

        return len(padded_document_indexes) - pre_pad_count - post_pad_count
예제 #9
0
    def distrib_nn_for_cdf(self, ntss_tmp, bool_print: bool = False):
        """
        Calculates the two indicators, average and standard deviation of the distances, necessary for the use of the CDF of the normal distribution.
        The computation of these indicators are described in `Scoring Message Stream Anomalies in Railway Communication Systems, L.Foulon et al., 2019, ICDMWorkshop <https://ieeexplore.ieee.org/abstract/document/8955558>`_.

        :param numpy.ndarray ntss_tmp: Reference sequences
        :param boolean bool_print: and True, Displays the nodes stats on the standard output

        :returns:
        :rtype: list(numpy.ndarray, numpy.array)
        """

        start_time = time_time()
        node_list, node_list_leaf, node_leaf_ndarray_mean = self.get_list_nodes_and_barycentre(
        )
        if bool_print:
            print("pretrait node --- %s seconds ---" %
                  (time_time() - start_time))
            stdout.flush()
            print(len(node_list), " nodes whose ", len(node_list_leaf),
                  " leafs in tree")
            stdout.flush()

        nb_leaf = len(node_list_leaf)

        cdf_mean = np_zeros((nb_leaf, len(ntss_tmp)))
        cdf_std = np_zeros(nb_leaf)
        nb_ts_by_node = np_zeros(nb_leaf, dtype=np_uint32)
        centroid_dist = np_square(cdist(node_leaf_ndarray_mean, ntss_tmp))

        for num, node in enumerate(node_list_leaf):
            cdf_std[node.id_numpy_leaf] = np_mean(node.std)
            nb_ts_by_node[node.id_numpy_leaf] = node.get_nb_sequences()

        dist_list = np_array([np_zeros(i) for i in nb_ts_by_node],
                             dtype=object)

        # calcul distance au carre entre [barycentre et ts] du meme nœud
        """ TODO np.vectorize ?"""
        for node_nn in node_list_leaf:
            dist_list[node_nn.id_numpy_leaf] = cdist(
                [node_nn.mean], node_nn.get_sequences())[0]
        dist_list = np_square(dist_list)
        """ TODO np.vectorize ?"""
        for num, node in enumerate(node_list_leaf):
            node_id = node.id_numpy_leaf

            centroid_dist_tmp = centroid_dist[node_id]
            centroid_dist_tmp = centroid_dist_tmp.reshape(
                centroid_dist_tmp.shape + (1, ))
            centroid_dist_tmp = np_repeat(centroid_dist_tmp,
                                          nb_ts_by_node[node_id],
                                          axis=1)

            cdf_mean_tmp = np_add(centroid_dist_tmp, dist_list[node_id])
            cdf_mean[node_id] = np_sum(cdf_mean_tmp, axis=1)

        del dist_list
        del cdf_mean_tmp
        del centroid_dist_tmp

        cdf_mean = np_divide(cdf_mean.T, nb_ts_by_node)
        cdf_mean = np_sqrt(cdf_mean)

        self.cdf_mean = cdf_mean
        self.cdf_std = cdf_std
예제 #10
0
def inset_regular_pols(np_verts,
                       np_pols,
                       np_distances,
                       np_inset_rate,
                       np_make_inners,
                       np_faces_id,
                       custom_normals,
                       matrices,
                       offset_mode='CENTER',
                       proportional=False,
                       concave_support=True,
                       index_offset=0,
                       use_custom_normals=False,
                       output_old_face_id=True,
                       output_old_v_id=True,
                       output_pols_groups=True):

    pols_number = np_pols.shape[0]
    pol_sides = np_pols.shape[1]
    v_pols = np_verts[np_pols]  #shape [num_pols, num_corners, 3]
    if offset_mode == 'SIDES':
        inner_points = sides_mode_inset(v_pols, np_inset_rate, np_distances,
                                        concave_support, proportional,
                                        use_custom_normals, custom_normals)
    elif offset_mode == 'MATRIX':
        inner_points = matrix_mode_inset(v_pols, matrices, use_custom_normals,
                                         custom_normals)
    else:
        if any(np_distances != 0):
            if use_custom_normals:
                normals = custom_normals
            else:
                normals = np_faces_normals(v_pols)
            average = np.sum(
                v_pols, axis=1
            ) / pol_sides  #+ normals*np_distances[:, np_newaxis] #shape [num_pols, 3]
            inner_points = average[:, np_newaxis, :] + (
                v_pols - average[:, np_newaxis, :]
            ) * np_inset_rate[:, np_newaxis,
                              np_newaxis] + normals[:,
                                                    np_newaxis, :] * np_distances[:,
                                                                                  np_newaxis,
                                                                                  np_newaxis]
        else:
            average = np.sum(v_pols, axis=1) / pol_sides  #shape [num_pols, 3]
            inner_points = average[:, np_newaxis, :] + (
                v_pols - average[:, np_newaxis, :]
            ) * np_inset_rate[:, np_newaxis, np_newaxis]

    idx_offset = len(np_verts) + index_offset

    new_v_idx = np_arange(idx_offset,
                          pols_number * pol_sides + idx_offset).reshape(
                              pols_number, pol_sides)

    side_pols = np.zeros([pols_number, pol_sides, 4], dtype=int)
    side_pols[:, :, 0] = np_pols
    side_pols[:, :, 1] = np_roll(np_pols, -1, axis=1)
    side_pols[:, :, 2] = np_roll(new_v_idx, -1, axis=1)
    side_pols[:, :, 3] = new_v_idx

    side_faces = side_pols.reshape(-1, 4)

    new_insets = new_v_idx[np_make_inners]

    if pol_sides == 4:
        new_faces = np_concatenate([side_faces, new_insets]).tolist()
    else:
        new_faces = side_faces.tolist() + new_insets.tolist()

    old_v_id = np_pols.flatten().tolist() if output_old_v_id else []
    if output_old_face_id:
        side_ids = np.repeat(np_faces_id[:, np_newaxis], pol_sides, axis=1)
        inset_ids = np_faces_id[np_make_inners]
        old_face_id = np.concatenate((side_ids.flatten(), inset_ids)).tolist()
    else:
        old_face_id = []

    if output_pols_groups:
        pols_groups = np_repeat(
            [1, 2], [len(side_faces), len(new_insets)]).tolist()
    else:
        pols_groups = []

    return (inner_points.reshape(-1, 3).tolist(), new_faces,
            new_insets.tolist(), old_v_id, old_face_id, pols_groups)
예제 #11
0
파일: doc2vec.py 프로젝트: polcar/gensim
    def train_document_dm(model,
                          doc_words,
                          doctag_indexes,
                          alpha,
                          work=None,
                          neu1=None,
                          learn_doctags=True,
                          learn_words=True,
                          learn_hidden=True,
                          word_vectors=None,
                          word_locks=None,
                          doctag_vectors=None,
                          doctag_locks=None):
        """
        Update distributed memory model ("PV-DM") by training on a single document.

        Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
        method implements the DM model with a projection (input) layer that is
        either the sum or mean of the context vectors, depending on the model's
        `dm_mean` configuration field.  See `train_dm_concat()` for the DM model
        with a concatenated input layer.

        The document is provided as `doc_words`, a list of word tokens which are looked up
        in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
        into the doctag_vectors array.

        Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
        prevent learning-updates to those respective model weights, as if using the
        (partially-)frozen model to infer other compatible vectors.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        if word_vectors is None:
            word_vectors = model.syn0
        if word_locks is None:
            word_locks = model.syn0_lockf
        if doctag_vectors is None:
            doctag_vectors = model.docvecs.doctag_syn0
        if doctag_locks is None:
            doctag_locks = model.docvecs.doctag_syn0_lockf

        word_vocabs = [
            model.vocab[w] for w in doc_words if w in model.vocab
            and model.vocab[w].sample_int > model.random.randint(2**32)
        ]
        doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0)
        doctag_len = len(doctag_indexes)

        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(
                model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(
                word_vocabs[start:(pos + model.window + 1 - reduced_window)],
                start)
            word2_indexes = [
                word2.index for pos2, word2 in window_pos
                if (word2 is not None and pos2 != pos)
            ]
            l1 = np_sum(word_vectors[word2_indexes],
                        axis=0) + doctag_sum  # 1 x layer1_size
            if word2_indexes and model.cbow_mean:
                l1 /= (len(word2_indexes) + doctag_len)
            neu1e = train_cbow_pair(model,
                                    word,
                                    word2_indexes,
                                    l1,
                                    alpha,
                                    learn_vectors=False,
                                    learn_hidden=learn_hidden)
            if word2_indexes and not model.cbow_mean:
                neu1e /= (len(word2_indexes) + doctag_len)
            if learn_doctags:
                doctag_vectors[doctag_indexes] += neu1e * \
                    np_repeat(doctag_locks[doctag_indexes], model.vector_size).reshape(-1, model.vector_size)
            if learn_words:
                word_vectors[word2_indexes] += neu1e * \
                    np_repeat(word_locks[word2_indexes], model.vector_size).reshape(-1, model.vector_size)

        return len(word_vocabs)
예제 #12
0
 def mock_noise(scale, size):
     """Return `size` copies of the `scale` parameter."""
     return np_repeat(scale, size)