Esempi in Python per Corpus.add_file

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: corpus

Classe/tipologia: Corpus

Metodo/funzione: add_file

Esempi su hotexamples.com: 2

Corpus.add_file in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per corpus.Corpus.add_file, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Corpus(30)

emails(15)

__init__(13)

save_to_file(4)

add_extra_info(4)

buildCorpusOnDB(3)

concetenate_corpus(3)

connect_to(3)

add_instance(3)

accuracy_corpus(3)

pop_instance(3)

emails_as_string(2)

convert_dictionary_to_words2vec(2)

construir_corpus(2)

construct_SentencesAndSPerItem(2)

construct_QAnswersAndQPerItem(2)

fit_features(2)

epoch_flag(2)

generate_batch_from_file(2)

export(2)

calculate_primary_targets(2)

Calculate_PairWiseFeature(2)

getTweets(2)

add_file(2)

add(2)

get_sentences(2)

add_document(2)

fit(2)

fill(1)

negativeWordDict(1)

representations(1)

read(1)

fit_dictionary(1)

query_by_id(1)

fit_matrix(1)

freeze(1)

positiveWordDict(1)

parse_xml(1)

getNumericDictionary(1)

from_config(1)

from_dict(1)

loadVocabulary(1)

generate_corpus_from_graph_using_random_walk(1)

instances(1)

getNumOfSampleDocs(1)

getRanges(1)

full_targets(1)

gaussian_model(1)

getAttributeVal(1)

gen_batch(1)

Esempio n. 1

Mostra file

File: skipgram.py Progetto: yiyg510/ICLR2020-G2DR

    def new_infer_vector(self, graph_wl_path, batch_size=1024, epochs=10):
        """
		Slightly new version faster version

		:param: graph_wl_path: a .g2v<depth> graph document
		:param: batch_size = 256: the batch size for the training
		:param: epochs=5: number of steps to train the network to get the inferred vector 5 is the default based on the paper
		"""

        # make a new corpus with the supplied file
        newCorpus = Corpus(corpus_dir=self.corpus.corpus_dir,
                           extension=self.corpus.extension)
        newCorpus.scan_and_load_corpus()
        newCorpus.add_file(graph_wl_path)
        new_graph_id = newCorpus._graph_name_to_id_map[graph_wl_path]

        infer_graph, batch_inputs, batch_labels, inf_normalized_embeddings, inf_loss, optimizer, inf_graph_embeddings = self.infer_initial(
            newCorpus)

        # use the new corpus to train the network for a bit and spit out the embeddings
        with tf.Session(
                graph=infer_graph,
                config=tf.ConfigProto(allow_soft_placement=False)) as inf_sess:
            init = tf.global_variables_initializer()
            inf_sess.run(init)

            loss = 0
            for i in range(epochs):
                while newCorpus.epoch_flag == False:
                    batch_data, batch_labelss = newCorpus.generate_batch_from_file(
                        batch_size)  # get the (target, context) wordID pairs

                    feed_dict = {
                        batch_inputs: batch_data,
                        batch_labels: batch_labelss
                    }
                    _, loss_val = inf_sess.run([optimizer, inf_loss],
                                               feed_dict=feed_dict)
                    loss += loss_val

                newCorpus.epoch_flag = False
                loss = 0

            final_embeddings = inf_normalized_embeddings.eval()
        return final_embeddings[new_graph_id]

Esempio n. 2

Mostra file

File: skipgram.py Progetto: yiyg510/ICLR2020-G2DR

    def infer_vector(self, graph_wl_path, batch_size=256, epochs=10):
        """
		Given a graph_document file, we infer its embedding based on the techniques in Le and Mikolov 2014, 

		We will assume that this truly is an unseen example and not one already considered in the corpus

		:param: graph_wl_path: a .g2v<depth> graph document
		:param: batch_size = 256: the batch size for the training
		:param: epochs=5: number of steps to train the network to get the inferred vector 5 is the default based on the paper
		"""

        # make a new corpus with the supplied file
        newCorpus = Corpus(corpus_dir=self.corpus.corpus_dir,
                           extension=self.corpus.extension)
        newCorpus.scan_and_load_corpus()
        newCorpus.add_file(graph_wl_path)
        new_graph_id = newCorpus._graph_name_to_id_map[graph_wl_path]

        # infer_graph = tf.Graph()
        # with infer_graph.as_default():
        batch_inputs = tf.placeholder(tf.int32, shape=([
            None,
        ]))
        batch_labels = tf.placeholder(tf.int64, shape=([None, 1]))

        old_graph_embeddings = tf.constant(
            self.graph_embeddings_for_normals
        )  # the trained embeddings from earlier
        new_embedding = tf.Variable(
            tf.random_uniform([1, self.embedding_size],
                              -0.5 / self.embedding_size,
                              0.5 / self.embedding_size))
        inf_graph_embeddings = tf.concat([old_graph_embeddings, new_embedding],
                                         0)

        batch_graph_embeddings = tf.nn.embedding_lookup(
            inf_graph_embeddings, batch_inputs)  # hidden layer

        weights = tf.Variable(
            tf.truncated_normal([newCorpus.num_subgraphs, self.embedding_size],
                                stddev=1.0 / math.sqrt(self.embedding_size)))
        biases = tf.Variable(tf.zeros(newCorpus.num_subgraphs))

        # negative sampling
        inf_loss = tf.reduce_mean(
            tf.nn.nce_loss(
                weights=weights,
                biases=biases,
                labels=batch_labels,
                inputs=batch_graph_embeddings,
                num_sampled=self.num_negsample,
                num_classes=newCorpus.num_subgraphs,
                sampled_values=tf.nn.fixed_unigram_candidate_sampler(
                    true_classes=batch_labels,
                    num_true=1,
                    num_sampled=self.num_negsample,
                    unique=True,
                    range_max=newCorpus.num_subgraphs,
                    distortion=0.75,
                    unigrams=newCorpus.subgraph_id_freq_map_as_list)
                # unigrams = self.corpus.subgraph_id_freq_map_as_list) # original
            ))

        global_step = tf.Variable(
            0, trainable=False
        )  # the number of steps we have performed, we make sure not to change this.
        learning_rate = tf.train.exponential_decay(
            self.learning_rate, global_step, 100000, 0.96, staircase=True
        )  # linear decay, we can make this a bit more exciting
        learning_rate = tf.maximum(
            learning_rate, 0.001
        )  # make sure the learning rate cannot go below 0.001 to ensure at least a minimal learning

        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            inf_loss, global_step=global_step)

        norm = tf.sqrt(
            tf.reduce_mean(tf.square(inf_graph_embeddings), 1, keep_dims=True))
        inf_normalized_embeddings = inf_graph_embeddings / norm

        # use the new corpus to train the network for a bit and spit out the embeddings
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=False)) as inf_sess:
            init = tf.global_variables_initializer()
            inf_sess.run(init)

            loss = 0
            for i in range(epochs):
                while newCorpus.epoch_flag == False:
                    batch_data, batch_labelss = newCorpus.generate_batch_from_file(
                        batch_size)  # get the (target, context) wordID pairs

                    feed_dict = {
                        batch_inputs: batch_data,
                        batch_labels: batch_labelss
                    }
                    _, loss_val = inf_sess.run([optimizer, inf_loss],
                                               feed_dict=feed_dict)
                    loss += loss_val

                newCorpus.epoch_flag = False
                loss = 0

            final_embeddings = inf_normalized_embeddings.eval()
        return final_embeddings[new_graph_id]