Python Corpus.init Examples

Programming Language: Python

Namespace/Package Name: corpus

Class/Type: Corpus

Method/Function: __init__

Examples at hotexamples.com: 20

Python Corpus.__init__ - 20 examples found. These are the top rated real world Python examples of corpus.Corpus.__init__ extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Corpus(30)

emails(15)

__init__(13)

save_to_file(4)

add_extra_info(4)

buildCorpusOnDB(3)

concetenate_corpus(3)

connect_to(3)

add_instance(3)

accuracy_corpus(3)

pop_instance(3)

emails_as_string(2)

convert_dictionary_to_words2vec(2)

construir_corpus(2)

construct_SentencesAndSPerItem(2)

construct_QAnswersAndQPerItem(2)

fit_features(2)

epoch_flag(2)

generate_batch_from_file(2)

export(2)

calculate_primary_targets(2)

Calculate_PairWiseFeature(2)

getTweets(2)

add_file(2)

add(2)

get_sentences(2)

add_document(2)

fit(2)

fill(1)

negativeWordDict(1)

representations(1)

read(1)

fit_dictionary(1)

query_by_id(1)

fit_matrix(1)

freeze(1)

positiveWordDict(1)

parse_xml(1)

getNumericDictionary(1)

from_config(1)

from_dict(1)

loadVocabulary(1)

generate_corpus_from_graph_using_random_walk(1)

instances(1)

getNumOfSampleDocs(1)

getRanges(1)

full_targets(1)

gaussian_model(1)

getAttributeVal(1)

gen_batch(1)

Example #1

Show file

    def __init__(self):

        Corpus.__init__(self)

        self.scslabels = []
        self.cslabel_count = Counter()
        self.cstype_count = Counter()

        self.lang_pair_count = Counter()

        self.multilingual_sentences = set()

Example #2

Show file

 def __init__(self, path, dirname, datatype):
     import os
     from os.path import join, isfile, isdir
     self.path = join(dirname, path)
     kwargs = {'print_info': False, 'level': 'f', 'datatype': datatype}
     Corpus.__init__(self, self.path, **kwargs)
     if self.path.endswith('.p'):
         self.datatype = 'tokens'
     elif self.path.endswith('.xml'):
         self.datatype = 'parse'
     else:
         self.datatype = 'plaintext'

Example #3

Show file

File: corpus.py Project: xsongx/corpkit

 def __init__(self, path, dirname):
     import os
     from os.path import join, isfile, isdir
     self.path = join(dirname, path)
     kwargs = {'print_info': False, 'level': 'f'}
     Corpus.__init__(self, self.path, **kwargs)
     if self.path.endswith('.p'):
         self.datatype = 'tokens'
     elif self.path.endswith('.xml'):
         self.datatype = 'parse'
     else:
         self.datatype = 'plaintext'

Example #4

Show file

File: corpus.py Project: nkhuyu/corpkit

    def __init__(self, path, dirname, datatype):
        import os
        from os.path import join, isfile, isdir

        self.path = join(dirname, path)
        kwargs = {"print_info": False, "level": "f", "datatype": datatype}
        Corpus.__init__(self, self.path, **kwargs)
        if self.path.endswith(".p"):
            self.datatype = "tokens"
        elif self.path.endswith(".xml"):
            self.datatype = "parse"
        else:
            self.datatype = "plaintext"

Example #5

Show file

    def __init__(self, char_dictionary=(None, None), label_dictionary=(None, None)):
        """Reads in a corpus file and sets the corpus variables.
    
        Keyword arguments:
        char_dictionary -- A tuple of dictionaries for characters to indices 
                           and indices to characters
        label_dictionary -- A tuple of dictionaries for labels to indices 
                           and indices to labels

        """
        label2idx = ({'<PAD>':0, 'lang1': 1, 'lang2':2, 'other':3, 'ne':4, 
        'ambiguous':5, 'fw':6, 'mixed':7, 'unk':8})
        idx2label = {i:l for l, i in self.label2idx.items()}

        Corpus.__init__(self, label_dictionary=(label2idx, idx2label))

Example #6

Show file

File: corpus_feats.py Project: dearden/april-fools

    def __init__(self, articles):
        Corpus.__init__(self, articles)
        for article in articles:
            features = get_features(
                article)  # Get the feature values for the current article.
            if article.train:  # put feature dict in either testing or training.
                self.train_feats.append(features)
                self.train_articles.append(
                    article
                )  # keep a list of all articles in the training set.
            else:
                self.test_feats.append(features)
                self.test_articles.append(
                    article)  # keep a list of all articles in the testing set.

        self.feat_names = features.keys()

Example #7

Show file

File: knn_corpus.py Project: PinPinIre/Final-Year-Project

 def __init__(self, dictionary=None, corpus=None, index_file=None, max_docs=None, **kwargs):
     Corpus.__init__(self, dictionary=dictionary, corpus=corpus)
     self.clip_corpus(max_docs)
     # Set up for KNN
     features = len(self.dictionary)
     self.index = AnnoyIndex(features)
     start_time = datetime.datetime.now()
     if not index_file:
         self.transform_corpus(models.TfidfModel)
         for i, vector in enumerate(self):
             self.index.add_item(i, list(sparse2full(vector, features).astype(float)))
         self.index.build(self.no_trees)
     else:
         self.index.load(index_file)
     end_time = datetime.datetime.now()
     self.train_time = end_time - start_time
     return

Example #8

Show file

File: corpus_taglist.py Project: dearden/april-fools

    def __init__(self, articles, taglist, kind="wordlist"):
        Corpus.__init__(self, articles)
        for article in articles:
            if article.train:
                self.train_articles.append(
                    article
                )  # keep a list of all articles in the training set.
            else:
                self.test_articles.append(
                    article)  # keep a list of all articles in the testing set.

        for article in articles:
            if kind == 'wordlist':
                features = get_bag_feats(article.wrd_fql, taglist)
            elif kind == 'poslist':
                features = get_bag_feats(remove_ditto(article.pos_fql),
                                         taglist)
            if article.train:  # put feature dict in either testing or training.
                self.train_feats.append(features)
            else:
                self.test_feats.append(features)

        self.feat_names = features.keys()

Example #9

Show file

    def __init__(self, articles, kind='bow'):
        Corpus.__init__(self, articles)
        for article in articles:
            if article.train:
                self.train_articles.append(
                    article
                )  # keep a list of all articles in the training set.
            else:
                self.test_articles.append(
                    article)  # keep a list of all articles in the testing set.

        wrd_bag = make_bag([a.wrd_fql for a in self.train_articles])
        pos_bag = make_bag([a.pos_fql for a in self.train_articles])
        sem_bag = make_bag([a.sem_fql for a in self.train_articles])

        for article in articles:
            features = get_bag_of_x(article, wrd_bag, pos_bag, sem_bag, kind)
            if article.train:  # put feature dict in either testing or training.
                self.train_feats.append(features)
            else:
                self.test_feats.append(features)

        self.feat_names = features.keys()

Example #10

Show file

File: corpus.py Project: xsongx/corpkit

 def __init__(self, path):
     self.path = path
     kwargs = {'print_info': False, 'level': 's'}
     Corpus.__init__(self, self.path, **kwargs)

Example #11

Show file

 def __init__(self, path, datatype):
     self.path = path
     kwargs = {'print_info': False, 'level': 's', 'datatype': datatype}
     Corpus.__init__(self, self.path, **kwargs)

Example #12

Show file

 def __init__(self, path):
     Corpus.__init__(self, path)
     self.path = path
     self.truth_dict = read_classification_from_file(self.path +
                                                     "/!truth.txt")

Example #13

Show file

File: trainingcorpus.py Project: Scytheroid/spam-filter

 def __init__(self, path_to_mails):
     Corpus.__init__(self, path_to_mails)

Example #14

Show file

File: battelleCorpus.py Project: battelle-dsi-capstone-2015/battelle-product

 def __init__(self,src_db,dst_db,dictfile=None,nltk_data_path=None):
     self.src_db = src_db
     Corpus.__init__(self,dst_db,dictfile,nltk_data_path)

Example #15

Show file

File: corpus.py Project: xuanhan863/corpkit

 def __init__(self, path, datatype):
     self.path = path
     kwargs = {'print_info': False, 'level': 's', 'datatype': datatype}
     Corpus.__init__(self, self.path, **kwargs)

Example #16

Show file

File: poloCorpus.py Project: ontoligent/corpustools

 def __init__(self,src_file,dst_db,dictfile=None):
     self.src_file = src_file
     Corpus.__init__(self,dst_db,dictfile)

Example #17

Show file

File: corpus.py Project: nkhuyu/corpkit

 def __init__(self, path, datatype):
     self.path = path
     kwargs = {"print_info": False, "level": "s", "datatype": datatype}
     Corpus.__init__(self, self.path, **kwargs)

Example #18

Show file

 def __init__(self, path, preprocess=None, max_len=None):
     Corpus.__init__(self, 'quora', path, preprocess=None, max_len=None)
     self.load()

Example #19

Show file

 def __init__(self, path, preprocess=None, max_len=None):
     Corpus.__init__(self, 'microsoft', path, preprocess=None, max_len=None)
     self.load()

Example #20

Show file

File: battelleCorpus.py Project: ontoligent-design/battelle-product

 def __init__(self, src_db, dst_db, dictfile=None, nltk_data_path=None):
     self.src_db = src_db
     Corpus.__init__(self, dst_db, dictfile, nltk_data_path)

Python Corpus.__init__ Examples

Python Corpus.init Examples