Ejemplo n.º 1
0
    def train(self):
        print('Training searcher...')
        start = datetime.now()

        tokens_corpus = []

        # create corpus from indexed docs
        doc_indexes = list(self._indexes_collection.find({}))

        # extend corpus with node body tokens
        for node in doc_indexes:
            tokens_corpus.extend(node["body"])

        # extend corpus with 10,000 of the most commonly used english words
        text_file = open('resources/english-words.txt')
        english_words = text_file.read().split('\n')
        tokens_corpus.extend(english_words)
        text_file.close()

        # perform training of our models
        models.train_models(" ".join(tokens_corpus))
        self._spellcheck.word_frequency.load_words(tokens_corpus)

        self._trained = True
        self._corpus_size = len(doc_indexes)
        print('Finished training in ' + (str(datetime.now() - start)) + 's')
Ejemplo n.º 2
0
    def train(self):

        for cat_id in self.categories:

            query = SELECT_PHRASE_QUERY

            if cat_id is not 0:
                filter_part = SELECT_PHRASE_CATEGORY_FILTER.format(
                    cat_id=cat_id)
                query += filter_part
            cat_details = self._get_category_details(cat_id)

            if cat_details is None:
                logger.error('No category id {}'.format(cat_id))
                continue

            cat_code = cat_details.get('category_code')
            cat_name = cat_details.get('category')
            fname = '{}_{}.pkl'.format(cat_id,
                                       cat_code).replace(" ",
                                                         "").replace("-", "_")
            dir = os.path.join(base_dir, self.save_dir)

            if not os.path.exists(dir):
                os.makedirs(dir)

            path = os.path.join(dir, fname)
            logger.info("Generating corpus for {}".format(cat_name))
            corpus = self._generate_corpus(query)
            logger.info("Training model for {}".format(cat_name))
            train_models(corpus, model_name=path)
            self._mapping[cat_id] = path
            logger.info("Saved model to {}".format(path))

        return self._mapping
Ejemplo n.º 3
0
def train_bigtxt():
    """unnecessary helper function for training against
    default corpus data (big.txt)"""

    bigtxtpath = os.path.join(os.path.dirname(__file__), 'bigNepali.txt')
    with open(bigtxtpath, 'r') as bigtxtfile:

        models.train_models(str(bigtxtfile.read()))
Ejemplo n.º 4
0
    async def completing(letters):
        """
            predict_currword() 
        """
        text = ' '
        words = await Complete.fetch_all_saved_user_events()
        for i in range(0, len(words)):
            pos = words[i]
            text = text + ' ' + str(pos['event'])

        models.train_models(text)
        return autocomplete.predict_currword(letters)
    def load_autocomplete_markov_chain(self):
        bigString = ''

        directory = glob('bigData/*')

        for file_name in directory:
            with open(file_name, 'r') as file:
                for line in file:
                    if line[-1] != '\n':
                        line += '\n'

                    bigString += line

        train_models(bigString)
Ejemplo n.º 6
0
import autocomplete
from autocomplete import models

with open('one_year_subject_lines.txt', 'r') as file:
    data = file.read().replace('\n', '')
models.train_models(data)

def trainModel(text):
    models.train_models(text)
Ejemplo n.º 8
0

def getAllEntities():

    try:
        return list(db.articles.find({}, {"entities.entity": 1}))
    except Exception as e:
        print(e)


entityList = getAllEntities()


def getAllEntitiesAsString():
    string = ""

    for article in entityList:
        for entity in article['entities']:
            string += entity['entity'] + " "

    return string


trainingString = getAllEntitiesAsString()

try:
    models.train_models(trainingString)
    print("Training Completed")
except Exception as e:
    print(e)
 def trainModel(self, string):
     print(
         'Training autcomplete tool to look for cities from file specified...'
     )
     models.train_models(string)
	def trainModel(self,string):
		print('Training autcomplete tool to look for cities from file specified...')
		models.train_models(string)