def __init__(self): self.user_mng = UsersManager() self.products_mng = ProductsManager() self.pm_mng = PrivateMessageManager() products = utilities.load_json(products_path) users = utilities.load_json(users_path) credit_cards = utilities.load_json(credit_cards_path) private_messages = utilities.load_json(pm_path) categories = utilities.load_json(categories_path) if categories != 0: for category in categories: self.products_mng.categories_mng.import_category(category) if products != 0: for (product) in products: self.products_mng.import_product(product) if users != 0: for (user) in users: self.user_mng.import_user(user) if credit_cards != 0: for (credit_card) in credit_cards: self.user_mng.import_credit_card(credit_card) if private_messages != 0: for private_message in private_messages: self.pm_mng.import_private_message(private_message) UbidManager.total += 1
def main(): sys.stdout.write('Loading countries.json... ') countries = load_json(json_file) sys.stdout.write(Fore.GREEN + 'DONE\n') sys.stdout.write('Creating TimeZone, Language, and Currency objects... ') time_zones = extract_unique_time_zones(countries) create_TimeZone_objects(time_zones) languages = load_json('./json/languages.json') create_Language_objects(languages) currencies = extract_unique_currencies(countries) currency_name = load_json('./json/currencies.json') currency_name['BOV'] = 'Bolivian Mvdol' currency_name['SSP'] = 'South Sudanese Pound' currency_name['CHE'] = 'WIR Euro' currency_name['CHW'] = 'WIR Franc' currency_name['USN'] = 'United States dollar (next day)' currency_name['USS'] = 'United States dollar (same day)' currency_name['UYI'] = 'Uruguay Peso en Unidades Indexadas' create_Currency_objects(currencies, currency_name) sys.stdout.write(Fore.GREEN + 'OK\n') # the Language, Currency and TimeZone objects must be present # in the database before we create the Country objects! sys.stdout.write('Creating Country objects... ') create_Country_objects(countries) sys.stdout.write(Fore.GREEN + 'OK\n')
def process(): data = load_json(PATH_STEP2_CLEAN) kFold = MyKFold(10, shuffle=True) print("MLE generating sentences using Language model.") for train_tweets, _ in kFold(data): models = init_models(3, MLE) train_sents = compress(train_tweets) # padded multiple models, returning a list of (ngram, vocab) ngrams = padded_multiple_models(3, train_sents) # train models using ngrams fit_multiple_models(models, ngrams) for i in range(3): print(f"\nGenerating {Ngrams(i)} sentences:") for j in range(10): # generate 10 sentences for each ngram. new_sentence = [] word = models[i].generate(text_seed=['<s>']) new_sentence.append(word) while word != '</s>': word = models[i].generate(text_seed=[word]) new_sentence.append(word) print(f"#{j}: [{' '.join(new_sentence)}]") break
def __init__(self, hawking, bot, *args, **kwargs): self.hawking = hawking self.bot = bot self.questions = [] self.is_mid_question_refresh = False self.last_question_refresh_time = time.time() ## Load config data self.submission_top_time = CONFIG_OPTIONS.get("stupid_question_top_time", "month") self.submission_count = CONFIG_OPTIONS.get("stupid_question_submission_count", 500) self.refresh_time_seconds = CONFIG_OPTIONS.get("stupid_question_refresh_time_seconds", 21600) ## Load module specific configs from 'stupid_questions.json' located in modules folder modules_folder_name = CONFIG_OPTIONS.get("modules_folder", "modules") config = utilities.load_json(os.path.sep.join([utilities.get_root_path(), modules_folder_name, "stupid_questions.json"])) reddit_client_id = config.get("reddit_client_id") reddit_secret = config.get("reddit_secret") subreddits = CONFIG_OPTIONS.get("stupid_question_subreddits", ["NoStupidQuestions"]) try: self.reddit = Reddit(client_id=reddit_client_id, client_secret=reddit_secret, user_agent=self.REDDIT_USER_AGENT) ## Use a multireddit to pull random post from any of the chosen subreddits self.subreddit = self.reddit.subreddit("+".join(subreddits)) except Exception: logger.exception("Unable to create reddit/subreddit instance") self.bot.loop.create_task(self.load_questions())
def __init__(self, hawking, bot, *args, **kwargs): self.hawking = hawking self.bot = bot ## Load module specific configs from 'stupid_questions.json' located in modules folder modules_folder_name = CONFIG_OPTIONS.get("modules_folder", "modules") config = utilities.load_json( os.path.sep.join([ utilities.get_root_path(), modules_folder_name, "stupid_questions.json" ])) reddit_client_id = config.get("reddit_client_id") reddit_secret = config.get("reddit_secret") subreddits = CONFIG_OPTIONS.get("stupid_question_subreddits", ["NoStupidQuestions"]) try: self.reddit = Reddit(client_id=reddit_client_id, client_secret=reddit_secret, user_agent=self.REDDIT_USER_AGENT) ## Use a multireddit to pull random post from any of the chosen subreddits self.subreddit = self.reddit.subreddit("+".join(subreddits)) except Exception as e: utilities.debug_log("Unable to create reddit/subreddit instance,", e, debug_level=1)
def run(self): ## Keep bot going despite any misc service errors try: self.bot.run(utilities.load_json(self.token_file_path)[self.TOKEN_KEY]) except Exception as e: utilities.debug_print("Critical exception when running bot", e, debug_level=0) time.sleep(1) self.run()
def main(): sys.stdout.write('Loading albums.json... ') albums = load_json(json_file) sys.stdout.write(Fore.GREEN + 'DONE\n') sys.stdout.write('Creating Album objects... ') create_Album_objects(albums) sys.stdout.write(Fore.GREEN + 'OK\n')
def get_response(): global response if response == None: response = utilities.load_json(REQUEST_URL) if response["statusCode"] != "OK": raise Exception("Code: " + response["statusCode"] + "Message: " + response["statusMessage"]) return response
def main(): sys.stdout.write('Loading recordings.json... ') recordings = load_json('./json/recordings.json') sys.stdout.write(Fore.GREEN + 'DONE\n') sys.stdout.write('Creating Recording objects... ') for recording in recordings: create_Recording_object(recording) sys.stdout.write(Fore.GREEN + 'OK\n')
def main(): rates = load_json('./json/rates.json') for currency_rate in rates.items(): currency = currency_rate[0][3:] try: Currency_object = models.Currency.objects.get(iso_code=currency) except: continue Currency_object.usd_rate = currency_rate[1] Currency_object.save()
async def _experiment_details(experiment_id: str = Path( default='latest', title="ID of experiment")): if experiment_id == 'latest': experiment_id = max(os.listdir(config.EXPERIMENTS_DIR)) experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id) args = utils.load_json( filepath=os.path.join(experiment_dir, 'config.json')) classes = data.LabelEncoder.load( fp=os.path.join(experiment_dir, 'y_tokenizer.json')).classes performance = utils.load_json( filepath=os.path.join(experiment_dir, 'performance.json')) response = { 'message': HTTPStatus.OK.phrase, 'status-code': HTTPStatus.OK, 'data': { "classes": classes, "args": args, "performance": performance } } config.logger.info(json.dumps(response, indent=2)) return response
def run(self): '''Starts the bot up''' ## So ideally there would be some flavor of atexit.register or signal.signal command to gracefully shut the bot ## down upon SIGTERM or SIGINT. However that doesn't seem to be possible at the moment. Discord.py's got most of ## the functionality built into the base close() method that fires on SIGINT and SIGTERM, but the bot never ends ## up getting properly disconnected from the voice channels that it's connected to. I end up having to wait for ## a time out. Otherwise the bot will be in a weird state upon starting back up, and attempting to speak in one ## of the channels that it was previously in. Fortunately this bad state will self-recover in a minute or so, ## but it's still unpleasant. A temporary fix is to bump up the RestartSec= property in the service config to be ## long enough to allow for the bot to be forcefully disconnected logger.info('Starting up the bot.') self.bot.run(utilities.load_json(self.token_file_path)["token"])
def predict(experiment_id, text): """Predict the class for a text using a trained model from an experiment.""" # Get experiment config experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id) experiment_config = utilities.load_json( os.path.join(experiment_dir, 'config.json')) args = Namespace(**experiment_config) # Preprocess texts = [text] X_tokenizer = data.Tokenizer.load( fp=os.path.join(experiment_dir, 'X_tokenizer.json')) y_tokenizer = data.LabelEncoder.load( fp=os.path.join(experiment_dir, 'y_tokenizer.json')) preprocessed_texts = data.preprocess_texts( texts, lower=args.lower, filters=args.filters) # Create dataset X_infer = np.array(X_tokenizer.texts_to_sequences(preprocessed_texts)) y_filler = np.array([0]*len(X_infer)) infer_set = data.TextDataset( X=X_infer, y=y_filler, batch_size=args.batch_size, max_filter_size=max(args.filter_sizes)) # Load model model = models.TextCNN( embedding_dim=args.embedding_dim, vocab_size=len(X_tokenizer), num_filters=args.num_filters, filter_sizes=args.filter_sizes, hidden_dim=args.hidden_dim, dropout_p=args.dropout_p, num_classes=len(y_tokenizer.classes)) model.load_state_dict(torch.load(os.path.join(experiment_dir, 'model.h5'))) device = torch.device('cuda' if ( torch.cuda.is_available() and args.cuda) else 'cpu') model = model.to(device) # Predict results = [] y_prob, conv_outputs = predict_step( model=model, dataset=infer_set, filter_sizes=args.filter_sizes, device=device) for index in range(len(X_infer)): results.append({ 'raw_input': texts[index], 'preprocessed_input': X_tokenizer.sequences_to_texts([X_infer[index]])[0], 'probabilities': get_probability_distribution(y_prob[index], y_tokenizer.classes), 'top_n_grams': get_top_n_grams(tokens=preprocessed_texts[index].split(' '), conv_outputs={ k: v[index] for k, v in conv_outputs.items()}, filter_sizes=args.filter_sizes)}) return results
def continuous_save_twitter(): raw_json_file = PATH_STEP1_RAW raw_json_file = os.path.abspath(os.path.join('.', raw_json_file)) raw_texts = [] if os.path.exists(raw_json_file) and os.path.isfile(raw_json_file): raw_texts = load_json(raw_json_file) count = len(raw_texts) if count > 0: print(f"Preloaded {count} texts.") while count < 10000: new_texts = fetch_twitter(100) raw_texts.extend(new_texts) count = len(raw_texts) print(f"Outputting {count} texts...") save_json(raw_texts, raw_json_file, ensure_ascii=False) time.sleep(2)
def process(): # 1. First load the data into memory data = load_json(PATH_STEP2_CLEAN) # 2. Making 10-Fold Cross Validation kFold = MyKFold(10, False) print("Starting 10-Fold CV training/test") means = np.zeros((10, 3)) for idx, (train_tweets, test_tweets) in enumerate(kFold(data)): print(f"Fold {idx}:") models = init_models(3, KneserNeyInterpolated) train_sents = compress(train_tweets) test_sents = compress(test_tweets) # padded multiple models, returning a list of (ngram, vocab) ngrams = padded_multiple_models(3, train_sents) # train models using ngrams fit_multiple_models(models, ngrams) test_ngrams = padded_multiple_models(3, test_sents) for n in range(0, 3): temp = [(models[n]).perplexity(i) for i in tqdm((test_ngrams[n])[0], desc=f"perplexity of {Ngrams(n)}", total=len(test_sents))] means[idx, n] = np.mean(temp) print( f"run {idx}, unigram: {means[idx, 0]}, bigram: {means[idx, 1]}, trigram: {means[idx, 2]}" ) final_means = np.mean(means, axis=0) print("Final mean of 10-Fold CV:") print( f"unigram: {final_means[0]}, bigram: {final_means[1]}, trigram: {final_means[2]}" )
if __name__ == '__main__': # Arguments parser = ArgumentParser() parser.add_argument('--experiment-id', type=str, default="latest", help="name of the model to load") parser.add_argument('--text', type=str, required=True, help="text to predict") args = parser.parse_args() # Load model config if args.experiment_id == 'latest': args.experiment_id = max(os.listdir(config.EXPERIMENTS_DIR)) experiment_dir = os.path.join(config.EXPERIMENTS_DIR, args.experiment_id) experiment_config = utilities.load_json( os.path.join(experiment_dir, 'config.json')) args = Namespace(**{**args.__dict__, **Namespace(**experiment_config).__dict__}) config.logger.info(f"→ Using {args.experiment_id}") # Preprocess texts = [args.text] X_tokenizer = data.Tokenizer.load( fp=os.path.join(experiment_dir, 'X_tokenizer.json')) y_tokenizer = data.LabelEncoder.load( fp=os.path.join(experiment_dir, 'y_tokenizer.json')) preprocessed_texts = data.preprocess_texts( texts, lower=args.lower, filters=args.filters) # Create dataset X_infer = np.array(X_tokenizer.texts_to_sequences(preprocessed_texts)) y_filler = np.array([0]*len(X_infer))
def predict(experiment_id, text): """Predict the class for a text using a trained model from an experiment.""" # Get experiment config experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id) experiment_config = utilities.load_json( os.path.join(experiment_dir, 'config.json')) args = Namespace(**experiment_config) # Tokenizers texts = [text] with open(os.path.join(experiment_dir, 'X_tokenizer.json'), 'r') as fp: X_tokenizer = tokenizer_from_json(json.load(fp)) y_tokenizer = LabelEncoder() y_tokenizer.classes_ = np.load(os.path.join(experiment_dir, 'y_tokenizer.npy'), allow_pickle=True) # Create dataset generator X_infer = np.array(X_tokenizer.texts_to_sequences(texts)) preprocessed_texts = X_tokenizer.sequences_to_texts(X_infer), y_filler = np.array([0] * len(X_infer)) inference_generator = data.DataGenerator(X=X_infer, y=y_filler, batch_size=args.batch_size, max_filter_size=max( args.filter_sizes)) # Load model model = models.TextCNN(embedding_dim=args.embedding_dim, vocab_size=len(X_tokenizer.word_index) + 1, num_filters=args.num_filters, filter_sizes=args.filter_sizes, hidden_dim=args.hidden_dim, dropout_p=args.dropout_p, num_classes=len(y_tokenizer.classes_)) model.summary(input_shape=(10, )) # build it model_path = os.path.join(experiment_dir, 'model/cp.ckpt') model.load_weights(model_path) # Conv output model conv_outputs_model = models.ConvOutputsModel( vocab_size=len(X_tokenizer.word_index) + 1, embedding_dim=args.embedding_dim, filter_sizes=args.filter_sizes, num_filters=args.num_filters) conv_outputs_model.summary(input_shape=(10, )) # build it # Set weights conv_outputs_model.layers[0].set_weights(model.layers[0].get_weights()) conv_layer_start_num = 1 for layer_num in range(conv_layer_start_num, conv_layer_start_num + len(args.filter_sizes)): conv_outputs_model.layers[layer_num].set_weights( model.layers[layer_num].get_weights()) # Predict results = [] y_prob = model.predict(x=inference_generator, verbose=1) conv_outputs = conv_outputs_model.predict(x=inference_generator, verbose=1) for index in range(len(X_infer)): results.append({ 'raw_input': texts[index], 'preprocessed_input': preprocessed_texts[index][0], 'probabilities': get_probability_distribution(y_prob[index], y_tokenizer.classes_), 'top_n_grams': get_top_n_grams(tokens=preprocessed_texts[index][0].split(' '), conv_outputs=conv_outputs, filter_sizes=args.filter_sizes) }) return results
def process(): data = load_json(PATH_STEP1_RAW) data = list(map(extract_text, data)) data = list(map(sentence_segment, data)) data = list(map(word_tokenize_sentpad, data)) save_json(data, PATH_STEP2_CLEAN, ensure_ascii=False)
def get_json(self, url): return load_json(url)
import os import logging import logging.config import utilities as utils # Directories BASE_DIR = os.getcwd() # project root LOGS_DIR = os.path.join(BASE_DIR, 'logs') EXPERIMENTS_DIR = os.path.join(BASE_DIR, 'experiments') TENSORBOARD_DIR = os.path.join(BASE_DIR, 'tensorboard') # Create dirs utils.create_dirs(LOGS_DIR) utils.create_dirs(EXPERIMENTS_DIR) utils.create_dirs(TENSORBOARD_DIR) # Loggers log_config = utils.load_json(filepath=os.path.join(BASE_DIR, 'logging.json')) logging.config.dictConfig(log_config) logger = logging.getLogger('logger')
def get_json(self, url): return load_json(url, {"X-Mashape-Authorization": AUTH_KEY})