def go(self, game, location_name): location = game.get_location() locations = utils.build_index(location.find_item_by_attribute("room")) location_names = sorted(locations) if (location_name): if (len(location_name) == 1 and location_name in location_names): # using the alias - so look up name loc = locations[location_name] location_name = loc["name"] self._go(game, location_name, location) else: # try auto routing - if room has single exit # commented out - always go to first location in list of routes - as per UI # if(len(location_names) > 2): # locations are indexed, so will have alias in it too. # utils.print_message("I dont understand where to go - Try: go <place>") # return if (len(location_names) < 1): # the solution must be in this room utils.print_message( "There doesn't appear to be anywhere I can go to at the moment." ) return location_name = location_names[0] if (len(location_name) == 1 and location_name in location_names): # using the alias - so look up name loc = locations[location_name] location_name = loc["name"] self._go(game, location_name, location)
current_dict = json.load(f) else: current_dict = { "openTimestamp": int(candle["openDate"].timestamp()), "timeframe": tf, "openDate": candle["openDate"].strftime('%Y-%m-%d %H:%M:%S'), "open": [], "high": [], "low": [], "close": [], "volume": [], # "openDate": [], "closeDate": [] } current_dict["open"].append(candle["open"]) current_dict["high"].append(candle["high"]) current_dict["low"].append(candle["low"]) current_dict["close"].append(candle["close"]) current_dict["volume"].append(candle["volume"]) # Not needed since we have the openTimestamp of the first candle and we know the timeframe; keep in comment for debug # current_dict["openDate"].append(str(candle["openDate"])) # current_dict["closeDate"].append(str(candle["closeDate"])) store_dict(tf, current_out_file, current_dict) build_index(args.outfolder)
import argparse from datetime import datetime, timezone, timedelta import json import os from utils import timedeltas, build_index def parse_cl_args(): parser = argparse.ArgumentParser( description='Build index files for scrapped database') parser.add_argument( 'path', help='Path a directory containing the scrapped database (folder 1d, 1h, etc)' ) return parser.parse_args() args = parse_cl_args() build_index(args.path)
def c_index(args): """ Build index. """ build_index()
def main(load=False): # Init hps hps = init_hps() criterion = nn.CrossEntropyLoss() torch.manual_seed(0) # Read file if load: print("Loading file", data_file, "for testing") else: print("Using file", data_file, "for training") lines = utils.read_file(data_file) global data_file_size data_file_size = len(lines) start = time.time() unique_words, vocab_size, n = utils.create_unique_words(lines) print("vocab_size", vocab_size) print("Constructing unique words took:", (time.time() - start)) # Construct dataloader dataset = utils.ReadLines(data_file) print("data set length:", len(dataset)) train_set_len = int(len(dataset) * 0.6) test_set_len = int(len(dataset) * 0.2) validation_set_len = int(len(dataset) * 0.2) while train_set_len + test_set_len + validation_set_len != len(dataset): validation_set_len += 1 train_set, test_set, validation_set = torch.utils.data.random_split( dataset, [train_set_len, test_set_len, validation_set_len]) train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) validation_loader = torch.utils.data.DataLoader(dataset=validation_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) # Init model if not load: word_to_idx, idx_to_word = utils.build_index(unique_words) mapper = SentenceMapper(lines, word_to_idx, idx_to_word) vocab_info = { 'idx_to_word': idx_to_word, 'word_to_idx': word_to_idx, 'vocab_size': vocab_size } with open( vocab_info_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim), 'wb') as f: pickle.dump(vocab_info, f, protocol=pickle.HIGHEST_PROTOCOL) embedding = fasttext.train_unsupervised(data_file, model='cbow', dim=hps.embedding_dim) embedding.save_model( embedding_model_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim)) print("Training...") model = LSTM(hps, vocab_size) train_model(hps, idx_to_word, model, train_loader, validation_loader, mapper, embedding) else: with open(vocab_info_load_path, 'rb') as f: vocab_info = pickle.load(f, encoding='utf-8') idx_to_word = vocab_info['idx_to_word'] word_to_idx = vocab_info['word_to_idx'] vocab_size = vocab_info['vocab_size'] mapper = SentenceMapper(lines, word_to_idx, idx_to_word) embedding = fasttext.load_model( embedding_model_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim)) print("Loading model...") model = LSTM(hps, vocab_size) model = nn.DataParallel(model).to(device) model.load_state_dict(torch.load(model_load_path, map_location=device)) model.to(device) model.eval() counter = 0 perplexities = [] for _, (data, N) in enumerate(test_loader): padded_data = mapper.pad_sentences(data, N) og_inputs, targets = utils.inputs_and_targets_from_sequences( padded_data) inputs = mapper.map_sentences_to_padded_embedding( og_inputs, embedding=embedding, embedding_size=hps.embedding_dim, N=N) targets = mapper.map_words_to_indices(targets, N=N) if cuda: inputs = inputs.cuda() targets = targets.cuda() outputs = model(inputs) loss = criterion(outputs.permute(0, 2, 1), targets) perplexities.append(np.exp(loss.detach().cpu().numpy())) topk = F.softmax(outputs, dim=2)[0, :, :] topk = torch.topk(topk, 1, dim=1)[1].squeeze(1) # print(topk.shape) outputs = F.softmax(outputs, dim=2)[0, :, :].detach().cpu().numpy() outs = [] idxs = np.array(list(range(vocab_size))) for i in range(outputs.shape[0]): outs.append(np.random.choice(idxs, p=np.array(outputs[i, :]))) output = torch.tensor(outs) input_sequence = og_inputs[0, :] predicted_sequence = [ idx_to_word[c] for c in topk.detach().cpu().numpy() ] sampled_sequence = [ idx_to_word[c] for c in output.detach().cpu().numpy() ] print('\nInput sequence') print(input_sequence) print('\nPredicted sequence:') print(predicted_sequence) print('\nSampled sequence:') print(sampled_sequence) prev_word = "" for i in range(1, len(predicted_sequence)): words = input_sequence[:i] predicted_next_word = predicted_sequence[i - 1] sampled_next_word = sampled_sequence[i - 1] if sampled_next_word == '</s>' and ( prev_word == '</s>' or input_sequence[i] == '</s>'): break prev_word = sampled_next_word print( " ".join(list(words)), "[" + predicted_next_word + "|" + sampled_next_word + "]") print("Moving on to next prediction....\n") print(perplexities) mean_perplexity = np.mean(perplexities) print(f'Perplexity: {mean_perplexity}') with open( perplexity_test_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim), 'a') as f: f.write(str(mean_perplexity) + "\n") return vocab_size, hps