Python build_index Exemples, utils.build_index Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : navigation_handler.py Projet : prestond123/a-text-adventure-game

    def go(self, game, location_name):
        location = game.get_location()
        locations = utils.build_index(location.find_item_by_attribute("room"))
        location_names = sorted(locations)
        if (location_name):
            if (len(location_name) == 1 and location_name in location_names):
                # using the alias - so look up name
                loc = locations[location_name]
                location_name = loc["name"]
            self._go(game, location_name, location)
        else:
            # try auto routing - if room has single exit

            # commented out - always go to first location in list of routes - as per UI
            # if(len(location_names) > 2): # locations are indexed, so will have alias in it too.
            #     utils.print_message("I dont understand where to go - Try: go <place>")
            #     return

            if (len(location_names) < 1):
                # the solution must be in this room
                utils.print_message(
                    "There doesn't appear to be anywhere I can go to at the moment."
                )
                return

            location_name = location_names[0]
            if (len(location_name) == 1 and location_name in location_names):
                # using the alias - so look up name
                loc = locations[location_name]
                location_name = loc["name"]

            self._go(game, location_name, location)

Exemple #2

0

Afficher le fichier

Fichier : updater.py Projet : zhangarejiu/bitmex-xbtusd-history

                        current_dict = json.load(f)
                else:
                    current_dict = {
                        "openTimestamp":
                        int(candle["openDate"].timestamp()),
                        "timeframe":
                        tf,
                        "openDate":
                        candle["openDate"].strftime('%Y-%m-%d %H:%M:%S'),
                        "open": [],
                        "high": [],
                        "low": [],
                        "close": [],
                        "volume": [],
                        # "openDate": [], "closeDate": []
                    }

            current_dict["open"].append(candle["open"])
            current_dict["high"].append(candle["high"])
            current_dict["low"].append(candle["low"])
            current_dict["close"].append(candle["close"])
            current_dict["volume"].append(candle["volume"])

            # Not needed since we have the openTimestamp of the first candle and we know the timeframe; keep in comment for debug
            # current_dict["openDate"].append(str(candle["openDate"]))
            # current_dict["closeDate"].append(str(candle["closeDate"]))

        store_dict(tf, current_out_file, current_dict)

    build_index(args.outfolder)

Exemple #3

0

Afficher le fichier

import argparse
from datetime import datetime, timezone, timedelta
import json
import os

from utils import timedeltas, build_index


def parse_cl_args():
    parser = argparse.ArgumentParser(
        description='Build index files for scrapped database')
    parser.add_argument(
        'path', help='Path a directory containing the scrapped database (folder 1d, 1h, etc)'
    )
    return parser.parse_args()


args = parse_cl_args()
build_index(args.path)

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : adamar/wikidump

def c_index(args):
  """
  Build index.
  """
  build_index()

Exemple #5

0

Afficher le fichier

Fichier : __init__.py Projet : riccardoangius/pairses

def c_index(args):
    """
  Build index.
  """
    build_index()

Exemple #6

0

Afficher le fichier

def main(load=False):
    # Init hps
    hps = init_hps()

    criterion = nn.CrossEntropyLoss()

    torch.manual_seed(0)

    # Read file
    if load:
        print("Loading file", data_file, "for testing")
    else:
        print("Using file", data_file, "for training")

    lines = utils.read_file(data_file)

    global data_file_size
    data_file_size = len(lines)

    start = time.time()
    unique_words, vocab_size, n = utils.create_unique_words(lines)

    print("vocab_size", vocab_size)
    print("Constructing unique words took:", (time.time() - start))

    # Construct dataloader
    dataset = utils.ReadLines(data_file)

    print("data set length:", len(dataset))

    train_set_len = int(len(dataset) * 0.6)
    test_set_len = int(len(dataset) * 0.2)
    validation_set_len = int(len(dataset) * 0.2)
    while train_set_len + test_set_len + validation_set_len != len(dataset):
        validation_set_len += 1

    train_set, test_set, validation_set = torch.utils.data.random_split(
        dataset, [train_set_len, test_set_len, validation_set_len])

    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=hps.batch_size,
                                               num_workers=8,
                                               shuffle=True,
                                               collate_fn=collate_fn)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=hps.batch_size,
                                              num_workers=8,
                                              shuffle=True,
                                              collate_fn=collate_fn)
    validation_loader = torch.utils.data.DataLoader(dataset=validation_set,
                                                    batch_size=hps.batch_size,
                                                    num_workers=8,
                                                    shuffle=True,
                                                    collate_fn=collate_fn)

    # Init model
    if not load:

        word_to_idx, idx_to_word = utils.build_index(unique_words)
        mapper = SentenceMapper(lines, word_to_idx, idx_to_word)

        vocab_info = {
            'idx_to_word': idx_to_word,
            'word_to_idx': word_to_idx,
            'vocab_size': vocab_size
        }

        with open(
                vocab_info_save_path(data_file_size, hps.lstm_h_dim,
                                     hps.embedding_dim), 'wb') as f:
            pickle.dump(vocab_info, f, protocol=pickle.HIGHEST_PROTOCOL)

        embedding = fasttext.train_unsupervised(data_file,
                                                model='cbow',
                                                dim=hps.embedding_dim)
        embedding.save_model(
            embedding_model_save_path(data_file_size, hps.lstm_h_dim,
                                      hps.embedding_dim))

        print("Training...")
        model = LSTM(hps, vocab_size)
        train_model(hps, idx_to_word, model, train_loader, validation_loader,
                    mapper, embedding)
    else:

        with open(vocab_info_load_path, 'rb') as f:
            vocab_info = pickle.load(f, encoding='utf-8')

        idx_to_word = vocab_info['idx_to_word']
        word_to_idx = vocab_info['word_to_idx']
        vocab_size = vocab_info['vocab_size']

        mapper = SentenceMapper(lines, word_to_idx, idx_to_word)

        embedding = fasttext.load_model(
            embedding_model_save_path(data_file_size, hps.lstm_h_dim,
                                      hps.embedding_dim))

        print("Loading model...")
        model = LSTM(hps, vocab_size)
        model = nn.DataParallel(model).to(device)

        model.load_state_dict(torch.load(model_load_path, map_location=device))
        model.to(device)
        model.eval()

        counter = 0

        perplexities = []

        for _, (data, N) in enumerate(test_loader):

            padded_data = mapper.pad_sentences(data, N)

            og_inputs, targets = utils.inputs_and_targets_from_sequences(
                padded_data)
            inputs = mapper.map_sentences_to_padded_embedding(
                og_inputs,
                embedding=embedding,
                embedding_size=hps.embedding_dim,
                N=N)
            targets = mapper.map_words_to_indices(targets, N=N)

            if cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()

            outputs = model(inputs)

            loss = criterion(outputs.permute(0, 2, 1), targets)

            perplexities.append(np.exp(loss.detach().cpu().numpy()))

            topk = F.softmax(outputs, dim=2)[0, :, :]

            topk = torch.topk(topk, 1, dim=1)[1].squeeze(1)

            # print(topk.shape)

            outputs = F.softmax(outputs, dim=2)[0, :, :].detach().cpu().numpy()

            outs = []
            idxs = np.array(list(range(vocab_size)))

            for i in range(outputs.shape[0]):
                outs.append(np.random.choice(idxs, p=np.array(outputs[i, :])))
            output = torch.tensor(outs)

            input_sequence = og_inputs[0, :]
            predicted_sequence = [
                idx_to_word[c] for c in topk.detach().cpu().numpy()
            ]
            sampled_sequence = [
                idx_to_word[c] for c in output.detach().cpu().numpy()
            ]

            print('\nInput sequence')
            print(input_sequence)

            print('\nPredicted sequence:')
            print(predicted_sequence)

            print('\nSampled sequence:')
            print(sampled_sequence)

            prev_word = ""
            for i in range(1, len(predicted_sequence)):
                words = input_sequence[:i]
                predicted_next_word = predicted_sequence[i - 1]
                sampled_next_word = sampled_sequence[i - 1]

                if sampled_next_word == '</s>' and (
                        prev_word == '</s>' or input_sequence[i] == '</s>'):
                    break

                prev_word = sampled_next_word

                print(
                    " ".join(list(words)),
                    "[" + predicted_next_word + "|" + sampled_next_word + "]")

            print("Moving on to next prediction....\n")

        print(perplexities)
        mean_perplexity = np.mean(perplexities)

        print(f'Perplexity: {mean_perplexity}')
        with open(
                perplexity_test_save_path(data_file_size, hps.lstm_h_dim,
                                          hps.embedding_dim), 'a') as f:
            f.write(str(mean_perplexity) + "\n")

    return vocab_size, hps