Ejemplos de vectorize_data en Python

Lenguaje de programación: Python

Namespace/Package Name: core.build_data.utils

Método / Función: vectorize_data

Ejemplos en hotexamples.com: 4

Python vectorize_data - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de core.build_data.utils.vectorize_data extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def parse_query(q, data):
    """
    Description: Get the knowledge base key from a query with a stock ticker
    Parameters: (String) Query as entered by the user, (Dict) data from load_data() output
    Output: (Array) Array containing inputs to model.predict, (Array) Labels for potential answers
    """

    #  Remove punctuation, lowercase, split on spaces
    translator = str.maketrans(string.punctuation,
                               ' ' * len(string.punctuation))
    raw_query = ((q.translate(translator)).lower()).split()

    # Get kb_key
    kb_key = get_kb_key(q)

    # Ensure raw_query is at most 13 words
    if len(raw_query) > 13:
        raw_query = raw_query[:13]

    # Tokenize Query
    query = [
        data['vocab2id'][word] if word in data['vocab2id'] else 1
        for word in raw_query
    ]
    query.extend([0 for _ in range(13 - len(raw_query))])
    query = [query]

    # Memory & Candidate Answer Labels
    ans_cands = build_ans_cands(data['kb'][kb_key],
                                            data['entity2id'], \
                                            data['entityType2id'],
                                            data['relation2id'],
                                            data['vocab2id'])
    memory = [ans_cands[:-1]]
    cand_labels = [ans_cands[-1]]

    # Vectorize Data
    query, query_words, _, memory = vectorize_data(query, [[]], memory, \
                                            max_query_size=data['opt']['query_size'], \
                                            max_query_markup_size=data['opt']['query_markup_size'], \
                                            max_ans_bow_size=data['opt']['ans_bow_size'], \
                                            vocab2id=data['vocab2id'])

    return ([memory, query, query_words, [raw_query], [[]],
             [len(raw_query)]], cand_labels)

Ejemplo n.º 2

Mostrar archivo

        'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor',
        'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't',
        'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now',
        'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't",
        'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',
        "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma',
        'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan',
        "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't",
        'won', "won't", 'wouldn', "wouldn't"
    }

    train_queries, train_raw_queries, train_query_mentions, train_memories, _, train_gold_ans_inds, _ = train_vec
    train_queries, train_query_words, train_query_lengths, train_memories = vectorize_data(train_queries, train_query_mentions, \
                                        train_memories, max_query_size=opt['query_size'], \
                                        max_query_markup_size=opt['query_markup_size'], \
                                        max_mem_size=opt['mem_size'], \
                                        max_ans_bow_size=opt['ans_bow_size'], \
                                        max_ans_path_bow_size=opt['ans_path_bow_size'], \
                                        vocab2id=vocab2id)

    valid_queries, valid_raw_queries, valid_query_mentions, valid_memories, valid_cand_labels, valid_gold_ans_inds, valid_gold_ans_labels = valid_vec
    valid_queries, valid_query_words, valid_query_lengths, valid_memories = vectorize_data(valid_queries, valid_query_mentions, \
                                        valid_memories, max_query_size=opt['query_size'], \
                                        max_query_markup_size=opt['query_markup_size'], \
                                        max_mem_size=opt['mem_size'], \
                                        max_ans_bow_size=opt['ans_bow_size'], \
                                        max_ans_path_bow_size=opt['ans_path_bow_size'], \
                                        vocab2id=vocab2id)

    start = timeit.default_timer()

Ejemplo n.º 3

Mostrar archivo

                                        'freebase_full.json'),
                           return_type='dict')
    test_data = load_ndjson(
        os.path.join(cfg['raw_data_dir'], 'test_seed_2_smart.json'))
    data_vec = build_data(test_data,
                          freebase,
                          entity2id,
                          entityType2id,
                          relation2id,
                          vocab2id,
                          pred_seed_ents=pred_seed_ents)

    queries, raw_queries, query_mentions, memories, cand_labels, _, gold_ans_labels = data_vec
    queries, query_words, query_lengths, memories_vec = vectorize_data(queries, query_mentions, memories, \
                                        max_query_size=bamnet_opt['query_size'], \
                                        max_query_markup_size=bamnet_opt['query_markup_size'], \
                                        max_ans_bow_size=bamnet_opt['ans_bow_size'], \
                                        vocab2id=vocab2id)

    model = BAMnetAgent(bamnet_opt, ctx_stopwords, vocab2id)
    pred = model.predict([
        memories_vec, queries, query_words, raw_queries, query_mentions,
        query_lengths
    ],
                         cand_labels,
                         batch_size=bamnet_opt['test_batch_size'],
                         margin=2)

    print('\nPredictions')
    for margin in bamnet_opt['test_margin']:
        print('\nMargin: {}'.format(margin))

Ejemplo n.º 4

Mostrar archivo

def train_model(config_path='question-answering/config/bamnet_webq.yml'):
    """
    Description: Train a BAMnet model with knowledge base and questions in /data
    Parameters: (String) Relative path to config file
    Output: (1 .md File) BAMnet model weights. Use for question_answering.py
    """

    # build_utils.vectorize_data()

    with open(config_path, "r") as setting:
        opt = yaml.load(setting)

    # Load data
    train_vec = load_json(os.path.join(opt['data_dir'], opt['train_data']))
    valid_vec = load_json(os.path.join(opt['data_dir'], opt['valid_data']))

    vocab2id = load_json(os.path.join(opt['data_dir'], 'vocab2id.json'))

    ctx_stopwords = {
        'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you',
        "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself',
        'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her',
        'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them',
        'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom',
        'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was',
        'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do',
        'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or',
        'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with',
        'about', 'against', 'between', 'into', 'through', 'during', 'before',
        'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out',
        'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once',
        'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both',
        'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor',
        'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't',
        'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now',
        'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't",
        'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',
        "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma',
        'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan',
        "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't",
        'won', "won't", 'wouldn', "wouldn't"
    }

    # Vectorize data
    train_queries, train_raw_queries, train_query_mentions, train_memories, _, train_gold_ans_inds, _ = train_vec
    train_queries, train_query_words, train_query_lengths, train_memories = build_utils.vectorize_data(train_queries, train_query_mentions, \
                                        train_memories, max_query_size=opt['query_size'], \
                                        max_query_markup_size=opt['query_markup_size'], \
                                        max_mem_size=opt['mem_size'], \
                                        max_ans_bow_size=opt['ans_bow_size'], \
                                        max_ans_path_bow_size=opt['ans_path_bow_size'], \
                                        vocab2id=vocab2id)

    valid_queries, valid_raw_queries, valid_query_mentions, valid_memories, valid_cand_labels, valid_gold_ans_inds, valid_gold_ans_labels = valid_vec
    valid_queries, valid_query_words, valid_query_lengths, valid_memories = build_utils.vectorize_data(valid_queries, valid_query_mentions, \
                                        valid_memories, max_query_size=opt['query_size'], \
                                        max_query_markup_size=opt['query_markup_size'], \
                                        max_mem_size=opt['mem_size'], \
                                        max_ans_bow_size=opt['ans_bow_size'], \
                                        max_ans_path_bow_size=opt['ans_path_bow_size'], \
                                        vocab2id=vocab2id)

    start = timeit.default_timer()

    model = BAMnetAgent(opt, ctx_stopwords, vocab2id)
    model.train([train_memories, train_queries, train_query_words, train_raw_queries, train_query_mentions, train_query_lengths], train_gold_ans_inds, \
        [valid_memories, valid_queries, valid_query_words, valid_raw_queries, valid_query_mentions, valid_query_lengths], \
        valid_gold_ans_inds, valid_cand_labels, valid_gold_ans_labels)

    print('Runtime: %ss' % (timeit.default_timer() - start))