Esempi in Python per Index.read_index

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: index

Classe/tipologia: Index

Metodo/funzione: read_index

Esempi su hotexamples.com: 2

Index.read_index in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per index.Index.read_index, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Index(17)

add_document(11)

add(8)

PUT_SCHEMA(3)

add_entry(3)

add_index_range(2)

read_index(2)

exists(2)

open_or_create(2)

get_status(1)

get_term(1)

get_or_create_instance(1)

get_net_interface(1)

get_keywords(1)

index_media(1)

index_object(1)

is_duplicate(1)

CreateIndex(1)

name(1)

post_syslog(1)

get_document_vector(1)

put_status(1)

remove_word(1)

rm_data(1)

status(1)

storeIndex(1)

train_path(1)

update_md5s(1)

verify(1)

get_items_generator(1)

getParserType(1)

get_data_by_id(1)

add_key(1)

SearchIndex(1)

__init__(1)

_fields(1)

_kw(1)

addTask(1)

add_data(1)

add_doc(1)

add_downloader(1)

add_index(1)

add_word(1)

get_all(1)

agenda(1)

append(1)

articles(1)

by_prefix(1)

calculate_tfidf(1)

construct_index(1)

Esempio n. 1

Mostra file

File: c.py Progetto: kyrre/translation

def build_parallel_corpus():
    """ return dicts containing the parallel corpus 
        entries 
    """

    con = psycopg2.connect(database='quora', user='******')
    cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)

    # we actually only need to cPickle the CountVectorizer object
    # so we can load the stop words and other preprocessing opt.
    index_directory = '/home/kyrre/michaeljackson'
    idx = Index.read_index(index_directory)

    query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers 
               FROM Question 
               JOIN (SELECT string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A 
               ON Question.qid = A.qid LIMIT 5000;
            """

    cur.execute(query)
    a = idx.count_vect.transform(SQL_generator(cur, 'answers'))

    cur.execute(query)
    q = idx.count_vect.transform(SQL_generator(cur, 'question'))

    od = lambda x: np.squeeze(np.asarray(x))

    asum = od(a.sum(axis=1))
    qsum = od(q.sum(axis=1))

    nnz_indices = np.intersect1d(od(np.argwhere(asum != 0)), 
                                od(np.argwhere(qsum != 0)))
    a = a[nnz_indices,:]
    q = q[nnz_indices,:]

    asum = od(a.sum(axis=0))
    qsum = od(q.sum(axis=0))

    nnz_cols_indices = np.intersect1d(od(np.argwhere(asum != 0)), 
                                      od(np.argwhere(qsum != 0)))

    a = a[:, nnz_cols_indices]
    q = q[:, nnz_cols_indices]

    assert a.shape == q.shape

    assert_sorted_indices(a)
    assert_sorted_indices(q)

    scipy.io.mmwrite(open('a.mtx', 'w'), a)
    scipy.io.mmwrite(open('q.mtx', 'w'), q)

Esempio n. 2

Mostra file

File: create_corpus.py Progetto: kyrre/translation

def build_parallel_corpus():
    """ return dicts containing the parallel corpus 
        entries 
    """

    con = psycopg2.connect(database='quora', user='******')
    cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)

    # we actually only need to cPickle the CountVectorizer object
    # so we can load the stop words and other preprocessing opt.
    index_directory = '/home/kyrre/michaeljackson'
    idx = Index.read_index(index_directory)

    query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers from Question JOIN (SELECT
            string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A ON
            Question.qid = A.qid;
            """

    cur.execute(query)

    # parallel corpora
    questions = OrderedDict()
    answers = OrderedDict()

    for record in cur:
        
        qdata, qindices = idx.count_vect.featurize(record['question'])   
        adata, aindices = idx.count_vect.featurize(record['answers'])   

        # skip "empty" entries 
        if adata.size == 0 or qdata.size == 0:
            continue 

        questions[record['qid']] = [qdata, qindices]
        answers[record['qid']] = [adata, aindices]
    
    container = Bunch(questions=questions, answers=answers, count_vect = idx.count_vect)

    return container