Python StandardFormat.items Examples

Programming Language: Python

Namespace/Package Name: TRECTopics

Class/Type: StandardFormat

Method/Function: items

Examples at hotexamples.com: 2

Python StandardFormat.items - 2 examples found. These are the top rated real world Python examples of TRECTopics.StandardFormat.items extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

has_key(2)

items(2)

read(1)

Frequently Used Methods

has_key (2)

items (2)

read (1)

Example #1

Show file

File: WindowExtractor.py Project: DrDub/window_shopper

def exe_extract_windows(argv):
    topic_path, judge_path, text_db_path, windows_db_path = argv;
    text_db = bsddb.hashopen(text_db_path);
    window_db = bsddb.hashopen(windows_db_path, 'w');
    judge_file = QRelFile(judge_path);
    topics = StandardFormat().read(topic_path);
    topic_chain = TextChain([TextTokenizer(word_tokenize), TextStopRemover('data/stoplist.dft'), TextStemmer(EnglishStemmer()), TextTokenNormalizer()]); 
    sentence_chain = TextChain([TextTokenizer(word_tokenize), TextStemmer(EnglishStemmer()), TextTokenNormalizer()]);
    for topic_id, topic_str in topics.items():
        print topic_id;
        sys.stdout.flush();
        topic = TextPiece(topic_str);
        topic_chain.work(topic);
        if not judge_file.has_key(topic_id):
            continue;
        docnos = judge_file[topic_id].keys();
        for docno in docnos:
            if not is_cluewebB(docno):
                continue;
            doc_text = text_db[docno];
            window_candidates = match_window(topic, doc_text, sentence_chain);
            sentences = map(lambda text_piece: text_piece.text, window_candidates);
            text = '\n'.join(sentences);
            window_db[docno] = text.encode('utf8');
    window_db.close();

Example #2

Show file

File: TextExtractor.py Project: jinghe/window_shopper

def exe_extract_words(article_path, topic_path, out_word_path):
    term_set = set()

    topics = StandardFormat().read(topic_path)
    for topic_id, topic_string in topics.items():
        topic = complete_text_work(topic_string)
        for token in topic.tokens:
            term_set.add(token)

    reader = TRECReader()
    reader.open(article_path)
    doc = reader.next()
    while doc:
        print doc.ID, len(term_set)
        text = complete_text_work(doc.text)
        for token in text.tokens:
            term_set.add(token)
        doc = reader.next()

    print 'writing.....'
    word_list_file = open(out_word_path, 'w');
    words = list(term_set)
    words.sort();
    map(lambda word:word_list_file.write('%s\n' % word), words);
    word_list_file.close();