def search(db, dictionary_path, search_terms, table, fromDanish): """Slå søgetermerne op i ordbogen og hent de matchende artikler. Funktionen returnerer en liste af rå artikeldata for hvert artikelmatch db = database search_terms = liste af søgetermer table = navn på tabel i databasen, hvor søgetermerne skal findes fromDanish = hvilken retningen slår vi op i ordbogen?""" # find id for matchende artikler fromDanish = 1 if fromDanish else 2 first_term = True for term in search_terms: rows = list( db.execute('select * from %s%i where word_ like \'%s\'' % (table, fromDanish, term))) term_entry_ids = [r[0] for r in rows] if first_term: first_term = False entry_ids = term_entry_ids else: entry_ids = set(term_entry_ids) & set(entry_ids) # hent artikeldata raw_entries = [] for entry_id in entry_ids: rows = list( db.execute('select * from entries%i where id_ = %d' % (fromDanish, entry_id))) for _, entry_type, link_id, offset, nbyte in rows: raw_entry = parse_entry(dictionary_path + '.dat', entry_id, offset, nbyte) raw_entries.append(raw_entry) return raw_entries
def search(db, dictionary_path, search_terms, table, fromDanish): """Slå søgetermerne op i ordbogen og hent de matchende artikler. Funktionen returnerer en liste af rå artikeldata for hvert artikelmatch db = database search_terms = liste af søgetermer table = navn på tabel i databasen, hvor søgetermerne skal findes fromDanish = hvilken retningen slår vi op i ordbogen?""" # find id for matchende artikler fromDanish = 1 if fromDanish else 2; first_term = True for term in search_terms: rows = list(db.execute('select * from %s%i where word_ like \'%s\'' %(table,fromDanish,term))) term_entry_ids = [r[0] for r in rows] if first_term: first_term = False entry_ids = term_entry_ids else: entry_ids = set(term_entry_ids) & set(entry_ids) # hent artikeldata raw_entries = [] for entry_id in entry_ids: rows = list(db.execute('select * from entries%i where id_ = %d' %(fromDanish, entry_id))) for _, entry_type, link_id, offset, nbyte in rows: raw_entry = parse_entry(dictionary_path+'.dat',entry_id, offset, nbyte) raw_entries.append(raw_entry) return raw_entries
def getRawEntryText(self, dat_file, entries): raw_entries = [] for entry_id, offset, nbyte in entries: data = self.extractFromFile(dat_file, offset, nbyte) raw_entry = groparser.parse_entry(data, entry_id, offset, nbyte) raw_entries.append(raw_entry.split('\0')[-2]) return raw_entries