def get(self, source_id, source_lang, target_lang): src_index = TMUtils.lang2es_index(source_lang) tgt_index = TMUtils.lang2es_index(target_lang) m_index = TMUtils.es_index2mapdb(src_index, tgt_index) m_results = self.mongo_db[m_index].find({'source_id': source_id}) if not m_results or not m_results.count(): return None return m_results[0]['target_id']
def add_segment(self, segment): # Add MongoDB document m_index = TMUtils.es_index2mapdb( TMUtils.lang2es_index(segment.source_lang), TMUtils.lang2es_index(segment.target_lang)) # TODO: do not update if creation date is older than existing one m_result = self.mongo_db[m_index].update_one( {'source_id': segment.source_id}, {'$set': self._segment2doc(segment)}, upsert=True) # insert if doesn't exist return m_result
def add_segments(self, segments): if not segments: return m_index = TMUtils.es_index2mapdb( TMUtils.lang2es_index(segments[0].source_lang), TMUtils.lang2es_index(segments[0].target_lang)) try: db = self.server[m_index] except: db = self.server.create(m_index) return db.update([self._segment2doc(s) for s in segments])
def get(self, source_id, source_lang, target_lang): tname = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang), TMUtils.lang2es_index(target_lang)) if not tname in self.tables: raise Exception("Language pair : {} - {} doesn't exist".format( source_lang, target_lang)) # TODO: implement bidirectional query t = self.tables[tname] res = self.conn.execute( t.select(t.target_id).where(t.source_id == source_id)) if res: return res.fetchone()[0] return None
def get(self, lang, id): index = TMUtils.lang2es_index(lang) if not self.index_exists(index): return hit = self.es.get(index=index, id=id) if not hit: return None return hit['_source']
def scan(self, lang, filter = None): index = TMUtils.lang2es_index(lang) if not self.index_exists(index): return query = TMDbQuery(es=self.es, index = index, filter=filter) for hit in query.scan(): # Build segment by querying map and target index yield hit
def add_segments(self, segments): bulk = None for segment in segments: if not bulk: m_index = TMUtils.es_index2mapdb( TMUtils.lang2es_index(segment.source_lang), TMUtils.lang2es_index(segment.target_lang)) bulk = self.mongo_db[m_index].initialize_unordered_bulk_op() bulk.find({'source_id': segment.source_id}) \ .update_one({'$set': self._segment2doc(segment) }) try: result = bulk.execute() except BulkWriteError as bwe: result = bwe.details logging.error(bwe.details) return result
def mget(self, ids_lang): if not ids_lang: return [] body = [{ '_index': TMUtils.lang2es_index(lang), '_id' : id } for lang,id in ids_lang] hits = self.es.mget(body={'docs' : body}) if not hits: return None return [hit.get('_source',None) for hit in hits['docs']]
def scan_pivot(self, pivot_lang, langs): index = TMUtils.lang2es_index(pivot_lang) if not self.index_exists(index): return search = Search(using=self.es, index=index) for lang in langs: search = search.query('match', target_language=lang) for result in search.scan(): yield result.meta.id
def _get_index(self, source_lang, target_lang, create_missing=False): m_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang), TMUtils.lang2es_index(target_lang)) if self.es.indices.exists(index=m_index): return m_index, False # Try reverse index r_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(target_lang), TMUtils.lang2es_index(source_lang)) # Found reverse index - use it if self.es.indices.exists(r_index): return r_index, True if not create_missing: return None, None # Neither direct, nor reverse index exist - create a direct one try: self.es.indices.create(m_index) except: pass self.refresh_lang_graph() return m_index, False
def add_segment(self, segment, ftype): # Add segment source and target texts to the correspondent index of ElasticSearch id = getattr(segment, ftype + '_id') index = TMUtils.lang2es_index(getattr(segment, ftype + '_language')) s_result = self.es.index(index=index, doc_type=self.DOC_TYPE, id=id, body = self._segment2doc(segment, ftype)) return id
def _segment2table(self, segment, suffix=None): tname = TMUtils.es_index2mapdb( TMUtils.lang2es_index(segment.source_lang), TMUtils.lang2es_index(segment.target_lang)) if suffix: tname += suffix if not tname in self.tables: md = MetaData() self.tables[tname] = Table(tname, md, Column('id', Integer, primary_key=True), Column('source_id', GUID, index=True), Column('target_id', GUID, index=True), Column('creation_date', TIMESTAMP), Column('change_date', TIMESTAMP), mysql_engine='InnoDB', mysql_charset='utf8') md.bind = self.conn self.tables[tname].create(checkfirst=True) return self.tables[tname]
def mquery(self, lang, limit, q_list, filter=None): index = TMUtils.lang2es_index(lang) if not self.index_exists(index): return # Query source ES for the text query = TMDbQuery(es=self.es, index=index, q=q_list, filter=filter, limit=limit) for response, q in query(): yield response
def query(self, lang, qstring, filter = None): index = TMUtils.lang2es_index(lang) if not self.index_exists(index): return # Query source ES for the text query = TMDbQuery(es=self.es, index = index, q=qstring, filter=filter) for response,q in query(): for hit in response: yield hit,q
def delete(self, lang, ids): index = TMUtils.lang2es_index(lang) actions = [{'_op_type': 'delete', '_id': id, '_index' : index, '_type': self.DOC_TYPE, } for id in ids] # Bulk delete try: status = helpers.bulk(self.es, actions) except Exception as e: logging.warning(e) return str(e) return status
def _segment2es_bulk(self, segments, ftype, op_type, f_action): # Add segment source and target texts to the correspondent index of ElasticSearch in a batch actions = [] added_ids = set() for segment in segments: id = getattr(segment, ftype + '_id') if id in added_ids: continue # avoid duplicates in the same batch added_ids.add(id) index = TMUtils.lang2es_index(getattr(segment, ftype + '_language')) action = {'_id': id, '_index' : index, '_type' : self.DOC_TYPE, '_op_type': op_type, '_source' : f_action(segment, ftype) #self._segment2doc(segment, ftype) } actions.append(action) # Bulk insert logging.info("Bulk upsert: {}".format(actions)) s_result = helpers.bulk(self.es, actions) self.refresh() # refresh list of indexes (could have been created during insert) return s_result
def get(self, source_id, source_lang, target_lang): m_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang), TMUtils.lang2es_index(target_lang)) doc = self.server[m_index].get(source_id.hex) if doc: return uuid.UUID(doc['target_id']) return None
def add_segment(self, segment): m_index = TMUtils.es_index2mapdb( TMUtils.lang2es_index(segment.source_lang), TMUtils.lang2es_index(segment.target_lang)) return self.server[m_index].update([self._segment2doc(segment)])