def create_document_to_score(): # doc: 1 # |- chunk: 2 # | |- matches: (id: 4, parent_id: 40, score.value: 4), # | |- matches: (id: 5, parent_id: 50, score.value: 5), # | # |- chunk: 3 # |- matches: (id: 6, parent_id: 60, score.value: 6), # |- matches: (id: 7, parent_id: 70, score.value: 7) doc = Document() doc.id = '1' for c in range(2): chunk = Document() chunk_id = str(c + 2) chunk.id = chunk_id for m in range(2): match = Document() match_id = 2 * int(chunk_id) + m match.id = str(match_id) parent_id = 10 * int(match_id) match.parent_id = str(parent_id) match.length = int(match_id) # to be used by MaxRanker and MinRanker match.score = NamedScore(value=int(match_id), ref_id=chunk.id) match.tags['price'] = match.score.value match.tags['discount'] = DISCOUNT_VAL chunk.matches.append(match) doc.chunks.append(chunk) return doc
def create_chunk_chunk_matches_to_score(): # doc: (id: 100, granularity=0) # |- chunk: (id: 101, granularity=1) # |- chunks: (id: 10) # | |- matches: (id: 11, parent_id: 1, score.value: 2), # | |- matches: (id: 12, parent_id: 1, score.value: 3), # |- chunks: (id: 20) # |- matches: (id: 21, parent_id: 2, score.value: 4), # |- matches: (id: 22, parent_id: 2, score.value: 5) doc = Document() doc.id = '100' doc.granularity = 0 chunk = Document() chunk.id = '101' chunk.parent_id = doc.id chunk.granularity = doc.granularity + 1 num_matches = 2 for parent_id in range(1, 3): chunk_chunk = Document() chunk_chunk.id = str(parent_id * 10) chunk_chunk.parent_id = str(parent_id) chunk_chunk.granularity = chunk.granularity + 1 for score_value in range(parent_id * 2, parent_id * 2 + num_matches): match = Document() match.parent_id = str(parent_id) match.score = NamedScore(value=score_value, ref_id=chunk_chunk.id) match.id = str(10 * parent_id + score_value) match.length = 4 chunk_chunk.matches.append(match) chunk.chunks.append(chunk_chunk) doc.chunks.append(chunk) return Document(doc)
def create_document_to_score_same_depth_level(): # doc: 1 # | matches: (id: 2, parent_id: 20, score.value: 30, length: 3), # | matches: (id: 3, parent_id: 20, score.value: 40, length: 4), # | matches: (id: 4, parent_id: 30, score.value: 20, length: 2), # | matches: (id: 5, parent_id: 30, score.value: 10, length: 1), doc = Document() doc.id = 1 for match_id, parent_id, match_score, weight in [ (2, 20, 30, 3), (3, 20, 40, 4), (4, 30, 20, 2), (5, 30, 10, 1), ]: match = Document() match.id = match_id match.parent_id = parent_id match.weight = weight match.score = NamedScore(value=match_score, ref_id=doc.id) doc.matches.append(match) return doc