def test_single_add(self): source = Index(self.settings.source) sourceq = FromES(source) dest = Index(self.settings.destination) destq = FromES(dest) children = sourceq.query({ "from": self.settings.source.alias, "select": ["bug_id", "dependson"], "where": {"and": [ {"term": {"bug_id": 961592}}, {"exists": {"field": "dependson"}} ]} }) to_fix_point(self.settings, destq, children)
def full_etl(settings): schema = convert.json2value(convert.value2json(SCHEMA), leaves=True) Cluster(settings.destination).get_or_create_index(settings=settings.destination, schema=schema, limit_replicas=True) destq = FromES(settings.destination) if settings.incremental: min_bug_id = destq.query({ "from": coalesce(settings.destination.alias, settings.destination.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) min_bug_id = int(MAX(min_bug_id-1000, 0)) else: min_bug_id = 0 sourceq = FromES(settings.source) max_bug_id = sourceq.query({ "from": coalesce(settings.source.alias, settings.source.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) + 1 max_bug_id = int(coalesce(max_bug_id, 0)) # FIRST, GET ALL MISSING BUGS for s, e in qb.reverse(list(qb.intervals(min_bug_id, max_bug_id, 10000))): with Timer("pull {{start}}..{{end}} from ES", {"start": s, "end": e}): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked", "modified_ts", "expires_on"], "where": {"and": [ {"range": {"bug_id": {"gte": s, "lt": e}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 10000 }) with Timer("fixpoint work"): to_fix_point(settings, destq, children.data) # PROCESS RECENT CHANGES with Timer("pull recent dependancies from ES"): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked"], "where": {"and": [ {"range": {"modified_ts": {"gte": convert.datetime2milli(datetime.utcnow() - timedelta(days=7))}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 100000 }) to_fix_point(settings, destq, children.data)