def extract_from_datazilla_using_id(es, settings, transformer):

    existing_ids = get_existing_ids(es, settings, transformer.pushlog.keys())
    max_existing_id = nvl(MAX(existing_ids), settings.production.min)
    holes = set(range(settings.production.min, max_existing_id)) - existing_ids
    missing_ids = set(range(settings.production.min, max_existing_id+nvl(settings.production.step, NUM_PER_BATCH))) - existing_ids

    Log.note("Number missing: {{num}}", {"num": len(missing_ids)})
    Log.note("Number in holes: {{num}}", {"num": len(holes)})
    #FASTER IF NO INDEXING IS ON
    es.set_refresh_interval(-1)

    #FILE IS FASTER THAN NETWORK
    if (len(holes) > 10000 or settings.args.scan_file or settings.args.restart) and File(settings.param.output_file).exists:
        #ASYNCH PUSH TO ES IN BLOCKS OF 1000
        with Timer("Scan file for missing ids"):
            with ThreadedQueue(es, size=nvl(es.settings.batch_size, 100)) as json_for_es:
                num = 0
                for line in File(settings.param.output_file):
                    try:
                        if len(line.strip()) == 0:
                            continue
                        col = line.split("\t")
                        id = int(col[0])
                        # if id==3003529:
                        #     Log.debug()
                        if id < settings.production.min:
                            continue
                        if id in existing_ids:
                            continue

                        if num > settings.production.step:
                            return
                        num += 1

                        with Profiler("decode and transform"):
                            data = CNV.JSON2object(col[-1])
                            if data.test_run_id:
                                with Profiler("transform"):
                                    data = transformer.transform(id, data)
                                json_for_es.extend({"value": d} for d in data)
                                Log.note("Added {{id}} from file", {"id": id})

                                existing_ids.add(id)
                            else:
                                Log.note("Skipped {{id}} from file (no test_run_id)", {"id": id})
                                num -= 1

                    except Exception, e:
                        Log.warning("Bad line id={{id}} ({{length}}bytes):\n\t{{prefix}}", {
                            "id": id,
                            "length": len(CNV.object2JSON(line)),
                            "prefix": CNV.object2JSON(line)[0:130]
                        }, e)
        missing_ids = missing_ids - existing_ids
def arrays_add(id, path, r):
    try:
        if isinstance(r, dict):
            for k, v in [(k, v) for k, v in r.items()]:
                new_path = path + "[" + k + "]"
                arrays_add(id, new_path, v)
        elif isinstance(r, list):
            try:
                values = r.map(float)
                arrays.append([id, path, len(values), 1])
            except Exception, e:
                for i, v in enumerate(r):
                    r[i] = arrays_add(id, path + "[" + str(i) + "]", v)
                #        return r
    except Exception, e:
        Log.warning("Can not summarize: {{json}}", {"json": CNV.object2JSON(r)})
                added.add(id)

                data = CNV.JSON2object(col[1])
                records_for_db.add({
                    "id": nvl(data.test_run_id, id),
                    "branch": data.json_blob.test_build.branch,
                    "name": data.json_blob.test_build.name,
                    "version": data.json_blob.test_build.version,
                    "suite": data.json_blob.testrun.suite,
                    "revision": data.json_blob.test_build.revision,
                    "date": data.json_blob.testrun.date
                })
                Log.note("Added {{id}} from file", {"id": data.test_run_id})
            except Exception, e:
                Log.warning("Bad line ({{length}}bytes):\n\t{{prefix}}", {
                    "length": len(CNV.object2JSON(line)),
                    "prefix": CNV.object2JSON(line)[0:130]
                }, e)



def main():
    try:
        settings = startup.read_settings(filename="file2db_settings.json")
        Log.start(settings.debug)


        with DB(settings.db) as db:
            db.execute("""
                DROP TABLE IF EXISTS b2g_tests
            """)
        with Timer("read {{id}} from DZ", {"id": id}):
            content = requests.get(url, timeout=nvl(settings.production.timeout, 30)).content
    except Exception, e:
        Log.warning("Failure to read from {{url}}", {"url": url}, e)
        return False

    try:
        if content.startswith("Id not found"):
            Log.note("{{id}} not found {{url}}", {"id": id, "url": url})
            if id < max_id:
                return True
            else:
                return False

        data = CNV.JSON2object(content.decode('utf-8'))
        content = CNV.object2JSON(data)  #ENSURE content HAS NO crlf

        if data.test_run_id:
            Log.println("Add {{id}} for revision {{revision}} ({{bytes}} bytes)", {
                "id": id,
                "revision": data.json_blob.test_build.revision,
                "bytes": len(content)
            })
            with Profiler("transform"):
                result = transformer.transform(id, data)

            if result:
                Log.println("{{num}} records to add", {
                    "num": len(result)
                })
                es_sink.extend({"value": d} for d in result)