def compileNumeric2Term(edge): if edge.script: Log.error("edge script not supported yet") if edge.domain.type != "numeric" and edge.domain.type != "count": Log.error("can only translate numeric domains") numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: ref = 0 partition2int = "Math.floor(" + value + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "false" else: ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "" + value + "<" + ref elif not edge.domain.min: ref = value2MVEL(edge.domain.max) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "" + value + ">=" + ref else: top = value2MVEL(edge.domain.max) ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" offset = convert.value2int(ref) def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset) return Data(toTerm={ "head": "", "body": partition2int }, fromTerm=int2Partition)
def compileNumeric2Term(edge): if edge.script: Log.error("edge script not supported yet") if edge.domain.type != "numeric" and edge.domain.type != "count": Log.error("can only translate numeric domains") numPartitions = len(edge.domain.partitions) value = edge.value if isKeyword(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: ref = 0 partition2int = "Math.floor(" + value + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "false" else: ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "" + value + "<" + ref elif not edge.domain.min: ref = value2MVEL(edge.domain.max) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "" + value + ">=" + ref else: top = value2MVEL(edge.domain.max) ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" offset = convert.value2int(ref) def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset) return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def main(settings): current_time = datetime.utcnow() time_file = File(settings.param.last_replication_time) # SYNCH WITH source ES INDEX source = Index(settings.source) destination = Cluster(settings.destination).get_or_create_index(settings.destination) # GET LAST UPDATED from_file = None if time_file.exists: from_file = convert.milli2datetime(convert.value2int(time_file.read())) from_es = get_last_updated(destination) - timedelta(hours=1) last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es) Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated}) pending = get_pending(source, last_updated) with ThreadedQueue(destination, batch_size=1000) as data_sink: replicate(source, data_sink, pending, last_updated) # RECORD LAST UPDATED time_file.write(unicode(convert.datetime2milli(current_time)))