def datetime(value): if not _convert: _late_import() if isinstance(value, (date, builtin_datetime)): pass elif value < 10000000000: value = _convert.unix2datetime(value) else: value = _convert.milli2datetime(value) return _convert.datetime2string(value, "%Y-%m-%d %H:%M:%S")
def unix(value): if not _convert: _late_import() if isinstance(value, (date, builtin_datetime)): pass elif value < 10000000000: value = _convert.unix2datetime(value) else: value = _convert.milli2datetime(value) return str(_convert.datetime2unix(value))
def get_last_updated(es): try: results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": { "range": { "modified_ts": {"gte": convert.datetime2milli(far_back)} }} }}, "from": 0, "size": 0, "sort": [], "facets": {"modified_ts": {"statistical": {"field": "modified_ts"}}} }) if results.facets.modified_ts.count == 0: return convert.milli2datetime(0) return convert.milli2datetime(results.facets.modified_ts.max) except Exception, e: return convert.milli2datetime(0)
def main(settings): current_time = datetime.utcnow() time_file = File(settings.param.last_replication_time) # SYNCH WITH source ES INDEX source = Index(settings.source) destination = Cluster(settings.destination).get_or_create_index(settings.destination) # GET LAST UPDATED from_file = None if time_file.exists: from_file = convert.milli2datetime(convert.value2int(time_file.read())) from_es = get_last_updated(destination) - timedelta(hours=1) last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es) Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated}) pending = get_pending(source, last_updated) with ThreadedQueue(destination, batch_size=1000) as data_sink: replicate(source, data_sink, pending, last_updated) # RECORD LAST UPDATED time_file.write(unicode(convert.datetime2milli(current_time)))
def update_repo(repo, settings): with MySQL(settings.database) as db: try: pull_repo(repo) # GET LATEST DATE existing_range = db.query(""" SELECT max(`date`) `max`, min(`date`) `min`, min(revision) min_rev, max(revision) max_rev FROM changesets WHERE repos={{repos}} """, {"repos": repo.name})[0] ranges = wrap([ {"min": coalesce(existing_range.max, convert.milli2datetime(0)) + timedelta(days=1)}, {"max": existing_range.min} ]) for r in ranges: for g, docs in qb.groupby(get_changesets(date_range=r, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() missing_revisions = find_holes(db, "changesets", "revision", {"min": 0, "max": existing_range.max_rev + 1}, {"term": {"repos": repo.name}}) for _range in missing_revisions: for g, docs in qb.groupby(get_changesets(revision_range=_range, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() except Exception, e: Log.warning("Failure to pull from {{repos.name}}", {"repos": repo}, e)
def update_repo(repo, settings): with MySQL(settings.database) as db: try: pull_repo(repo) # GET LATEST DATE existing_range = db.query( """ SELECT max(`date`) `max`, min(`date`) `min`, min(revision) min_rev, max(revision) max_rev FROM changesets WHERE repos={{repos}} """, {"repos": repo.name})[0] ranges = wrap([{ "min": coalesce(existing_range.max, convert.milli2datetime(0)) + timedelta(days=1) }, { "max": existing_range.min }]) for r in ranges: for g, docs in qb.groupby(get_changesets(date_range=r, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() missing_revisions = find_holes(db, "changesets", "revision", { "min": 0, "max": existing_range.max_rev + 1 }, {"term": { "repos": repo.name }}) for _range in missing_revisions: for g, docs in qb.groupby(get_changesets(revision_range=_range, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() except Exception, e: Log.warning("Failure to pull from {{repos.name}}", {"repos": repo}, e)