Exemple #1
0
def datetime(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return _convert.datetime2string(value, "%Y-%m-%d %H:%M:%S")
Exemple #2
0
def unix(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return str(_convert.datetime2unix(value))
Exemple #3
0
def unix(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return str(_convert.datetime2unix(value))
Exemple #4
0
def datetime(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return _convert.datetime2string(value, "%Y-%m-%d %H:%M:%S")
def get_last_updated(es):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {
                    "range": {
                        "modified_ts": {"gte": convert.datetime2milli(far_back)}
                    }}
            }},
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {"modified_ts": {"statistical": {"field": "modified_ts"}}}
        })

        if results.facets.modified_ts.count == 0:
            return convert.milli2datetime(0)
        return convert.milli2datetime(results.facets.modified_ts.max)
    except Exception, e:
        return convert.milli2datetime(0)
def main(settings):
    current_time = datetime.utcnow()
    time_file = File(settings.param.last_replication_time)

    # SYNCH WITH source ES INDEX
    source = Index(settings.source)
    destination = Cluster(settings.destination).get_or_create_index(settings.destination)

    # GET LAST UPDATED
    from_file = None
    if time_file.exists:
        from_file = convert.milli2datetime(convert.value2int(time_file.read()))
    from_es = get_last_updated(destination) - timedelta(hours=1)
    last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es)
    Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated})

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, batch_size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(convert.datetime2milli(current_time)))
def update_repo(repo, settings):
    with MySQL(settings.database) as db:
        try:
            pull_repo(repo)

            # GET LATEST DATE
            existing_range = db.query("""
                        SELECT
                            max(`date`) `max`,
                            min(`date`) `min`,
                            min(revision) min_rev,
                            max(revision) max_rev
                        FROM
                            changesets
                        WHERE
                            repos={{repos}}
                    """, {"repos": repo.name})[0]

            ranges = wrap([
                {"min": coalesce(existing_range.max, convert.milli2datetime(0)) + timedelta(days=1)},
                {"max": existing_range.min}
            ])

            for r in ranges:
                for g, docs in qb.groupby(get_changesets(date_range=r, repo=repo), size=100):
                    for doc in docs:
                        doc.file_changes = None
                        doc.file_adds = None
                        doc.file_dels = None
                        doc.description = doc.description[0:16000]

                    db.insert_list("changesets", docs)
                    db.flush()

            missing_revisions = find_holes(db, "changesets", "revision",  {"min": 0, "max": existing_range.max_rev + 1}, {"term": {"repos": repo.name}})
            for _range in missing_revisions:
                for g, docs in qb.groupby(get_changesets(revision_range=_range, repo=repo), size=100):
                    for doc in docs:
                        doc.file_changes = None
                        doc.file_adds = None
                        doc.file_dels = None
                        doc.description = doc.description[0:16000]

                    db.insert_list("changesets", docs)
                    db.flush()



        except Exception, e:
            Log.warning("Failure to pull from {{repos.name}}", {"repos": repo}, e)
Exemple #8
0
def update_repo(repo, settings):
    with MySQL(settings.database) as db:
        try:
            pull_repo(repo)

            # GET LATEST DATE
            existing_range = db.query(
                """
                        SELECT
                            max(`date`) `max`,
                            min(`date`) `min`,
                            min(revision) min_rev,
                            max(revision) max_rev
                        FROM
                            changesets
                        WHERE
                            repos={{repos}}
                    """, {"repos": repo.name})[0]

            ranges = wrap([{
                "min":
                coalesce(existing_range.max, convert.milli2datetime(0)) +
                timedelta(days=1)
            }, {
                "max": existing_range.min
            }])

            for r in ranges:
                for g, docs in qb.groupby(get_changesets(date_range=r,
                                                         repo=repo),
                                          size=100):
                    for doc in docs:
                        doc.file_changes = None
                        doc.file_adds = None
                        doc.file_dels = None
                        doc.description = doc.description[0:16000]

                    db.insert_list("changesets", docs)
                    db.flush()

            missing_revisions = find_holes(db, "changesets", "revision", {
                "min": 0,
                "max": existing_range.max_rev + 1
            }, {"term": {
                "repos": repo.name
            }})
            for _range in missing_revisions:
                for g, docs in qb.groupby(get_changesets(revision_range=_range,
                                                         repo=repo),
                                          size=100):
                    for doc in docs:
                        doc.file_changes = None
                        doc.file_adds = None
                        doc.file_dels = None
                        doc.description = doc.description[0:16000]

                    db.insert_list("changesets", docs)
                    db.flush()

        except Exception, e:
            Log.warning("Failure to pull from {{repos.name}}", {"repos": repo},
                        e)