コード例 #1
0
def poll():
    client = authenticate()

    for org_name in LINKEDIN_ORGS.keys():
        for api_type in LINKEDIN_APIS.keys():
            doc_name = api_type + org_name.replace(" ", "").upper()

            last_inserted_timestamp = 0
            last_inserted_doc = PollerUtil.fetch_last_inserted_doc(doc_name)
            if last_inserted_doc:
                last_inserted_timestamp = int(last_inserted_doc)

            data_points = poll_daily_stats_data(client,
                                                LINKEDIN_ORGS[org_name],
                                                org_name,
                                                LINKEDIN_APIS[api_type],
                                                last_inserted_timestamp)
            if not data_points:
                break

            for data_point in data_points:
                result = PollerUtil.post_to_ingest_api(data_point, api_type)
                if result is not None:
                    last_inserted_timestamp = data_point['data']['timeRange'][
                        'end']

            PollerUtil.upload_last_inserted_doc(last_inserted_timestamp,
                                                doc_name)

    return True
コード例 #2
0
def poll():
    """
    This method calls the poll_account_data with the defined accounts in TWITTER_ACCOUNTS
    and is returned new tweets. If new tweets are found, these get put in to dynamoDB
    and last_inserted_id is updated
    :return: True
    """
    api = authenticate()

    week_ago = (datetime.now() - timedelta(days=7)).date()


    for account in TWITTER_ACCOUNTS:

        last_inserted_doc = PollerUtil.fetch_last_inserted_doc(TWITTER_ACCOUNTS[account])
        last_inserted_id = 1
        if last_inserted_doc:
            last_inserted_id = int(last_inserted_doc)

        data_points = poll_account_data(api, account, last_inserted_id, week_ago)

        for data_point in data_points:
            result = PollerUtil.post_to_ingest_api(data_point, TWITTER_ACCOUNT_TYPE)
            if result is not None:
                last_inserted_id = data_point['id']

        PollerUtil.upload_last_inserted_doc(last_inserted_id, TWITTER_ACCOUNTS[account])

    return True
コード例 #3
0
def poll():
    client = authenticate()

    for key in LINKEDIN_ORGS.keys():
        data_points = poll_stats_data(client, LINKEDIN_ORGS[key], key)

        for data_point in data_points:
            PollerUtil.post_to_ingest_api(data_point, LINKEDIN_STATS_TYPE)
コード例 #4
0
def post_commits(commits, repo):
    """
    post the given commits, and update last_inserted_doc for the repo
    :param commits: A list of commits to post into ingest
    :param repo: the repo which the commits are associated with
    """
    last_inserted_id = None
    for commit in commits:
        result = PollerUtil.post_to_ingest_api(
            type=BITBUCKET_TYPE, data=commit)
        if result is 200:
            last_inserted_id = commit["id"]

    if last_inserted_id is not None:
        last_inserted_name = BITBUCKET_TYPE + repo["slug"]
        PollerUtil.upload_last_inserted_doc(
            last_inserted_doc=last_inserted_id, type=last_inserted_name)
コード例 #5
0
ファイル: yr_poller.py プロジェクト: knowit/Dataplattform-old
def poll():
    last_inserted_doc = PollerUtil.fetch_last_inserted_doc(YR_TYPE)
    last_inserted_timestamp = 0
    if last_inserted_doc:
        last_inserted_timestamp = int(last_inserted_doc)
    location = os.getenv("DATAPLATTFORM_YR_LOCATION",
                         "Norway/Oslo/Oslo/Lakkegata")

    data_points = get_yr_data(location, last_inserted_timestamp)

    for forecast in data_points:
        result = PollerUtil.post_to_ingest_api(forecast, YR_TYPE)
        if result is not None:
            last_inserted_timestamp = forecast["time_from"]

    PollerUtil.upload_last_inserted_doc(last_inserted_timestamp, YR_TYPE)

    return True
コード例 #6
0
def poll():
    """
    This method gets run every day and should fetch data from the website and compare it to a
    database in order to avoid duplicates.
    :return: True if everything was successful.
    """
    last_inserted_doc = PollerUtil.fetch_last_inserted_doc(UBW_TYPE)

    ubw_datas = fetch_ubw_data()
    for ubw_data in ubw_datas:
        if should_upload_ingest(ubw_data, last_inserted_doc):
            last_doc_new = insert_new_ubw_data(ubw_data)
            if last_doc_new is not None:
                last_inserted_doc = last_doc_new

    PollerUtil.upload_last_inserted_doc(last_inserted_doc, UBW_TYPE)

    return True
コード例 #7
0
def insert_new_ubw_data(doc):
    """
    :param doc: A ubw document.
    :return: This method attempts to upload the ubw document into the ingest API and if that was
    successful it returns the reg_period of this document. (aka the last_inserted_doc)
    """
    if PollerUtil.post_to_ingest_api(doc, UBW_TYPE) == 200:
        # This method is always updating the last_inserted_doc global after uploading new data.
        return doc["reg_period"]
    return None
コード例 #8
0
def poll():
    """
    This method gets run every day and should fetch data from the website and compare it to a
    database in order to avoid duplicates.
    :return: True if everything was successful.
    """
    # Should actually be called most_recent here in blog_poller.
    last_inserted_doc = PollerUtil.fetch_last_inserted_doc(KNOWITLABS_TYPE)

    html = get_html_from_blog()
    medium_data = get_medium_data_dict(html)
    docs = create_docs(medium_data)

    most_recent = docs[0]["id"]
    for doc in docs:
        if should_upload_ingest(doc, last_inserted_doc):
            PollerUtil.post_to_ingest_api(doc, KNOWITLABS_TYPE)
        else:
            break
    if last_inserted_doc != most_recent:
        PollerUtil.upload_last_inserted_doc(most_recent, KNOWITLABS_TYPE)
    return True
コード例 #9
0
def poll():
    date_now = datetime.now().date()

    last_inserted_doc = PollerUtil.fetch_last_inserted_doc(DOC_TYPE)
    if last_inserted_doc:
        last_inserted_date = datetime.strptime(str(last_inserted_doc),
                                               DATETIME_FORMAT).date()
        if not last_inserted_date < date_now:
            return False

    client = authenticate()

    for key in LINKEDIN_ORGS.keys():
        data_points = poll_stats_data(client, LINKEDIN_ORGS[key], key)

        for data_point in data_points:
            for data_point_key in data_point.keys():
                PollerUtil.post_to_ingest_api(data_point[data_point_key],
                                              data_point_key)

    PollerUtil.upload_last_inserted_doc(str(date_now), DOC_TYPE)
    return True
コード例 #10
0
def poll():
    """
    This method calls the poll_search_data method and is returned new tweets.
    If new tweets are found, these get put in to dynamoDB and last_inserted_id is updated
    :return: True
    """
    api = authenticate()

    last_inserted_doc = PollerUtil.fetch_last_inserted_doc(TWITTER_SEARCH_TYPE)
    last_inserted_id = 1
    if last_inserted_doc:
        last_inserted_id = int(last_inserted_doc)

    data_points = poll_search_data(api, last_inserted_id)

    for data_point in data_points:
        result = PollerUtil.post_to_ingest_api(data_point, TWITTER_SEARCH_TYPE)
        if result is not None:
            last_inserted_id = data_point['id']

    PollerUtil.upload_last_inserted_doc(last_inserted_id, TWITTER_SEARCH_TYPE)


    return True
コード例 #11
0
def test_upload_and_fetch_last_inserted():
    os.environ["DATAPLATTFORM_POLLING_STATUS_TABLENAME"] = POLLER_CONFIG[
        "PollingStatusTableName"]

    TESTING_TYPE = "TestingTestingType"
    last_inserted_correct = "123"

    PollerUtil.upload_last_inserted_doc(last_inserted_correct, TESTING_TYPE)
    last_inserted = PollerUtil.fetch_last_inserted_doc(TESTING_TYPE)
    assert last_inserted == last_inserted_correct

    last_inserted_correct_2 = "12739jldfjlka"
    PollerUtil.upload_last_inserted_doc(last_inserted_correct_2, TESTING_TYPE)
    last_inserted = PollerUtil.fetch_last_inserted_doc(TESTING_TYPE)
    assert last_inserted == last_inserted_correct_2
コード例 #12
0
def get_commits(repo):
    """
    get all commits, for the given repo, which have not yet been posted to
    the ingest api
    """
    last = PollerUtil.fetch_last_inserted_doc("BitbucketType" + repo["slug"])
    params = {"since": last}
    url = f"{get_repo_url(repo)}/commits"
    commits = get_all_pages(url, params=params)
    for commit in commits:
        del commit["committer"]
        del commit["author"]
        commit["repo"] = repo
        # There are trailing zeros in the timestamps from bitbucket. Get rid of
        # those to get valid unix timestamps
        commit["authorTimestamp"] //= 1000
        commit["committerTimestamp"] //= 1000

    # Make sure commits are in order with olderst first and newest last
    commits = sorted(commits, key=lambda it: it["committerTimestamp"])

    return commits
コード例 #13
0
def upload_last_inserted_doc(timestamp: str, data_type: str):
    last_inserted_doc = format_string_containing_iso_date(
        iso_date_string=timestamp)
    PollerUtil.upload_last_inserted_doc(last_inserted_doc=last_inserted_doc,
                                        type=data_type)