Exemple #1
0
def stars_mature(num_days):
    date_from = (datetime.now() + timedelta(days=num_days * -1)).strftime('%Y-%m-%d')
    service = bigquery.instance(app)
    query = """
        SELECT
            COUNT(1) AS stars, YEAR(created_at) AS y, DAYOFYEAR(created_at) AS doy
        FROM
            TABLE_DATE_RANGE(
                githubarchive:day.events_, TIMESTAMP('{date_from}'), CURRENT_TIMESTAMP()
            )
        WHERE repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent')
        GROUP BY y, doy
    """
    jobs = []

    repos = Repo.query.filter(Repo.mature.is_(True)).filter(Repo.status == 'new').limit(100)
    for repo in repos:
        job = Job(service, query.format(id=repo.id, date_from=date_from), batch=True)
        job.execute()
        jobs.append((job, repo))

    for job in jobs:
        for row in results_of(job[0]):
            db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2]))

        job[1].status = 'unknown'

        db.session.commit()
def most_starred_sync(model_name: str, query: str):
    service = bigquery.instance(app)
    model = getattr(report, model_name)

    db.session.query(model).delete()

    for row in job_results(Job(service, query)):
        info, code = repo_info(row[1])
        if not info:
            continue

        db.session.merge(
            model(
                id=row[0],
                cnt_watch=row[2],
                repo=Repo(
                    id=info['id'],
                    name=info['name'],
                    language=info['language'],
                    full_name=info['full_name'],
                    description=info['description'],
                    html_url=info['html_url'],
                    homepage=info['homepage'],
                    created_at=datetime.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%SZ')
                )
            )
        )

    db.session.commit()
def most_starred_sync(model_name: str, query: str):
    app.logger.info('Importing repos of %s', model_name)

    model = getattr(report, model_name)
    service = bigquery.instance(app)

    db.session.query(model).delete()

    job = Job(service, query)
    job.execute()

    for row in results_of(job):
        info, code = repo_info(row[1])
        if not info:
            continue

        db.session.merge(
            model(id=row[0],
                  cnt_watch=row[2],
                  repo=Repo(id=info['id'],
                            name=info['name'],
                            language=info['language'],
                            full_name=info['full_name'],
                            description=info['description'],
                            html_url=info['html_url'],
                            homepage=info['homepage'],
                            created_at=datetime.strptime(
                                info['created_at'], '%Y-%m-%dT%H:%M:%SZ'))))

    db.session.commit()
Exemple #4
0
def stars_mature(num_days):
    service = bigquery.instance(app)

    jobs = []

    repos = Repo.query\
        .filter(Repo.mature.is_(True))\
        .filter(Repo.status == 'new')\
        .order_by(Repo.checked_at.asc())\
        .limit(100)  # donations will increase this number
    for repo in repos:
        query = query_stars_by_repo(
            repo_id=repo.id, date_from=datetime.now() + timedelta(days=num_days * -1),
            date_to=datetime.now()
        )

        job = Job(service, query, batch=True)
        job.execute()

        jobs.append((job, repo))

    for job in jobs:
        for row in results_of(job[0]):
            db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2]))

        status_old = job[1].status
        job[1].status = 'unknown'

        db.session.commit()

        app.logger.info(
            'Repository {0} got a new status {1} (was: {2})'
            .format(job[1].id, job[1].status, status_old)
        )
Exemple #5
0
def stars_mature(num_days):
    service = bigquery.instance(app)

    jobs = []

    repos = Repo.query\
        .filter(Repo.mature.is_(True))\
        .filter(Repo.status == 'new')\
        .order_by(Repo.checked_at.asc())\
        .limit(40)  # we are at the free plan
    for repo in repos:
        query = query_stars_by_repo(repo_id=repo.id,
                                    date_from=datetime.now() +
                                    timedelta(days=num_days * -1),
                                    date_to=datetime.now())

        job = Job(service, query, batch=True)
        job.execute()

        jobs.append((job, repo))

    for job in jobs:
        for row in results_of(job[0]):
            db.session.add(
                RepoStars(repo_id=job[1].id,
                          stars=row[0],
                          year=row[1],
                          day=row[2]))

        job[1].status = 'unknown'

        db.session.commit()
            EXTRACT(MONTH FROM created_at) AS mon
        FROM
            `githubarchive.month.*`
        WHERE
            (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}')
            AND repo.id = {id}
            AND type IN ('WatchEvent', 'ForkEvent')
        GROUP BY y, mon, doy
    """
    return query.format(
        id=repo_id, date_from=date_from.strftime('%Y%m'), date_to=date_to.strftime('%Y%m')
    )


now = datetime.now()
service = bigquery.instance(app)
results = Repo.query\
    .filter(Repo.last_reset_at.is_(None))\
    .filter(Repo.stargazers_count > 1000)\
    .order_by(Repo.stargazers_count.desc())\
    .yield_per(10)\
    .all()
for result in results:
    query = query_stars_by_repo(
        repo_id=result.id, date_from=datetime(year=now.year, month=1, day=1),
        date_to=datetime(year=now.year, month=now.month-1, day=1)
    )

    job = Job(service, query)
    job.execute()
Exemple #7
0
            EXTRACT(MONTH FROM created_at) AS mon
        FROM
            `githubarchive.month.*`
        WHERE
            (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}')
            AND repo.id = {id}
            AND type IN ('WatchEvent', 'ForkEvent')
        GROUP BY y, mon, doy
    """
    return query.format(id=repo_id,
                        date_from=date_from.strftime('%Y%m'),
                        date_to=date_to.strftime('%Y%m'))


now = datetime.now()
service = bigquery.instance(app)
results = Repo.query\
    .filter(Repo.last_reset_at.is_(None))\
    .filter(Repo.stargazers_count > 1000)\
    .order_by(Repo.stargazers_count.desc())\
    .yield_per(10)\
    .all()
for result in results:
    query = query_stars_by_repo(repo_id=result.id,
                                date_from=datetime(year=now.year,
                                                   month=1,
                                                   day=1),
                                date_to=datetime(year=now.year,
                                                 month=now.month - 1,
                                                 day=1))