def stars_mature(num_days): date_from = (datetime.now() + timedelta(days=num_days * -1)).strftime('%Y-%m-%d') service = bigquery.instance(app) query = """ SELECT COUNT(1) AS stars, YEAR(created_at) AS y, DAYOFYEAR(created_at) AS doy FROM TABLE_DATE_RANGE( githubarchive:day.events_, TIMESTAMP('{date_from}'), CURRENT_TIMESTAMP() ) WHERE repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent') GROUP BY y, doy """ jobs = [] repos = Repo.query.filter(Repo.mature.is_(True)).filter(Repo.status == 'new').limit(100) for repo in repos: job = Job(service, query.format(id=repo.id, date_from=date_from), batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) job[1].status = 'unknown' db.session.commit()
def most_starred_sync(model_name: str, query: str): service = bigquery.instance(app) model = getattr(report, model_name) db.session.query(model).delete() for row in job_results(Job(service, query)): info, code = repo_info(row[1]) if not info: continue db.session.merge( model( id=row[0], cnt_watch=row[2], repo=Repo( id=info['id'], name=info['name'], language=info['language'], full_name=info['full_name'], description=info['description'], html_url=info['html_url'], homepage=info['homepage'], created_at=datetime.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%SZ') ) ) ) db.session.commit()
def most_starred_sync(model_name: str, query: str): app.logger.info('Importing repos of %s', model_name) model = getattr(report, model_name) service = bigquery.instance(app) db.session.query(model).delete() job = Job(service, query) job.execute() for row in results_of(job): info, code = repo_info(row[1]) if not info: continue db.session.merge( model(id=row[0], cnt_watch=row[2], repo=Repo(id=info['id'], name=info['name'], language=info['language'], full_name=info['full_name'], description=info['description'], html_url=info['html_url'], homepage=info['homepage'], created_at=datetime.strptime( info['created_at'], '%Y-%m-%dT%H:%M:%SZ')))) db.session.commit()
def stars_mature(num_days): service = bigquery.instance(app) jobs = [] repos = Repo.query\ .filter(Repo.mature.is_(True))\ .filter(Repo.status == 'new')\ .order_by(Repo.checked_at.asc())\ .limit(100) # donations will increase this number for repo in repos: query = query_stars_by_repo( repo_id=repo.id, date_from=datetime.now() + timedelta(days=num_days * -1), date_to=datetime.now() ) job = Job(service, query, batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) status_old = job[1].status job[1].status = 'unknown' db.session.commit() app.logger.info( 'Repository {0} got a new status {1} (was: {2})' .format(job[1].id, job[1].status, status_old) )
def stars_mature(num_days): service = bigquery.instance(app) jobs = [] repos = Repo.query\ .filter(Repo.mature.is_(True))\ .filter(Repo.status == 'new')\ .order_by(Repo.checked_at.asc())\ .limit(40) # we are at the free plan for repo in repos: query = query_stars_by_repo(repo_id=repo.id, date_from=datetime.now() + timedelta(days=num_days * -1), date_to=datetime.now()) job = Job(service, query, batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add( RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) job[1].status = 'unknown' db.session.commit()
EXTRACT(MONTH FROM created_at) AS mon FROM `githubarchive.month.*` WHERE (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}') AND repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent') GROUP BY y, mon, doy """ return query.format( id=repo_id, date_from=date_from.strftime('%Y%m'), date_to=date_to.strftime('%Y%m') ) now = datetime.now() service = bigquery.instance(app) results = Repo.query\ .filter(Repo.last_reset_at.is_(None))\ .filter(Repo.stargazers_count > 1000)\ .order_by(Repo.stargazers_count.desc())\ .yield_per(10)\ .all() for result in results: query = query_stars_by_repo( repo_id=result.id, date_from=datetime(year=now.year, month=1, day=1), date_to=datetime(year=now.year, month=now.month-1, day=1) ) job = Job(service, query) job.execute()
EXTRACT(MONTH FROM created_at) AS mon FROM `githubarchive.month.*` WHERE (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}') AND repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent') GROUP BY y, mon, doy """ return query.format(id=repo_id, date_from=date_from.strftime('%Y%m'), date_to=date_to.strftime('%Y%m')) now = datetime.now() service = bigquery.instance(app) results = Repo.query\ .filter(Repo.last_reset_at.is_(None))\ .filter(Repo.stargazers_count > 1000)\ .order_by(Repo.stargazers_count.desc())\ .yield_per(10)\ .all() for result in results: query = query_stars_by_repo(repo_id=result.id, date_from=datetime(year=now.year, month=1, day=1), date_to=datetime(year=now.year, month=now.month - 1, day=1))