def most_starred_sync(model_name: str, query: str): app.logger.info('Importing repos of %s', model_name) model = getattr(report, model_name) service = bigquery.instance(app) db.session.query(model).delete() job = Job(service, query) job.execute() for row in results_of(job): info, code = repo_info(row[1]) if not info: continue db.session.merge( model(id=row[0], cnt_watch=row[2], repo=Repo(id=info['id'], name=info['name'], language=info['language'], full_name=info['full_name'], description=info['description'], html_url=info['html_url'], homepage=info['homepage'], created_at=datetime.strptime( info['created_at'], '%Y-%m-%dT%H:%M:%SZ')))) db.session.commit()
def stars_mature(num_days): date_from = (datetime.now() + timedelta(days=num_days * -1)).strftime('%Y-%m-%d') service = bigquery.instance(app) query = """ SELECT COUNT(1) AS stars, YEAR(created_at) AS y, DAYOFYEAR(created_at) AS doy FROM TABLE_DATE_RANGE( githubarchive:day.events_, TIMESTAMP('{date_from}'), CURRENT_TIMESTAMP() ) WHERE repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent') GROUP BY y, doy """ jobs = [] repos = Repo.query.filter(Repo.mature.is_(True)).filter(Repo.status == 'new').limit(100) for repo in repos: job = Job(service, query.format(id=repo.id, date_from=date_from), batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) job[1].status = 'unknown' db.session.commit()
def stars_mature(num_days): service = bigquery.instance(app) jobs = [] repos = Repo.query\ .filter(Repo.mature.is_(True))\ .filter(Repo.status == 'new')\ .order_by(Repo.checked_at.asc())\ .limit(100) # donations will increase this number for repo in repos: query = query_stars_by_repo( repo_id=repo.id, date_from=datetime.now() + timedelta(days=num_days * -1), date_to=datetime.now() ) job = Job(service, query, batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add(RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) status_old = job[1].status job[1].status = 'unknown' db.session.commit() app.logger.info( 'Repository {0} got a new status {1} (was: {2})' .format(job[1].id, job[1].status, status_old) )
def stars_mature(num_days): service = bigquery.instance(app) jobs = [] repos = Repo.query\ .filter(Repo.mature.is_(True))\ .filter(Repo.status == 'new')\ .order_by(Repo.checked_at.asc())\ .limit(40) # we are at the free plan for repo in repos: query = query_stars_by_repo(repo_id=repo.id, date_from=datetime.now() + timedelta(days=num_days * -1), date_to=datetime.now()) job = Job(service, query, batch=True) job.execute() jobs.append((job, repo)) for job in jobs: for row in results_of(job[0]): db.session.add( RepoStars(repo_id=job[1].id, stars=row[0], year=row[1], day=row[2])) job[1].status = 'unknown' db.session.commit()
def most_starred_sync(model_name: str, query: str): app.logger.info('Importing repos of %s', model_name) model = getattr(report, model_name) service = bigquery.instance(app) db.session.query(model).delete() job = Job(service, query) job.execute() for row in results_of(job): info, code = repo_info(row[1]) if not info: continue db.session.merge( model( id=row[0], cnt_watch=row[2], repo=Repo( id=info['id'], name=info['name'], language=info['language'], full_name=info['full_name'], description=info['description'], html_url=info['html_url'], homepage=info['homepage'], created_at=datetime.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%SZ') ) ) ) db.session.commit()
now = datetime.now() service = bigquery.instance(app) results = Repo.query\ .filter(Repo.last_reset_at.is_(None))\ .filter(Repo.stargazers_count > 1000)\ .order_by(Repo.stargazers_count.desc())\ .yield_per(10)\ .all() for result in results: query = query_stars_by_repo( repo_id=result.id, date_from=datetime(year=now.year, month=1, day=1), date_to=datetime(year=now.year, month=now.month-1, day=1) ) job = Job(service, query) job.execute() cnt = 0 lst = {} for row in results_of(job): key = '{} {}'.format(row[1], row[3]) lst[key] = lst.get(key, ()) + ((int(row[2]), int(row[0])),) db.session.merge(RepoStars(repo_id=result.id, stars=row[0], year=row[1], day=row[2])) cnt += 1 db.session.query(RepoMean).filter(RepoMean.repo_id == result.id).delete() db.session.commit() for key in lst.keys():
results = Repo.query\ .filter(Repo.last_reset_at.is_(None))\ .filter(Repo.stargazers_count > 1000)\ .order_by(Repo.stargazers_count.desc())\ .yield_per(10)\ .all() for result in results: query = query_stars_by_repo(repo_id=result.id, date_from=datetime(year=now.year, month=1, day=1), date_to=datetime(year=now.year, month=now.month - 1, day=1)) job = Job(service, query) job.execute() cnt = 0 lst = {} for row in results_of(job): key = '{} {}'.format(row[1], row[3]) lst[key] = lst.get(key, ()) + ((int(row[2]), int(row[0])), ) db.session.merge( RepoStars(repo_id=result.id, stars=row[0], year=row[1], day=row[2])) cnt += 1 db.session.query(RepoMean).filter(RepoMean.repo_id == result.id).delete() db.session.commit()