def metadata_refresh(num_days): repos = Repo.query\ .filter( Repo.checked_at.is_(None) | (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1)) )\ .yield_per(25)\ .limit(300) # GitHub allows only 3000 calls per day within a token for repo in repos: details, code = api.repo_info(repo.full_name) if not details: if 400 <= code < 500: repo.worth -= 1 db.session.commit() log.info( '{0} is not found, the "worth" has been decreased by 1'. format(repo.full_name)) continue repo.checked_at = datetime.now() for key in [ 'description', 'forks_count', 'homepage', 'language', 'open_issues_count', 'size', 'stargazers_count', 'subscribers_count' ]: if getattr(repo, key) != details[key]: setattr(repo, key, details[key]) db.session.commit() return repos.count()
def metadata_refresh(num_days): repos = Repo.query\ .filter( Repo.checked_at.is_(None) | (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1)) )\ .yield_per(25)\ .limit(300) # GitHub allows only 3000 calls per day within a token for repo in repos: details, code = api.repo_info(repo.full_name) if not details: if 400 <= code < 500: repo.worth -= 1 db.session.commit() log.info( '{0} is not found, the "worth" has been decreased by 1' .format(repo.full_name) ) continue repo.checked_at = datetime.now() for key in [ 'description', 'forks_count', 'homepage', 'language', 'open_issues_count', 'size', 'stargazers_count', 'subscribers_count' ]: if getattr(repo, key) != details[key]: setattr(repo, key, details[key]) db.session.commit() return repos.count()
def metadata_maturity(num_months): cnt = func.count(RepoMean.repo_id).label("cnt") repos = db.session.query(RepoMean.repo_id, cnt).join(Repo)\ .filter(Repo.mature.is_(False))\ .group_by(RepoMean.repo_id)\ .having(cnt >= num_months) for (repo_id, cnt) in repos: db.session.query(Repo).filter(Repo.id == repo_id).update({Repo.mature: True}) db.session.commit() log.info('{0} marked as mature ({1})'.format(repo_id, cnt)) return repos.count()
def metadata_maturity(num_months): cnt = func.count(RepoMean.repo_id).label("cnt") repos = db.session.query(RepoMean.repo_id, cnt).join(Repo)\ .filter(Repo.mature.is_(False))\ .group_by(RepoMean.repo_id)\ .having(cnt >= num_months) for (repo_id, cnt) in repos: db.session.query(Repo).filter(Repo.id == repo_id).update( {Repo.mature: True}) db.session.commit() log.info('{0} marked as mature ({1})'.format(repo_id, cnt)) return repos.count()
def status_refresh(num_days): repos = Repo.query\ .filter(Repo.status.in_(('promising', 'hopeless')))\ .filter(Repo.status_updated_at <= datetime.now() + timedelta(days=num_days * -1)) for repo in repos: RepoStars.query.filter(RepoStars.repo_id == repo.id).delete() if repo.status != 'promising': repo.worth += -1 else: repo.worth += 1 log.info('The "worth" value for {0} has been increased by 1'.format(repo.full_name)) repo.status = 'new' if repo.worth > -1 else 'deleted' db.session.commit()
def metadata_trend(num_days): results = db.session.query( RepoMean.repo_id, func.substring_index( func.group_concat( RepoMean.value.op('ORDER BY')(expression.desc(RepoMean.created_at)) ), ',', 2) )\ .filter(RepoMean.created_at >= datetime.now() + timedelta(days=num_days * -1))\ .group_by(RepoMean.repo_id)\ .all() for result in filter(lambda x: ',' in x[1], results): curr, prev = map(lambda v: float(v), result[1].split(',')) if is_worth_decreased(curr, prev): log.info( 'Mean value of {0} is {1}, previous was {2}. The "worth" has been decreased by 1' .format(result[0], curr, prev)) db.session.query(Repo)\ .filter(Repo.id == result[0])\ .update({Repo.worth: Repo.worth - 1}) db.session.commit()
def metadata_trend(num_days): results = db.session.query( RepoMean.repo_id, func.substring_index( func.group_concat( RepoMean.value.op('ORDER BY')(expression.desc(RepoMean.created_at)) ), ',', 2) )\ .filter(RepoMean.created_at >= datetime.now() + timedelta(days=num_days * -1))\ .group_by(RepoMean.repo_id)\ .all() for result in filter(lambda x: ',' in x[1], results): curr, prev = map(lambda v: float(v), result[1].split(',')) if is_worth_decreased(curr, prev): log.info( 'Mean value of {0} is {1}, previous was {2}. The "worth" has been decreased by 1' .format(result[0], curr, prev) ) db.session.query(Repo)\ .filter(Repo.id == result[0])\ .update({Repo.worth: Repo.worth - 1}) db.session.commit()
def metadata_refresh(num_days): repos = Repo.query\ .filter( (Repo.status != 'deleted') & ( Repo.checked_at.is_(None) | (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1)) ) )\ .limit(300) # GitHub allows only 3000 calls per day within a token for repo in repos: details, code = api.repo_info(repo.full_name) if not details: if 400 <= code < 500: repo.worth -= 1 db.session.commit() log.info( '{0} is not found (code={1}), the "worth" has been decreased by 1' .format(repo.full_name, code)) continue repo.checked_at = datetime.now() for key in [ 'description', 'forks_count', 'homepage', 'language', 'open_issues_count', 'size', 'stargazers_count', 'subscribers_count' ]: if getattr(repo, key) != details[key]: setattr(repo, key, details[key]) if 'license' in details and details['license'] is not None: license = details['license'].get('key', 'unlicense') if license != repo.license: setattr(repo, 'license', license) db.session.commit() log.info('Repository {0}({1}) has been updated'.format( repo.id, repo.full_name)) return repos.count()
def status_detect(num_days, num_segments): repos = Repo.query.filter(Repo.status == 'unknown') for repo in repos: result = db.session.query(RepoStars.day, RepoStars.stars)\ .filter(RepoStars.repo_id == repo.id)\ .order_by(expression.asc(RepoStars.day))\ .limit(num_days)\ .all() val = 0 if not result else repo_mean(result, num_days, num_segments, last_known_mean(repo.id)) status_old = repo.status repo.status = 'hopeless' if val < 1 else 'promising' log.info( 'Repository status of {0}({1}) has been changed to {2} (was: {3})'. format(repo.id, repo.full_name, repo.status, status_old)) db.session.merge( RepoMean(repo=repo, value=val, created_at=datetime.today().strftime('%Y-%m-%d'))) db.session.commit()
from gitmostwanted.app import log, db from gitmostwanted.models.repo import Repo, RepoMean from gitmostwanted.tasks.repo_metadata import is_worth_decreased cache = {} results = db.session.query(Repo, RepoMean) \ .filter((Repo.id == RepoMean.repo_id) & Repo.mature.is_(True)) \ .order_by(RepoMean.created_at.asc()) \ .yield_per(100) \ .all() for result in results: repo, mean = result if repo.id not in cache: cache[repo.id] = {'prev': mean.value, 'worth': 3} continue prev, curr = cache[repo.id]['prev'], mean.value cache[repo.id]['prev'] = curr cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1 log.info( '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4})' .format(repo.id, prev, curr, cache[repo.id]['worth'], repo.worth) ) repo.worth = cache[repo.id]['worth'] db.session.commit()
from gitmostwanted.app import log, db from gitmostwanted.models.repo import Repo, RepoMean from gitmostwanted.tasks.repo_metadata import is_worth_decreased import sys cache = {} query = db.session.query(Repo, RepoMean)\ .filter(Repo.id == RepoMean.repo_id)\ .order_by(RepoMean.created_at.asc())\ .yield_per(100) if sys.argv[0] and sys.argv[0].isdigit(): log.info('#{0} is used as the repository id'.format(sys.argv[0])) query = query.filter(Repo.id == int(sys.argv[0])) results = query.all() for result in results: repo, mean = result if repo.id not in cache: cache[repo.id] = {'prev': mean.value, 'worth': 3, 'mature': 0} continue prev, curr = cache[repo.id]['prev'], mean.value cache[repo.id]['mature'] += 1 cache[repo.id]['prev'] = curr cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1 log.info( '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4}); mature: {5}' .format(repo.id, prev, curr, cache[repo.id]['worth'], repo.worth,
from gitmostwanted.app import log, db from gitmostwanted.models.repo import Repo, RepoMean from gitmostwanted.tasks.repo_metadata import is_worth_decreased import sys cache = {} query = db.session.query(Repo, RepoMean)\ .filter(Repo.id == RepoMean.repo_id)\ .order_by(RepoMean.created_at.asc())\ .yield_per(100) if sys.argv[0] and sys.argv[0].isdigit(): log.info('#{0} is used as the repository id'.format(sys.argv[0])) query = query.filter(Repo.id == int(sys.argv[0])) results = query.all() for result in results: repo, mean = result if repo.id not in cache: cache[repo.id] = {'prev': mean.value, 'worth': 3, 'mature': 0} continue prev, curr = cache[repo.id]['prev'], mean.value cache[repo.id]['mature'] += 1 cache[repo.id]['prev'] = curr cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1 log.info( '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4}); mature: {5}'
def metadata_erase(): cnt = Repo.query\ .filter((Repo.worth < -5) & (Repo.worth_max <= app.config['REPOSITORY_WORTH_DEFAULT'] * 3))\ .delete() log.info('{0} repositories has been removed (worthless)'.format(cnt)) db.session.commit()