Example #1
0
def run(name, **kwargs):
    mr_kwargs = {}
    if "finalize" in kwargs:
        mr_kwargs["finalize"] = load_js(kwargs["finalize"])

    mongo.db().repositories.map_reduce(
        load_js(kwargs["map"]), load_js(kwargs["reduce"]), "summary-%s" % name, **mr_kwargs
    )
Example #2
0
def remove_undefined(collection_name):
    collection = mongo.db()['summary-%s' % collection_name]

    for d in collection.find():
        if 'undefined' in d['value']:
            del d['value']['undefined']
            collection.save(d)
Example #3
0
import json
from pycheckup import mongo


db = mongo.db()


def json_encoder(obj):
    if hasattr(obj, "isoformat"):
        return obj.isoformat()
    else:
        raise TypeError, "Could not encode %s" % type(obj)


def stats(user, repo):
    doc = db.repositories.find_one({"_id": "%s/%s" % (user, repo)})
    doc["commits"] = json.dumps(doc["commits"], default=json_encoder)
    doc["line_count"] = json.dumps(doc["line_count"], default=json_encoder)
    doc["pep8"] = json.dumps(doc["pep8"], default=json_encoder)
    doc["pyflakes"] = json.dumps(doc["pyflakes"], default=json_encoder)
    doc["swearing"] = json.dumps(doc["swearing"], default=json_encoder)
    return doc
Example #4
0
import math
from django.core.cache import cache
from pycheckup import mongo


collection = mongo.db().repositories


TYPES = {
    # document field, data attribute, max
    'line_count':           ('line_count', 'total', 6000000),
    'open_issues':          ('popularity', 'open_issues', 400),
    'forks':                ('popularity', 'forks', 800),
    'watchers':             ('popularity', 'watchers', 4500),
    'num_collaborators':    ('popularity', 'num_collaborators', 300),
    'swearing':             ('swearing', 'total', 6000),
    'pep8':                 ('pep8', 'total', 600000),
    'pyflakes':             ('pyflakes', 'total', 15000),
}


def distribution(name):
    if name not in TYPES:
        raise ValueError

    cache_key = 'distribution-%s' % name
    cached = cache.get(cache_key)
    if cached is not None:
        return cached

    data = get_latest(TYPES[name][0], TYPES[name][1])
Example #5
0
def bootstrap_repo(user, repo_name):
    logger.info('Bootstraping %s/%s' % (user, repo_name))

    repo = GitRepo(user, repo_name)
    repo.clone()
    repo.load_commits()

    doc = document.empty(user, repo_name)
    # doc = collection.find_one({'_id': '%s/%s' % (user, repo_name)})
    # if doc is None:
    #    raise Exception('%s/%s not in db!!!!!' % (user, repo_name))

    working_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
    one_week = timedelta(days=7)

    # one-time probes
    license.run(repo, doc)
    readme.run(repo, doc)
    setup_py.run(repo, doc)
    tabs_or_spaces.run(repo, doc)

    popularity.run(repo, doc, working_date)

    current_rev = None

    if len(repo.commits) == 0:
        doc['empty'] = True
        print 'No commits in time period, skipping...'

    # range(31) means the script will grab 31 weeks of history
    else:
        for _ in range(31):
            for c in repo.commits:
                if c['date'] <= working_date:
                    # Only check it out if we need to
                    if c['rev'] != current_rev:
                        logger.info('Checking out %s (%s)' % (c['rev'], c['date']))
                        repo.checkout(c['rev'])

                    current_rev = c['rev']

                    # weekly probes
                    commits.run(repo, doc, working_date)
                    line_count.run(repo, doc, working_date)
                    pep8.run(repo, doc, working_date)
                    pyflakes.run(repo, doc, working_date)
                    swearing.run(repo, doc, working_date)
                    break

            working_date -= one_week

    doc['commits'].reverse()
    doc['swearing'].reverse()
    doc['line_count'].reverse()
    doc['pep8'].reverse()
    doc['pyflakes'].reverse()

    repo_collection = mongo.db().repositories
    repo_collection.save(doc)

    repo.cleanup()

    print 'Finished %s/%s' % (user, repo_name)