Exemple #1
0
def main():
    # Begin by parsing options and ensuring existence of input and output paths.
    quickSetup()
    parser = get_parser()
    options = parser.parse_args()
    output_path = options.output_path[0]
    pathlog = log.fields(output_path=output_path)
    if os.path.isdir(output_path):
        pathlog.info("directory OK")
    elif os.path.exists(output_path):
        pathlog.error("is NOT a directory")
        sys.exit(1)
    else:
        pathlog.error("does NOT exist")
        sys.exit(1)
    all_repos_valid = True
    for repo_path in options.repos:
        repolog = log.fields(repo_path=repo_path)
        try:
            repo = git.Repo(repo_path)
        except git.InvalidGitRepositoryError:
            repolog.error("is NOT a repository")
            all_repos_valid = False
        except git.NoSuchPathError:
            repolog.error("does NOT exist")
            all_repos_valid = False
        else:
            repolog.info("repository OK")
            repos[repo_path] = repo
    if not all_repos_valid:
        sys.exit(1)
    process_all_repos(repos)
Exemple #2
0
def write_page(path, filename, **kwargs):
    if 'index' not in kwargs:
        kwargs['index_link'] = '<p><a href="index.html">Back to stats home page</a></p>'
    else:
        kwargs['index_link'] = ''
    if 'head_title' not in kwargs:
        kwargs['head_title'] = kwargs['title']
    kwargs['timestamp'] = unicode(datetime.now())
    filename = os.path.join(path, filename)
    log.fields(filename=filename).info('writing')
    with open(filename, 'wb') as f:
        f.write(PAGE_TEMPLATE.format(**kwargs))
Exemple #3
0
def pickle_all(filename, log=log):
    log.fields(filename=filename).info('pickling')
    structure = dict(
        authors_map=authors_map,
        review_locks=review_locks,
        snippet_locks=snippet_locks,
        latest_commits=latest_commits,
        repo_infos_by_path=repo_infos_by_path,
        repo_infos_by_name=repo_infos_by_name,
    )
    with open(filename, 'wb') as f:
        pickle.dump(structure, f, -1)
def store_grid_entry(session, grid_spec):
    """
    Add a grid spec to the database and return the grid's unique ID.

    Parameters
    ----------
    session : sqlalchemy.orm.session.Session
    grid_spec : dict

    Returns
    -------
    hash_id : str

    """
    llog = log.fields(secret=grid_spec['secret'])
    llog.debug('storing grid')

    table = models.SecretGrid if grid_spec['secret'] else models.PublicGrid
    new_grid = table(**grid_spec)
    session.add(new_grid)
    session.flush()
    hash_id = encode_grid_id(new_grid.id, grid_spec['secret'])

    llog.fields(grid_id=new_grid.id, hash_id=hash_id).debug('grid stored')

    return hash_id
def get_grid_entry(session, hash_id, secret=False):
    """
    Get a specific grid entry.

    Parameters
    ----------
    session : sqlalchemy.orm.session.Session
    hash_id : str
    secret : bool, optional
        Whether this is a secret grid.

    Returns
    -------
    grid_spec : dict
        Will be None if no matching grid was found.

    """
    grid_id = decode_hash_id(hash_id, secret)
    llog = log.fields(grid_id=grid_id, hash_id=hash_id, secret=secret)
    if not grid_id:
        # couldn't do the conversion from hash to database ID
        llog.debug('cannot decrypt hash')
        return

    llog.debug('pulling grid from database')
    table = models.SecretGrid if secret else models.PublicGrid
    grid_spec = session.query(table).filter(table.id == grid_id).one_or_none()

    return grid_spec
Exemple #6
0
def process_all_repos():
    """Loop through events in all repositories."""
    for repo_path, repo_info in repo_infos_by_path.iteritems():
        repo = repo_info.repo
        repo_name = repo_info.name
        repolog = log.fields(repo_name=repo_name)
        repolog.info('processing')
        last_locks = {}
        prev_latest_commit = latest_commits.get(repo_name, None)
        if prev_latest_commit is not None:
            repolog.fields(prev_latest_commit=prev_latest_commit).info()
        else:
            repolog.info('new repo')
        latest_commit = repo.commit('master').hexsha
        commits = []
        # Find applicable commits.
        for commit in repo.iter_commits(latest_commit):
            if commit.hexsha == prev_latest_commit:
                # Reached commit we stopped at last time.
                break
            else:
                commits.append(commit)
        # Process them starting with eldest first.
        for commit in reversed(commits):
            email = normalize_email(commit.author.email)
            update_authors_map(email, commit)
            last_locks = update_locks(email, repo_name, commit, last_locks)
            update_snippets(email, repo_info, commit)
        # Store latest commit for next time.
        latest_commits[repo_name] = latest_commit
        repolog.fields(latest_commit=latest_commit).info()
Exemple #7
0
def unpickle_all(filename, log=log):
    log = log.fields(filename=filename)
    if os.path.isfile(filename):
        log.info('unpickling')
        with open(filename, 'rb') as f:
            globals().update(pickle.load(f))
    else:
        log.info('notfound')
Exemple #8
0
def process_all_repos(repos):
    """Loop through events in all repositories."""
    for repo_path, repo in repos.iteritems():
        repo_name = os.path.split(repo_path)[-1]
        repolog = log.fields(repo_name=repo_name)
        repolog.info("processing")
        last_locks = {}
        for commit in reversed(list(repo.iter_commits("master"))):
            update_authors_map(commit)
            last_locks = update_locks(repo_name, commit, last_locks)
Exemple #9
0
def get_grid_entry(hash_id, secret=False):
    """
    Get a specific grid entry.

    Parameters
    ----------
    hash_id : str
    secret : bool, optional
        Whether this is a secret grid.

    Returns
    -------
    grid_spec : dict
        Will be None if no matching grid was found.

    """
    grid_id = decode_hash_id(hash_id, secret)
    llog = log.fields(grid_id=grid_id, hash_id=hash_id, secret=secret)
    if not grid_id:
        # couldn't do the conversion from hash to database ID
        llog.debug('cannot decrypt hash')
        return

    llog.debug('looking for grid')

    mc = get_memcached()
    mc_key = str((grid_id, secret))
    if mc_key in mc:
        llog.debug('pulling grid from memcached')
        return mc[mc_key]

    llog.debug('pulling grid from database')
    table = get_table(secret)
    grid_spec = table.find_one(id=grid_id)

    if grid_spec:
        llog.debug('grid found')
        grid_spec = desqlize_grid_entry(grid_spec)
        mc[mc_key] = grid_spec

    else:
        llog.debug('grid not found')
        return

    return grid_spec
Exemple #10
0
def store_grid_entry(grid_spec):
    """
    Add a grid spec to the database and return the grid's unique ID.

    Parameters
    ----------
    grid_spec : dict

    Returns
    -------
    hash_id : str

    """
    grid_entry = sqlize_grid_spec(grid_spec)

    llog = log.fields(secret=grid_entry['secret'])
    llog.debug('storing grid')
    table = get_table(grid_entry['secret'])
    grid_id = table.insert(grid_entry)
    llog.fields(grid_id=grid_id).debug('grid stored')

    return encode_grid_id(grid_id, grid_entry['secret'])
Exemple #11
0
def process_all_authors():
    """Loop through authors to calculate author-specific stats."""
    for author_info in authors_map.itervalues():
        author_log = log.fields(author_name=sorted(author_info.names)[0])
        author_log.info('processing')
        # Reset accumulators.
        author_info.total_actions = 0
        author_info.total_transcriptions = 0
        author_info.time_spent = 0
        author_info.time_spent_transcribing = 0
        author_info.total_bytes_transcribed = 0
        # Process snippets.
        for repo_name, snippet_map in author_info.snippets.iteritems():
            for starting_point, snippet_actions in snippet_map.iteritems():
                author_info.total_actions += len(snippet_actions)
                # Determine if the snippet action was a transcription.
                repo_info = repo_infos_by_name[repo_name]
                first_repo_snippet = repo_info.snippets.get(starting_point)
                if first_repo_snippet:
                    first_repo_snippet_action = first_repo_snippet[0]
                    if first_repo_snippet_action in snippet_actions:
                        author_info.total_transcriptions += 1
        # Process locks.
        for repo_name, locks_map in author_info.locks_created.iteritems():
            for lock in locks_map.itervalues():
                if lock.created_by == lock.destroyed_by:
                    duration = lock.destroyed_at - lock.created_at
                    author_info.time_spent += duration
                    # Determine if the lock was associated with a transcription, vs. an edit.
                    repo_info = repo_infos_by_name[repo_name]
                    if lock.starting_point in repo_info.snippets:
                        first_repo_snippet_action = repo_info.snippets[lock.starting_point][0]
                        if first_repo_snippet_action.saved == lock.destroyed_at:
                            author_info.time_spent_transcribing += duration
                            author_info.total_bytes_transcribed += first_repo_snippet_action.bytes
        if author_info.total_transcriptions and author_info.time_spent_transcribing:
            author_info.average_time_per_transcription = author_info.time_spent_transcribing / author_info.total_transcriptions
            author_info.average_wpm = (author_info.total_bytes_transcribed / 5.0) / (author_info.time_spent_transcribing / 60.0)
Exemple #12
0
def get_memcached():
    host = os.environ.get('MC_PORT', '127.0.0.1').replace('tcp://', '')
    log.fields(mc_host=host).debug('connecting to memcached')
    return pylibmc.Client([host], binary=True)
Exemple #13
0
def main():
    # Parse options and ensuring existence of input and output paths.
    quickSetup()
    parser = get_parser()
    options = parser.parse_args()
    if options.pickle is not None:
        options.pickle = options.pickle[0]  # list -> string
        unpickle_all(options.pickle)
    else:
        # Do nothing; keep initial state.
        pass
    if options.email_map:
        for email_mapping in options.email_map:
            email_from, email_to = email_mapping.split(':')
            email_maps[email_from] = email_to
            log.fields(email_from=email_from, email_to=email_to).info('email mapping')
    if options.email_ignore:
        email_ignores.update(options.email_ignore)
    output_path = options.output_path[0]
    pathlog = log.fields(output_path=output_path)
    if os.path.isdir(output_path):
        pathlog.info('directory OK')
    elif os.path.exists(output_path):
        pathlog.error('is NOT a directory')
        sys.exit(1)
    else:
        pathlog.error('does NOT exist')
        sys.exit(1)
    all_repos_valid = True
    for repo_path in options.repos:
        repolog = log.fields(repo_path=repo_path)
        try:
            repo = git.Repo(repo_path)
        except git.InvalidGitRepositoryError:
            repolog.error('is NOT a repository')
            all_repos_valid = False
        except git.NoSuchPathError:
            repolog.error('does NOT exist')
            all_repos_valid = False
        else:
            repolog.info('repository OK')
            tree = repo.tree('master')
            transcription_json = load(tree['transcription.json'].data_stream)
            repo_name = os.path.split(repo_path)[-1]
            git_repos_by_name[repo_name] = repo
            if repo_path not in repo_infos_by_path:
                # Create new RepoInfo structure.
                repo_infos_by_path[repo_path] = repo_infos_by_name[repo_name] = RepoInfo(
                    name=repo_name,
                    transcription=transcription_json,
                    authors=set(),
                    snippets={},
                )
            else:
                # Keep existing RepoInfo structure.
                pass
    if not all_repos_valid:
        sys.exit(1)
    process_all_repos()
    process_all_authors()
    create_all_output(output_path)
    if options.pickle is not None:
        pickle_all(options.pickle)