コード例 #1
0
    def update_statistics(self):
        """
        Generates the statistics page.

        Should be done every time the entries change.
        """
        if not self.entries:
            print('entries not yet loaded')
            return

        # start the page
        statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'

        # total number
        number_entries = len(self.entries)
        rel = lambda x: x / number_entries * 100  # conversion to percent

        statistics += 'analyzed {} entries on {}\n\n'.format(
            number_entries,
            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

        # State (beta, mature, inactive)
        statistics += '## State\n\n'

        number_state_beta = sum(1 for x in self.entries
                                if 'beta' in x['State'])
        number_state_mature = sum(1 for x in self.entries
                                  if 'mature' in x['State'])
        number_inactive = sum(1 for x in self.entries if osg.is_inactive(x))
        statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
            number_state_mature, rel(number_state_mature), number_state_beta,
            rel(number_state_beta), number_inactive, rel(number_inactive))

        if number_inactive > 0:
            entries_inactive = [(x['Title'], osg.extract_inactive_year(x))
                                for x in self.entries if osg.is_inactive(x)]
            entries_inactive.sort(
                key=lambda x: str.casefold(x[0]))  # first sort by name
            entries_inactive.sort(
                key=lambda x: x[1], reverse=True
            )  # then sort by inactive year (more recently first)
            entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
            statistics += '##### Inactive State\n\n' + ', '.join(
                entries_inactive) + '\n\n'

        # Language
        statistics += '## Code Languages\n\n'
        field = 'Code language'

        # get all languages together
        languages = []
        for entry in self.entries:
            languages.extend(entry[field])
        languages = [x.value for x in languages]

        unique_languages = set(languages)
        unique_languages = [(l, languages.count(l) / len(languages))
                            for l in unique_languages]
        unique_languages.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name

        # print languages to console
        print('\nLanguages\n')
        print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100)
                        for x in unique_languages))

        unique_languages.sort(
            key=lambda x: x[1],
            reverse=True)  # then sort by occurrence (highest occurrence first)
        unique_languages = [
            '- {} ({:.1f}%)\n'.format(x[0], x[1] * 100)
            for x in unique_languages
        ]
        statistics += '##### Language frequency\n\n' + ''.join(
            unique_languages) + '\n'

        # Licenses
        statistics += '## Code licenses\n\n'
        field = 'Code license'

        # get all licenses together
        licenses = []
        for entry in self.entries:
            licenses.extend(entry[field])
        licenses = [x.value for x in licenses]

        unique_licenses = set(licenses)
        unique_licenses = [(l, licenses.count(l) / len(licenses))
                           for l in unique_licenses]
        unique_licenses.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name

        # print licenses to console
        print('\nLicenses\n')
        print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100)
                        for x in unique_licenses))

        unique_licenses.sort(key=lambda x: -x[
            1])  # then sort by occurrence (highest occurrence first)
        unique_licenses = [
            '- {} ({:.1f}%)\n'.format(x[0], x[1] * 100)
            for x in unique_licenses
        ]
        statistics += '##### Licenses frequency\n\n' + ''.join(
            unique_licenses) + '\n'

        # Keywords
        statistics += '## Keywords\n\n'
        field = 'Keyword'

        # get all keywords together
        keywords = []
        for entry in self.entries:
            keywords.extend(entry[field])
        keywords = [x.value for x in keywords]

        # reduce those starting with "multiplayer"
        keywords = [
            x if not x.startswith('multiplayer') else 'multiplayer'
            for x in keywords
        ]

        unique_keywords = set(keywords)
        unique_keywords = [(l, keywords.count(l) / len(keywords))
                           for l in unique_keywords]
        unique_keywords.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name

        # print keywords to console
        print('\nKeywords\n')
        print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100)
                        for x in unique_keywords))

        unique_keywords.sort(key=lambda x: -x[
            1])  # then sort by occurrence (highest occurrence first)
        unique_keywords = [
            '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords
        ]
        statistics += '##### Keywords frequency\n\n' + '\n'.join(
            unique_keywords) + '\n\n'

        # no download or play field
        statistics += '## Entries without download or play fields\n\n'

        entries = []
        for entry in self.entries:
            if 'Download' not in entry and 'Play' not in entry:
                entries.append(entry['Title'])
        entries.sort(key=str.casefold)
        statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'

        # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
        popular_code_repositories = ('github.com', 'gitlab.com',
                                     'bitbucket.org', 'code.sf.net',
                                     'code.launchpad.net')
        statistics += '## Entries with a code repository not on a popular site\n\n'

        entries = []
        field = 'Code repository'
        for entry in self.entries:
            popular = False
            for repo in entry[field]:
                for popular_repo in popular_code_repositories:
                    if popular_repo in repo.value:
                        popular = True
                        break
            # if there were repositories, but none popular, add them to the list
            if not popular:
                entries.append(entry['Title'])
                # print(info[field])
        entries.sort(key=str.casefold)
        statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'

        # Code dependencies
        statistics += '## Code dependencies\n\n'
        field = 'Code dependency'

        # get all code dependencies together
        code_dependencies = []
        entries_with_code_dependency = 0
        for entry in self.entries:
            if field in entry:
                code_dependencies.extend(entry[field])
                entries_with_code_dependency += 1
        code_dependencies = [x.value for x in code_dependencies]
        statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(
            entries_with_code_dependency, rel(entries_with_code_dependency))

        unique_code_dependencies = set(code_dependencies)
        unique_code_dependencies = [
            (l, code_dependencies.count(l) / len(code_dependencies))
            for l in unique_code_dependencies
        ]
        unique_code_dependencies.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name

        # print code dependencies to console
        print('\nCode dependencies\n')
        print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100)
                        for x in unique_code_dependencies))

        unique_code_dependencies.sort(key=lambda x: -x[
            1])  # then sort by occurrence (highest occurrence first)
        unique_code_dependencies = [
            '- {} ({:.1f}%)'.format(x[0], x[1] * 100)
            for x in unique_code_dependencies
        ]
        statistics += '##### Code dependencies frequency\n\n' + '\n'.join(
            unique_code_dependencies) + '\n\n'

        # Build systems:
        statistics += '## Build systems\n\n'
        field = 'Build system'

        # get all build systems together
        build_systems = []
        for entry in self.entries:
            if field in entry['Building']:
                build_systems.extend(entry['Building'][field])
        build_systems = [x.value for x in build_systems]

        statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(
            rel(len(build_systems)))

        unique_build_systems = set(build_systems)
        unique_build_systems = [(l,
                                 build_systems.count(l) / len(build_systems))
                                for l in unique_build_systems]
        unique_build_systems.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name

        # print build systems to console
        print('\nBuild systems\n')
        print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100)
                        for x in unique_build_systems))

        unique_build_systems.sort(key=lambda x: -x[
            1])  # then sort by occurrence (highest occurrence first)
        unique_build_systems = [
            '- {} ({:.1f}%)'.format(x[0], x[1] * 100)
            for x in unique_build_systems
        ]
        statistics += '##### Build systems frequency ({})\n\n'.format(
            len(build_systems)) + '\n'.join(unique_build_systems) + '\n\n'

        # C, C++ projects without build system information
        c_cpp_project_without_build_system = []
        for entry in self.entries:
            if field not in entry and ('C' in entry['Code language']
                                       or 'C++' in entry['Code language']):
                c_cpp_project_without_build_system.append(entry['Title'])
        c_cpp_project_without_build_system.sort(key=str.casefold)
        statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
            len(c_cpp_project_without_build_system)) + ', '.join(
                c_cpp_project_without_build_system) + '\n\n'

        # C, C++ projects with build system information but without CMake as build system
        c_cpp_project_not_cmake = []
        for entry in entries:
            if field in entry and 'CMake' in entry[field] and (
                    'C' in entry['Code language']
                    or 'C++' in entry['Code language']):
                c_cpp_project_not_cmake.append(entry['Title'])
        c_cpp_project_not_cmake.sort(key=str.casefold)
        statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
            len(c_cpp_project_not_cmake)) + ', '.join(
                c_cpp_project_not_cmake) + '\n\n'

        # Platform
        statistics += '## Platform\n\n'
        field = 'Platform'

        # get all platforms together
        platforms = []
        for entry in self.entries:
            if field in entry:
                platforms.extend(entry[field])
        platforms = [x.value for x in platforms]

        statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(
            rel(len(platforms)))

        unique_platforms = set(platforms)
        unique_platforms = [(l, platforms.count(l) / len(platforms))
                            for l in unique_platforms]
        unique_platforms.sort(
            key=lambda x: str.casefold(x[0]))  # first sort by name
        unique_platforms.sort(key=lambda x: -x[
            1])  # then sort by occurrence (highest occurrence first)
        unique_platforms = [
            '- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms
        ]
        statistics += '##### Platforms frequency\n\n' + '\n'.join(
            unique_platforms) + '\n\n'

        # write to statistics file
        utils.write_text(c.statistics_file, statistics)

        print('statistics updated')
コード例 #2
0
    def update_repos(self):
        """
        export to json for local repository update of primary repos
        """
        if not self.entries:
            print('entries not yet loaded')
            return

        primary_repos = {'git': [], 'svn': [], 'hg': []}
        unconsumed_entries = []

        # for every entry filter those that are known git repositories (add additional repositories)
        for entry in self.entries:
            repos = entry.get('Code repository', [])
            # keep the first and all others containing @add
            if not repos:
                continue
            repos = [repos[0]] + [x for x in repos[1:] if "@add" in x]
            for repo in repos:
                consumed = False
                repo = repo.split(' ')[0].strip()
                url = osg.git_repo(repo)
                if url:
                    primary_repos['git'].append(url)
                    consumed = True
                    continue
                url = osg.svn_repo(repo)
                if url:
                    primary_repos['svn'].append(url)
                    consumed = True
                    continue
                url = osg.hg_repo(repo)
                if url:
                    primary_repos['hg'].append(url)
                    consumed = True
                    continue

                if not consumed:
                    unconsumed_entries.append([entry['Title'], repo])
                    print('Entry "{}" unconsumed repo: {}'.format(
                        entry['File'], repo))

        # sort them alphabetically (and remove duplicates)
        for k, v in primary_repos.items():
            primary_repos[k] = sorted(set(v))

        # statistics of gits
        git_repos = primary_repos['git']
        print('{} Git repositories'.format(len(git_repos)))
        for domain in ('repo.or.cz', 'anongit.kde.org', 'bitbucket.org',
                       'git.code.sf.net', 'git.savannah', 'git.tuxfamily',
                       'github.com', 'gitlab.com', 'gitlab.com/osgames',
                       'gitlab.gnome.org'):
            print('{} on {}'.format(
                sum(1 if domain in x else 0 for x in git_repos), domain))

        # write them to code/git
        json_path = os.path.join(c.root_path, 'code', 'archives.json')
        text = json.dumps(primary_repos, indent=1)
        utils.write_text(json_path, text)

        print('Repositories updated')
コード例 #3
0
    def update_readme_tocs(self):
        """
        Recounts entries in sub categories and writes them to the readme.
        Also updates the _toc files in the categories directories.

        Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
        on "A collection.."

        Needs to be performed regularly.
        """

        # completely delete content of toc path
        for file in os.listdir(c.tocs_path):
            os.remove(os.path.join(c.tocs_path, file))

        # read readme
        readme_file = os.path.join(c.root_path, 'README.md')
        readme_text = utils.read_text(readme_file)

        # compile regex for identifying the building blocks in the readme
        regex = re.compile(
            r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)",
            re.DOTALL)

        # apply regex
        matches = regex.findall(readme_text)
        if len(matches) != 1:
            raise RuntimeError('readme file has invalid structure')
        matches = matches[0]
        start = matches[0]
        end = matches[2]

        tocs_text = ''

        # split into games, tools, frameworks, libraries
        games = [
            x for x in self.entries if not any(
                [y in x['Keyword'] for y in ('tool', 'framework', 'library')])
        ]
        tools = [x for x in self.entries if 'tool' in x['Keyword']]
        frameworks = [x for x in self.entries if 'framework' in x['Keyword']]
        libraries = [x for x in self.entries if 'library' in x['Keyword']]

        # create games, tools, frameworks, libraries tocs
        title = 'Games'
        file = '_games.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(games))
        create_toc(title, file, games)

        title = 'Tools'
        file = '_tools.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(tools))
        create_toc(title, file, tools)

        title = 'Frameworks'
        file = '_frameworks.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(frameworks))
        create_toc(title, file, frameworks)

        title = 'Libraries'
        file = '_libraries.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(
            title, file, title, len(libraries))
        create_toc(title, file, libraries)

        # create by category
        categories_text = []
        for keyword in c.recommended_keywords:
            filtered = [x for x in self.entries if keyword in x['Keyword']]
            title = keyword.capitalize()
            name = keyword.replace(' ', '-')
            file = '_{}.md'.format(name)
            categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(
                title, file, name, len(filtered)))
            create_toc(title, file, filtered)
        categories_text.sort()
        tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))

        # create by platform
        platforms_text = []
        for platform in c.valid_platforms:
            filtered = [
                x for x in self.entries if platform in x.get('Platform', [])
            ]
            title = platform
            name = platform.lower()
            file = '_{}.md'.format(name)
            platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(
                title, file, name, len(filtered)))
            create_toc(title, file, filtered)
        tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))

        # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
        text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end

        # write to readme
        utils.write_text(readme_file, text)

        print('Readme and TOCs updated')
コード例 #4
0
                    'https://git') and not repo.endswith('.git'):
                # we have them with .git on github/gitlab
                repo += '.git'
            entry += '- Code repository: {}\n'.format(repo)

            # code language (mandatory on our side)
            entry += '- Code language: {}\n'.format(', '.join(lang))

            # code license
            entry += '- Code license: {}\n'.format(', '.join(
                osgc_entry['license']))

            # code dependencies (if existing)
            if 'framework' in osgc_entry:
                frameworks = osgc_entry['framework']
                if type(frameworks) == str:
                    frameworks = [frameworks]
                entry += '- Code dependencies: {}\n'.format(
                    ', '.join(frameworks))

            # write info (if existing)
            if 'info' in osgc_entry:
                entry += '\n{}\n'.format(osgc_entry['info'])

            # write ## Building
            entry += '\n## Building\n'

            # finally write to file
            utils.write_text(target_file, entry)
            newly_created_entries += 1
コード例 #5
0
def sourceforge_import():
    """

    :return:
    """
    files = json.loads(utils.read_text(sf_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))
    all_developers_changed = False

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            developers = entry.get('Developer', [])
            urls = [
                x.value for x in entry['Home']
                if x.startswith('https://sourceforge.net/projects/')
            ]

            entry_changed = False

            for url in urls:
                print('  sf project {}'.format(url))

                if not url.endswith('/'):
                    print('error: sf project does not end with slash')
                    url += '/'

                # members
                url_members = 'https://sourceforge.net/p/' + url[
                    len(prefix):] + '_members/'
                response = requests.get(url_members)
                if response.status_code != 200:
                    print('error: url {} not accessible, status {}'.format(
                        url_members, response.status_code))
                    raise RuntimeError()
                soup = BeautifulSoup(response.text, 'html.parser')
                authors = soup.find(
                    'div', id='content_base').find('table').find_all('tr')
                authors = [author.find_all('td') for author in authors]
                authors = [
                    author[1].a['href'] for author in authors
                    if len(author) == 3
                ]
                for author in authors:
                    # sometimes author already contains the full url, sometimes not
                    url_author = 'https://sourceforge.net' + author if not author.startswith(
                        'http') else author
                    response = requests.get(url_author)
                    if response.status_code != 200 and author not in (
                            '/u/favorito/', ):
                        print('error: url {} not accessible, status {}'.format(
                            url_author, response.status_code))
                        raise RuntimeError()
                    url_author = response.url  # could be different now
                    if 'auth/?return_to' in url_author or response.status_code != 200:
                        # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
                        author_name = author[3:-1]
                        nickname = author_name
                    else:
                        soup = BeautifulSoup(response.text, 'html.parser')
                        author_name = soup.h1.get_text()
                        author_name = SF_alias_list.get(
                            author_name,
                            author_name)  # replace by alias if possible
                        nickname = soup.find(
                            'dl',
                            class_='personal-data').find('dd').get_text()
                        nickname = nickname.replace('\n', '').strip()
                    nickname += '@SF'  # our indication of the platform to search for
                    author_name = author_name.strip(
                    )  # names can still have white spaces before or after

                    if author_name in SF_ignore_list:
                        continue

                    # look author up in entry developers
                    if author_name not in developers:
                        print('   dev "{}" added to entry {}'.format(
                            author_name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(author_name)
                        ]
                        entry_changed = True
                        developers = entry.get('Developer', [])

                    # look author and SF nickname up in developers data base
                    if author_name in all_developers:
                        dev = all_developers[author_name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(author_name, nickname))
                            # check that name has not already @SF contact
                            if any(
                                    x.endswith('@SF')
                                    for x in dev.get('Contact', [])):
                                print('warning: already SF contact')
                            all_developers[author_name]['Contact'] = dev.get(
                                'Contact', []) + [nickname]
                            all_developers_changed = True
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(author_name, nickname))
                        all_developers[author_name] = {
                            'Name': author_name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        all_developers_changed = True

            if entry_changed:
                # save entry
                osg.write_entry(entry)
                print('  entry updated')
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1))

        # save entry
        osg.write_entry(entry)
        print(' entry updated')

        # maybe save all developers
        if all_developers_changed:
            # save all developers
            osg.write_developers(all_developers)
            print('developers database updated')
コード例 #6
0
    def update_html(self):
        """
        Parses all entries, collects interesting info and stores it in a json file suitable for displaying
        with a dynamic table in a browser.
        """
        if not self.entries:
            print('entries not yet loaded')
            return

        # make database out of it
        db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keyword', 'Source']}

        entries = []
        for info in self.entries:

            # game & description
            entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['Title'], info['Home'][0],
                                                                                 r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' +
                                                                                 info['File']),
                     textwrap.shorten(info.get('Note', ''), width=60, placeholder='..')]

            # download
            field = 'Download'
            if field in info and info[field]:
                entry.append('<a href="{}">Link</a>'.format(info[field][0]))
            else:
                entry.append('')

            # state (field state is essential)
            entry.append('{} / {}'.format(info['State'][0],
                                          'inactive since {}'.format(osg.extract_inactive_year(info)) if osg.is_inactive(info) else 'active'))

            # keywords
            keywords = info['Keyword']
            keywords = [x.value for x in keywords]
            entry.append(', '.join(keywords))

            # source
            text = []
            field = 'Code repository'
            if field in info and info[field]:
                text.append('<a href="{}">Source</a>'.format(info[field][0].value))
            languages = info['Code language']
            languages = [x.value for x in languages]
            text.append(', '.join(languages))
            licenses = info['Code license']
            licenses = [x.value for x in licenses]
            text.append(', '.join(licenses))
            entry.append(' - '.join(text))

            # append to entries
            entries.append(entry)

        # sort entries by game name
        entries.sort(key=lambda x: str.casefold(x[0]))

        db['data'] = entries

        # output
        text = json.dumps(db, indent=1)
        utils.write_text(c.json_db_file, text)

        print('HTML updated')
コード例 #7
0
def test():
    # loop over infos
    developers = ''
    try:
        i = 0
        # active = False
        for entry in entries:

            # if entry['Name'] == 'Aleph One':
            #    active = True
            # if not active:
            #    continue

            # for testing purposes
            i += 1
            if i > 40:
                break

            # print
            entry_name = '{} - {}'.format(entry['file'], entry['Name'])
            print(entry_name)
            content = ''

            entry_developer = entry.get('developer', [])

            # parse home
            home = entry['home']
            # sourceforge project site
            prefix = 'https://sourceforge.net/projects/'
            url = [x for x in home if x.startswith(prefix)]
            if len(url) == 1:
                url = url[0]
                print(' sourceforge project site: {}'.format(url))
                url = 'https://sourceforge.net/p/' + url[len(prefix
                                                             ):] + '_members/'
                response = requests.get(url)
                soup = BeautifulSoup(response.text, 'html.parser')
                authors = soup.find(
                    'div', id='content_base').find('table').find_all('tr')
                authors = [author.find_all('td') for author in authors]
                authors = [
                    author[1].a['href'] for author in authors
                    if len(author) == 3
                ]
                for author in authors:
                    # sometimes author already contains the full url, sometimes not
                    url = 'https://sourceforge.net' + author if not author.startswith(
                        'http') else author
                    response = requests.get(url)
                    url = response.url  # could be different now
                    if 'auth/?return_to' in url:
                        # for some reason authorisation is forbidden
                        author_name = author
                        nickname = author
                    else:
                        soup = BeautifulSoup(response.text, 'html.parser')
                        author_name = soup.h1.get_text()
                        author_name = SF_alias_list.get(
                            author_name,
                            author_name)  # replace by alias if possible
                        nickname = soup.find(
                            'dl',
                            class_='personal-data').find('dd').get_text()
                        nickname = nickname.replace('\n', '').strip()
                    dev = developer_info_lookup(author_name)
                    in_devs = dev and 'contact' in dev and nickname + '@SF' in dev[
                        'contact']
                    in_entry = author_name in entry_developer
                    if in_devs and in_entry:
                        continue  # already existing in entry and devs
                    content += ' {} : {}@SF'.format(author_name, nickname)
                    if not in_devs:
                        content += ' (not in devs)'
                    if not in_entry:
                        content += ' (not in entry)'
                    content += '\n'

            # parse source repository
            repos = entry.get('code repository', [])

            # Github
            urls = [x for x in repos if x.startswith('https://github.com/')]
            urls = []
            for url in urls:
                print(' github repo: {}'.format(url))
                github_info = osg_github.retrieve_repo_info(url)
                for contributor in github_info['contributors']:
                    name = contributor.name
                    dev = developer_info_lookup(name)
                    in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev[
                        'contact']
                    in_entry = name in entry_developer
                    if in_devs and in_entry:
                        continue  # already existing in entry and devs
                    content += ' {}: {}@GH'.format(name, contributor.login)
                    if contributor.blog:
                        content += ' url: {}'.format(contributor.blog)
                    if not in_devs:
                        content += ' (not in devs)'
                    if not in_entry:
                        content += ' (not in entry)'
                    content += '\n'

            if content:
                developers += '{}\n\n{}\n'.format(entry_name, content)

    except RuntimeError as e:
        raise e
        # pass
    finally:
        # store developer info
        utils.write_text(
            os.path.join(c.root_path, 'collected_developer_info.txt'),
            developers)
コード例 #8
0
def export_json(infos):
    """
    Parses all entries, collects interesting info and stores it in a json file suitable for displaying
    with a dynamic table in a browser.
    """

    print('export to json for web display')

    # make database out of it
    db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']}

    entries = []
    for info in infos:

        # game & description
        entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['name'], info['home'][0],
            r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + info['file']),
            textwrap.shorten(info['description'], width=60, placeholder='..')]

        # download
        field = 'download'
        if field in info and info[field]:
            entry.append('<a href="{}">Link</a>'.format(info[field][0]))
        else:
            entry.append('')

        # state (field state is essential)
        entry.append('{} / {}'.format(info['state'][0], 'inactive since {}'.format(info['inactive']) if 'inactive' in info else 'active'))

        # keywords
        field = 'keywords'
        if field in info and info[field]:
            entry.append(', '.join(info[field]))
        else:
            entry.append('')

        # source
        text = []
        field = 'code repository'
        if field in info and info[field]:
            text.append('<a href="{}">Source</a>'.format(info[field][0]))
        field = 'code language'
        if field in info and info[field]:
            text.append(', '.join(info[field]))
        field = 'code license'
        if field in info and info[field]:
            text.append(info[field][0])
        entry.append(' - '.join(text))

        # append to entries
        entries.append(entry)

    # sort entries by game name
    entries.sort(key=lambda x: str.casefold(x[0]))

    db['data'] = entries

    # output
    json_path = os.path.join(c.entries_path, os.path.pardir, 'docs', 'data.json')
    text = json.dumps(db, indent=1)
    utils.write_text(json_path, text)
コード例 #9
0
def export_primary_code_repositories_json():
    """

    """

    print('export to json for local repository update')

    primary_repos = {'git':[],'svn':[],'hg':[],'bzr':[]}
    unconsumed_entries = []

    # for every entry filter those that are known git repositories (add additional repositories)
    field = 'code repository-raw'
    for info in infos:
        # if field 'Code repository' is available
        if field in info:
            consumed = False
            repos = info[field]
            if repos:
                # split at comma
                repos = repos.split(',')
                # keep the first and all others containing "(+)"
                additional_repos = [x for x in repos[1:] if "(+)" in x]
                repos = repos[0:1]
                repos.extend(additional_repos)
                for repo in repos:
                    # remove parenthesis and strip of white spaces
                    repo = re.sub(r'\([^)]*\)', '', repo)
                    repo = repo.strip()
                    url = git_repo(repo)
                    if url:
                        primary_repos['git'].append(url)
                        consumed = True
                        continue
                    url = svn_repo(repo)
                    if url:
                        primary_repos['svn'].append(url)
                        consumed = True
                        continue
                    url = hg_repo(repo)
                    if url:
                        primary_repos['hg'].append(url)
                        consumed=True
                        continue
                    url = bzr_repo(repo)
                    if url:
                        primary_repos['bzr'].append(url)
                        consumed=True
                        continue

            if not consumed:
                unconsumed_entries.append([info['name'], info[field]])
                # print output
                #if info['code repository']:
                #    print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field]))
                #if not info['code repository']:
                #    print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field]))

    # sort them alphabetically (and remove duplicates)
    for k, v in primary_repos.items():
        primary_repos[k] = sorted(set(v))

    # write them to tools/git
    json_path = os.path.join(c.root_path, 'tools', 'archives.json')
    text = json.dumps(primary_repos, indent=1)
    utils.write_text(json_path, text)
コード例 #10
0
def fix_entries():
    """
    Fixes the keywords, code dependencies, build systems, .. entries, mostly by automatically sorting them.
    """

    keyword_synonyms = {'RTS': ('real time', 'strategy'), 'realtime': 'real time'}

    # TODO also sort other fields, only read once and then do all, move to separate file

    print('fix entries')

    # keywords
    regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
    for entry, entry_path, content in osg.entry_iterator():

        # match with regex
        matches = regex.findall(content)
        if len(matches) != 1:
            raise RuntimeError('Could not find keywords in entry "{}"'.format(entry))

        match = matches[0]

        # get elements out, split, strip, delete duplicates
        elements = match[1].split(',')
        elements = [x.strip() for x in elements]
        elements = list(set(elements))

        # get category out
        for keyword in osg.recommended_keywords:
            if keyword in elements:
                elements.remove(keyword)
                category = keyword
                break

        # special treatments here
        elements = [x if x != 'TBS' and x != 'TB' else 'turn based' for x in elements]
        elements = [x if x != 'RTS' else 'real time' for x in elements]
        elements = [x if x != 'MMO' else 'massive multiplayer online' for x in elements]
        elements = [x if x != 'MMO' else 'multiplayer online' for x in elements]
        elements = [x if x != 'SP' else 'singleplayer' for x in elements]
        elements = [x if x != 'MP' else 'multiplayer' for x in elements]
        elements = [x if x != 'engine' else 'game engine' for x in elements]
        elements = [x if x != 'rpg' else 'role playing' for x in elements]
        elements = [x if x != 'turn based' else 'turn-based' for x in elements]
        for keyword in ('browser', 'misc', 'tools'):
            if keyword in elements:
                elements.remove(keyword)

        # sort
        elements.sort(key=str.casefold)

        # add category
        elements.insert(0, category)

        keywords = '- Keywords: {}'.format(', '.join(elements))

        new_content = match[0] + keywords + match[2]

        if new_content != content:
            # write again
            utils.write_text(entry_path, new_content)

    # code dependencies
    regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
    for entry, entry_path, content in osg.entry_iterator():
        # match with regex
        matches = regex.findall(content)

        if not matches:
            # no code dependencies given
            continue

        match = matches[0]

        # get code dependencies out, split, strip, delete duplicates
        elements = match[1].split(',')
        elements = [x.strip() for x in elements]
        elements = list(set(elements))

        # special treatments here
        elements = [x if x != 'Blender' else 'Blender game engine' for x in elements]
        elements = [x if x.lower() != 'libgdx' else 'libGDX' for x in elements]
        elements = [x if x != 'SDL 2' else 'SDL2' for x in elements]
        elements = [x if x.lower() != "ren'py" else "Ren'Py" for x in elements]

        # sort
        elements.sort(key=str.casefold)

        code_dependencies = '- Code dependencies: {}'.format(', '.join(elements))

        new_content = match[0] + code_dependencies + match[2]

        if new_content != content:
            # write again
            utils.write_text(entry_path, new_content)

    # build systems
    regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
    for entry, entry_path, content in osg.entry_iterator():
        # match with regex
        matches = regex.findall(content)

        if not matches:
            # no build system given
            continue

        match = matches[0]

        # get code dependencies out, split, strip, delete duplicates
        elements = match[1].split(',')
        elements = [x.strip() for x in elements]
        elements = list(set(elements))

        # special treatments here

        # sort
        elements.sort(key=str.casefold)

        build_system = '- Build system: {}'.format(', '.join(elements))

        new_content = match[0] + build_system + match[2]

        if new_content != content:
            # write again
            utils.write_text(entry_path, new_content)
コード例 #11
0
def update_statistics(infos):
    """
    Generates the statistics page.

    Should be done every time the entries change.
    """

    print('update statistics')

    # start the page
    statistics_file = os.path.join(c.root_path, 'statistics.md')
    statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'

    # total number
    number_entries = len(infos)
    rel = lambda x: x / number_entries * 100 # conversion to percent

    statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

    # State (beta, mature, inactive)
    statistics += '## State\n\n'

    number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
    number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
    number_inactive = sum(1 for x in infos if 'inactive' in x)
    statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))

    if number_inactive > 0:
        entries_inactive = [(x['name'], x['inactive']) for x in infos if 'inactive' in x]
        entries_inactive.sort(key=lambda x: str.casefold(x[0]))  # first sort by name
        entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
        entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
        statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'

    # Language
    statistics += '## Code Languages\n\n'
    field = 'code language'

    # those without language tag
    # TODO the language tag is now an essential field, this cannot happen anymore
    # number_no_language = sum(1 for x in infois if field not in x)
    # if number_no_language > 0:
    #     statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
    #     entries_no_language = [x['name'] for x in infois if field not in x]
    #     entries_no_language.sort()
    #     statistics += ', '.join(entries_no_language) + '\n\n'

    # get all languages together
    languages = []
    for info in infos:
        if field in info:
            languages.extend(info[field])

    unique_languages = set(languages)
    unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
    unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
    unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
    statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'

    # Licenses
    statistics += '## Code licenses\n\n'
    field = 'code license'

    # those without license
    number_no_license = sum(1 for x in infos if field not in x)
    if number_no_license > 0:
        statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
        entries_no_license = [x['name'] for x in infos if field not in x]
        entries_no_license.sort()
        statistics += ', '.join(entries_no_license) + '\n\n'

    # get all licenses together
    licenses = []
    for info in infos:
        if field in info:
            licenses.extend(info[field])

    unique_licenses = set(licenses)
    unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
    unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
    unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
    statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'

    # Keywords
    statistics += '## Keywords\n\n'
    field = 'keywords'

    # get all keywords together
    keywords = []
    for info in infos:
        if field in info:
            keywords.extend(info[field])
    # ignore those starting with "inspired by"
    keywords = [x for x in keywords if not x.startswith('inspired by ')]

    unique_keywords = set(keywords)
    unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
    unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
    unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_keywords]
    statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'

    # no download or play field
    statistics += '## Entries without download or play fields\n\n'

    entries = []
    for info in infos:
        if 'download' not in info and 'play' not in info:
            entries.append(info['name'])
    entries.sort(key=str.casefold)
    statistics +=  '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'

    # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
    popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
    statistics += '## Entries with a code repository not on a popular site\n\n'

    entries = []
    field = 'code repository'
    for info in infos:
        if field in info:
            popular = False
            for repo in info[field]:
                for popular_repo in popular_code_repositories:
                    if popular_repo in repo:
                        popular = True
                        break
            # if there were repositories, but none popular, add them to the list
            if not popular:
                entries.append(info['name'])
                # print(info[field])
    entries.sort(key=str.casefold)
    statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'

    # Code dependencies
    statistics += '## Code dependencies\n\n'
    field = 'code dependencies'

    # get all code dependencies together
    code_dependencies = []
    entries_with_code_dependency = 0
    for info in infos:
        if field in info:
            code_dependencies.extend(info[field])
            entries_with_code_dependency += 1
    statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency, rel(entries_with_code_dependency))

    unique_code_dependencies = set(code_dependencies)
    unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in unique_code_dependencies]
    unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
    unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_code_dependencies]
    statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'

    # Build systems:
    statistics += '## Build systems\n\n'
    field = 'build system'

    # get all build systems together
    build_systems = []
    for info in infos:
        if field in info:
            build_systems.extend(info[field])

    statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems)))

    unique_build_systems = set(build_systems)
    unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
    unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
    unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_build_systems]
    statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(unique_build_systems) + '\n\n'

    # C, C++ projects without build system information
    c_cpp_project_without_build_system = []
    for info in infos:
        if field not in info and ('C' in info['code language'] or 'C++' in info['code language']):
            c_cpp_project_without_build_system.append(info['name'])
    c_cpp_project_without_build_system.sort(key=str.casefold)
    statistics += '##### C and C++ projects without build system information ({})\n\n'.format(len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'

    # C, C++ projects with build system information but without CMake as build system
    c_cpp_project_not_cmake = []
    for info in infos:
        if field in info and 'CMake' in info[field] and ('C' in info['code language'] or 'C++' in info['code language']):
            c_cpp_project_not_cmake.append(info['name'])
    c_cpp_project_not_cmake.sort(key=str.casefold)
    statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'

    # Platform
    statistics += '## Platform\n\n'
    field = 'platform'

    # get all platforms together
    platforms = []
    for info in infos:
        if field in info:
            platforms.extend(info[field])

    statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))

    unique_platforms = set(platforms)
    unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
    unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
    unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
    unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_platforms]
    statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'

    # write to statistics file
    utils.write_text(statistics_file, statistics)
コード例 #12
0
def parse_lgw_content():

    # paths
    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
    entries_file = os.path.join(import_path, '_lgw.json')

    # iterate over all imported files
    files = os.listdir(import_path)
    entries = []
    for file in files:
        file = files[56]
        if file.startswith('_lgw'):
            continue

        text = utils.read_text(os.path.join(import_path, file))

        # parse the html
        soup = BeautifulSoup(text, 'html.parser')
        title = soup.h1.get_text()
        print(title)
        entry = {'name': title}

        # get all external links
        ignored_external_links = ('libregamewiki.org', 'freegamedev.net', 'freegamer.blogspot.com', 'opengameart.org', 'gnu.org', 'creativecommons.org', 'freesound.org', 'freecode.com', 'freenode.net')
        links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)]
        links = [x for x in links if x[0].startswith('http') and not any([y in x[0] for y in ignored_external_links])]
        entry['external links'] = links

        # get meta description
        description = soup.find('meta', attrs={"name":"description"})
        entry['description'] = description['content']

        # parse gameinfobox
        infos = soup.find('div', class_='gameinfobox')
        if not infos:
            print(' no gameinfobox')
        else:
            infos = infos.find_all('tr')
            for x in infos:
                if x.th and x.td:
                    # row with header
                    key = x.th.get_text()
                    content = x.td.get_text()
                    content = content.split(',')
                    content = [x.strip() for x in content]
                    entry[key] = content
                if not x.th and x.td:
                    # row without header: contribute section
                    x = x.find_all('li')
                    x = [(x.a.string, x.a['href']) for x in x if x.a]
                    for key, content in x:
                        entry[key] = content

        # parse "for available as package in"
        tables = soup.find_all('table', class_='wikitable')
        tables = [table for table in tables if table.caption and table.caption.string.startswith('Available as package')]
        if len(tables) > 0:
            if len(tables) > 1:
                raise RuntimeError()
            table = tables[0]
            packages = table.find_all('tr')
            packages = [x.td.a['href'] for x in packages]
            entry['linux-packages'] = packages

        # categories
        categories = soup.find_all('div', id='mw-normal-catlinks')
        if not categories:
            print(' no categories')
            categories = []
        else:
            if len(categories) > 1:
                raise RuntimeError()
            categories = categories[0]
            categories = categories.find_all('li')
            categories = [x.a.string for x in categories]
            if 'Games' not in categories:
                print(' "Games" not in categories')
            else:
                categories.remove('Games') # should be there
            # strip games at the end
            phrase = ' games'
            categories = [x[:-len(phrase)] if x.endswith(phrase) else x for x in categories]
            ignored_categories = ['Articles lacking reference', 'Stubs']
            categories = [x for x in categories if x not in ignored_categories]
        entry['categories'] = categories

        entries.append(entry)


    # save entries
    text = json.dumps(entries, indent=1)
    utils.write_text(entries_file, text)
コード例 #13
0
def clean_lgw_content():

    # paths
    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
    entries_file = os.path.join(import_path, '_lgw.json')
    cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')

    # load entries
    text = utils.read_text(entries_file)
    entries = json.loads(text)

    # rename keys
    key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')),
                        ('library', ('Library', 'Libraries')), ('assets license', ('Media license', 'Media licenses')), ('code language', ('P. language', 'P. languages')), ('home', ('Homepage',)),
                        ('platform', ('Platforms', )), ('tracker', ('Bug/Feature Tracker', )), ('repo', ('Source Code', )), ('forum', ('Forum', )), ('chat', ('Chat', )), ('origin', ('Origin', )),
                        ('dev home', ('Development Project', )), ('last active', ('Release date', )))
    for index, entry in enumerate(entries):
        for new_key, old_keys in key_replacements:
            for key in old_keys:
                if key in entry:
                    entry[new_key] = entry[key]
                    del entry[key]
                    break
        entries[index] = entry

    # ignore keys
    ignored_keys = ('origin', 'Latest\xa0release')
    for index, entry in enumerate(entries):
        for key in ignored_keys:
            if key in entry:
                del entry[key]
        entries[index] = entry

    # check for unique field names
    unique_fields = set()
    for entry in entries:
        unique_fields.update(entry.keys())
    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))

    # which fields are mandatory
    mandatory_fields = unique_fields.copy()
    for entry in entries:
        remove_fields = [field for field in mandatory_fields if field not in entry]
        mandatory_fields -= set(remove_fields)
    print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields))))

    # content replacements
    entries = remove_parenthized_content(entries, ('assets license', 'code language', 'code license', 'engine', 'genre', 'last active', 'library'))
    entries = remove_prefix_suffix(entries, ('code license', 'assets license'), ('"', 'GNU', ), ('"', '[3]', '[2]', '[1]', 'only'))
    entries = replace_content(entries, ('code license', 'assets license'), 'GPL', ('General Public License', ))
    entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2', ('GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
    entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2+', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
    entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
    entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3+', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
    entries = replace_content(entries, ('code license', 'assets license'), 'Public domain', ('public domain', 'Public Domain'))
    entries = replace_content(entries, ('code license', 'assets license'), 'zlib', ('zlib/libpng license', 'Zlib License'))
    entries = replace_content(entries, ('code license', 'assets license'), 'BSD', ('Original BSD License', ))
    entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA-3.0', ('Creative Commons Attribution-ShareAlike 3.0 Unported License', 'CC-BY-SA 3.0', 'CC BY-SA 3.0'))
    entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA', ('CC BY-SA',))
    entries = replace_content(entries, ('code license', 'assets license'), 'MIT', ('MIT License', 'MIT"'))
    entries = replace_content(entries, 'platform', 'macOS', ('Mac', ))
    entries = remove_prefix_suffix(entries, ('code language', 'developer'), (), ('[3]', '[2]', '[1]'))
    entries = ignore_content(entries, 'code language', ('HTML5', 'HTML', 'English', 'XML', 'WML'))
    entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA'))
    entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games'))
    entries = lower_case_content(entries, 'genre')
    entries = replace_content(entries, 'genre', 'platform', ('platformer', ))
    entries = replace_content(entries, 'genre', 'role playing', ('rpg', ))
    entries = replace_content(entries, 'genre', 'first person, shooter', ('fps', ))
    entries = replace_content(entries, 'genre', 'real time, strategy', ('rts',))
    entries = replace_content(entries, 'genre', 'turn based, strategy', ('tbs',))
    entries = ignore_content(entries, 'categories', ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua', 'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD', 'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl', 'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1'))
    entries = replace_content(entries, 'library', 'pygame', ('Pygame', ))
    entries = replace_content(entries, 'library', 'Qt', ('QT', ))
    entries = ignore_content(entries, 'library', ('C++', 'Lua', 'Mozilla Firefox'))
    entries = ignore_nonnumbers(entries, 'last active')
    entries = ignore_content(entries, 'last active', ('2019', ))
    entries = ignore_content(entries, 'platform', ('DOS', ))


    # list for every unique field
    # fields = sorted(list(unique_fields))
    fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
    for field in fields:
        content = [entry[field] for entry in entries if field in entry]
        # flatten
        flat_content = []
        for c in content:
            if isinstance(c, list):
                flat_content.extend(c)
            else:
                flat_content.append(c)
        statistics = utils.unique_elements_and_occurrences(flat_content)
        print('\n{}: {}'.format(field, ', '.join(statistics)))

    # save entries
    text = json.dumps(entries, indent=1)
    utils.write_text(cleaned_entries_file, text)
コード例 #14
0
def github_import():
    """

    :return:
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gh_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            code_repositories = entry['Code repository']
            repos = [
                x.value for x in code_repositories if x.startswith(prefix)
            ]
            repos[0] += ' @add'
            repos = [x for x in repos if '@add' in x]
            repos = [x.split(' ')[0] for x in repos]
            repos = [x for x in repos if x not in ignored_repos]
            for repo in repos:
                print('  GH repo {}'.format(repo))

                info = osg_github.retrieve_repo_info(
                    repo, private_properties['github-token'])

                new_comments = []
                # is archived
                if info['archived']:
                    if not osg.is_inactive(entry):
                        print(
                            'warning: repo is archived but not inactive state??'
                        )
                    # add archive to repo comment
                    new_comments.append('@archived')

                # add created comment
                new_comments.append('@created {}'.format(info['created'].year))

                # add stars
                new_comments.append('@stars {}'.format(info['stars']))

                # add forks
                new_comments.append('@forks {}'.format(info['forks']))

                # update comment
                for r in code_repositories:
                    if r.value.startswith(repo):
                        break
                comments = r.comment
                if comments:
                    comments = comments.split(',')
                    comments = [c.strip() for c in comments]
                    comments = [c for c in comments
                                if not c.startswith('@')]  # delete old ones
                    comments += new_comments
                else:
                    comments = new_comments
                r.comment = ', '.join(comments)

                # language in languages
                language = info['language']
                language = language_aliases.get(language, language)
                if language and language not in entry[
                        'Code language'] and language not in ignored_languages:
                    entry['Code language'].append(
                        osg_parse.ValueWithComment(language))
                    print('  added to languages: {}'.format(language))

                # contributors
                for contributor in info['contributors']:
                    if contributor.type != 'User':
                        continue
                    if contributor.contributions < 4:
                        continue
                    # contributor.login/name/blog
                    name = contributor.name
                    if not name:
                        name = contributor.login
                    name = name_aliases.get(name, name)
                    nickname = '{}@GH'.format(contributor.login)
                    blog = contributor.blog
                    if blog:
                        blog = blog_alias[blog] if blog in blog_alias else blog
                        if not blog.startswith('http'):
                            blog = 'https://' + blog
                        if blog in ignored_blogs:
                            blog = None

                    # look up author in entry developers
                    if name not in entry.get('Developer', []):
                        print('   dev "{}" added to entry {}'.format(
                            name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(name)
                        ]

                    # look up author in developers data base
                    if name in all_developers:
                        dev = all_developers[name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(name, nickname))
                            # check that name has not already @GH contact
                            if any(
                                    x.endswith('@GH')
                                    for x in dev.get('Contact', [])):
                                print('warning: already GH contact')
                            dev['Contact'] = dev.get('Contact',
                                                     []) + [nickname]
                        if blog and blog not in dev.get('Home', []):
                            dev['Home'] = dev.get('Home', []) + [blog]
                        # TODO add to games entries!
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(name, nickname))
                        all_developers[name] = {
                            'Name': name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        if blog:
                            all_developers[name]['Home'] = [blog]

            entry['Code repository'] = code_repositories
            osg.write_entry(entry)
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))

        osg.write_developers(all_developers)
        print('developers database updated')
コード例 #15
0
def export_primary_code_repositories_json(infos):
    """

    """

    print('export to json for local repository update')

    primary_repos = {'git': [], 'svn': [], 'hg': []}
    unconsumed_entries = []

    # for every entry filter those that are known git repositories (add additional repositories)
    field = 'code repository-raw'
    for info in infos:
        # if field 'Code repository' is available
        if field in info:
            consumed = False
            repos = info[field]
            if repos:
                # split at comma
                repos = repos.split(',')
                # keep the first and all others containing "(+)"
                additional_repos = [x for x in repos[1:] if "(+)" in x]
                repos = repos[0:1]
                repos.extend(additional_repos)
                for repo in repos:
                    # remove parenthesis and strip of white spaces
                    repo = re.sub(r'\([^)]*\)', '', repo)
                    repo = repo.strip()
                    url = git_repo(repo)
                    if url:
                        primary_repos['git'].append(url)
                        consumed = True
                        continue
                    url = svn_repo(repo)
                    if url:
                        primary_repos['svn'].append(url)
                        consumed = True
                        continue
                    url = hg_repo(repo)
                    if url:
                        primary_repos['hg'].append(url)
                        consumed=True
                        continue

            if not consumed:
                unconsumed_entries.append([info['name'], info[field]])
                # print output
                if 'code repository' in info:
                    print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field]))

    # sort them alphabetically (and remove duplicates)
    for k, v in primary_repos.items():
        primary_repos[k] = sorted(set(v))

    # statistics of gits
    git_repos = primary_repos['git']
    print('{} Git repositories'.format(len(git_repos)))
    for domain in ('repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily', 'github.com', 'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'):
        print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain))

    # write them to code/git
    json_path = os.path.join(c.root_path, 'code', 'archives.json')
    text = json.dumps(primary_repos, indent=1)
    utils.write_text(json_path, text)
コード例 #16
0
            for url in urls:
                print(' github repo: {}'.format(url))
                github_info = osg_github.retrieve_repo_info(url)
                for contributor in github_info['contributors']:
                    name = contributor.name
                    dev = developer_info_lookup(name)
                    in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev[
                        'contact']
                    in_entry = name in entry_developer
                    if in_devs and in_entry:
                        continue  # already existing in entry and devs
                    content += ' {}: {}@GH'.format(name, contributor.login)
                    if contributor.blog:
                        content += ' url: {}'.format(contributor.blog)
                    if not in_devs:
                        content += ' (not in devs)'
                    if not in_entry:
                        content += ' (not in entry)'
                    content += '\n'

            if content:
                developers += '{}\n\n{}\n'.format(entry_name, content)

    except RuntimeError as e:
        raise (e)
        # pass
    finally:
        # store developer info
        utils.write_text(
            os.path.join(c.root_path, 'collected_developer_info.txt'),
            developers)