Exemple #1
0
def read_entry(file):
    """
    Reads a single entry
    :param file: the entry file (without path)
    :return: the entry
    """

    # setup parser and transformer
    grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
    grammar = utils.read_text(grammar_file)
    parse = osg_parse.create(grammar, osg_parse.EntryTransformer)

    # read entry file
    content = utils.read_text(os.path.join(c.entries_path, file))
    if not content.endswith('\n'):
        content += '\n'

    # parse and transform entry content
    try:
        entry = parse(content)
        entry = [
            ('File', file),
        ] + entry  # add file information to the beginning
        entry = check_and_process_entry(entry)
    except Exception as e:
        print('{} - {}'.format(file, e))
        raise RuntimeError(e)

    return entry
Exemple #2
0
    def clean_backlog(self):
        """

        :return:
        """
        if not self.entries:
            print('entries not yet loaded')
            return
        # get urls from entries
        included_urls = osg.all_urls(self.entries)
        included_urls = list(included_urls.keys())  # only need the URLs here

        # get urls from rejected file
        text = utils.read_text(c.rejected_file)
        regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
        matches = regex.findall(text)
        rejected_urls = []
        for match in matches:
            urls = match.split(',')
            urls = [x.strip() for x in urls]
            rejected_urls.extend(urls)
        included_urls.extend(rejected_urls)

        # those that only have a web archive version, also get the original version
        more_urls = []
        for url in included_urls:
            if url.startswith('https://web.archive.org/web'):
                # print(url) # sometimes the http is missing in archive links (would need proper parsing)
                url = url[url.index('http', 5):]
                more_urls.append(url)
        included_urls.extend(more_urls)

        # now we strip the urls
        stripped_urls = [utils.strip_url(x) for x in included_urls]
        stripped_urls = set(
            stripped_urls)  # removes duplicates for performance

        # read backlog and get urls from there
        text = utils.read_text(c.backlog_file)
        text = text.split('\n')

        # remove those that are in stripped_game_urls
        text = [x for x in text if utils.strip_url(x) not in stripped_urls]

        # remove duplicates and sort
        text = sorted(list(set(text)), key=str.casefold)
        print('backlog contains {} items'.format(len(text)))

        # join and save again
        text = '\n'.join(text)
        utils.write_text(c.backlog_file, text)

        print('backlog cleaned')
Exemple #3
0
def read_and_parse(content_file, grammar_file, transformer):
    """

    :param content_file:
    :param grammar_file:
    :param transformer:
    :return:
    """
    content = utils.read_text(content_file)
    grammar = utils.read_text(grammar_file)
    parser = lark.Lark(grammar, debug=False)
    tree = parser.parse(content)
    return transformer.transform(tree)
def read_and_parse(content_file: str, grammar_file: str, Transformer: lark.Transformer):
    """
    Reads a content file and a grammar file and parses the content with the grammar following by
    transforming the parsed output and returning the transformed result.
    :param content_file:
    :param grammar_file:
    :param transformer:
    :return:
    """
    grammar = utils.read_text(grammar_file)
    parse = create(grammar, Transformer)

    content = utils.read_text(content_file)
    return parse(content)
Exemple #5
0
def read_screenshots_overview():
    """

    :return:
    """
    # read screenshots readme and parse
    overview = {}
    text = utils.read_text(c.screenshots_file)
    for entry in text.split('\n# ')[1:]:  # skip first paragraph
        lines = entry.split('\n')  # split into lines
        name = lines[0]
        if name not in overview:
            overview[name] = {}
        lines = [line for line in lines[1:]
                 if line]  # include only non-empty lines
        # for every screenshot
        for line in lines:
            values = line.split(' ')  # split into values
            values = [value for value in values if value]
            id = int(values[0])  # id (must be there)
            width = int(values[1])  # width can be 0, will be updated
            height = int(values[2])  # height can be 0, will be updated
            if len(values) > 3:  # optional an url
                url = values[3]
            else:
                url = None
            overview[name][id] = [width, height, url]
    return overview
Exemple #6
0
def sort_text_file(file, name):
    """
    Reads a text file, splits in lines, removes duplicates, sort, writes back.
    """
    text = utils.read_text(file)
    text = text.split('\n')
    text = sorted(list(set(text)), key=str.casefold)
    print('{} contains {} items'.format(name, len(text)))
    text = '\n'.join(text)
    utils.write_text(file, text)
Exemple #7
0
def github_starring_synchronization():
    """
    Which Github repositories haven't I starred yet.
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gh_entries_file))

    # loop over each entry and collect list of repos
    all_repos = []
    for index, file in enumerate(files):
        # read entry
        entry = osg.read_entry(file)

        # get repos
        code_repositories = entry.get('Code repository', [])
        repos = [x for x in code_repositories if x.startswith(prefix)]
        repos[0] += ' @add'
        repos = [x for x in repos if '@add' in x]
        repos = [x.split(' ')[0] for x in repos]
        repos = [x for x in repos if x not in ignored_repos]
        all_repos.extend(repos)
    all_repos = set(all_repos)
    print('found {} Github repos'.format(len(all_repos)))

    # get my Github user
    user = osg_github.get_user(private_properties['github-name'],
                               token=private_properties['github-token'])

    # get starred repos
    starred = user.get_starred()
    starred = [repo.clone_url for repo in starred]
    starred = set(starred)
    print('starred {} Github repos'.format(len(starred)))

    # and now the difference
    unstarred = all_repos - starred
    print('not yet starred {} repos'.format(len(unstarred)))
    print(', '.join(unstarred))
Exemple #8
0
def gitlab_starring_synchronization():
    """
    Which Gitlab repositories haven't I starred yet.
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gl_entries_file))

    # loop over each entry and collect list of repos
    all_repos = []
    for index, file in enumerate(files):
        # read entry
        entry = osg.read_entry(file)

        # get repos
        code_repositories = entry.get('Code repository', [])
        repos = [x for x in code_repositories if x.startswith(prefix)]
        repos[0] += ' @add'
        repos = [x for x in repos if '@add' in x]
        repos = [x.split(' ')[0] for x in repos]
        repos = [x for x in repos if x not in ignored_repos]
        all_repos.extend(repos)
    all_repos = set(all_repos)
    print('found {} Gitlab repos'.format(len(all_repos)))
def clean_backlog(stripped_game_urls):

    # read backlog and split
    file = os.path.join(c.root_path, 'tools', 'backlog.txt')
    text = utils.read_text(file)
    text = text.split('\n')

    # remove those that are in stripped_game_urls
    text = [x for x in text if utils.strip_url(x) not in stripped_game_urls]

    # remove duplicates and sort
    text = sorted(list(set(text)), key=str.casefold)
    print('backlog contains {} items'.format(len(text)))

    # join and save again
    text = '\n'.join(text)
    utils.write_text(file, text)
Exemple #10
0
    def check_template_leftovers(self):
        """
        Checks for template leftovers.
        Should be run only occasionally.
        """
        # load template and get all lines
        text = utils.read_text(os.path.join(c.root_path, 'template.md'))
        text = text.split('\n')
        check_strings = [x for x in text if x and not x.startswith('##')]

        # iterate over all entries
        for _, entry_path, content in osg.entry_iterator():

            for check_string in check_strings:
                if content.find(check_string) >= 0:
                    print('{}: found {}'.format(os.path.basename(entry_path), check_string))
        print('checked for template leftovers')
Exemple #11
0
def entry_iterator():
    """

    """

    # get all entries (ignore everything starting with underscore)
    entries = os.listdir(c.entries_path)

    # iterate over all entries
    for entry in entries:
        entry_path = os.path.join(c.entries_path, entry)

        # ignore directories ("tocs" for example)
        if os.path.isdir(entry_path):
            continue

        # read entry
        content = utils.read_text(entry_path)

        # yield
        yield entry, entry_path, content
Exemple #12
0
def check_validity_backlog():
    import requests

    # read backlog and split
    file = os.path.join(c.root_path, 'code', 'backlog.txt')
    text = utils.read_text(file)
    urls = text.split('\n')
    urls = [x.split(' ')[0] for x in urls]

    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
    for url in urls:
        try:
            r = requests.get(url, headers=headers, timeout=5)
        except Exception as e:
            print('{} gave error: {}'.format(url, e))
        else:
            if r.status_code != requests.codes.ok:
                print('{} returned status code: {}'.format(url, r.status_code))

            if r.is_redirect or r.history:
                print('{} redirected to {}, {}'.format(url, r.url, r.history))
Exemple #13
0
def read_entries():
    """
    Parses all entries and assembles interesting infos about them.
    """

    # setup parser and transformer
    grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
    grammar = utils.read_text(grammar_file)
    parse = osg_parse.create(grammar, osg_parse.EntryTransformer)

    # a database of all important infos about the entries
    entries = []

    # iterate over all entries
    exception_happened = None
    for file, _, content in entry_iterator():

        if not content.endswith('\n'):
            content += '\n'

        # parse and transform entry content
        try:
            entry = parse(content)
            entry = [
                ('File', file),
            ] + entry  # add file information to the beginning
            entry = check_and_process_entry(entry)
        except Exception as e:
            print('{} - {}'.format(file, e))
            exception_happened = e  # just store last one
            continue

        # add to list
        entries.append(entry)
    if exception_happened:
        print('error(s) while reading entries')
        raise exception_happened

    return entries
Exemple #14
0
def read_rejected_file():
    """
    Read list of rejected games informations.
    Uses very simple parsing.
    :return: List of dictionaries with keys: Title, URLs, Description
    """
    text = u.read_text(c.rejected_file)
    rejected = []
    for line in text.split('\n'):
        # print(line)
        matches = matcher.findall(line)[
            0]  # we know there will be exactly one match on every line
        name = matches[0].strip()
        links = matches[1].split(',')
        links = [link.strip() for link in links]
        description = matches[2].strip()
        rejected.append({
            'Title': name,
            'URLs': links,
            'Description': description
        })
    return rejected
Exemple #15
0
def write_screenshots_overview(overview):
    """

    :param overview:
    :return:
    """
    # get preamble
    text = utils.read_text(c.screenshots_file)
    text = text.split('\n# ')[0] + '\n'

    # write out each entry sorted by name
    for name in sorted(overview.keys()):
        a = overview[name]
        t = '# {}\n\n'.format(name)
        # write out each line sorted by id
        for id in sorted(a.keys()):
            ai = a[id]
            if ai[-1] is None:
                ai = ai[:-1]
            t += ' '.join(['{:02d}'.format(id)] + [str(x) for x in ai]) + '\n'
        t += '\n'
        text += t

    utils.write_text(c.screenshots_file, text)
def parse_lgw_content():

    # paths
    import_path = os.path.join(constants.root_path, 'code', 'lgw-import')
    entries_file = os.path.join(import_path, '_lgw.json')

    # iterate over all imported files
    files = os.listdir(import_path)
    entries = []
    for file in files:
        if file.startswith('_lgw'):
            continue

        text = utils.read_text(os.path.join(import_path, file))

        # parse the html
        soup = BeautifulSoup(text, 'html.parser')
        title = soup.h1.get_text()
        print(title)
        entry = {'name': title}

        # get all external links
        ignored_external_links = ('libregamewiki.org', 'freegamedev.net',
                                  'freegamer.blogspot.com', 'opengameart.org',
                                  'gnu.org', 'creativecommons.org',
                                  'freesound.org', 'freecode.com',
                                  'freenode.net')
        links = [(x['href'], x.get_text())
                 for x in soup.find_all('a', href=True)]
        links = [
            x for x in links if x[0].startswith('http')
            and not any([y in x[0] for y in ignored_external_links])
        ]
        entry['external links'] = links

        # get meta description
        description = soup.find('meta', attrs={"name": "description"})
        entry['description'] = description['content']

        # parse gameinfobox
        infos = soup.find('div', class_='gameinfobox')
        if not infos:
            print(' no gameinfobox')
        else:
            infos = infos.find_all('tr')
            for x in infos:
                if x.th and x.td:
                    # row with header
                    key = x.th.get_text()
                    content = x.td.get_text()
                    content = content.split(',')
                    content = [x.strip() for x in content]
                    entry[key] = content
                if not x.th and x.td:
                    # row without header: contribute section
                    x = x.find_all('li')
                    x = [(x.a.string, x.a['href']) for x in x if x.a]
                    for key, content in x:
                        entry[key] = content

        # parse "for available as package in"
        tables = soup.find_all('table', class_='wikitable')
        tables = [
            table for table in tables if table.caption
            and table.caption.string.startswith('Available as package')
        ]
        if len(tables) > 0:
            if len(tables) > 1:
                raise RuntimeError()
            table = tables[0]
            packages = table.find_all('tr')
            packages = [x.td.a['href'] for x in packages]
            entry['linux-packages'] = packages

        # categories
        categories = soup.find_all('div', id='mw-normal-catlinks')
        if not categories:
            print(' no categories')
            categories = []
        else:
            if len(categories) > 1:
                raise RuntimeError()
            categories = categories[0]
            categories = categories.find_all('li')
            categories = [x.a.string for x in categories]
            if 'Games' not in categories:
                print(' "Games" not in categories')
            else:
                categories.remove('Games')  # should be there
            # strip games at the end
            phrase = ' games'
            categories = [
                x[:-len(phrase)] if x.endswith(phrase) else x
                for x in categories
            ]
            ignored_categories = ['Articles lacking reference', 'Stubs']
            categories = [x for x in categories if x not in ignored_categories]
        entry['categories'] = categories

        entries.append(entry)

    # save entries
    text = json.dumps(entries, indent=1)
    utils.write_text(entries_file, text)
def clean_lgw_content():

    # paths
    import_path = os.path.join(constants.root_path, 'code', 'lgw-import')
    entries_file = os.path.join(import_path, '_lgw.json')
    cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')

    # load entries
    text = utils.read_text(entries_file)
    entries = json.loads(text)

    # rename keys
    key_replacements = (('developer', ('Developer', 'Developers')),
                        ('code license', ('Code license', 'Code licenses')),
                        ('engine', ('Engine', 'Engines')), ('genre',
                                                            ('Genre',
                                                             'Genres')),
                        ('library', ('Library',
                                     'Libraries')), ('assets license',
                                                     ('Media license',
                                                      'Media licenses')),
                        ('code language', ('P. language', 'P. languages')),
                        ('home', ('Homepage', )), ('platform',
                                                   ('Platforms', )),
                        ('tracker', ('Bug/Feature Tracker', )),
                        ('repo', ('Source Code', )), ('forum', ('Forum', )),
                        ('chat', ('Chat', )), ('origin', ('Origin', )),
                        ('dev home',
                         ('Development Project', )), ('last active',
                                                      ('Release date', )))
    for index, entry in enumerate(entries):
        for new_key, old_keys in key_replacements:
            for key in old_keys:
                if key in entry:
                    entry[new_key] = entry[key]
                    del entry[key]
                    break
        entries[index] = entry

    # ignore keys
    ignored_keys = ('origin', 'Latest\xa0release')
    for index, entry in enumerate(entries):
        for key in ignored_keys:
            if key in entry:
                del entry[key]
        entries[index] = entry

    # check for unique field names
    unique_fields = set()
    for entry in entries:
        unique_fields.update(entry.keys())
    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))

    # which fields are mandatory
    mandatory_fields = unique_fields.copy()
    for entry in entries:
        remove_fields = [
            field for field in mandatory_fields if field not in entry
        ]
        mandatory_fields -= set(remove_fields)
    print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields))))

    # statistics before
    print('field contents before')
    fields = sorted(
        list(unique_fields -
             set(('description', 'external links', 'dev home', 'forum', 'home',
                  'linux-packages', 'developer', 'chat', 'tracker',
                  'Latest release', 'name', 'repo', 'Release date',
                  'categories'))))
    for field in fields:
        content = [entry[field] for entry in entries if field in entry]
        # flatten
        flat_content = []
        for c in content:
            if isinstance(c, list):
                flat_content.extend(c)
            else:
                flat_content.append(c)
        statistics = utils.unique_elements_and_occurrences(flat_content)
        print('{}: {}'.format(field, ', '.join(statistics)))

    # content replacements
    entries = remove_parenthized_content(
        entries, ('assets license', 'code language', 'code license', 'engine',
                  'genre', 'last active', 'library'))
    entries = remove_prefix_suffix(entries, ('code license', 'assets license'),
                                   (
                                       '"',
                                       'GNU',
                                   ), ('"', '[3]', '[2]', '[1]', 'only'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'GPL', ('General Public License', ))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2.0',
        ('GPLv2', ))  # for LGW GPLv2 would be the correct writing
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2',
        ('GPLv2', 'GPL v2', 'GPL version 2.0', 'GPL 2.0',
         'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-2',
        ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-3.0',
        ('GPLv3', ))  # for LGW GPLv3 would be the correct writing
    entries = replace_content(entries, ('code license', 'assets license'),
                              'GPL-3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'GPL-3',
        ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'Public domain',
                              ('public domain', 'Public Domain'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'zlib', ('zlib/libpng license', 'Zlib License'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'BSD', ('Original BSD License', ))
    entries = replace_content(
        entries, ('code license', 'assets license'), 'CC-BY-SA-3.0',
        ('Creative Commons Attribution-ShareAlike 3.0 Unported License',
         'CC-BY-SA 3.0', 'CC BY-SA 3.0'))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'CC-BY-SA', ('CC BY-SA', ))
    entries = replace_content(entries, ('code license', 'assets license'),
                              'MIT', ('MIT License', 'MIT"'))
    entries = replace_content(entries, 'platform', 'macOS', ('Mac', ))
    entries = remove_prefix_suffix(entries, ('code language', 'developer'), (),
                                   ('[3]', '[2]', '[1]'))
    entries = ignore_content(entries, 'code language',
                             ('HTML5', 'HTML', 'English', 'XML', 'WML'))
    entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA'))
    entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games'))
    entries = lower_case_content(entries, 'genre')
    entries = replace_content(entries, 'genre', 'platform', ('platformer', ))
    entries = replace_content(entries, 'genre', 'role playing', ('rpg', ))
    entries = replace_content(entries, 'genre', 'first person, shooter',
                              ('fps', ))
    entries = replace_content(entries, 'genre', 'real time, strategy',
                              ('rts', ))
    entries = replace_content(entries, 'genre', 'turn based, strategy',
                              ('tbs', ))
    entries = ignore_content(
        entries, 'categories',
        ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua',
         'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD',
         'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl',
         'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1'))
    entries = replace_content(entries, 'library', 'pygame', ('Pygame', ))
    entries = replace_content(entries, 'library', 'Qt', ('QT', ))
    entries = ignore_content(entries, 'library',
                             ('C++', 'Lua', 'Mozilla Firefox'))
    entries = ignore_nonnumbers(entries, 'last active')
    entries = ignore_content(entries, 'last active', ('2019', ))
    entries = ignore_content(entries, 'platform', ('DOS', ))

    # list for every unique field
    print('\nfield contents after')
    fields = sorted(
        list(unique_fields -
             set(('description', 'external links', 'dev home', 'forum', 'home',
                  'linux-packages', 'developer', 'chat', 'tracker',
                  'Latest release', 'name', 'repo', 'Release date',
                  'categories'))))
    for field in fields:
        content = [entry[field] for entry in entries if field in entry]
        # flatten
        flat_content = []
        for c in content:
            if isinstance(c, list):
                flat_content.extend(c)
            else:
                flat_content.append(c)
        statistics = utils.unique_elements_and_occurrences(flat_content)
        print('{}: {}'.format(field, ', '.join(statistics)))

    # save entries
    text = json.dumps(entries, indent=1)
    utils.write_text(cleaned_entries_file, text)
Exemple #18
0
def gitlab_import():
    """
    Import various information from Gitlab repositories (like contributors) or stars for Gitlab repos
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gl_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {} ({})'.format(file, index))

            # read entry
            entry = osg.read_entry(file)
            code_repositories = entry['Code repository']
            repos = [x for x in code_repositories if x.startswith(prefix)]
            repos[0] += ' @add'
            repos = [x for x in repos if '@add' in x]
            repos = [x.split(' ')[0] for x in repos]
            repos = [x for x in repos if x not in ignored_repos]
            for repo in repos:
                print('  GH repo {}'.format(repo))

                info = osg_gitlab.retrieve_repo_info(repo)

                new_comments = []

                # add created comment
                new_comments.append('@created {}'.format(info['created'].year))

                # add stars
                new_comments.append('@stars {}'.format(info['stars']))

                # add forks
                new_comments.append('@forks {}'.format(info['forks']))

                # search for repository
                for r in code_repositories:
                    if r.startswith(repo):
                        break

                # update comment
                comments = r.comment
                if comments:
                    comments = comments.split(',')
                    comments = [c.strip() for c in comments]
                    comments = [c for c in comments
                                if not c.startswith('@')]  # delete old ones
                    comments += new_comments
                else:
                    comments = new_comments
                r.comment = ', '.join(comments)

                # language in languages
                for language, usage in info['languages'].items():
                    if language in c.known_languages and usage > 5 and language not in entry[
                            'Code language']:
                        entry['Code language'].append(language)
                        print('  added to languages: {}'.format(language))

            entry['Code repository'] = code_repositories
            osg.write_entry(entry)
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(gl_entries_file, json.dumps(files[index:], indent=1))

        # osg.write_developers(all_developers)
        print('developers database updated')
    for x in x:
        p += ' {} {} missing\n'.format(k, x)
    return p


if __name__ == "__main__":

    similarity_threshold = 0.8
    maximal_newly_created_entries = 40

    # paths
    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
    lgw_entries_file = os.path.join(import_path, '_lgw.cleaned.json')

    # import lgw import
    text = utils.read_text(lgw_entries_file)
    lgw_entries = json.loads(text)

    # perform name replacements
    lgw_entries = [x for x in lgw_entries if x['name'] not in ignored_names]
    for index, lgw_entry in enumerate(lgw_entries):
        if lgw_entry['name'] in name_replacements:
            lgw_entry['name'] = name_replacements[lgw_entry['name']]
        if 'code language' in lgw_entry:
            languages = lgw_entry['code language']
            h = []
            for l in languages:
                for g in ('/', 'and'):
                    if g in l:
                        l = l.split(g)
                        l = [x.strip() for x in l]
"""

# TODO do not add if already 3, but print warning instead

import os
import requests
from io import BytesIO
from PIL import Image
from utils import utils as u, constants as c, osg as osg

if __name__ == "__main__":
    # paths
    root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))

    # read content of screenshots_bzt.txt
    info = u.read_text(os.path.join(root_path, 'code', 'synchronization', 'screenshots_bzt.txt'))
    info = info.split('\n') # split on line end
    info = [entry.split('\t') for entry in info] # split on tabs
    info = [[entry[0].strip(), entry[-1].strip()] for entry in info] # only keep first and last (in case multiple tabs were used)

    # read our screenshots
    screenshots = osg.read_screenshots_overview()

    # iterate over all new info
    for entry in info:
        name = entry[0]
        print('work on {}'.format(name))
        url = entry[1]

        # is contained?
        our_screenshots = screenshots.get(name, {})
Exemple #21
0
    def update_readme_tocs(self):
        """
        Recounts entries in sub categories and writes them to the readme.
        Also updates the _toc files in the categories directories.

        Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
        on "A collection.."

        Needs to be performed regularly.
        """

        # completely delete content of toc path
        for file in os.listdir(c.tocs_path):
            os.remove(os.path.join(c.tocs_path, file))

        # read readme
        readme_file = os.path.join(c.root_path, 'README.md')
        readme_text = utils.read_text(readme_file)

        # compile regex for identifying the building blocks in the readme
        regex = re.compile(
            r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)",
            re.DOTALL)

        # apply regex
        matches = regex.findall(readme_text)
        if len(matches) != 1:
            raise RuntimeError('readme file has invalid structure')
        matches = matches[0]
        start = matches[0]
        end = matches[2]

        tocs_text = ''

        # split into games, tools, frameworks, libraries
        games = [
            x for x in self.entries if not any(
                [y in x['Keyword'] for y in ('tool', 'framework', 'library')])
        ]
        tools = [x for x in self.entries if 'tool' in x['Keyword']]
        frameworks = [x for x in self.entries if 'framework' in x['Keyword']]
        libraries = [x for x in self.entries if 'library' in x['Keyword']]

        # create games, tools, frameworks, libraries tocs
        title = 'Games'
        file = '_games.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(games))
        create_toc(title, file, games)

        title = 'Tools'
        file = '_tools.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(tools))
        create_toc(title, file, tools)

        title = 'Frameworks'
        file = '_frameworks.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(
            title, file, title, len(frameworks))
        create_toc(title, file, frameworks)

        title = 'Libraries'
        file = '_libraries.md'
        tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(
            title, file, title, len(libraries))
        create_toc(title, file, libraries)

        # create by category
        categories_text = []
        for keyword in c.recommended_keywords:
            filtered = [x for x in self.entries if keyword in x['Keyword']]
            title = keyword.capitalize()
            name = keyword.replace(' ', '-')
            file = '_{}.md'.format(name)
            categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(
                title, file, name, len(filtered)))
            create_toc(title, file, filtered)
        categories_text.sort()
        tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))

        # create by platform
        platforms_text = []
        for platform in c.valid_platforms:
            filtered = [
                x for x in self.entries if platform in x.get('Platform', [])
            ]
            title = platform
            name = platform.lower()
            file = '_{}.md'.format(name)
            platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(
                title, file, name, len(filtered)))
            create_toc(title, file, filtered)
        tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))

        # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
        text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end

        # write to readme
        utils.write_text(readme_file, text)

        print('Readme and TOCs updated')
Exemple #22
0
def github_import():
    """

    :return:
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gh_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            code_repositories = entry['Code repository']
            repos = [
                x.value for x in code_repositories if x.startswith(prefix)
            ]
            repos[0] += ' @add'
            repos = [x for x in repos if '@add' in x]
            repos = [x.split(' ')[0] for x in repos]
            repos = [x for x in repos if x not in ignored_repos]
            for repo in repos:
                print('  GH repo {}'.format(repo))

                info = osg_github.retrieve_repo_info(
                    repo, private_properties['github-token'])

                new_comments = []
                # is archived
                if info['archived']:
                    if not osg.is_inactive(entry):
                        print(
                            'warning: repo is archived but not inactive state??'
                        )
                    # add archive to repo comment
                    new_comments.append('@archived')

                # add created comment
                new_comments.append('@created {}'.format(info['created'].year))

                # add stars
                new_comments.append('@stars {}'.format(info['stars']))

                # add forks
                new_comments.append('@forks {}'.format(info['forks']))

                # update comment
                for r in code_repositories:
                    if r.value.startswith(repo):
                        break
                comments = r.comment
                if comments:
                    comments = comments.split(',')
                    comments = [c.strip() for c in comments]
                    comments = [c for c in comments
                                if not c.startswith('@')]  # delete old ones
                    comments += new_comments
                else:
                    comments = new_comments
                r.comment = ', '.join(comments)

                # language in languages
                language = info['language']
                language = language_aliases.get(language, language)
                if language and language not in entry[
                        'Code language'] and language not in ignored_languages:
                    entry['Code language'].append(
                        osg_parse.ValueWithComment(language))
                    print('  added to languages: {}'.format(language))

                # contributors
                for contributor in info['contributors']:
                    if contributor.type != 'User':
                        continue
                    if contributor.contributions < 4:
                        continue
                    # contributor.login/name/blog
                    name = contributor.name
                    if not name:
                        name = contributor.login
                    name = name_aliases.get(name, name)
                    nickname = '{}@GH'.format(contributor.login)
                    blog = contributor.blog
                    if blog:
                        blog = blog_alias[blog] if blog in blog_alias else blog
                        if not blog.startswith('http'):
                            blog = 'https://' + blog
                        if blog in ignored_blogs:
                            blog = None

                    # look up author in entry developers
                    if name not in entry.get('Developer', []):
                        print('   dev "{}" added to entry {}'.format(
                            name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(name)
                        ]

                    # look up author in developers data base
                    if name in all_developers:
                        dev = all_developers[name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(name, nickname))
                            # check that name has not already @GH contact
                            if any(
                                    x.endswith('@GH')
                                    for x in dev.get('Contact', [])):
                                print('warning: already GH contact')
                            dev['Contact'] = dev.get('Contact',
                                                     []) + [nickname]
                        if blog and blog not in dev.get('Home', []):
                            dev['Home'] = dev.get('Home', []) + [blog]
                        # TODO add to games entries!
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(name, nickname))
                        all_developers[name] = {
                            'Name': name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        if blog:
                            all_developers[name]['Home'] = [blog]

            entry['Code repository'] = code_repositories
            osg.write_entry(entry)
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))

        osg.write_developers(all_developers)
        print('developers database updated')
def sourceforge_import():
    """

    :return:
    """
    files = json.loads(utils.read_text(sf_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))
    all_developers_changed = False

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            developers = entry.get('Developer', [])
            urls = [
                x.value for x in entry['Home']
                if x.startswith('https://sourceforge.net/projects/')
            ]

            entry_changed = False

            for url in urls:
                print('  sf project {}'.format(url))

                if not url.endswith('/'):
                    print('error: sf project does not end with slash')
                    url += '/'

                # members
                url_members = 'https://sourceforge.net/p/' + url[
                    len(prefix):] + '_members/'
                response = requests.get(url_members)
                if response.status_code != 200:
                    print('error: url {} not accessible, status {}'.format(
                        url_members, response.status_code))
                    raise RuntimeError()
                soup = BeautifulSoup(response.text, 'html.parser')
                authors = soup.find(
                    'div', id='content_base').find('table').find_all('tr')
                authors = [author.find_all('td') for author in authors]
                authors = [
                    author[1].a['href'] for author in authors
                    if len(author) == 3
                ]
                for author in authors:
                    # sometimes author already contains the full url, sometimes not
                    url_author = 'https://sourceforge.net' + author if not author.startswith(
                        'http') else author
                    response = requests.get(url_author)
                    if response.status_code != 200 and author not in (
                            '/u/favorito/', ):
                        print('error: url {} not accessible, status {}'.format(
                            url_author, response.status_code))
                        raise RuntimeError()
                    url_author = response.url  # could be different now
                    if 'auth/?return_to' in url_author or response.status_code != 200:
                        # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
                        author_name = author[3:-1]
                        nickname = author_name
                    else:
                        soup = BeautifulSoup(response.text, 'html.parser')
                        author_name = soup.h1.get_text()
                        author_name = SF_alias_list.get(
                            author_name,
                            author_name)  # replace by alias if possible
                        nickname = soup.find(
                            'dl',
                            class_='personal-data').find('dd').get_text()
                        nickname = nickname.replace('\n', '').strip()
                    nickname += '@SF'  # our indication of the platform to search for
                    author_name = author_name.strip(
                    )  # names can still have white spaces before or after

                    if author_name in SF_ignore_list:
                        continue

                    # look author up in entry developers
                    if author_name not in developers:
                        print('   dev "{}" added to entry {}'.format(
                            author_name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(author_name)
                        ]
                        entry_changed = True
                        developers = entry.get('Developer', [])

                    # look author and SF nickname up in developers data base
                    if author_name in all_developers:
                        dev = all_developers[author_name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(author_name, nickname))
                            # check that name has not already @SF contact
                            if any(
                                    x.endswith('@SF')
                                    for x in dev.get('Contact', [])):
                                print('warning: already SF contact')
                            all_developers[author_name]['Contact'] = dev.get(
                                'Contact', []) + [nickname]
                            all_developers_changed = True
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(author_name, nickname))
                        all_developers[author_name] = {
                            'Name': author_name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        all_developers_changed = True

            if entry_changed:
                # save entry
                osg.write_entry(entry)
                print('  entry updated')
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1))

        # save entry
        osg.write_entry(entry)
        print(' entry updated')

        # maybe save all developers
        if all_developers_changed:
            # save all developers
            osg.write_developers(all_developers)
            print('developers database updated')
            print('{} gave error: {}'.format(url, e))
        else:
            if r.status_code != requests.codes.ok:
                print('{} returned status code: {}'.format(url, r.status_code))

            if r.is_redirect or r.history:
                print('{} redirected to {}, {}'.format(url, r.url, r.history))


if __name__ == "__main__":

    # check_validity_backlog()

    # backlog
    game_urls = osg.extract_links()
    text = utils.read_text(os.path.join(c.root_path, 'tools', 'rejected.txt'))
    regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
    matches = regex.findall(text)
    rejected_urls = []
    for match in matches:
        urls = match.split(',')
        urls = [x.strip() for x in urls]
        rejected_urls.extend(urls)
    game_urls.extend(rejected_urls)
    more_urls = []
    for url in game_urls:
        if url.startswith('https://web.archive.org/web'):
            url = url[url.index('http', 5):]
            more_urls.append(url)
    game_urls.extend(more_urls)
    stripped_game_urls = [utils.strip_url(x) for x in game_urls]