Exemple #1
0
def gitlab_import():
    """
    Import various information from Gitlab repositories (like contributors) or stars for Gitlab repos
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gl_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {} ({})'.format(file, index))

            # read entry
            entry = osg.read_entry(file)
            code_repositories = entry['Code repository']
            repos = [x for x in code_repositories if x.startswith(prefix)]
            repos[0] += ' @add'
            repos = [x for x in repos if '@add' in x]
            repos = [x.split(' ')[0] for x in repos]
            repos = [x for x in repos if x not in ignored_repos]
            for repo in repos:
                print('  GH repo {}'.format(repo))

                info = osg_gitlab.retrieve_repo_info(repo)

                new_comments = []

                # add created comment
                new_comments.append('@created {}'.format(info['created'].year))

                # add stars
                new_comments.append('@stars {}'.format(info['stars']))

                # add forks
                new_comments.append('@forks {}'.format(info['forks']))

                # search for repository
                for r in code_repositories:
                    if r.startswith(repo):
                        break

                # update comment
                comments = r.comment
                if comments:
                    comments = comments.split(',')
                    comments = [c.strip() for c in comments]
                    comments = [c for c in comments
                                if not c.startswith('@')]  # delete old ones
                    comments += new_comments
                else:
                    comments = new_comments
                r.comment = ', '.join(comments)

                # language in languages
                for language, usage in info['languages'].items():
                    if language in c.known_languages and usage > 5 and language not in entry[
                            'Code language']:
                        entry['Code language'].append(language)
                        print('  added to languages: {}'.format(language))

            entry['Code repository'] = code_repositories
            osg.write_entry(entry)
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(gl_entries_file, json.dumps(files[index:], indent=1))

        # osg.write_developers(all_developers)
        print('developers database updated')
def sourceforge_import():
    """

    :return:
    """
    files = json.loads(utils.read_text(sf_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))
    all_developers_changed = False

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            developers = entry.get('Developer', [])
            urls = [
                x.value for x in entry['Home']
                if x.startswith('https://sourceforge.net/projects/')
            ]

            entry_changed = False

            for url in urls:
                print('  sf project {}'.format(url))

                if not url.endswith('/'):
                    print('error: sf project does not end with slash')
                    url += '/'

                # members
                url_members = 'https://sourceforge.net/p/' + url[
                    len(prefix):] + '_members/'
                response = requests.get(url_members)
                if response.status_code != 200:
                    print('error: url {} not accessible, status {}'.format(
                        url_members, response.status_code))
                    raise RuntimeError()
                soup = BeautifulSoup(response.text, 'html.parser')
                authors = soup.find(
                    'div', id='content_base').find('table').find_all('tr')
                authors = [author.find_all('td') for author in authors]
                authors = [
                    author[1].a['href'] for author in authors
                    if len(author) == 3
                ]
                for author in authors:
                    # sometimes author already contains the full url, sometimes not
                    url_author = 'https://sourceforge.net' + author if not author.startswith(
                        'http') else author
                    response = requests.get(url_author)
                    if response.status_code != 200 and author not in (
                            '/u/favorito/', ):
                        print('error: url {} not accessible, status {}'.format(
                            url_author, response.status_code))
                        raise RuntimeError()
                    url_author = response.url  # could be different now
                    if 'auth/?return_to' in url_author or response.status_code != 200:
                        # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
                        author_name = author[3:-1]
                        nickname = author_name
                    else:
                        soup = BeautifulSoup(response.text, 'html.parser')
                        author_name = soup.h1.get_text()
                        author_name = SF_alias_list.get(
                            author_name,
                            author_name)  # replace by alias if possible
                        nickname = soup.find(
                            'dl',
                            class_='personal-data').find('dd').get_text()
                        nickname = nickname.replace('\n', '').strip()
                    nickname += '@SF'  # our indication of the platform to search for
                    author_name = author_name.strip(
                    )  # names can still have white spaces before or after

                    if author_name in SF_ignore_list:
                        continue

                    # look author up in entry developers
                    if author_name not in developers:
                        print('   dev "{}" added to entry {}'.format(
                            author_name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(author_name)
                        ]
                        entry_changed = True
                        developers = entry.get('Developer', [])

                    # look author and SF nickname up in developers data base
                    if author_name in all_developers:
                        dev = all_developers[author_name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(author_name, nickname))
                            # check that name has not already @SF contact
                            if any(
                                    x.endswith('@SF')
                                    for x in dev.get('Contact', [])):
                                print('warning: already SF contact')
                            all_developers[author_name]['Contact'] = dev.get(
                                'Contact', []) + [nickname]
                            all_developers_changed = True
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(author_name, nickname))
                        all_developers[author_name] = {
                            'Name': author_name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        all_developers_changed = True

            if entry_changed:
                # save entry
                osg.write_entry(entry)
                print('  entry updated')
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1))

        # save entry
        osg.write_entry(entry)
        print(' entry updated')

        # maybe save all developers
        if all_developers_changed:
            # save all developers
            osg.write_developers(all_developers)
            print('developers database updated')
            if 'framework' in osgc_entry:
                osgc_frameworks = osgc_entry['framework']
                if type(osgc_frameworks) == str:
                    osgc_frameworks = [osgc_frameworks]
                entry['Code dependency'] = osgc_frameworks

            # write info (if existing)
            if 'info' in osgc_entry:
                entry['Note'] = osgc_entry['info']

            # add empty building
            entry['Building'] = {}

            # finally write to file
            print(entry)
            osg.write_entry(entry)
            newly_created_entries += 1

    # save updated screenshots if they could have chenged
    if download_missing_screenshots:
        osg.write_screenshots_overview(screenshots)

    # now iterate over our entries and test if we can add anything to them
    print('entries that could be added to them:')
    for our_entry in our_entries:
        our_name = our_entry['Title']

        # only if contains Inspiration and not "tool", "framework" or "library"
        our_keywords = our_entry['Keyword']
        if not 'Inspiration' in our_entry:
            continue
Exemple #4
0
def github_import():
    """

    :return:
    """
    private_properties = json.loads(utils.read_text(c.private_properties_file))

    files = json.loads(utils.read_text(gh_entries_file))

    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))

    # all exceptions that happen will be eaten (but will end the execution)
    try:
        # loop over each entry
        for index, file in enumerate(files):
            print(' process {}'.format(file))

            # read entry
            entry = osg.read_entry(file)
            code_repositories = entry['Code repository']
            repos = [
                x.value for x in code_repositories if x.startswith(prefix)
            ]
            repos[0] += ' @add'
            repos = [x for x in repos if '@add' in x]
            repos = [x.split(' ')[0] for x in repos]
            repos = [x for x in repos if x not in ignored_repos]
            for repo in repos:
                print('  GH repo {}'.format(repo))

                info = osg_github.retrieve_repo_info(
                    repo, private_properties['github-token'])

                new_comments = []
                # is archived
                if info['archived']:
                    if not osg.is_inactive(entry):
                        print(
                            'warning: repo is archived but not inactive state??'
                        )
                    # add archive to repo comment
                    new_comments.append('@archived')

                # add created comment
                new_comments.append('@created {}'.format(info['created'].year))

                # add stars
                new_comments.append('@stars {}'.format(info['stars']))

                # add forks
                new_comments.append('@forks {}'.format(info['forks']))

                # update comment
                for r in code_repositories:
                    if r.value.startswith(repo):
                        break
                comments = r.comment
                if comments:
                    comments = comments.split(',')
                    comments = [c.strip() for c in comments]
                    comments = [c for c in comments
                                if not c.startswith('@')]  # delete old ones
                    comments += new_comments
                else:
                    comments = new_comments
                r.comment = ', '.join(comments)

                # language in languages
                language = info['language']
                language = language_aliases.get(language, language)
                if language and language not in entry[
                        'Code language'] and language not in ignored_languages:
                    entry['Code language'].append(
                        osg_parse.ValueWithComment(language))
                    print('  added to languages: {}'.format(language))

                # contributors
                for contributor in info['contributors']:
                    if contributor.type != 'User':
                        continue
                    if contributor.contributions < 4:
                        continue
                    # contributor.login/name/blog
                    name = contributor.name
                    if not name:
                        name = contributor.login
                    name = name_aliases.get(name, name)
                    nickname = '{}@GH'.format(contributor.login)
                    blog = contributor.blog
                    if blog:
                        blog = blog_alias[blog] if blog in blog_alias else blog
                        if not blog.startswith('http'):
                            blog = 'https://' + blog
                        if blog in ignored_blogs:
                            blog = None

                    # look up author in entry developers
                    if name not in entry.get('Developer', []):
                        print('   dev "{}" added to entry {}'.format(
                            name, file))
                        entry['Developer'] = entry.get('Developer', []) + [
                            osg_parse.ValueWithComment(name)
                        ]

                    # look up author in developers data base
                    if name in all_developers:
                        dev = all_developers[name]
                        if not nickname in dev.get('Contact', []):
                            print(
                                ' existing dev "{}" added nickname ({}) to developer database'
                                .format(name, nickname))
                            # check that name has not already @GH contact
                            if any(
                                    x.endswith('@GH')
                                    for x in dev.get('Contact', [])):
                                print('warning: already GH contact')
                            dev['Contact'] = dev.get('Contact',
                                                     []) + [nickname]
                        if blog and blog not in dev.get('Home', []):
                            dev['Home'] = dev.get('Home', []) + [blog]
                        # TODO add to games entries!
                    else:
                        print('   dev "{}" ({}) added to developer database'.
                              format(name, nickname))
                        all_developers[name] = {
                            'Name': name,
                            'Contact': [nickname],
                            'Games': [entry['Title']]
                        }
                        if blog:
                            all_developers[name]['Home'] = [blog]

            entry['Code repository'] = code_repositories
            osg.write_entry(entry)
    except:
        raise
    finally:
        # shorten file list
        utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))

        osg.write_developers(all_developers)
        print('developers database updated')