class MyRemoteCallbacks(pygit2.RemoteCallbacks): """ """ def __init__(self): self.config = Config() self.username = self.config.BITBUCKET_USERNAME self.app_password = self.config.BITBUCKET_APP_PASSWORD self.owner = self.config.BITBUCKET_OWNER self.client = Client(self.username, self.app_password, self.owner) def credentials(self, url, username_from_url, allowed_types): if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERNAME: return pygit2.Username("git") elif allowed_types & pygit2.credentials.GIT_CREDTYPE_SSH_KEY: return pygit2.Keypair(username_from_url, self.config.SSH_PUB_KEY, self.config.SSH_PRI_KEY, self.config.SSH_PRI_KEY) else: return None def scan_repos(self): repos = [] i = 1 while self.client.get_repositories({"page" : i}).get('next'): i = self.client.get_repositories({"page" : i})['page'] paginator = self.client.get_repositories({"page": int(i)}) for key in paginator['values']: repo_name = key['slug'] lang = key['language'] cl = key['links']['clone'] for j in cl: if j['name'] == "ssh": ssh_url = j['href'] det = (repo_name, ssh_url, lang) repos.append(det) i += 1 return repos def clone_repository(self, repo:str, url: str, lang: str=None): """ """ path = Path("%s%s" % (self.config.PATRONUS_DOWNLOAD_LOCATION, repo)) try: pygit2.clone_repository("ssh://%s" % (url.replace('org:', 'org/')), "%s%s" % (self.config.PATRONUS_DOWNLOAD_LOCATION, repo), callbacks=MyRemoteCallbacks()) logging.info('Successfully cloned repo %s to %s%s' % (repo,self.config.PATRONUS_DOWNLOAD_LOCATION, repo)) except Exception as e: print(e) logging.debug('Error while cloning repo %s to %s%s' % (repo,self.config.PATRONUS_DOWNLOAD_LOCATION, repo)) return def clone_wrapper(self, args): return self.clone_repository(*args) def clone_all_repository(self, repos:str): pool = Pool(processes=multiprocessing.cpu_count()) res = pool.map(self.clone_wrapper, repos) pool.close() pool.join() return def clean_all_repos(self, repos: str): """ """ for repo in repos: if os.path.exists('%s%s' % (self.config.PATRONUS_DOWNLOAD_LOCATION, repo[0])): try: shutil.rmtree("%s%s" % (self.config.PATRONUS_DOWNLOAD_LOCATION, repo[0])) logging.info("Deleted repo %s from %s%s" % (repo[0], self.config.PATRONUS_DOWNLOAD_LOCATION, repo[0])) except: logging.debug("Error deleting repo %s from %s%s" % (repo[0], self.config.PATRONUS_DOWNLOAD_LOCATION, repo[0])) else: pass
def getURLs(username, password, owner, redo=False): """ Returns a list of tuples of bitbuckets URLs and last_updated time that we want to clone. Also, generate a checked_URL.csv file that stores the URLs and last_updated time for each URL. If last_updated time of a URL is before the last modified time we got from bitbucket website, we append a tuple of URL and last_modified time of that URL, otherwise we will just skip this URL because we have already checked it before. Parameters: username: email address to login to bitbucket password: password to login to bit bitbucket owner: owner of bitucket repo, i.e. aeaverification in this case redo: if True, then redo all URL repos cloning disregarding the checked_URLS.csv Returns: URLs: dict; URLs of repos to clone index tuples, 0 index is string date of last update, 1 index is string name of repo """ URLs = {} # change to "checked_URL_empty.csv" if you want to start from fresh if not redo: df = pd.read_csv("checked_URL.csv") checked_URLs = list(df['URL']) checked_URLs_time = list(df['last_updated_time']) client = Client(str(username), str(password), str(owner)) # alt soln. here: https://thepythoncoding.blogspot.com/2019/06/python-script-to-clone-all-repositories.html?fbclid=IwAR0a-cI-EI9cA1cgQGkiXCY9R6-5SrJq_NItKurEQ59eSVnzGCVpmKtWs7g # compile repo list pg = 1 d = [0] repos = [] while len(d) > 0: d = client.get_repositories( params={'pagelen': 100, 'page': pg})['values'] repos = repos+d pg += 1 # get URLs for repo in repos: links = repo['links'] clone = links['clone'] raw_URL = clone[0]['href'] # remove user handle from url hst = raw_URL.find('//')+2 hend = raw_URL.find('bitbucket.org') URL = raw_URL[:hst]+raw_URL[hend:] upd_time = repo['updated_on'] name = repo['name'] if URL in checked_URLs: index = checked_URLs.index(URL) old_time = checked_URLs_time[index] if parseTime(old_time) < parseTime(upd_time): URLs[URL] = (upd_time, name) else: URLs[URL] = (upd_time, name) return URLs
def main(): sys.path.append('../') sys.path.append('/app') import git import github from bitbucket.client import Client from bitbucket.exceptions import NotAuthenticatedError import random from git.exc import GitCommandError from ghcopy.utils import error_handler from ghcopy.config import translate as _, logger, cmd_args, config_args def repo_work(clone_url, language): """ :param clone_url: :param language: :type language: str :return: """ try: out_dir = '%s/%s/%s' % (cmd_args.output, hub[:1].upper() + hub[1:].lower(), language[:1].upper() + language[1:].lower()) os.makedirs(out_dir, 0o755, True) repo_dir = '%s/%s' % (out_dir, clone_url.split('/')[-1].split('.')[0]) if not os.path.isdir(repo_dir): logger.info('%s \'%s\'' % (_('cloning into'), repo_dir)) git.Repo.clone_from(clone_url, repo_dir, multi_options=['--config credential.%s.username=%s' % (clone_url, user), '--config core.askPass=%s' % pass_file_name]) else: rp = git.Repo(repo_dir) config = rp.config_writer() config.set_value('credential "%s"' % clone_url, 'username', user) config.set_value('core', 'askpass', pass_file_name) for remote in rp.remotes: logger.info('%s \'%s\'' % (_('fetching'), repo_dir)) remote.fetch() logger.info('%s \'%s\'' % (_('pulling current branch'), repo_dir)) remote.pull() except GitCommandError as e: errors = e.stderr[3:-2].split('\n') for error in errors: logger.error(error) return except Exception as e: error_handler(logger, e, _('unexpected exception'), debug_info=True) return user = cmd_args.user if cmd_args.user else config_args.get('user', None) password = cmd_args.password if cmd_args.password else config_args.get('password', None) token = cmd_args.token if cmd_args.token else config_args.get('token', None) hub = cmd_args.hub if cmd_args.hub else config_args.get('type', 'github') if (not user or not password) and not token: logger.error('%s' % _('parameters \'user\' and \'password\' or \'token\' are required')) exit(1) pass_file_name = '/tmp/%s' % (''.join([str(random.randint(0, 9)) for _ in range(16)])) pass_file = open(pass_file_name, 'w') if hub == 'github': password = '' if token else password pass_file.write('#!/bin/bash\n\necho "%s"' % password) pass_file.close() os.chmod(pass_file_name, 0o755) try: os.makedirs(cmd_args.output, 0o755, True) if hub == 'github': logger.info('github %s, %s: %s' % (_('copying started'), _('logging level'), cmd_args.log_level)) user = token if token else user g = github.Github(user, password) for repo in g.get_user().get_repos(): repo_work(repo.clone_url, repo.language) elif hub == 'bitbucket': page = 1 repos = 0 size = 1 logger.info('bitbucket %s, %s: %s' % (_('copying started'), _('logging level'), cmd_args.log_level)) client = Client(user, password) while repos < size: response = client.get_repositories(params={'page': page}) size = response['size'] for repo in response['values']: repos += 1 repo_work(repo['links']['clone'][0]['href'], repo.get('language', 'Undefined')) page += 1 else: logger.error('%s' % _('incorrect repository type')) exit(1) logger.info('%s %s' % (hub, _('copying ended'))) except NotAuthenticatedError: logger.error('%s' % _('incorrect user name, password or token')) exit(1) except github.BadCredentialsException: logger.error('%s' % _('incorrect user name, password or token')) exit(1) finally: os.unlink(pass_file_name) exit(0)