class Pullit: # Pullit constructor def __init__(self): self.repos = [] self.keywords = Keywords() self.repositories = Repositories() # Find credentials @staticmethod def find(repo): # todo # Database.done(repo) # Clone the repo Git.clone(repo, "https://github.com/%s.git" % repo) # Run the metadata on the repo for metadata in Metadata.get(): if metadata['type'] == 'contents': File(repo).find_by_content(metadata) elif metadata['type'] == 'extension': File(repo).find_by_extension(metadata) elif metadata['type'] == 'filename': File(repo).find_by_name(metadata) # todo # Database.checked(repo) # Delete the repo Git.delete(repo) # Run on all repos def all(self, pool): for repo in self.repositories.all(): self.repos.append(repo) if len(self.repos) >= Threads.get(): function = partial(self.find) pool.map(function, self.repos) self.repos.clear() # Run on searched repos def search(self, pool): for keyword in self.keywords.get(): for repo in self.repositories.search(keyword): self.repos.append(repo.full_name) if len(self.repos) >= Threads.get(): function = partial(self.find) pool.map(function, self.repos) self.repos.clear() # Run Pullit def main(self): # Pool connections to speed up our job with Pool(processes=Threads.get()) as pool: if self.keywords.get(): self.search(pool) else: self.all(pool)
class Indexit: # Indexit constructor def __init__(self): self.repositories = Repositories() self.git = Git() self.files = Files() # Get the repo def run(self, repo_id): # initiate database database = Database.connect().get_connection() # Don't run if already indexed if Database.has_been_indexed(database, repo_id): return # Get the repo information uri = "https://api.github.com/repositories/%d" % repo_id repository = self.repositories.get(uri) # Clone the repo if 'html_url' in repository: clone = self.git.clone(repository['full_name'], repository['html_url'], repo_id) if clone: # Run through files and store contents files = self.files.contents(clone) # Save files to database Database.save(database, files) # Add to indexed Database.indexed(database, repo_id) # close the databae database.close() # Delete repo in temp folder self.repositories.delete(repository['full_name']) print('Indexed ', uri) # Index threading def main(self): # Pool connections to speed up our job with Pool(processes=Threads().total()) as pool: pool.map(self.run, range(10000000))
class Pullit: # Pullit constructor def __init__(self): self.repositories = Repositories() # Find credentials @staticmethod def find(repo): # Clone the repo Git.clone(repo.full_name, "https://github.com/%s.git" % repo.full_name) # Run the metadata on the repo for metadata in Metadata.get(): if metadata['type'] == 'contents': File(repo).find_by_content(metadata['match']) elif metadata['type'] == 'extension': File(repo).find_by_extension(metadata['match']) elif metadata['type'] == 'filename': File(repo).find_by_name(metadata['match']) # Delete the repo Git.delete(repo.full_name) # Run pullit def main(self): # Pool connections to speed up our job with Pool(processes=Threads.get()) as pool: repos = [] for repo in self.repositories.get(): repos.append(repo) if len(repos) >= Threads.get(): function = partial(self.find) pool.map(function, repos) repos.clear()
def __init__(self): self.repo_id = '' self.github = Repositories() self.connection = Database().get() Events.listen(Events, 'checked-repo', self.checked) Events.listen(Events, 'regex-found', self.notify) Events.listen(Events, 'extension-found', self.notify) Events.listen(Events, 'filename-found', self.notify)
def __init__(self): self.repos = [] self.keywords = Keywords() self.repositories = Repositories()
def __init__(self): self.repositories = Repositories()
def __init__(self): self.repositories = Repositories() self.git = Git() self.files = Files()