def find(repo): # Clone the repo Git.clone(repo.full_name, "https://github.com/%s.git" % repo.full_name) # Run the metadata on the repo for metadata in Metadata.get(): if metadata['type'] == 'contents': File(repo).find_by_content(metadata['match']) elif metadata['type'] == 'extension': File(repo).find_by_extension(metadata['match']) elif metadata['type'] == 'filename': File(repo).find_by_name(metadata['match']) # Delete the repo Git.delete(repo.full_name)
def find(repo): # todo # Database.done(repo) # Clone the repo Git.clone(repo, "https://github.com/%s.git" % repo) # Run the metadata on the repo for metadata in Metadata.get(): if metadata['type'] == 'contents': File(repo).find_by_content(metadata) elif metadata['type'] == 'extension': File(repo).find_by_extension(metadata) elif metadata['type'] == 'filename': File(repo).find_by_name(metadata) # todo # Database.checked(repo) # Delete the repo Git.delete(repo)
class Indexit: # Indexit constructor def __init__(self): self.repositories = Repositories() self.git = Git() self.files = Files() # Get the repo def run(self, repo_id): # initiate database database = Database.connect().get_connection() # Don't run if already indexed if Database.has_been_indexed(database, repo_id): return # Get the repo information uri = "https://api.github.com/repositories/%d" % repo_id repository = self.repositories.get(uri) # Clone the repo if 'html_url' in repository: clone = self.git.clone(repository['full_name'], repository['html_url'], repo_id) if clone: # Run through files and store contents files = self.files.contents(clone) # Save files to database Database.save(database, files) # Add to indexed Database.indexed(database, repo_id) # close the databae database.close() # Delete repo in temp folder self.repositories.delete(repository['full_name']) print('Indexed ', uri) # Index threading def main(self): # Pool connections to speed up our job with Pool(processes=Threads().total()) as pool: pool.map(self.run, range(10000000))
def __init__(self): self.repositories = Repositories() self.git = Git() self.files = Files()