Exemple #1
0
    def find(repo):
        # Clone the repo
        Git.clone(repo.full_name, "https://github.com/%s.git" % repo.full_name)

        # Run the metadata on the repo
        for metadata in Metadata.get():
            if metadata['type'] == 'contents':
                File(repo).find_by_content(metadata['match'])
            elif metadata['type'] == 'extension':
                File(repo).find_by_extension(metadata['match'])
            elif metadata['type'] == 'filename':
                File(repo).find_by_name(metadata['match'])

        # Delete the repo
        Git.delete(repo.full_name)
Exemple #2
0
    def find(repo):

        # todo
        # Database.done(repo)

        # Clone the repo
        Git.clone(repo, "https://github.com/%s.git" % repo)

        # Run the metadata on the repo
        for metadata in Metadata.get():
            if metadata['type'] == 'contents':
                File(repo).find_by_content(metadata)
            elif metadata['type'] == 'extension':
                File(repo).find_by_extension(metadata)
            elif metadata['type'] == 'filename':
                File(repo).find_by_name(metadata)

        # todo
        # Database.checked(repo)

        # Delete the repo
        Git.delete(repo)
Exemple #3
0
class Indexit:

    # Indexit constructor
    def __init__(self):
        self.repositories = Repositories()
        self.git = Git()
        self.files = Files()

    # Get the repo
    def run(self, repo_id):
        # initiate database
        database = Database.connect().get_connection()

        # Don't run if already indexed
        if Database.has_been_indexed(database, repo_id):
            return

        # Get the repo information
        uri = "https://api.github.com/repositories/%d" % repo_id
        repository = self.repositories.get(uri)

        # Clone the repo
        if 'html_url' in repository:
            clone = self.git.clone(repository['full_name'],
                                   repository['html_url'], repo_id)

            if clone:
                # Run through files and store contents
                files = self.files.contents(clone)

                # Save files to database
                Database.save(database, files)

                # Add to indexed
                Database.indexed(database, repo_id)

                # close the databae
                database.close()

            # Delete repo in temp folder
            self.repositories.delete(repository['full_name'])

            print('Indexed ', uri)

    # Index threading
    def main(self):
        # Pool connections to speed up our job
        with Pool(processes=Threads().total()) as pool:
            pool.map(self.run, range(10000000))
Exemple #4
0
 def __init__(self):
     self.repositories = Repositories()
     self.git = Git()
     self.files = Files()