Ejemplo n.º 1
0
 def __init__(self,
              repo_location,
              repo_command,
              repo_type='hg',
              date_range=''):
     if not os.path.exists(repo_location):
         raise ChurnDriverError("Repo Location does not exist: %s" %
                                repo_location)
     if not repo_command:
         raise ChurnDriverError("Repo Command cannot be empty")
     self._repo_location = os.path.abspath(repo_location)
     self._repo_type = repo_type
     self._cmd = repo_command
     self._dp = DiffParser(self._repo_type)
     self._ch = ChurnHash()
     self._backend = SQLiteBackend()
     self._daterange = date_range
Ejemplo n.º 2
0
 def __init__(self, repo_location, repo_command, repo_type='hg', date_range=''):
     if not os.path.exists(repo_location):
         raise ChurnDriverError("Repo Location does not exist: %s" % repo_location)
     if not repo_command:
         raise ChurnDriverError("Repo Command cannot be empty")
     self._repo_location = os.path.abspath(repo_location)
     self._repo_type = repo_type
     self._cmd = repo_command
     self._dp = DiffParser(self._repo_type)
     self._ch = ChurnHash()
     self._backend = SQLiteBackend()
     self._daterange = date_range
Ejemplo n.º 3
0
class ChurnDriver(object):
    def __init__(self,
                 repo_location,
                 repo_command,
                 repo_type='hg',
                 date_range=''):
        if not os.path.exists(repo_location):
            raise ChurnDriverError("Repo Location does not exist: %s" %
                                   repo_location)
        if not repo_command:
            raise ChurnDriverError("Repo Command cannot be empty")
        self._repo_location = os.path.abspath(repo_location)
        self._repo_type = repo_type
        self._cmd = repo_command
        self._dp = DiffParser(self._repo_type)
        self._ch = ChurnHash()
        self._backend = SQLiteBackend()
        self._daterange = date_range

    def run(self):
        args = shlex.split(self._cmd)
        p = subprocess.Popen(args,
                             cwd=self._repo_location,
                             stdout=subprocess.PIPE)

        sr = StreamReader(p.stdout)

        now = time.time()
        # Wait for 5 seconds with no output
        count = 0
        while (time.time() - now < 20):
            if sr.is_empty():
                print '.',
                time.sleep(1)
                continue

            lines = sr.readline(0.5)
            count += 1
            if count % 100 == 0:
                print "Read %d lines" % count
            # Got a line of output, reset timer
            now = time.time()

            if lines:
                diffs = self._dp.parse(lines.split('\n'))
                if diffs:
                    while len(diffs):
                        d = diffs.popitem()

                        # These are now key, value tuples, the second half is the embedded dict
                        # I can't decide what to do next. Either this is an aggregated metric or it isn't
                        # So, I'm going to store both and we can see which turns out to be useful. We will
                        # calculate an aggregate metric and a per file metric and store both.
                        # It may be that it's better to have the database do the aggregation for us in which case
                        # churnhash.py is totally useless.
                        chgset = d[0]
                        user = d[1]['user']
                        timestamp = d[1]['timestamp']
                        for k in d[1].keys():
                            if k not in ('user', 'timestamp'):
                                # Then it's a file name with a churn value
                                self._ch.add_file_path(k, d[1][k])
                                # Add non-aggregated values to our backend
                                if self._backend:
                                    self._backend.add_single_file_value(
                                        chgset, user, timestamp, k, d[1][k])
        p.wait()

        # TODO: Now we save to some backend - or perhaps just wire this into churnhash directly
        # For now, we pull this back and return it
        if self._backend:
            h = self._ch.get_hash()
            for i in h:
                self._backend.store_churn_hash(i, h[i]['file'],
                                               self._daterange,
                                               h[i]['lines_changed'])

        return self._ch
Ejemplo n.º 4
0
class ChurnDriver(object):
    def __init__(self, repo_location, repo_command, repo_type='hg', date_range=''):
        if not os.path.exists(repo_location):
            raise ChurnDriverError("Repo Location does not exist: %s" % repo_location)
        if not repo_command:
            raise ChurnDriverError("Repo Command cannot be empty")
        self._repo_location = os.path.abspath(repo_location)
        self._repo_type = repo_type
        self._cmd = repo_command
        self._dp = DiffParser(self._repo_type)
        self._ch = ChurnHash()
        self._backend = SQLiteBackend()
        self._daterange = date_range

    def run(self):
        args = shlex.split(self._cmd)
        p = subprocess.Popen(args, cwd=self._repo_location, stdout=subprocess.PIPE)
       
        sr = StreamReader(p.stdout)
        
        now = time.time()
        # Wait for 5 seconds with no output
        count = 0
        while (time.time() - now < 20):
            if sr.is_empty():
                print '.',
                time.sleep(1)
                continue

            lines = sr.readline(0.5)
            count += 1
            if count % 100 == 0:
                print "Read %d lines" % count
            # Got a line of output, reset timer
            now = time.time()
            
            if lines:                
                diffs = self._dp.parse(lines.split('\n'))
                if diffs:
                    while len(diffs):
                        d = diffs.popitem()
            
                        # These are now key, value tuples, the second half is the embedded dict
                        # I can't decide what to do next. Either this is an aggregated metric or it isn't
                        # So, I'm going to store both and we can see which turns out to be useful. We will
                        # calculate an aggregate metric and a per file metric and store both.
                        # It may be that it's better to have the database do the aggregation for us in which case
                        # churnhash.py is totally useless.
                        chgset = d[0]
                        user = d[1]['user']
                        timestamp = d[1]['timestamp']
                        for k in d[1].keys():
                            if k not in ('user','timestamp'):
                                # Then it's a file name with a churn value
                                self._ch.add_file_path(k, d[1][k])
                                # Add non-aggregated values to our backend
                                if self._backend:
                                    self._backend.add_single_file_value(chgset, user, timestamp, k, d[1][k])
        p.wait()

        # TODO: Now we save to some backend - or perhaps just wire this into churnhash directly
        # For now, we pull this back and return it
        if self._backend:
            h = self._ch.get_hash()
            for i in h:
                self._backend.store_churn_hash(i, h[i]['file'], self._daterange, h[i]['lines_changed'])

        return self._ch