def get_patch_for_commit(self): def diff_line(data, io): io.write(data) io = BytesIO() wid = self.repo.add_watch(DIFF, diff_line, io) done = False failed = False retries = 3 while not done and not failed: try: self.repo.show(self.repo_uri, self.rev) self.data = to_utf8(io.getvalue().strip()).decode("utf-8") done = True except (CommandError, CommandRunningError) as e: if retries > 0: printerr("Error running show command: %s, trying again", (str(e), )) retries -= 1 io.seek(0) elif retries <= 0: failed = True printerr("Error running show command: %s, FAILED", (str(e), )) self.data = None self.repo.remove_watch(DIFF, wid) return self.data
def get_patch_for_commit(self): def diff_line(data, io): io.write(data) io = BytesIO() wid = self.repo.add_watch(DIFF, diff_line, io) done = False failed = False retries = 3 while not done and not failed: try: self.repo.show(self.repo_uri, self.rev) self.data = to_utf8(io.getvalue().strip()).decode("utf-8") done = True except (CommandError, CommandRunningError) as e: if retries > 0: printerr("Error running show command: %s, trying again", (str(e),)) retries -= 1 io.seek(0) elif retries <= 0: failed = True printerr("Error running show command: %s, FAILED", (str(e),)) self.data = None self.repo.remove_watch(DIFF, wid) return self.data
def __init__(self, id, type, file_id): if id is None: self.id = DBFileType.id_counter DBFileType.id_counter += 1 else: self.id = id self.type = to_utf8(type) self.file_id = file_id
def get_file_contents(self): """Returns contents of the file, stripped of whitespace at either end""" # An encode will fail if the source code can't be converted to # utf-8, ie. it's not already unicode, or latin-1, or something # obvious. This almost always means that the file isn't source # code at all. # TODO: I should really throw a "not source" exception, # but just doing None is fine for now. try: return to_utf8(self._file_contents).encode("utf-8").strip() except: return None
def get_patches(self, repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id, )) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def get_patches(self, repo, repo_uri, repo_id, db, cursor): icursor = ICursor(cursor, self.INTERVAL_SIZE) # Get the patches from this repository query = """select p.commit_id, p.file_id, p.patch, s.rev from patches p, scmlog s where p.commit_id = s.id and s.repository_id = ? and p.patch is not NULL""" icursor.execute(statement(query, db.place_holder), (repo_id,)) rs = icursor.fetchmany() while rs: for commit_id, file_id, patch_content, rev in rs: yield (commit_id, file_id, to_utf8(patch_content), rev) rs = icursor.fetchmany()
def _iterate_lexer_output(iterator): """Iterate Lexer Output and build an array from it. Each item in Array is another Array which represents a line""" output_lines = [] output_line = [] for ttype, value in iterator: input_lines = value.split("\n") for i in range(len(input_lines)): item = {} item["token"] = str(ttype) item["value"] = to_utf8(input_lines[i]).decode("utf-8") if (item["value"] != '') or (i == 0): output_line.append(item) if (len(input_lines) > 1) and (i < len(input_lines)-1): output_lines.append(output_line) output_line = [] return output_lines
def _iterate_lexer_output(iterator): """Iterate Lexer Output and build an array from it. Each item in Array is another Array which represents a line""" output_lines = [] output_line = [] for ttype, value in iterator: input_lines = value.split("\n") for i in range(len(input_lines)): item = {} item["token"] = str(ttype) item["value"] = to_utf8(input_lines[i]).decode("utf-8") if (item["value"] != '') or (i == 0): output_line.append(item) if (len(input_lines) > 1) and (i < len(input_lines) - 1): output_lines.append(output_line) output_line = [] return output_lines
def _get_file_content(repo, uri, rev): """Reads the content of a file and revision from a given repository""" def write_line(data, io): io.write(data) io = BytesIO() wid = repo.add_watch(CAT, write_line, io) try: repo.cat(uri, rev) file_content = to_utf8(io.getvalue()).decode("utf-8") file_content = _convert_linebreaks(file_content) #make shure we do have the same new lines. except Exception as e: printerr("[get_line_types] Error running show command: %s, FAILED", (str(e),)) file_content = None repo.remove_watch(CAT, wid) return file_content
def __process_finished_jobs(self, job_pool, write_cursor, db): finished_job = job_pool.get_next_done() # scmlog_id is the commit ID. For some reason, the # documentaion advocates tablename_id as the reference, # but in the source, these are referred to as commit IDs. # Don't ask me why! while finished_job is not None: p = DBPatch (None, finished_job.commit_id, finished_job.data) execute_statement(statement(DBPatch.__insert__, self.db.place_holder), (p.id, p.commit_id, to_utf8(p.patch).decode("utf-8")), write_cursor, db, "Couldn't insert, duplicate patch?", exception=ExtensionRunError) finished_job = job_pool.get_next_done(0)
def _get_file_content(repo, uri, rev): """Reads the content of a file and revision from a given repository""" def write_line(data, io): io.write(data) io = BytesIO() wid = repo.add_watch(CAT, write_line, io) try: repo.cat(uri, rev) file_content = to_utf8(io.getvalue()).decode("utf-8") file_content = _convert_linebreaks( file_content) #make shure we do have the same new lines. except Exception as e: printerr("[get_line_types] Error running show command: %s, FAILED", (str(e), )) file_content = None repo.remove_watch(CAT, wid) return file_content
def __str__(self): return "<Patch ID: %s, commit_id: %s, data: %s>" % \ (str(self.id), str(self.commit_id), to_utf8(self.patch).decode("utf-8"))
def line(self, line): lauthor = to_utf8(line.author).decode("utf-8") self.authors.setdefault(lauthor, 0) self.authors[lauthor] += 1
def __str__(self): return "<Patch ID: " + str(self.id) + ", commit_id: " + str(self.commit_id) + \ ", data: " + to_utf8(self.patch).decode("utf-8") + ">"