def transform(self, bugs): results = [] reporter_experience_map = defaultdict(int) author_ids = get_author_ids() if self.commit_map else None for bug in bugs: bug_id = bug["id"] if self.rollback: bug = bug_snapshot.rollback(bug, self.rollback_when) data = {} if self.commit_map is not None: if bug_id in self.commit_map: bug["commits"] = self.commit_map[bug_id] else: bug["commits"] = [] for f in self.feature_extractors: res = f( bug, reporter_experience=reporter_experience_map[ bug["creator"]], author_ids=author_ids, ) if res is None: continue if isinstance(res, list): for item in res: data[f.__class__.__name__ + "-" + item] = "True" continue if isinstance(res, bool): res = str(res) data[f.__class__.__name__] = res reporter_experience_map[bug["creator"]] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug["summary"] = cleanup_function(bug["summary"]) for c in bug["comments"]: c["text"] = cleanup_function(c["text"]) result = { "data": data, "title": bug["summary"], "first_comment": bug["comments"][0]["text"], "comments": " ".join([c["text"] for c in bug["comments"]]), } results.append(result) return pd.DataFrame(results)
def transform(self, bugs): results = [] reporter_experience_map = defaultdict(int) author_ids = get_author_ids() if self.commit_map else None for bug in bugs: bug_id = bug["id"] if self.rollback: bug = bug_snapshot.rollback(bug, self.rollback_when) data = {} if self.commit_map is not None: if bug_id in self.commit_map: bug["commits"] = self.commit_map[bug_id] else: bug["commits"] = [] for f in self.feature_extractors: res = f( bug, reporter_experience=reporter_experience_map[bug["creator"]], author_ids=author_ids, ) if res is None: continue if isinstance(res, list): for item in res: data[f.__class__.__name__ + "-" + item] = "True" continue if isinstance(res, bool): res = str(res) data[f.__class__.__name__] = res reporter_experience_map[bug["creator"]] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug["summary"] = cleanup_function(bug["summary"]) for c in bug["comments"]: c["text"] = cleanup_function(c["text"]) result = { "data": data, "title": bug["summary"], "first_comment": bug["comments"][0]["text"], "comments": " ".join([c["text"] for c in bug["comments"]]), } results.append(result) return pd.DataFrame(results)
def transform(self, bugs): results = [] reporter_experience_map = defaultdict(int) author_ids = get_author_ids() if self.commit_map else None for bug in bugs: bug_id = bug['id'] if self.rollback: bug = bug_snapshot.rollback(bug, self.rollback_when) data = {} if self.commit_map is not None: if bug_id in self.commit_map: bug['commits'] = self.commit_map[bug_id] else: bug['commits'] = [] for f in self.feature_extractors: res = f(bug, reporter_experience=reporter_experience_map[ bug['creator']], author_ids=author_ids) if res is None: continue if isinstance(res, list): for item in res: data[f.__class__.__name__ + '-' + item] = 'True' continue if isinstance(res, bool): res = str(res) data[f.__class__.__name__] = res reporter_experience_map[bug['creator']] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug['summary'] = cleanup_function(bug['summary']) for c in bug['comments']: c['text'] = cleanup_function(c['text']) result = { 'data': data, 'title': bug['summary'], 'first_comment': bug['comments'][0]['text'], 'comments': ' '.join([c['text'] for c in bug['comments']]), } results.append(result) return pd.DataFrame(results)
def apply_transform(bug): bug_id = bug["id"] if self.rollback and bug_id not in already_rollbacked: bug = bug_snapshot.rollback(bug, self.rollback_when) already_rollbacked.add(bug_id) data = {} if self.commit_map is not None: if bug_id in self.commit_map: bug["commits"] = self.commit_map[bug_id] else: bug["commits"] = [] for feature_extractor in self.feature_extractors: res = feature_extractor( bug, reporter_experience=reporter_experience_map[ bug["creator"]], author_ids=author_ids, ) feature_extractor_name = feature_extractor.__class__.__name__ if res is None: continue if isinstance(res, list): for item in res: data[f"{feature_extractor_name}-{item}"] = "True" continue if isinstance(res, bool): res = str(res) data[feature_extractor_name] = res reporter_experience_map[bug["creator"]] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug["summary"] = cleanup_function(bug["summary"]) for c in bug["comments"]: c["text"] = cleanup_function(c["text"]) return { "data": data, "title": bug["summary"], "first_comment": bug["comments"][0]["text"], "comments": " ".join([c["text"] for c in bug["comments"]]), }
def transform(self, bugs): results = [] for bug in bugs: bug_id = bug['id'] if self.rollback: bug = bug_snapshot.rollback(bug, self.rollback_when) data = {} for f in self.feature_extractors: res = f(bug) if res is None: continue if isinstance(res, list): for item in res: data[f.__class__.__name__ + '-' + item] = 'True' continue if isinstance(res, bool): res = str(res) data[f.__class__.__name__] = res # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug['summary'] = cleanup_function(bug['summary']) for c in bug['comments']: c['text'] = cleanup_function(c['text']) result = { 'data': data, 'title': bug['summary'], 'first_comment': bug['comments'][0]['text'], 'comments': ' '.join([c['text'] for c in bug['comments']]), } if self.commit_messages_map is not None: result['commits'] = self.commit_messages_map[ bug_id] if bug_id in self.commit_messages_map else '' results.append(result) return pd.DataFrame(results)
def apply_transform(bug): is_couple = isinstance(bug, tuple) if not is_couple: bug_id = bug["id"] if self.rollback and bug_id not in already_rollbacked: bug = bug_snapshot.rollback(bug, self.rollback_when) already_rollbacked.add(bug_id) else: bug1_id = bug[0]["id"] bug2_id = bug[1]["id"] if self.rollback: if bug1_id not in already_rollbacked: bug[0] = bug_snapshot.rollback(bug[0], self.rollback_when) already_rollbacked.add(bug1_id) if bug2_id not in already_rollbacked: bug[1] = bug_snapshot.rollback(bug[1], self.rollback_when) already_rollbacked.add(bug2_id) data = {} for feature_extractor in self.feature_extractors: res = None if isinstance(feature_extractor, single_bug_feature) and not is_couple: res = feature_extractor( bug, reporter_experience=reporter_experience_map[ bug["creator"]], author_ids=author_ids, ) elif isinstance(feature_extractor, couple_bug_feature) and is_couple: res = feature_extractor(bug) if hasattr(feature_extractor, "name"): feature_extractor_name = feature_extractor.name else: feature_extractor_name = feature_extractor.__class__.__name__ if res is None: continue if isinstance(res, list): for item in res: data[f"{item} in {feature_extractor_name}"] = "True" continue if isinstance(res, bool): res = str(res) data[feature_extractor_name] = res if is_couple: reporter_experience_map[bug[0]["creator"]] += 1 reporter_experience_map[bug[1]["creator"]] += 1 return {"data": data} else: reporter_experience_map[bug["creator"]] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. for cleanup_function in self.cleanup_functions: bug["summary"] = cleanup_function(bug["summary"]) for c in bug["comments"]: c["text"] = cleanup_function(c["text"]) return { "data": data, "title": bug["summary"], "first_comment": bug["comments"][0]["text"], "comments": " ".join([c["text"] for c in bug["comments"]]), }
def test_bug_snapshot(): for i, bug in enumerate(bugzilla.get_bugs()): print(bug["id"]) print(i) rollback(bug, None, False)
def apply_transform(bug): is_couple = isinstance(bug, tuple) if not is_couple: bug_id = bug["id"] if self.rollback and bug_id not in already_rollbacked: bug = bug_snapshot.rollback(bug, self.rollback_when) already_rollbacked.add(bug_id) else: bug1_id = bug[0]["id"] bug2_id = bug[1]["id"] if self.rollback: if bug1_id not in already_rollbacked: bug[0] = bug_snapshot.rollback(bug[0], self.rollback_when) already_rollbacked.add(bug1_id) if bug2_id not in already_rollbacked: bug[1] = bug_snapshot.rollback(bug[1], self.rollback_when) already_rollbacked.add(bug2_id) data = {} for feature_extractor in self.feature_extractors: res = None if isinstance(feature_extractor, single_bug_feature) and not is_couple: res = feature_extractor( bug, reporter_experience=reporter_experience_map[ bug["creator"]], author_ids=author_ids, ) elif isinstance(feature_extractor, couple_bug_feature) and is_couple: res = feature_extractor(bug) if hasattr(feature_extractor, "name"): feature_extractor_name = feature_extractor.name else: feature_extractor_name = feature_extractor.__class__.__name__ if res is None: continue if isinstance(res, (list, set)): for item in res: data[sys.intern( f"{item} in {feature_extractor_name}")] = True continue data[feature_extractor_name] = res if is_couple: reporter_experience_map[bug[0]["creator"]] += 1 reporter_experience_map[bug[1]["creator"]] += 1 return {"data": data} else: reporter_experience_map[bug["creator"]] += 1 summary = bug["summary"] comments = [c["text"] for c in bug["comments"]] for cleanup_function in self.cleanup_functions: summary = cleanup_function(summary) comments = [ cleanup_function(comment) for comment in comments ] return { "data": data, "title": summary, "first_comment": "" if len(comments) == 0 else comments[0], "comments": " ".join(comments), }
def test_bug_snapshot(): for i, bug in enumerate(bugzilla.get_bugs()): print(bug["id"]) print(i) rollback(bug, None, False)
def test_bug_snapshot(): for i, bug in enumerate(bugzilla.get_bugs()): print(bug["id"]) print(i) rollback(bug, do_assert=True)