def add_shared_text(self, shared_text: SharedText) -> None: assert ( shared_text.id.local_id not in self._shared_texts ), "Shared text already exists" # Remove this block when we finally remove sources/sinks table # (T30720232). For now, the corresponding source/sink entries # still need to be present. if shared_text.kind == SharedTextKind.SOURCE: # New source added, make sure we keep track of it source_rec = Source.Record(id=DBID(), name=shared_text.contents) self._sources[source_rec.id.local_id] = source_rec self._shared_text_to_source[shared_text.id.local_id] = source_rec elif shared_text.kind == SharedTextKind.SINK: # New sink added, make sure we keep track of it sink_rec = Sink.Record(id=DBID(), name=shared_text.contents) self._sinks[sink_rec.id.local_id] = sink_rec self._shared_text_to_sink[shared_text.id.local_id] = sink_rec self._shared_texts[shared_text.id.local_id] = shared_text # Allow look up of SharedTexts by name and kind (to optimize # get_shared_text which is called when parsing each issue instance) self._shared_text_lookup[shared_text.kind][ shared_text.contents ] = shared_text.id.local_id
def run(self, iters: DictEntries, summary: Summary) -> Tuple[TraceGraph, Summary]: self.summary = summary self.summary["extracted_features"] = {} # Dict[int, Any] self.summary["precondition_entries"] = defaultdict( list) # Dict[Tuple[str, str], Any] self.summary["postcondition_entries"] = defaultdict( list) # Dict[Tuple[str, str], Any] self.summary["missing_preconditions"] = set() # Set[Tuple[str, str]] self.summary["missing_postconditions"] = set() # Set[Tuple[str, str]] self.summary["bad_preconditions"] = set() # Set[Tuple[str, str, int]] self.graph = TraceGraph() self.summary["run"] = self._create_empty_run( status=RunStatus.INCOMPLETE) self.summary["run"].id = DBID() self.summary["precondition_entries"] = iters["preconditions"] self.summary["postcondition_entries"] = iters["postconditions"] callables = self._compute_callables_count(iters) log.info("Generating instances") for entry in iters["issues"]: self._generate_issue(self.summary["run"], entry, callables) return self.graph, self.summary
def _get_shared_text(self, kind, name): shared_text = self.graph.get_shared_text(kind, name) if shared_text is None: shared_text = SharedText.Record(id=DBID(), contents=name[:SHARED_TEXT_LENGTH], kind=kind) self.graph.add_shared_text(shared_text) return shared_text
def _generate_trace_annotations(self, frame_id, features) -> None: for f in features: if "extra_trace" in f: annotation = f["extra_trace"] location = annotation["position"] self.graph.add_trace_annotation( TraceFrameAnnotation.Record( id=DBID(), trace_frame_id=frame_id, trace_frame_id2=frame_id, # TODO: Deprecate id2 field location=SourceLocation(location["line"], location["start"], location["end"]), message=annotation["msg"], link=annotation.get("link", None), trace_key=annotation.get("trace", None), ))
def _generate_raw_precondition( self, run, filename, caller, caller_port, callee, callee_port, callee_location, titos, sinks, type_interval, features, ): lb, ub, preserves_type_context = self._get_interval(type_interval) trace_frame = TraceFrame.Record( id=DBID(), kind=TraceKind.PRECONDITION, caller=caller, caller_port=caller_port, callee=callee, callee_port=callee_port, callee_location=SourceLocation( callee_location["line"], callee_location["start"], callee_location["end"], ), filename=filename, titos=titos, run_id=run.id, preserves_type_context=preserves_type_context, type_interval_lower=lb, type_interval_upper=ub, migrated_id=None, ) for (sink, depth) in sinks: sink_record = self._get_shared_text(SharedTextKind.SINK, sink) self.graph.add_trace_frame_leaf_assoc(trace_frame, sink_record, depth) self.graph.add_trace_frame(trace_frame) self._generate_trace_annotations(trace_frame.id, features) return trace_frame
def _generate_raw_postcondition( self, run, filename, caller, caller_port, callee, callee_port, callee_location, sources, type_interval, ): lb, ub, preserves_type_context = self._get_interval(type_interval) trace_frame = TraceFrame.Record( id=DBID(), kind=TraceKind.POSTCONDITION, caller=caller, callee=callee, callee_location=SourceLocation( callee_location["line"], callee_location["start"], callee_location["end"], ), filename=filename, run_id=run.id, caller_port=caller_port, callee_port=callee_port, preserves_type_context=preserves_type_context, type_interval_lower=lb, type_interval_upper=ub, migrated_id=None, titos=[], ) for (source, depth) in sources: source_record = self._get_shared_text(SharedTextKind.SOURCE, source) self.graph.add_trace_frame_leaf_assoc(trace_frame, source_record, depth) self.graph.add_trace_frame(trace_frame) return trace_frame
def _generate_issue(self, run, entry, callablesCount): """Insert the issue instance into a run. This includes creating (for new issues) or finding (for existing issues) Issue objects to associate with the instances. Also create sink entries and associate related issues""" trace_frames = [] for p in entry["preconditions"]: tf = self._generate_issue_precondition(run, entry, p) trace_frames.append(tf) for p in entry["postconditions"]: tf = self._generate_issue_postcondition(run, entry, p) trace_frames.append(tf) features = set() for f in entry["features"]: features.update(self._generate_issue_feature_contents(entry, f)) callable = entry["callable"] handle = self._get_issue_handle(entry) # TODO: record depth in issue_sink and issue_source assoc, but this can # be different per instance, so should be stored with the instance. initial_sources = { self._get_shared_text(SharedTextKind.SOURCE, s) for (s, _) in entry["initial_sources"] } final_sinks = { self._get_shared_text(SharedTextKind.SINK, s) for (s, _) in entry["final_sinks"] } issue = Issue.Record( id=IssueDBID(), code=entry["code"], handle=handle, callable=callable, filename=entry["filename"], status=IssueStatus.UNCATEGORIZED, first_seen=run.date, run_id=run.id, json=self.summary["compress"](json.dumps(entry.get( "json", {})).encode("utf-8")), ) self.graph.add_issue(issue) fix_info = None fix_info_id = None if entry.get("fix_info") is not None: fix_info = IssueInstanceFixInfo.Record(id=DBID(), fix_info=json.dumps( entry["fix_info"])) fix_info_id = fix_info.id message = self._get_shared_text(SharedTextKind.MESSAGE, entry["message"]) instance = IssueInstance.Record( id=DBID(), issue_id=issue.id, location=self.get_location(entry), filename=entry["filename"], run_id=run.id, fix_info_id=fix_info_id, message_id=message.id, rank=0, min_trace_length_to_sources=self._get_minimum_trace_length( entry["postconditions"]), min_trace_length_to_sinks=self._get_minimum_trace_length( entry["preconditions"]), callable_count=callablesCount[issue.callable], ) for sink in final_sinks: self.graph.add_issue_instance_shared_text_assoc(instance, sink) for source in initial_sources: self.graph.add_issue_instance_shared_text_assoc(instance, source) if fix_info is not None: self.graph.add_issue_instance_fix_info(instance, fix_info) for trace_frame in trace_frames: self.graph.add_issue_instance_trace_frame_assoc( instance, trace_frame) for feature in features: feature = self._get_shared_text(SharedTextKind.FEATURE, feature) self.graph.add_issue_instance_shared_text_assoc(instance, feature) self.graph.add_issue_instance(instance) if "extracted_features" in entry: self.summary["extracted_features"][ instance.id.local_id] = entry["extracted_features"]