def test_diff_to_json(self): j1 = diff_to_json(File("tests/resources/diff1.patch").read()) j2 = diff_to_json(File("tests/resources/diff2.patch").read()) e1 = File("tests/resources/diff1.json").read_json(flexible=False, leaves=False) e2 = File("tests/resources/diff2.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, e1) self.assertEqual(j2, e2)
def test_small_changeset_to_json(self): small_patch_file = File("tests/resources/small.patch") j1 = diff_to_json(small_patch_file.read_bytes().decode( "utf8", "replace")) expected = File("tests/resources/small.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected)
def test_big_changeset_to_json(self): big_patch_file = File("tests/resources/big.patch") # big_patch_file.write_bytes(http.get("https://hg.mozilla.org/mozilla-central/raw-rev/e5693cea1ec944ca077c7a46c5f127c828a90f1b").content) self.assertEqual(b'\r'.decode('utf8', 'replace'), u'\r') j1 = diff_to_json(big_patch_file.read_bytes().decode( "utf8", "replace")) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected)
def inner(changeset_id): if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 1 } else: query = { "query": {"bool": {"must": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 1 } try: # ALWAYS TRY ES FIRST with self.es_locker: response = self.es.search(query) json_diff = response.hits.hits[0]._source.changeset.diff if json_diff: return json_diff except Exception as e: pass url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) if DEBUG: Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) diff = response.content.decode("utf8", "replace") json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if num_changes < MAX_DIFF_SIZE: return json_diff elif revision.changeset.description.startswith("merge "): return None # IGNORE THE MERGE CHANGESETS else: Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff", cause=e)
def inner(changeset_id): if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 1 } else: query = { "query": {"bool": {"must": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 1 } try: # ALWAYS TRY ES FIRST with self.es_locker: response = self.es.search(query) json_diff = response.hits.hits[0]._source.changeset.diff if json_diff: return json_diff except Exception as e: pass url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) DEBUG and Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) diff = response.content.decode("utf8") json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if revision.changeset.description.startswith("merge "): return None # IGNORE THE MERGE CHANGESETS elif num_changes < MAX_DIFF_SIZE: return json_diff else: Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def inner(changeset_id): # ALWAYS TRY ES FIRST json_diff = _get_changeset_from_es(self.repo, changeset_id).changeset.diff if json_diff: return json_diff url = URL(revision.branch.url) / "raw-rev" / changeset_id DEBUG and Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) try: diff = response.content.decode("utf8") except Exception as e: diff = response.content.decode("latin1") # File("tests/resources/big.patch").write_bytes(response.content) json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if (IGNORE_MERGE_DIFFS and revision.changeset.description.startswith( "merge ")): return None # IGNORE THE MERGE CHANGESETS elif num_changes < MAX_DIFF_SIZE: return json_diff else: Log.warning( "Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes, ) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def test_big_changeset_to_json(self): j1 = diff_to_json(File("tests/resources/big.patch").read()) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected)