def __init__(self, conn=None, tuid_service=None, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg( kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self) self.rev_locker = Lock() self.working_locker = Lock() self.init_db() self.next_revnum = coalesce( self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue( name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query( "SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False) Log.note( "Table is filled with atleast {{minim}} entries. Starting workers...", minim=MINIMUM_PERMANENT_CSETS) Thread.run('clogger-tip', self.fill_forward_continuous) Thread.run('clogger-backfill', self.fill_backward_with_list) Thread.run('clogger-maintenance', self.csetLog_maintenance) Thread.run('clogger-deleter', self.csetLog_deleter) Log.note("Started clogger workers.") except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def main(): global config global hg try: config = startup.read_settings() constants.set(config.constants) hg = HgMozillaOrg(config) Log.start(config.debug) except Exception as e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) hg = HgMozillaOrg(settings) todo = Queue() todo.add("97160a734959") least = 100000 while todo: next_ = todo.pop() curr = hg.get_revision( wrap({ "changeset": { "id": next_ }, "branch": { "name": BRANCH } })) if len(curr.changeset.files) > MIN_FILES: diff = hg._get_json_diff_from_hg(curr) num_changes = sum(len(d.changes) for d in diff) score = num_changes / len(diff) if score < least: least = score Log.note( "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}", rev=curr.changeset.id, num=num_changes, files=len(diff)) todo.extend(listwrap(curr.parents)) except Exception as e: Log.error("Problem with scna", e) finally: Log.stop()
def main(): global config global hg try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) hg = HgMozillaOrg(config) random = _parse_diff( Data(changeset={"id": "2d9d0bebb5c6"}, branch={"url": "https://hg.mozilla.org/mozilla-central"})) except Exception as e: Log.error("Problem with etl", e)
def setUp(self): self.hg = HgMozillaOrg(TestHg.config)
class TestHg(FuzzyTestCase): config = Null @classmethod def setUpClass(cls): try: cls.config = startup.read_settings(filename="tests/config.json") constants.set(cls.config.constants) Log.start(cls.config.debug) except Exception as e: Log.error("Problem with etl", e) @classmethod def tearDownClass(cls): Log.stop() def setUp(self): self.hg = HgMozillaOrg(TestHg.config) def test_get_push1(self): central = [ b for b in self.hg.branches if b.name == "mozilla-central" and b.locale == "en-US" ][0] test = self.hg._get_push(central, "b6b8e616de32") expected = { "date": 1503659542, "user": "******", "id": 32390 } self.assertEqual(test, expected) while len(self.hg.todo.queue): Till(seconds=1).wait() def test_get_rev_with_backout(self): central = [ b for b in self.hg.branches if b.name == "mozilla-central" and b.locale == "en-US" ][0] test = self.hg.get_revision( wrap({ "branch": central, "changeset": { "id": "de7aa6b08234" } })) expected = { "changeset": { "backedoutby": "f384789a29dcfd514d25d4a16a97ec5309612d78" } } self.assertEqual(test, expected) while len(self.hg.todo.queue): Till(seconds=1).wait() def test_get_prefix_space(self): central = [ b for b in self.hg.branches if b.name == "mozilla-central" and b.locale == "en-US" ][0] test = self.hg.get_revision( wrap({ "branch": central, "changeset": { "id": "de7aa6b08234" } }), None, True) self.assertTrue( test.changeset.diff[1].changes[0].new.content.startswith(" ")) def test_diff_to_json(self): j1 = diff_to_json(File("tests/resources/diff1.patch").read()) j2 = diff_to_json(File("tests/resources/diff2.patch").read()) e1 = File("tests/resources/diff1.json").read_json(flexible=False, leaves=False) e2 = File("tests/resources/diff2.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, e1) self.assertEqual(j2, e2) def test_big_changeset_to_json(self): big_patch_file = File("tests/resources/big.patch") # big_patch_file.write_bytes(http.get("https://hg.mozilla.org/mozilla-central/raw-rev/e5693cea1ec944ca077c7a46c5f127c828a90f1b").content) self.assertEqual(b'\r'.decode('utf8', 'replace'), u'\r') j1 = diff_to_json(big_patch_file.read_bytes().decode( "utf8", "replace")) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected) def test_small_changeset_to_json(self): small_patch_file = File("tests/resources/small.patch") j1 = diff_to_json(small_patch_file.read_bytes().decode( "utf8", "replace")) expected = File("tests/resources/small.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected) def test_changeset_to_json(self): j1 = self.hg.get_revision( wrap({ "branch": { "name": "mozilla-central", "url": "https://hg.mozilla.org/mozilla-central" }, "changeset": { "id": "e5693cea1ec944ca0" } }), None, # Locale True # get_diff ) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1.changeset.diff, expected) def test_coverage_parser(self): diff = http.get( 'https://hg.mozilla.org/mozilla-central/raw-rev/14dc6342ec5' ).content.decode('utf8') moves = diff_to_moves(diff) Log.note("{{files}}", files=[ m.old.name if m.new.name == 'dev/null' else m.new.name for m in moves ])
def __init__(self, conn=None, tuid_service=None, start_workers=True, new_table=False, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg( kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( kwargs=self.config.tuid, conn=self.conn, clogger=self) self.rev_locker = Lock() self.working_locker = Lock() if new_table: with self.conn.transaction() as t: t.execute("DROP TABLE IF EXISTS csetLog") self.init_db() self.next_revnum = coalesce( self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue( name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") if 'tuid' in self.config: self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False self.backfill_thread = None self.tipfill_thread = None self.deletion_thread = None self.maintenance_thread = None # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query( "SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False) Log.note("Table is filled with atleast {{minim}} entries.", minim=MINIMUM_PERMANENT_CSETS) if start_workers: self.start_workers() except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def setUp(self): self.hg = HgMozillaOrg(TestParsing.config)
class TestParsing(FuzzyTestCase): @classmethod def setUpClass(cls): try: cls.config = startup.read_settings() constants.set(cls.config.constants) Log.start(cls.config.debug) except Exception as e: Log.error("Problem with etl", e) @classmethod def tearDownClass(cls): Log.stop() def setUp(self): self.hg = HgMozillaOrg(TestParsing.config) def _get_test_data(self): file1 = File("tests/resources/example_file_v1.py").read().split('\n') file2 = File("tests/resources/example_file_v2.py").read().split('\n') file3 = File("tests/resources/example_file_v3.py").read().split('\n') c1 = parse_diff_to_matrix( diff=File("tests/resources/diff1.patch").read(), new_source_code=file2)["/tests/resources/example_file.py"] c2 = parse_diff_to_matrix( diff=File("tests/resources/diff2.patch").read(), new_source_code=file3)["/tests/resources/example_file.py"] # file1 -> c1 -> file2 -> c2 -> file3 return file1, c1, file2, c2, file3 def test_parse(self): file1, c1, file2, c2, file3 = self._get_test_data() coverage2 = np.matrix([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0], dtype=int) coverage1 = coverage2 * c1.T coverage3 = coverage2 * c2 self.assertEqual(coverage1.tolist(), [[1, 1, 0, 1, 0, 1, 1, 1, 0, 0]]) self.assertEqual(coverage2.tolist(), [[1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0]]) self.assertEqual(coverage3.tolist(), [[1, 1, 0, 1, 1, 0, 0, 0, 0]]) def test_diff_to_json(self): j1 = diff_to_json(File("tests/resources/diff1.patch").read()) j2 = diff_to_json(File("tests/resources/diff2.patch").read()) e1 = File("tests/resources/diff1.json").read_json(flexible=False, leaves=False) e2 = File("tests/resources/diff2.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, e1) self.assertEqual(j2, e2) def test_big_changeset_to_json(self): j1 = diff_to_json(File("tests/resources/big.patch").read()) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1, expected) def test_changeset_to_json(self): j1 = self.hg.get_revision( wrap({ "branch": { "name": "mozilla-central", "url": "https://hg.mozilla.org/mozilla-central" }, "changeset": { "id": "e5693cea1ec944ca0" } }), None, # Locale True # get_diff ) expected = File("tests/resources/big.json").read_json(flexible=False, leaves=False) self.assertEqual(j1.changeset.diff, expected) def test_net_new_lines(self): file1, c1, file2, c2, file3 = self._get_test_data() # NET NEW LINES CAN BE EXTRACTED FROM A CHANGESET 1==New line, 0==Old line net_new_lines = ( -np.sum(c1, 0) ) + 1 # MAYBE THIS SHOULD BE A FUNCTION CALLED net_new_lines(changeset) self.assertEqual(net_new_lines.tolist(), [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]) def test_net_new_percent(self): file1, c1, file2, c2, file3 = self._get_test_data() net_new_lines2 = ( -np.sum(c1, 0) ) + 1 # MAYBE THIS SHOULD BE A FUNCTION CALLED net_new_lines(changeset) num_net_new_lines = np.sum(net_new_lines2) coverage2 = np.array([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0], dtype=int) # MULTIPLY net_new_lines WITH THE coverage2 VECTOR TO GET INTERSECTION # sum THE INTERSECTION TO GET A COUNT # MAYBE WE SHOULD BE USING BOOLEAN ARRAYS EVERYWHERE num_net_new_lines_covered = np.sum(coverage2 * net_new_lines2) net_new_percent = num_net_new_lines_covered / num_net_new_lines self.assertEqual(net_new_percent, 0.5) def test_percent_using_future_coverage(self): file1, c1, file2, c2, file3 = self._get_test_data() # WE ARE GIVEN FUTURE COVERAGE coverage3 = np.matrix([1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int) # CALCULATE THE COVERAGE FOR REVISION 2 coverage2 = coverage3 * c2.T # THE NET NEW LINES FOR REVISION 2 net_new_lines2 = np.matrix( (-np.sum(c1, 0)) + 1) # THE MATRIX WILL ALLOW TRANSPOSES num_net_new_lines_covered = np.sum(coverage2 * net_new_lines2.T) net_new_percent = num_net_new_lines_covered / np.sum(net_new_lines2) self.assertEqual(net_new_percent, 1)