Ejemplo n.º 1
0
    def __init__(self, conn=None, tuid_service=None, kwargs=None):
        try:
            self.config = kwargs

            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(
                kwargs=self.config.hg_cache,
                use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                database=None,
                hg=None,
                kwargs=self.config,
                conn=self.conn,
                clogger=self)
            self.rev_locker = Lock()
            self.working_locker = Lock()

            self.init_db()
            self.next_revnum = coalesce(
                self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(
                name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")
            self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.",
                         minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query(
                        "SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs,
                                    oldest_rev,
                                    timestamp=False)

            Log.note(
                "Table is filled with atleast {{minim}} entries. Starting workers...",
                minim=MINIMUM_PERMANENT_CSETS)

            Thread.run('clogger-tip', self.fill_forward_continuous)
            Thread.run('clogger-backfill', self.fill_backward_with_list)
            Thread.run('clogger-maintenance', self.csetLog_maintenance)
            Thread.run('clogger-deleter', self.csetLog_deleter)

            Log.note("Started clogger workers.")
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
Ejemplo n.º 2
0
def main():
    global config
    global hg
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        hg = HgMozillaOrg(config)
        Log.start(config.debug)

    except Exception as e:
        Log.error("Problem with etl", e)
Ejemplo n.º 3
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        hg = HgMozillaOrg(settings)
        todo = Queue()
        todo.add("97160a734959")
        least = 100000

        while todo:
            next_ = todo.pop()
            curr = hg.get_revision(
                wrap({
                    "changeset": {
                        "id": next_
                    },
                    "branch": {
                        "name": BRANCH
                    }
                }))
            if len(curr.changeset.files) > MIN_FILES:
                diff = hg._get_json_diff_from_hg(curr)
                num_changes = sum(len(d.changes) for d in diff)
                score = num_changes / len(diff)
                if score < least:
                    least = score
                    Log.note(
                        "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}",
                        rev=curr.changeset.id,
                        num=num_changes,
                        files=len(diff))
            todo.extend(listwrap(curr.parents))

    except Exception as e:
        Log.error("Problem with scna", e)
    finally:
        Log.stop()
Ejemplo n.º 4
0
def main():
    global config
    global hg
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)
        hg = HgMozillaOrg(config)
        random = _parse_diff(
            Data(changeset={"id": "2d9d0bebb5c6"},
                 branch={"url": "https://hg.mozilla.org/mozilla-central"}))
    except Exception as e:
        Log.error("Problem with etl", e)
Ejemplo n.º 5
0
 def setUp(self):
     self.hg = HgMozillaOrg(TestHg.config)
Ejemplo n.º 6
0
class TestHg(FuzzyTestCase):
    config = Null

    @classmethod
    def setUpClass(cls):
        try:
            cls.config = startup.read_settings(filename="tests/config.json")
            constants.set(cls.config.constants)
            Log.start(cls.config.debug)
        except Exception as e:
            Log.error("Problem with etl", e)

    @classmethod
    def tearDownClass(cls):
        Log.stop()

    def setUp(self):
        self.hg = HgMozillaOrg(TestHg.config)

    def test_get_push1(self):
        central = [
            b for b in self.hg.branches
            if b.name == "mozilla-central" and b.locale == "en-US"
        ][0]
        test = self.hg._get_push(central, "b6b8e616de32")
        expected = {
            "date": 1503659542,
            "user": "******",
            "id": 32390
        }
        self.assertEqual(test, expected)
        while len(self.hg.todo.queue):
            Till(seconds=1).wait()

    def test_get_rev_with_backout(self):
        central = [
            b for b in self.hg.branches
            if b.name == "mozilla-central" and b.locale == "en-US"
        ][0]
        test = self.hg.get_revision(
            wrap({
                "branch": central,
                "changeset": {
                    "id": "de7aa6b08234"
                }
            }))
        expected = {
            "changeset": {
                "backedoutby": "f384789a29dcfd514d25d4a16a97ec5309612d78"
            }
        }
        self.assertEqual(test, expected)
        while len(self.hg.todo.queue):
            Till(seconds=1).wait()

    def test_get_prefix_space(self):
        central = [
            b for b in self.hg.branches
            if b.name == "mozilla-central" and b.locale == "en-US"
        ][0]
        test = self.hg.get_revision(
            wrap({
                "branch": central,
                "changeset": {
                    "id": "de7aa6b08234"
                }
            }), None, True)

        self.assertTrue(
            test.changeset.diff[1].changes[0].new.content.startswith("    "))

    def test_diff_to_json(self):
        j1 = diff_to_json(File("tests/resources/diff1.patch").read())
        j2 = diff_to_json(File("tests/resources/diff2.patch").read())

        e1 = File("tests/resources/diff1.json").read_json(flexible=False,
                                                          leaves=False)
        e2 = File("tests/resources/diff2.json").read_json(flexible=False,
                                                          leaves=False)
        self.assertEqual(j1, e1)
        self.assertEqual(j2, e2)

    def test_big_changeset_to_json(self):
        big_patch_file = File("tests/resources/big.patch")
        # big_patch_file.write_bytes(http.get("https://hg.mozilla.org/mozilla-central/raw-rev/e5693cea1ec944ca077c7a46c5f127c828a90f1b").content)
        self.assertEqual(b'\r'.decode('utf8', 'replace'), u'\r')

        j1 = diff_to_json(big_patch_file.read_bytes().decode(
            "utf8", "replace"))
        expected = File("tests/resources/big.json").read_json(flexible=False,
                                                              leaves=False)
        self.assertEqual(j1, expected)

    def test_small_changeset_to_json(self):
        small_patch_file = File("tests/resources/small.patch")

        j1 = diff_to_json(small_patch_file.read_bytes().decode(
            "utf8", "replace"))
        expected = File("tests/resources/small.json").read_json(flexible=False,
                                                                leaves=False)
        self.assertEqual(j1, expected)

    def test_changeset_to_json(self):
        j1 = self.hg.get_revision(
            wrap({
                "branch": {
                    "name": "mozilla-central",
                    "url": "https://hg.mozilla.org/mozilla-central"
                },
                "changeset": {
                    "id": "e5693cea1ec944ca0"
                }
            }),
            None,  # Locale
            True  # get_diff
        )
        expected = File("tests/resources/big.json").read_json(flexible=False,
                                                              leaves=False)
        self.assertEqual(j1.changeset.diff, expected)

    def test_coverage_parser(self):
        diff = http.get(
            'https://hg.mozilla.org/mozilla-central/raw-rev/14dc6342ec5'
        ).content.decode('utf8')
        moves = diff_to_moves(diff)
        Log.note("{{files}}",
                 files=[
                     m.old.name if m.new.name == 'dev/null' else m.new.name
                     for m in moves
                 ])
Ejemplo n.º 7
0
    def __init__(self,
                 conn=None,
                 tuid_service=None,
                 start_workers=True,
                 new_table=False,
                 kwargs=None):
        try:
            self.config = kwargs
            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(
                kwargs=self.config.hg_cache,
                use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                kwargs=self.config.tuid, conn=self.conn, clogger=self)
            self.rev_locker = Lock()
            self.working_locker = Lock()

            if new_table:
                with self.conn.transaction() as t:
                    t.execute("DROP TABLE IF EXISTS csetLog")

            self.init_db()
            self.next_revnum = coalesce(
                self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(
                name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")

            if 'tuid' in self.config:
                self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            self.backfill_thread = None
            self.tipfill_thread = None
            self.deletion_thread = None
            self.maintenance_thread = None

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.",
                         minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query(
                        "SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs,
                                    oldest_rev,
                                    timestamp=False)

            Log.note("Table is filled with atleast {{minim}} entries.",
                     minim=MINIMUM_PERMANENT_CSETS)

            if start_workers:
                self.start_workers()
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
Ejemplo n.º 8
0
 def setUp(self):
     self.hg = HgMozillaOrg(TestParsing.config)
Ejemplo n.º 9
0
class TestParsing(FuzzyTestCase):
    @classmethod
    def setUpClass(cls):
        try:
            cls.config = startup.read_settings()
            constants.set(cls.config.constants)
            Log.start(cls.config.debug)
        except Exception as e:
            Log.error("Problem with etl", e)

    @classmethod
    def tearDownClass(cls):
        Log.stop()

    def setUp(self):
        self.hg = HgMozillaOrg(TestParsing.config)

    def _get_test_data(self):
        file1 = File("tests/resources/example_file_v1.py").read().split('\n')
        file2 = File("tests/resources/example_file_v2.py").read().split('\n')
        file3 = File("tests/resources/example_file_v3.py").read().split('\n')

        c1 = parse_diff_to_matrix(
            diff=File("tests/resources/diff1.patch").read(),
            new_source_code=file2)["/tests/resources/example_file.py"]

        c2 = parse_diff_to_matrix(
            diff=File("tests/resources/diff2.patch").read(),
            new_source_code=file3)["/tests/resources/example_file.py"]

        # file1 -> c1 -> file2 -> c2 -> file3
        return file1, c1, file2, c2, file3

    def test_parse(self):
        file1, c1, file2, c2, file3 = self._get_test_data()

        coverage2 = np.matrix([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0], dtype=int)

        coverage1 = coverage2 * c1.T
        coverage3 = coverage2 * c2

        self.assertEqual(coverage1.tolist(), [[1, 1, 0, 1, 0, 1, 1, 1, 0, 0]])
        self.assertEqual(coverage2.tolist(),
                         [[1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0]])
        self.assertEqual(coverage3.tolist(), [[1, 1, 0, 1, 1, 0, 0, 0, 0]])

    def test_diff_to_json(self):
        j1 = diff_to_json(File("tests/resources/diff1.patch").read())
        j2 = diff_to_json(File("tests/resources/diff2.patch").read())

        e1 = File("tests/resources/diff1.json").read_json(flexible=False,
                                                          leaves=False)
        e2 = File("tests/resources/diff2.json").read_json(flexible=False,
                                                          leaves=False)
        self.assertEqual(j1, e1)
        self.assertEqual(j2, e2)

    def test_big_changeset_to_json(self):
        j1 = diff_to_json(File("tests/resources/big.patch").read())
        expected = File("tests/resources/big.json").read_json(flexible=False,
                                                              leaves=False)
        self.assertEqual(j1, expected)

    def test_changeset_to_json(self):
        j1 = self.hg.get_revision(
            wrap({
                "branch": {
                    "name": "mozilla-central",
                    "url": "https://hg.mozilla.org/mozilla-central"
                },
                "changeset": {
                    "id": "e5693cea1ec944ca0"
                }
            }),
            None,  # Locale
            True  # get_diff
        )
        expected = File("tests/resources/big.json").read_json(flexible=False,
                                                              leaves=False)
        self.assertEqual(j1.changeset.diff, expected)

    def test_net_new_lines(self):
        file1, c1, file2, c2, file3 = self._get_test_data()

        # NET NEW LINES CAN BE EXTRACTED FROM A CHANGESET 1==New line, 0==Old line
        net_new_lines = (
            -np.sum(c1, 0)
        ) + 1  # MAYBE THIS SHOULD BE A FUNCTION CALLED net_new_lines(changeset)

        self.assertEqual(net_new_lines.tolist(),
                         [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0])

    def test_net_new_percent(self):
        file1, c1, file2, c2, file3 = self._get_test_data()

        net_new_lines2 = (
            -np.sum(c1, 0)
        ) + 1  # MAYBE THIS SHOULD BE A FUNCTION CALLED net_new_lines(changeset)
        num_net_new_lines = np.sum(net_new_lines2)

        coverage2 = np.array([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0], dtype=int)
        # MULTIPLY net_new_lines WITH THE coverage2 VECTOR TO GET INTERSECTION
        # sum THE INTERSECTION TO GET A COUNT
        # MAYBE WE SHOULD BE USING BOOLEAN ARRAYS EVERYWHERE
        num_net_new_lines_covered = np.sum(coverage2 * net_new_lines2)

        net_new_percent = num_net_new_lines_covered / num_net_new_lines
        self.assertEqual(net_new_percent, 0.5)

    def test_percent_using_future_coverage(self):
        file1, c1, file2, c2, file3 = self._get_test_data()

        # WE ARE GIVEN FUTURE COVERAGE
        coverage3 = np.matrix([1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int)
        # CALCULATE THE COVERAGE FOR REVISION 2
        coverage2 = coverage3 * c2.T
        # THE NET NEW LINES FOR REVISION 2
        net_new_lines2 = np.matrix(
            (-np.sum(c1, 0)) + 1)  # THE MATRIX WILL ALLOW TRANSPOSES
        num_net_new_lines_covered = np.sum(coverage2 * net_new_lines2.T)

        net_new_percent = num_net_new_lines_covered / np.sum(net_new_lines2)
        self.assertEqual(net_new_percent, 1)