def test_case_as_url(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) test_dict = { 'definition': 'unit-test', 'case': 'unit-test', 'level': '1.3.4.1', # list of numbers, generates a much longer YAML string than just the count 'result': 'pass' } pattern = '[-_a-zA-Z0-9.\\(\\)]+' matches = re.search(pattern, test_dict['case']) self.assertIsNotNone(matches) # passes self.assertEqual(matches.group(0), test_dict['case']) suite, _ = TestSuite.objects.get_or_create( name=test_dict["definition"], job=job) self.assertIsNotNone( reverse('lava.results.testcase', args=[job.id, suite.name, test_dict['case']])) self.assertTrue(map_scanned_results(test_dict, job, None)) # now break the reverse pattern test_dict['case'] = 'unit test' # whitespace in the case name matches = re.search(pattern, test_dict['case']) self.assertIsNotNone(matches) self.assertRaises(NoReverseMatch, reverse, 'lava.results.testcase', args=[job.id, suite.name, test_dict['case']]) self.assertFalse(map_scanned_results(test_dict, job, None))
def test_level_input(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), user) suite = TestSuite.objects.create( job=job, name='test-suite' ) suite.save() result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] power_off: !!python/object/apply:collections.OrderedDict - - [status, Complete] - [level, 5.1] """ scanned = yaml.load(result_sample) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) for testcase in TestCase.objects.filter(suite=suite): if testcase.name == 'power_off': self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertTrue(type(testcase.action_data) == OrderedDict) self.assertEqual(testcase.action_data['status'], 'Complete') self.assertEqual(testcase.action_data['level'], 5.1) self.assertEqual(testcase.action_level, '5.1') self.assertEqual(testcase.result, TestCase.RESULT_UNKNOWN) self.factory.cleanup()
def test_set(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) result_samples = [ {"case": "linux-INLINE-lscpu", "definition": "smoke-tests-basic", "result": "fail", "set": "listing"}, {"case": "linux-INLINE-lspci", "definition": "smoke-tests-basic", "result": "fail", "set": "listing"} ] suite = TestSuite.objects.create( job=job, name='test-suite' ) suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) for sample in result_samples: ret = map_scanned_results(results=sample, job=job, meta_filename=None) self.assertTrue(ret) self.assertEqual(2, TestCase.objects.count()) val = URLValidator() for testcase in TestCase.objects.filter(suite=suite): self.assertEqual(testcase.suite, suite) self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) self.assertIsNone(testcase.metadata) self.assertEqual(testcase.result, TestCase.RESULT_PASS) self.assertEqual(testcase.test_set.name, 'listing') self.assertTrue(testcase.name.startswith('linux-INLINE-')) val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_level_input(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), user) suite = TestSuite.objects.create(job=job, name='test-suite') suite.save() result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] power_off: !!python/object/apply:collections.OrderedDict - - [status, Complete] - [level, 5.1] """ scanned = yaml.load(result_sample) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) for testcase in TestCase.objects.filter(suite=suite): if testcase.name == 'power_off': self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertTrue(type(testcase.action_data) == OrderedDict) self.assertEqual(testcase.action_data['status'], 'Complete') self.assertEqual(testcase.action_data['level'], 5.1) self.assertEqual(testcase.action_level, '5.1') self.assertEqual(testcase.result, TestCase.RESULT_UNKNOWN) self.factory.cleanup()
def test_level_input(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) suite = TestSuite.objects.create(job=job, name="lava") suite.save() ret = map_scanned_results( results={ "case": "test-overlay", "definition": "lava", "duration": 0.01159811019897461, "level": "1.3.3.2", "result": "pass", }, job=job, starttc=None, endtc=None, meta_filename=None, ) self.assertTrue(ret) ret.save() self.assertEqual(1, TestCase.objects.filter(suite=suite).count()) testcase = TestCase.objects.get(suite=suite) self.assertTrue(isinstance(testcase.metadata, string_types)) self.assertEqual(testcase.result, TestCase.RESULT_PASS) self.factory.cleanup()
def test_metastore(self): field = TestCase._meta.get_field('metadata') level = '1.3.5.1' # artificially inflate results to represent a set of kernel messages results = { 'definition': 'lava', 'case': 'unit-test', # list of numbers, generates a much longer YAML string than just the count 'extra': range(int(field.max_length / 2)), 'result': 'pass' } stub = "%s-%s-%s.yaml" % (results['definition'], results['case'], level) job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) meta_filename = os.path.join(job.output_dir, 'metadata', stub) filename = "%s/job-%s/pipeline/%s/%s-%s.yaml" % (job.output_dir, job.id, level.split('.')[0], level, results['definition']) mkdir(os.path.dirname(filename)) if os.path.exists(meta_filename): # isolate from other unit tests os.unlink(meta_filename) self.assertEqual(meta_filename, create_metadata_store(results, job, level)) self.assertTrue(map_scanned_results(results, job, meta_filename)) self.assertEqual(TestCase.objects.filter(name='unit-test').count(), 1) test_data = yaml.load(TestCase.objects.filter(name='unit-test')[0].metadata, Loader=yaml.CLoader) self.assertEqual(test_data['extra'], meta_filename) self.assertTrue(os.path.exists(meta_filename)) with open(test_data['extra'], 'r') as extra_file: data = yaml.load(extra_file, Loader=yaml.CLoader) self.assertIsNotNone(data) os.unlink(meta_filename) shutil.rmtree(job.output_dir)
def test_case_as_url(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) test_dict = { "definition": "unit-test", "case": "unit-test", "level": "1.3.4.1", # list of numbers, generates a much longer YAML string than just the count "result": "pass", } pattern = "[-_a-zA-Z0-9.\\(\\)]+" matches = re.search(pattern, test_dict["case"]) self.assertIsNotNone(matches) # passes self.assertEqual(matches.group(0), test_dict["case"]) suite, _ = TestSuite.objects.get_or_create( name=test_dict["definition"], job=job) case, _ = TestCase.objects.get_or_create(suite=suite, name=test_dict["case"], result=TestCase.RESULT_PASS) self.assertIsNotNone(reverse("lava.results.testcase", args=[case.id])) self.assertIsNotNone( reverse("lava.results.testcase", args=[job.id, suite.name, case.id])) self.assertIsNotNone(map_scanned_results(test_dict, job, {}, None)) # now break the reverse pattern test_dict["case"] = "unit test" # whitespace in the case name matches = re.search(pattern, test_dict["case"]) self.assertIsNotNone(matches) self.assertRaises( NoReverseMatch, reverse, "lava.results.testcase", args=[job.id, suite.name, test_dict["case"]], )
def test_bad_input(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) # missing {'results'} key result_samples = [ { "definition": "lava", "result": "pass" }, { "case": "test-runscript-overlay", "result": "pass" }, { "case": "test-runscript-overlay", "definition": "lava" }, {}, ] for sample in result_samples: ret = map_scanned_results(results=sample, job=job, markers={}, meta_filename=None) self.assertFalse(ret) self.factory.cleanup()
def test_set(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) result_samples = [ {"case": "linux-INLINE-lscpu", "definition": "smoke-tests-basic", "result": "fail", "set": "listing"}, {"case": "linux-INLINE-lspci", "definition": "smoke-tests-basic", "result": "fail", "set": "listing"} ] suite = TestSuite.objects.create( job=job, name='test-suite' ) suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) for sample in result_samples: ret = map_scanned_results(results=sample, job=job) self.assertTrue(ret) self.assertEqual(2, TestCase.objects.count()) val = URLValidator() for testcase in TestCase.objects.filter(suite=suite): self.assertEqual(testcase.suite, suite) self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) self.assertIsNone(testcase.metadata) self.assertEqual(testcase.result, TestCase.RESULT_PASS) self.assertEqual(testcase.test_set.name, 'listing') self.assertTrue(testcase.name.startswith('linux-INLINE-')) val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_case_as_url(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) test_dict = { 'definition': 'unit-test', 'case': 'unit-test', 'level': '1.3.4.1', # list of numbers, generates a much longer YAML string than just the count 'result': 'pass' } pattern = '[-_a-zA-Z0-9.\\(\\)]+' matches = re.search(pattern, test_dict['case']) self.assertIsNotNone(matches) # passes self.assertEqual(matches.group(0), test_dict['case']) suite, _ = TestSuite.objects.get_or_create(name=test_dict["definition"], job=job) case, _ = TestCase.objects.get_or_create(suite=suite, name=test_dict['case'], result=TestCase.RESULT_PASS) self.assertIsNotNone(reverse('lava.results.testcase', args=[case.id])) self.assertIsNotNone(reverse('lava.results.testcase', args=[job.id, suite.name, case.id])) self.assertIsNotNone(map_scanned_results(test_dict, job, None)) # now break the reverse pattern test_dict['case'] = 'unit test' # whitespace in the case name matches = re.search(pattern, test_dict['case']) self.assertIsNotNone(matches) self.assertRaises(NoReverseMatch, reverse, 'lava.results.testcase', args=[job.id, suite.name, test_dict['case']])
def test_bad_input(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), user) # missing {'results'} key result_sample = """ lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] - [realpath-check, fail] - [ntpdate-check, pass] - [curl-ftp, pass] - [tar-tgz, pass] - [remove-tgz, pass] """ scanned = yaml.load(result_sample) suite = TestSuite.objects.create( job=job, name='test-suite' ) suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertFalse(ret) result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] power_off: !!python/object/apply:collections.OrderedDict - - [status, Complete] """ scanned = yaml.load(result_sample) self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) for testcase in TestCase.objects.filter(suite=suite): if testcase.name == 'power_off': self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertTrue(type(testcase.action_data) == OrderedDict) self.assertEqual(testcase.action_data['status'], 'Complete') self.assertEqual(testcase.result, TestCase.RESULT_UNKNOWN) elif testcase.name == 'ping-test': self.assertIsNone(testcase.metadata) self.assertEqual(testcase.result, TestCase.RESULT_FAIL) else: self.fail("Unrecognised testcase name") self.factory.cleanup()
def test_bad_input(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), user) # missing {'results'} key result_sample = """ lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] - [realpath-check, fail] - [ntpdate-check, pass] - [curl-ftp, pass] - [tar-tgz, pass] - [remove-tgz, pass] """ scanned = yaml.load(result_sample) suite = TestSuite.objects.create(job=job, name='test-suite') suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertFalse(ret) result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] power_off: !!python/object/apply:collections.OrderedDict - - [status, Complete] """ scanned = yaml.load(result_sample) self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) for testcase in TestCase.objects.filter(suite=suite): if testcase.name == 'power_off': self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertTrue(type(testcase.action_data) == OrderedDict) self.assertEqual(testcase.action_data['status'], 'Complete') self.assertEqual(testcase.result, TestCase.RESULT_UNKNOWN) elif testcase.name == 'ping-test': self.assertIsNone(testcase.metadata) self.assertEqual(testcase.result, TestCase.RESULT_FAIL) else: self.fail("Unrecognised testcase name") self.factory.cleanup()
def test_decimal_yaml_dump(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) test_dict = { "definition": "unit-test", "case": "unit-test", "measurement": decimal.Decimal(1234.5), "result": "pass", } test_case = map_scanned_results(test_dict, job, {}, None) self.assertEqual(yaml_load(test_case.metadata)["measurement"], "1234.5")
def test_pipelinestore(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) result_samples = [ { "case": "test-runscript-overlay", "definition": "lava", "duration": 1.8733930587768555, "level": "1.3.3.4", "result": "pass", }, { "case": "apply-overlay-guest", "definition": "lava", "duration": 46.395780086517334, "level": "1.4", "result": "pass", }, { "case": "smoke-tests-basic", "definition": "lava", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "fail", }, { "case": "linux-INLINE-lscpu", "definition": "smoke-tests-basic", "result": "pass", }, { "case": "smoke-tests-basic", "definition": "lava", "duration": "2.61", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "pass", }, ] for sample in result_samples: ret = map_scanned_results(results=sample, job=job, starttc=None, endtc=None, meta_filename=None) self.assertTrue(ret) ret.save() # the duplicate smoke-tests-basic is allowed here as the lava test suite supports multiples self.assertEqual(5, TestCase.objects.filter(suite__job=job).count()) val = URLValidator() for testcase in TestCase.objects.all(): self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) if testcase.test_set: val("http://localhost/%s" % testcase.get_absolute_url()) self.factory.cleanup()
def test_pipelinestore(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), user) result_sample = { 'results': { 'test-runscript-overlay': OrderedDict([ ('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6'), ('filename', '/tmp/tmp9ICoFn/lava-device/tests/2_singlenode-advanced/run.sh' ) ]), 'test-install-overlay': OrderedDict([('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6')]), 'power_off': OrderedDict([('status', 'Complete')]), 'test-overlay': OrderedDict([('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6')]), 'git-repo-action': OrderedDict([('success', '6dd3121dc7f2855d710e83fe39c217392e4fb2b4')]), 'lava-test-shell': OrderedDict([('linux-linaro-ubuntu-pwd', 'pass'), ('linux-linaro-ubuntu-uname', 'pass'), ('linux-linaro-ubuntu-vmstat', 'pass'), ('linux-linaro-ubuntu-ifconfig', 'pass'), ('linux-linaro-ubuntu-lscpu', 'pass'), ('linux-linaro-ubuntu-lsb_release', 'fail'), ('linux-linaro-ubuntu-netstat', 'pass'), ('linux-linaro-ubuntu-ifconfig-dump', 'pass'), ('linux-linaro-ubuntu-route-dump-a', 'pass'), ('linux-linaro-ubuntu-route-ifconfig-up-lo', 'pass'), ('linux-linaro-ubuntu-route-dump-b', 'pass'), ('linux-linaro-ubuntu-route-ifconfig-up', 'pass'), ('ping-test', 'fail'), ('realpath-check', 'fail'), ('ntpdate-check', 'pass'), ('curl-ftp', 'pass'), ('tar-tgz', 'pass'), ('remove-tgz', 'pass')]) } } ret = map_scanned_results(scanned_dict=result_sample, job=job) self.assertTrue(ret) self.assertIsNot([], TestCase.objects.all()) self.assertIsNot([], TestCase.objects.all()) val = URLValidator() for testcase in TestCase.objects.all(): self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) if testcase.test_set: val('http://localhost/%s' % testcase.get_absolute_url()) self.assertIsNotNone(TestCase.objects.filter(name='ping-test')) self.factory.cleanup()
def test_decimal_yaml_dump(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) test_dict = { 'definition': 'unit-test', 'case': 'unit-test', 'measurement': decimal.Decimal(1234.5), 'result': 'pass', } test_case = map_scanned_results(test_dict, job, {}, None) metadata_yaml_ref = "{case: unit-test, definition: unit-test, measurement: '1234.5', result: pass}" self.assertEqual(metadata_yaml_ref, test_case.metadata.strip())
def test_length(self): field = TestCase._meta.get_field('metadata') job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) # artificially inflate the dict to represent a failed parser test_dict = { 'definition': 'unit-test', 'case': 'unit-test', # list of numbers, generates a much longer YAML string than just the count 'result': range(int(field.max_length / 2)) } self.assertGreater(len(yaml.dump(test_dict)), field.max_length) self.assertFalse(map_scanned_results(test_dict, job))
def test_set(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), user) result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] - - set-name - !!python/object/apply:collections.OrderedDict - - [linux-linaro-foo, pass] - [linux-linaro-ubuntu-uname, pass] - [linux-linaro-ubuntu-vmstat, pass] - [linux-linaro-ubuntu-ifconfig, pass] - [linux-linaro-ubuntu-lscpu, pass] - [linux-linaro-ubuntu-lsb_release, pass] - [linux-linaro-ubuntu-netstat, pass] - [linux-linaro-ubuntu-ifconfig-dump, pass] - [linux-linaro-ubuntu-route-dump-a, pass] - [linux-linaro-ubuntu-route-ifconfig-up-lo, pass] - [linux-linaro-ubuntu-route-dump-b, pass] - [linux-linaro-ubuntu-route-ifconfig-up, pass] - [realpath-check, fail] - [ntpdate-check, pass] - [curl-ftp, pass] - [tar-tgz, pass] - [remove-tgz, pass] """ scanned = yaml.load(result_sample) suite = TestSuite.objects.create( job=job, name='test-suite' ) suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) self.assertIsNot([], TestCase.objects.all()) val = URLValidator() for testcase in TestCase.objects.filter(suite=suite): self.assertEqual(testcase.suite, suite) self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) self.assertIsNone(testcase.metadata) self.assertNotEqual(testcase.result, TestCase.RESULT_UNKNOWN) if testcase.test_set: self.assertEqual(testcase.test_set.name, 'set-name') self.assertTrue(testcase.name.startswith('linux-linaro')) val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_bad_input(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) # missing {'results'} key result_samples = [ {"definition": "lava", "result": "pass"}, {"case": "test-runscript-overlay", "result": "pass"}, {"case": "test-runscript-overlay", "definition": "lava"}, {} ] for sample in result_samples: ret = map_scanned_results(results=sample, job=job) self.assertFalse(ret) self.factory.cleanup()
def test_set(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), user) result_sample = """ results: lava-test-shell: !!python/object/apply:collections.OrderedDict - - [ping-test, fail] - - set-name - !!python/object/apply:collections.OrderedDict - - [linux-linaro-foo, pass] - [linux-linaro-ubuntu-uname, pass] - [linux-linaro-ubuntu-vmstat, pass] - [linux-linaro-ubuntu-ifconfig, pass] - [linux-linaro-ubuntu-lscpu, pass] - [linux-linaro-ubuntu-lsb_release, pass] - [linux-linaro-ubuntu-netstat, pass] - [linux-linaro-ubuntu-ifconfig-dump, pass] - [linux-linaro-ubuntu-route-dump-a, pass] - [linux-linaro-ubuntu-route-ifconfig-up-lo, pass] - [linux-linaro-ubuntu-route-dump-b, pass] - [linux-linaro-ubuntu-route-ifconfig-up, pass] - [realpath-check, fail] - [ntpdate-check, pass] - [curl-ftp, pass] - [tar-tgz, pass] - [remove-tgz, pass] """ scanned = yaml.load(result_sample) suite = TestSuite.objects.create(job=job, name='test-suite') suite.save() self.assertEqual('/results/%s/test-suite' % job.id, suite.get_absolute_url()) ret = map_scanned_results(scanned_dict=scanned, job=job) self.assertTrue(ret) self.assertIsNot([], TestCase.objects.all()) val = URLValidator() for testcase in TestCase.objects.filter(suite=suite): self.assertEqual(testcase.suite, suite) self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) self.assertIsNone(testcase.metadata) self.assertNotEqual(testcase.result, TestCase.RESULT_UNKNOWN) if testcase.test_set: self.assertEqual(testcase.test_set.name, 'set-name') self.assertTrue(testcase.name.startswith('linux-linaro')) val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_pipelinestore(self): user = self.factory.make_user() job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), user) result_sample = { 'results': { 'test-runscript-overlay': OrderedDict([ ('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6'), ('filename', '/tmp/tmp9ICoFn/lava-device/tests/2_singlenode-advanced/run.sh')]), 'test-install-overlay': OrderedDict([ ('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6')]), 'power_off': OrderedDict([('status', 'Complete')]), 'test-overlay': OrderedDict([('success', 'c66c77b2-bc32-4cec-bc6d-477712da7eb6')]), 'git-repo-action': OrderedDict([('success', '6dd3121dc7f2855d710e83fe39c217392e4fb2b4')]), 'lava-test-shell': OrderedDict([ ('linux-linaro-ubuntu-pwd', 'pass'), ('linux-linaro-ubuntu-uname', 'pass'), ('linux-linaro-ubuntu-vmstat', 'pass'), ('linux-linaro-ubuntu-ifconfig', 'pass'), ('linux-linaro-ubuntu-lscpu', 'pass'), ('linux-linaro-ubuntu-lsb_release', 'fail'), ('linux-linaro-ubuntu-netstat', 'pass'), ('linux-linaro-ubuntu-ifconfig-dump', 'pass'), ('linux-linaro-ubuntu-route-dump-a', 'pass'), ('linux-linaro-ubuntu-route-ifconfig-up-lo', 'pass'), ('linux-linaro-ubuntu-route-dump-b', 'pass'), ('linux-linaro-ubuntu-route-ifconfig-up', 'pass'), ('ping-test', 'fail'), ('realpath-check', 'fail'), ('ntpdate-check', 'pass'), ('curl-ftp', 'pass'), ('tar-tgz', 'pass'), ('remove-tgz', 'pass')])} } ret = map_scanned_results(scanned_dict=result_sample, job=job) self.assertTrue(ret) self.assertIsNot([], TestCase.objects.all()) self.assertIsNot([], TestCase.objects.all()) val = URLValidator() for testcase in TestCase.objects.all(): self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) if testcase.test_set: val('http://localhost/%s' % testcase.get_absolute_url()) self.assertIsNotNone(TestCase.objects.filter(name='ping-test')) self.factory.cleanup()
def test_metastore(self): field = TestCase._meta.get_field("metadata") level = "1.3.5.1" # artificially inflate results to represent a set of kernel messages results = { "definition": "lava", "case": "unit-test", "level": level, # list of numbers, generates a much longer YAML string than just the count "extra": range(int(field.max_length / 2)), "result": "pass", } stub = "%s-%s-%s.yaml" % (results["definition"], results["case"], level) job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) meta_filename = os.path.join(job.output_dir, "metadata", stub) filename = "%s/job-%s/pipeline/%s/%s-%s.yaml" % ( job.output_dir, job.id, level.split(".")[0], level, results["definition"], ) mkdir(os.path.dirname(filename)) if os.path.exists(meta_filename): # isolate from other unit tests os.unlink(meta_filename) self.assertEqual(meta_filename, create_metadata_store(results, job)) ret = map_scanned_results(results, job, {}, meta_filename) self.assertIsNotNone(ret) ret.save() self.assertEqual(TestCase.objects.filter(name="unit-test").count(), 1) test_data = yaml.load( # nosec - unit test TestCase.objects.filter(name="unit-test")[0].metadata, Loader=yaml.CLoader) self.assertEqual(test_data["extra"], meta_filename) self.assertTrue(os.path.exists(meta_filename)) with open(test_data["extra"], "r") as extra_file: data = yaml.load(extra_file, Loader=yaml.CLoader) # nosec - unit test self.assertIsNotNone(data) os.unlink(meta_filename) shutil.rmtree(job.output_dir)
def test_level_input(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) suite = TestSuite.objects.create( job=job, name='lava' ) suite.save() ret = map_scanned_results( results={"case": "test-overlay", "definition": "lava", "duration": 0.01159811019897461, "level": "1.3.3.2", "result": "pass"}, job=job) self.assertTrue(ret) self.assertEqual(1, TestCase.objects.filter(suite=suite).count()) testcase = TestCase.objects.get(suite=suite) self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertEqual(testcase.result, TestCase.RESULT_PASS) self.factory.cleanup()
def test_level_input(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) suite = TestSuite.objects.create(job=job, name='lava') suite.save() ret = map_scanned_results(results={ "case": "test-overlay", "definition": "lava", "duration": 0.01159811019897461, "level": "1.3.3.2", "result": "pass" }, job=job) self.assertTrue(ret) self.assertEqual(1, TestCase.objects.filter(suite=suite).count()) testcase = TestCase.objects.get(suite=suite) self.assertTrue(type(testcase.metadata) in [str, unicode]) self.assertEqual(testcase.result, TestCase.RESULT_PASS) self.factory.cleanup()
def test_pipelinestore(self): job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) result_samples = [{ "case": "test-runscript-overlay", "definition": "lava", "duration": 1.8733930587768555, "level": "1.3.3.4", "result": "pass" }, { "case": "apply-overlay-guest", "definition": "lava", "duration": 46.395780086517334, "level": "1.4", "result": "pass" }, { "case": "smoke-tests-basic", "definition": "lava", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "fail" }, { "case": "linux-INLINE-lscpu", "definition": "smoke-tests-basic", "result": "pass" }, { "case": "smoke-tests-basic", "definition": "lava", "duration": "2.61", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "pass" }] for sample in result_samples: ret = map_scanned_results(results=sample, job=job) self.assertTrue(ret) self.assertEqual(4, TestCase.objects.count()) val = URLValidator() for testcase in TestCase.objects.all(): self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) if testcase.test_set: val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_pipelinestore(self): job = TestJob.from_yaml_and_user( self.factory.make_job_yaml(), self.user) result_samples = [ {"case": "test-runscript-overlay", "definition": "lava", "duration": 1.8733930587768555, "level": "1.3.3.4", "result": "pass"}, {"case": "apply-overlay-guest", "definition": "lava", "duration": 46.395780086517334, "level": "1.4", "result": "pass"}, {"case": "smoke-tests-basic", "definition": "lava", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "fail"}, {"case": "linux-INLINE-lscpu", "definition": "smoke-tests-basic", "result": "pass"}, {"case": "smoke-tests-basic", "definition": "lava", "duration": "2.61", "uuid": "44148c2f-3c7d-4143-889e-dd4a77084e07", "result": "pass"} ] for sample in result_samples: ret = map_scanned_results(results=sample, job=job) self.assertTrue(ret) self.assertEqual(4, TestCase.objects.count()) val = URLValidator() for testcase in TestCase.objects.all(): self.assertIsNotNone(testcase.name) self.assertIsNotNone(testcase.result) if testcase.test_set: val('http://localhost/%s' % testcase.get_absolute_url()) self.factory.cleanup()
def test_metastore(self): field = TestCase._meta.get_field('metadata') level = '1.3.5.1' # artificially inflate results to represent a set of kernel messages results = { 'definition': 'lava', 'case': 'unit-test', 'level': level, # list of numbers, generates a much longer YAML string than just the count 'extra': range(int(field.max_length / 2)), 'result': 'pass' } stub = "%s-%s-%s.yaml" % (results['definition'], results['case'], level) job = TestJob.from_yaml_and_user(self.factory.make_job_yaml(), self.user) meta_filename = os.path.join(job.output_dir, 'metadata', stub) filename = "%s/job-%s/pipeline/%s/%s-%s.yaml" % ( job.output_dir, job.id, level.split('.')[0], level, results['definition']) mkdir(os.path.dirname(filename)) if os.path.exists(meta_filename): # isolate from other unit tests os.unlink(meta_filename) self.assertEqual(meta_filename, create_metadata_store(results, job)) ret = map_scanned_results(results, job, {}, meta_filename) self.assertIsNotNone(ret) ret.save() self.assertEqual(TestCase.objects.filter(name='unit-test').count(), 1) test_data = yaml.load( TestCase.objects.filter(name='unit-test')[0].metadata, Loader=yaml.CLoader) self.assertEqual(test_data['extra'], meta_filename) self.assertTrue(os.path.exists(meta_filename)) with open(test_data['extra'], 'r') as extra_file: data = yaml.load(extra_file, Loader=yaml.CLoader) self.assertIsNotNone(data) os.unlink(meta_filename) shutil.rmtree(job.output_dir)
def logging_socket(self): msg = self.log_socket.recv_multipart() try: (job_id, message) = (u(m) for m in msg) # pylint: disable=unbalanced-tuple-unpacking except ValueError: # do not let a bad message stop the master. self.logger.error("[POLL] failed to parse log message, skipping: %s", msg) return try: scanned = yaml.load(message, Loader=yaml.CLoader) except yaml.YAMLError: self.logger.error("[%s] data are not valid YAML, dropping", job_id) return # Look for "results" level try: message_lvl = scanned["lvl"] message_msg = scanned["msg"] except TypeError: self.logger.error("[%s] not a dictionary, dropping", job_id) return except KeyError: self.logger.error( "[%s] invalid log line, missing \"lvl\" or \"msg\" keys: %s", job_id, message) return # Find the handler (if available) if job_id not in self.jobs: # Query the database for the job try: job = TestJob.objects.get(id=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] unknown job id", job_id) return self.logger.info("[%s] receiving logs from a new job", job_id) # Create the sub directories (if needed) mkdir(job.output_dir) self.jobs[job_id] = JobHandler(job) if message_lvl == "results": try: job = TestJob.objects.get(pk=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] unknown job id", job_id) return meta_filename = create_metadata_store(message_msg, job) new_test_case = map_scanned_results(results=message_msg, job=job, meta_filename=meta_filename) if new_test_case is None: self.logger.warning( "[%s] unable to map scanned results: %s", job_id, message) else: self.test_cases.append(new_test_case) # Look for lava.job result if message_msg.get("definition") == "lava" and message_msg.get("case") == "job": # Flush cached test cases self.flush_test_cases() if message_msg.get("result") == "pass": health = TestJob.HEALTH_COMPLETE health_msg = "Complete" else: health = TestJob.HEALTH_INCOMPLETE health_msg = "Incomplete" self.logger.info("[%s] job status: %s", job_id, health_msg) infrastructure_error = (message_msg.get("error_type") in ["Bug", "Configuration", "Infrastructure"]) if infrastructure_error: self.logger.info("[%s] Infrastructure error", job_id) # Update status. with transaction.atomic(): # TODO: find a way to lock actual_device job = TestJob.objects.select_for_update() \ .get(id=job_id) job.go_state_finished(health, infrastructure_error) job.save() # Mark the file handler as used self.jobs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # The format is a list of dictionaries message = "- %s" % message # Write data self.jobs[job_id].write(message)
def handle(self, *args, **options): # FIXME: this function is getting much too long and complex. del logging.root.handlers[:] del logging.root.filters[:] # Create the logger FORMAT = '%(asctime)-15s %(levelname)s %(message)s' # pylint: disable=invalid-name logging.basicConfig(format=FORMAT, filename='/var/log/lava-server/lava-master.log') self.logger = logging.getLogger('dispatcher-master') if options['level'] == 'ERROR': self.logger.setLevel(logging.ERROR) elif options['level'] == 'WARN': self.logger.setLevel(logging.WARN) elif options['level'] == 'INFO': self.logger.setLevel(logging.INFO) else: self.logger.setLevel(logging.DEBUG) # Create the sockets context = zmq.Context() pull_socket = context.socket(zmq.PULL) pull_socket.bind(options['log_socket']) controler = context.socket(zmq.ROUTER) controler.bind(options['master_socket']) # List of logs logs = {} # List of known dispatchers. At startup do not laod this from the # database. This will help to know if the slave as restarted or not. dispatchers = {} # Last access to the database for new jobs and cancelations last_db_access = 0 # Poll on the sockets (only one for the moment). This allow to have a # nice timeout along with polling. poller = zmq.Poller() poller.register(pull_socket, zmq.POLLIN) poller.register(controler, zmq.POLLIN) # Mask signals and create a pipe that will receive a bit for each # signal received. Poll the pipe along with the zmq socket so that we # can only be interupted while reading data. (pipe_r, pipe_w) = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags | os.O_NONBLOCK) signal.set_wakeup_fd(pipe_w) signal.signal(signal.SIGINT, lambda x, y: None) signal.signal(signal.SIGTERM, lambda x, y: None) signal.signal(signal.SIGQUIT, lambda x, y: None) poller.register(pipe_r, zmq.POLLIN) self.logger.info("[INIT] LAVA dispatcher-master has started.") while True: try: # TODO: Fix the timeout computation # Wait for data or a timeout sockets = dict(poller.poll(TIMEOUT * 1000)) except zmq.error.ZMQError: continue if sockets.get(pipe_r) == zmq.POLLIN: self.logger.info("[POLL] Received a signal, leaving") break # Logging socket if sockets.get(pull_socket) == zmq.POLLIN: msg = pull_socket.recv_multipart() (job_id, level, name, message) = msg try: scanned = yaml.load(message) except yaml.YAMLError: self.logger.error("Failed to scan: %s", message) scanned = None # the results logger wraps the OrderedDict in a dict called results, for identification, # YAML then puts that into a list of one item for each call to log.results. if type(scanned) is list and len(scanned) == 1: if type(scanned[0]) is dict and 'results' in scanned[0]: job = TestJob.objects.get(id=job_id) ret = map_scanned_results(scanned_dict=scanned[0], job=job) if not ret: self.logger.warning( "[%s] Unable to map scanned results: %s" % (job_id, yaml.dump(scanned[0]))) # Clear filename if '/' in level or '/' in name: self.logger.error( "[%s] Wrong level or name received, dropping the message", job_id) continue filename = "%s/job-%s/pipeline/%s/%s-%s.log" % ( options['output_dir'], job_id, level.split('.')[0], level, name) # Find the handler (if available) f_handler = None if job_id in logs: if filename != logs[job_id].filename: # Close the old file handler logs[job_id].close() path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) else: self.logger.info("[%s] Receiving logs from a new job", job_id) path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) # Mark the file handler as used # TODO: try to use a more pythonnic way logs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # Write data f_handler = logs[job_id].fd f_handler.write(message) f_handler.write('\n') f_handler.flush() # FIXME: to be removed when the web UI knows how to deal with # pipeline logs filename = os.path.join(options['output_dir'], "job-%s" % job_id, 'output.txt') with open(filename, 'a+') as f_out: f_out.write(message) f_out.write('\n') # Garbage collect file handlers now = time.time() for job_id in logs.keys(): if now - logs[job_id].last_usage > FD_TIMEOUT: self.logger.info("[%s] Collecting file handler '%s'", job_id, logs[job_id].filename) logs[job_id].close() del logs[job_id] # Command socket if sockets.get(controler) == zmq.POLLIN: msg = controler.recv_multipart() self.logger.debug("[CC] Receiving: %s", msg) # 1: the hostname (see ZMQ documentation) hostname = msg[0] # 2: the action action = msg[1] # Handle the actions if action == 'HELLO': self.logger.info("%s => HELLO", hostname) controler.send_multipart([hostname, 'HELLO_OK']) # If the dispatcher is known and sent an HELLO, means that # the slave has restarted if hostname in dispatchers: self.logger.warning("Dispatcher <%s> has RESTARTED", hostname) else: self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == "HELLO_RETRY": self.logger.info("%s => HELLO_RETRY", hostname) controler.send_multipart([hostname, "HELLO_OK"]) if hostname in dispatchers: # Assume the HELLO command was received, and the # action succeeded. self.logger.warning( "Dispatcher <%s> was not confirmed", hostname) else: # No dispatcher, treat it as a normal HELLO message. self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'PING': self.logger.debug("%s => PING", hostname) # Send back a signal controler.send_multipart([hostname, 'PONG']) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'END': status = TestJob.COMPLETE try: job_id = int(msg[2]) job_status = int(msg[3]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue if job_status: self.logger.info("[%d] %s => END with error %d", job_id, hostname, job_status) status = TestJob.INCOMPLETE else: self.logger.info("[%d] %s => END", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) if job.status == TestJob.CANCELING: cancel_job(job) else: end_job(job, job_status=status) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) # ACK even if the job is unknown to let the dispatcher # forget about it controler.send_multipart([hostname, 'END_OK', str(job_id)]) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'START_OK': try: job_id = int(msg[2]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue self.logger.info("[%d] %s => START_OK", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) start_job(job) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() else: self.logger.error("<%s> sent unknown action=%s, args=(%s)", hostname, action, msg[1:]) # Check dispatchers status now = time.time() for hostname in dispatchers.keys(): dispatcher = dispatchers[hostname] if dispatcher.online and now - dispatcher.last_msg > DISPATCHER_TIMEOUT: self.logger.error("Dispatcher <%s> goes OFFLINE", hostname) dispatchers[hostname].online = False # TODO: DB: mark the dispatcher as offline and attached # devices # Limit accesses to the database. This will also limit the rate of # CANCEL and START messages if now - last_db_access > DB_LIMIT: last_db_access = now # Dispatch jobs # TODO: make this atomic not_allocated = 0 # only pick up pipeline jobs with devices in Reserved state for job in TestJob.objects.filter( status=TestJob.SUBMITTED, is_pipeline=True, actual_device__isnull=False).order_by( '-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job) if not device: continue # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device self.logger.info( "[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = None \ if job.dynamic_connection else device.load_device_configuration(job_ctx) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline controler.send_multipart([ str(worker_host.hostname), 'START', str(group_job.id), str(group_job.definition), str(device_configuration), str(open(options['env'], 'r').read()) ]) controler.send_multipart([ str(worker_host.hostname), 'START', str(job.id), str(job.definition), str(device_configuration), get_env_string(options['env']), get_env_string(options['env_dut']) ]) except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc: if isinstance(exc, jinja2.TemplateNotFound): self.logger.error("Template not found: '%s'", exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message elif isinstance(exc, jinja2.TemplateSyntaxError): self.logger.error( "Template syntax error in '%s', line %d: %s", exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) elif isinstance(exc, IOError): self.logger.error("Unable to read '%s': %s", options['env'], exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (options['env'], exc.strerror) elif isinstance(exc, yaml.YAMLError): self.logger.error( "Unable to parse job definition: %s", exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc else: self.logger.exception(exc) msg = "Infrastructure error: %s" % exc.message self.logger.error("[%d] INCOMPLETE job", job.id) job.status = TestJob.INCOMPLETE if job.dynamic_connection: job.failure_comment = msg job.save() else: new_status = Device.IDLE device.state_transition_to(new_status, message=msg, job=job) device.status = new_status device.current_job = None job.failure_comment = msg job.save() device.save() if not_allocated > 0: self.logger.info("%d jobs not allocated yet", not_allocated) # Handle canceling jobs for job in TestJob.objects.filter(status=TestJob.CANCELING, is_pipeline=True): worker_host = job.lookup_worker if job.dynamic_connection else job.actual_device.worker_host if not worker_host: self.logger.warning("[%d] Invalid worker information" % job.id) # shouldn't happen fail_job(job, 'invalid worker information', TestJob.CANCELED) continue self.logger.info("[%d] CANCEL => %s", job.id, worker_host.hostname) controler.send_multipart( [str(worker_host.hostname), 'CANCEL', str(job.id)]) # Closing sockets and droping messages. self.logger.info("Closing the socket and dropping messages") controler.close(linger=0) pull_socket.close(linger=0) context.term()
def handle(self, *args, **options): # FIXME: this function is getting much too long and complex. del logging.root.handlers[:] del logging.root.filters[:] # Create the logger FORMAT = '%(asctime)-15s %(levelname)s %(message)s' # pylint: disable=invalid-name logging.basicConfig(format=FORMAT, filename='/var/log/lava-server/lava-master.log') self.logger = logging.getLogger('dispatcher-master') if options['level'] == 'ERROR': self.logger.setLevel(logging.ERROR) elif options['level'] == 'WARN': self.logger.setLevel(logging.WARN) elif options['level'] == 'INFO': self.logger.setLevel(logging.INFO) else: self.logger.setLevel(logging.DEBUG) # Create the sockets context = zmq.Context() pull_socket = context.socket(zmq.PULL) pull_socket.bind(options['log_socket']) controler = context.socket(zmq.ROUTER) controler.bind(options['master_socket']) # List of logs logs = {} # List of known dispatchers. At startup do not load this from the # database. This will help to know if the slave as restarted or not. dispatchers = {} # Last access to the database for new jobs and cancelations last_db_access = 0 # Poll on the sockets (only one for the moment). This allow to have a # nice timeout along with polling. poller = zmq.Poller() poller.register(pull_socket, zmq.POLLIN) poller.register(controler, zmq.POLLIN) # Mask signals and create a pipe that will receive a bit for each # signal received. Poll the pipe along with the zmq socket so that we # can only be interupted while reading data. (pipe_r, pipe_w) = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags | os.O_NONBLOCK) signal.set_wakeup_fd(pipe_w) signal.signal(signal.SIGINT, lambda x, y: None) signal.signal(signal.SIGTERM, lambda x, y: None) signal.signal(signal.SIGQUIT, lambda x, y: None) poller.register(pipe_r, zmq.POLLIN) if os.path.exists('/etc/lava-server/worker.conf'): self.logger.error("FAIL: lava-master must not be run on a remote worker!") controler.close(linger=0) pull_socket.close(linger=0) context.term() sys.exit(2) self.logger.info("[INIT] LAVA dispatcher-master has started.") while True: try: # TODO: Fix the timeout computation # Wait for data or a timeout sockets = dict(poller.poll(TIMEOUT * 1000)) except zmq.error.ZMQError: continue if sockets.get(pipe_r) == zmq.POLLIN: self.logger.info("[POLL] Received a signal, leaving") break # Logging socket if sockets.get(pull_socket) == zmq.POLLIN: msg = pull_socket.recv_multipart() try: (job_id, level, name, message) = msg except ValueError: # do not let a bad message stop the master. self.logger.error("Failed to parse log message, skipping: %s", msg) continue try: scanned = yaml.load(message) except yaml.YAMLError: # failure to scan is not an error here, it just means the message is not a result scanned = None # the results logger wraps the OrderedDict in a dict called results, for identification, # YAML then puts that into a list of one item for each call to log.results. if type(scanned) is list and len(scanned) == 1: if type(scanned[0]) is dict and 'results' in scanned[0]: job = TestJob.objects.get(id=job_id) ret = map_scanned_results(scanned_dict=scanned[0], job=job) if not ret: self.logger.warning("[%s] Unable to map scanned results: %s" % (job_id, yaml.dump(scanned[0]))) # Clear filename if '/' in level or '/' in name: self.logger.error("[%s] Wrong level or name received, dropping the message", job_id) continue filename = "%s/job-%s/pipeline/%s/%s-%s.log" % (options['output_dir'], job_id, level.split('.')[0], level, name) # Find the handler (if available) f_handler = None if job_id in logs: if filename != logs[job_id].filename: # Close the old file handler logs[job_id].close() path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) else: self.logger.info("[%s] Receiving logs from a new job", job_id) path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) # Mark the file handler as used # TODO: try to use a more pythonnic way logs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # Write data f_handler = logs[job_id].fd f_handler.write(message) f_handler.write('\n') f_handler.flush() # FIXME: to be removed when the web UI knows how to deal with pipeline logs filename = os.path.join(options['output_dir'], "job-%s" % job_id, 'output.txt') with open(filename, 'a+') as f_out: f_out.write(message) f_out.write('\n') # Garbage collect file handlers now = time.time() for job_id in logs.keys(): if now - logs[job_id].last_usage > FD_TIMEOUT: self.logger.info("[%s] Closing log file", job_id) logs[job_id].close() del logs[job_id] # Command socket if sockets.get(controler) == zmq.POLLIN: msg = controler.recv_multipart() self.logger.debug("[CC] Receiving: %s", msg) # 1: the hostname (see ZMQ documentation) hostname = msg[0] # 2: the action action = msg[1] # Handle the actions if action == 'HELLO': self.logger.info("%s => %s", hostname, action) controler.send_multipart([hostname, 'HELLO_OK']) # If the dispatcher is known and sent an HELLO, means that # the slave has restarted if hostname in dispatchers: self.logger.warning("Dispatcher <%s> has RESTARTED", hostname) else: self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) # FIXME: slaves need to be allowed to restart cleanly without affecting jobs # as well as handling unexpected crashes. self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == "HELLO_RETRY": self.logger.info("%s => HELLO_RETRY", hostname) controler.send_multipart([hostname, "HELLO_OK"]) if hostname in dispatchers: # Assume the HELLO command was received, and the # action succeeded. self.logger.warning( "Dispatcher <%s> was not confirmed", hostname) else: # No dispatcher, treat it as a normal HELLO message. self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher( hostname, online=True) self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'PING': self.logger.debug("%s => PING", hostname) # Send back a signal controler.send_multipart([hostname, 'PONG']) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning("Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == "ERROR": try: job_id = int(msg[2]) error_msg = str(msg[3]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg[:50]) continue self.logger.error("[%d] Error: %s", job_id, error_msg) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'END': status = TestJob.COMPLETE try: job_id = int(msg[2]) job_status = int(msg[3]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue if job_status: self.logger.info("[%d] %s => END with error %d", job_id, hostname, job_status) status = TestJob.INCOMPLETE else: self.logger.info("[%d] %s => END", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) if job.status == TestJob.CANCELING: cancel_job(job) else: end_job(job, job_status=status) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) # ACK even if the job is unknown to let the dispatcher # forget about it controler.send_multipart([hostname, 'END_OK', str(job_id)]) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning("Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'START_OK': try: job_id = int(msg[2]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue self.logger.info("[%d] %s => START_OK", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) start_job(job) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning("Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() else: self.logger.error("<%s> sent unknown action=%s, args=(%s)", hostname, action, msg[1:]) # Check dispatchers status now = time.time() for hostname in dispatchers.keys(): dispatcher = dispatchers[hostname] if dispatcher.online and now - dispatcher.last_msg > DISPATCHER_TIMEOUT: self.logger.error("Dispatcher <%s> goes OFFLINE", hostname) dispatchers[hostname].online = False # TODO: DB: mark the dispatcher as offline and attached # devices # Limit accesses to the database. This will also limit the rate of # CANCEL and START messages if now - last_db_access > DB_LIMIT: last_db_access = now # Dispatch jobs # TODO: make this atomic not_allocated = 0 # only pick up pipeline jobs with devices in Reserved state for job in TestJob.objects.filter( status=TestJob.SUBMITTED, is_pipeline=True, actual_device__isnull=False).order_by( '-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job, dispatchers) if not device: continue # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) continue # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device if not device.worker_host: msg = "Infrastructure error: Invalid worker information" self.logger.error("[%d] %s", job.id, msg) fail_job(job, msg, TestJob.INCOMPLETE) continue self.logger.info("[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = None \ if job.dynamic_connection else device.load_device_configuration(job_ctx) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline controler.send_multipart( [str(worker_host.hostname), 'START', str(group_job.id), self.export_definition(group_job), str(device_configuration), str(open(options['env'], 'r').read())]) controler.send_multipart( [str(worker_host.hostname), 'START', str(job.id), self.export_definition(job), str(device_configuration), get_env_string(options['env']), get_env_string(options['env_dut'])]) except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc: if isinstance(exc, jinja2.TemplateNotFound): self.logger.error("Template not found: '%s'", exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message elif isinstance(exc, jinja2.TemplateSyntaxError): self.logger.error("Template syntax error in '%s', line %d: %s", exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) elif isinstance(exc, IOError): self.logger.error("Unable to read '%s': %s", options['env'], exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (options['env'], exc.strerror) elif isinstance(exc, yaml.YAMLError): self.logger.error("Unable to parse job definition: %s", exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc else: self.logger.exception(exc) msg = "Infrastructure error: %s" % exc.message self.logger.error("[%d] INCOMPLETE job", job.id) job.status = TestJob.INCOMPLETE if job.dynamic_connection: job.failure_comment = msg job.save() else: new_status = Device.IDLE device.state_transition_to( new_status, message=msg, job=job) device.status = new_status device.current_job = None job.failure_comment = msg job.save() device.save() if not_allocated > 0: self.logger.info("%d jobs not allocated yet", not_allocated) # Handle canceling jobs for job in TestJob.objects.filter(status=TestJob.CANCELING, is_pipeline=True): worker_host = job.lookup_worker if job.dynamic_connection else job.actual_device.worker_host if not worker_host: self.logger.warning("[%d] Invalid worker information" % job.id) # shouldn't happen fail_job(job, 'invalid worker information', TestJob.CANCELED) continue self.logger.info("[%d] CANCEL => %s", job.id, worker_host.hostname) controler.send_multipart([str(worker_host.hostname), 'CANCEL', str(job.id)]) # Closing sockets and droping messages. self.logger.info("Closing the socket and dropping messages") controler.close(linger=0) pull_socket.close(linger=0) context.term()
def logging_socket(self, options): msg = self.pull_socket.recv_multipart() try: (job_id, level, name, message) = msg # pylint: disable=unbalanced-tuple-unpacking except ValueError: # do not let a bad message stop the master. self.logger.error("Failed to parse log message, skipping: %s", msg) return try: scanned = yaml.load(message, Loader=yaml.CLoader) except yaml.YAMLError: self.logger.error("[%s] data are not valid YAML, dropping", job_id) return # Look for "results" level try: message_lvl = scanned["lvl"] message_msg = scanned["msg"] except KeyError: self.logger.error( "[%s] Invalid log line, missing \"lvl\" or \"msg\" keys: %s", job_id, message) return # Clear filename if '/' in level or '/' in name: self.logger.error( "[%s] Wrong level or name received, dropping the message", job_id) return # Find the handler (if available) if job_id in self.jobs: if level != self.jobs[job_id].current_level: # Close the old file handler self.jobs[job_id].sub_log.close() filename = os.path.join(self.jobs[job_id].output_dir, "pipeline", level.split('.')[0], "%s-%s.yaml" % (level, name)) mkdir(os.path.dirname(filename)) self.current_level = level self.jobs[job_id].sub_log = open(filename, 'a+') else: # Query the database for the job try: job = TestJob.objects.get(id=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] Unknown job id", job_id) return self.logger.info("[%s] Receiving logs from a new job", job_id) filename = os.path.join(job.output_dir, "pipeline", level.split('.')[0], "%s-%s.yaml" % (level, name)) # Create the sub directories (if needed) mkdir(os.path.dirname(filename)) self.jobs[job_id] = JobHandler(job, level, filename) if message_lvl == "results": try: job = TestJob.objects.get(pk=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] Unknown job id", job_id) return meta_filename = create_metadata_store(message_msg, job, level) ret = map_scanned_results(results=message_msg, job=job, meta_filename=meta_filename) if not ret: self.logger.warning("[%s] Unable to map scanned results: %s", job_id, message) # Mark the file handler as used self.jobs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # The format is a list of dictionaries message = "- %s" % message # Write data self.jobs[job_id].write(message)
def logging_socket(self): msg = self.log_socket.recv_multipart() try: (job_id, message) = (u(m) for m in msg) # pylint: disable=unbalanced-tuple-unpacking except ValueError: # do not let a bad message stop the master. self.logger.error("[POLL] failed to parse log message, skipping: %s", msg) return try: scanned = yaml.load(message, Loader=yaml.CLoader) except yaml.YAMLError: self.logger.error("[%s] data are not valid YAML, dropping", job_id) return # Look for "results" level try: message_lvl = scanned["lvl"] message_msg = scanned["msg"] except TypeError: self.logger.error("[%s] not a dictionary, dropping", job_id) return except KeyError: self.logger.error( "[%s] invalid log line, missing \"lvl\" or \"msg\" keys: %s", job_id, message) return # Find the handler (if available) if job_id not in self.jobs: # Query the database for the job try: job = TestJob.objects.get(id=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] unknown job id", job_id) return self.logger.info("[%s] receiving logs from a new job", job_id) # Create the sub directories (if needed) mkdir(job.output_dir) self.jobs[job_id] = JobHandler(job) # For 'event', send an event and log as 'debug' if message_lvl == 'event': self.logger.debug("[%s] event: %s", job_id, message_msg) send_event(".event", "lavaserver", {"message": message_msg, "job": job_id}) message_lvl = "debug" # For 'marker', save in the database and log as 'debug' elif message_lvl == 'marker': # TODO: save on the file system in case of lava-logs restart m_type = message_msg.get("type") case = message_msg.get("case") if m_type is None or case is None: self.logger.error("[%s] invalid marker: %s", job_id, message_msg) return self.jobs[job_id].markers.setdefault(case, {})[m_type] = self.jobs[job_id].line_count() # This is in fact the previous line self.jobs[job_id].markers[case][m_type] -= 1 self.logger.debug("[%s] marker: %s line: %s", job_id, message_msg, self.jobs[job_id].markers[case][m_type]) return # Mark the file handler as used self.jobs[job_id].last_usage = time.time() # The format is a list of dictionaries self.jobs[job_id].write("- %s" % message) if message_lvl == "results": try: job = TestJob.objects.get(pk=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] unknown job id", job_id) return meta_filename = create_metadata_store(message_msg, job) new_test_case = map_scanned_results(results=message_msg, job=job, markers=self.jobs[job_id].markers, meta_filename=meta_filename) if new_test_case is None: self.logger.warning( "[%s] unable to map scanned results: %s", job_id, message) else: self.test_cases.append(new_test_case) # Look for lava.job result if message_msg.get("definition") == "lava" and message_msg.get("case") == "job": # Flush cached test cases self.flush_test_cases() if message_msg.get("result") == "pass": health = TestJob.HEALTH_COMPLETE health_msg = "Complete" else: health = TestJob.HEALTH_INCOMPLETE health_msg = "Incomplete" self.logger.info("[%s] job status: %s", job_id, health_msg) infrastructure_error = (message_msg.get("error_type") in ["Bug", "Configuration", "Infrastructure"]) if infrastructure_error: self.logger.info("[%s] Infrastructure error", job_id) # Update status. with transaction.atomic(): # TODO: find a way to lock actual_device job = TestJob.objects.select_for_update() \ .get(id=job_id) job.go_state_finished(health, infrastructure_error) job.save()
def logging_socket(self, options): msg = self.pull_socket.recv_multipart() try: (job_id, level, name, message) = msg # pylint: disable=unbalanced-tuple-unpacking except ValueError: # do not let a bad message stop the master. self.logger.error("Failed to parse log message, skipping: %s", msg) return try: scanned = yaml.load(message) except yaml.YAMLError: self.logger.error("[%s] data are not valid YAML, dropping", job_id) return # Look for "results" level try: message_lvl = scanned["lvl"] message_msg = scanned["msg"] except KeyError: self.logger.error( "[%s] Invalid log line, missing \"lvl\" or \"msg\" keys: %s", job_id, message) return # Clear filename if '/' in level or '/' in name: self.logger.error("[%s] Wrong level or name received, dropping the message", job_id) return filename = "%s/job-%s/pipeline/%s/%s-%s.yaml" % (options['output_dir'], job_id, level.split('.')[0], level, name) # Find the handler (if available) if job_id in self.logs: if filename != self.logs[job_id].filename: # Close the old file handler self.logs[job_id].close() mkdir(os.path.dirname(filename)) self.logs[job_id] = FileHandler(filename) else: self.logger.info("[%s] Receiving logs from a new job", job_id) mkdir(os.path.dirname(filename)) self.logs[job_id] = FileHandler(filename) if message_lvl == "results": try: job = TestJob.objects.get(pk=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] Unknown job id", job_id) return meta_filename = create_metadata_store(message_msg, job, level) ret = map_scanned_results(results=message_msg, job=job, meta_filename=meta_filename) if not ret: self.logger.warning( "[%s] Unable to map scanned results: %s", job_id, message) # Mark the file handler as used # TODO: try to use a more pythonnic way self.logs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # The format is a list of dictionaries message = "- %s" % message # Write data f_handler = self.logs[job_id].fd f_handler.write(message) f_handler.write('\n') f_handler.flush() # TODO: keep the file handler to avoid calling open for each line filename = os.path.join(options['output_dir'], "job-%s" % job_id, 'output.yaml') with open(filename, 'a+') as f_out: f_out.write(message) f_out.write('\n')
def logging_socket(self, options): msg = self.pull_socket.recv_multipart() try: (job_id, level, name, message) = msg except ValueError: # do not let a bad message stop the master. self.logger.error("Failed to parse log message, skipping: %s", msg) return try: scanned = yaml.load(message) except yaml.YAMLError: self.logger.error("[%s] data are not valid YAML, dropping", job_id) return # Look for "results" level try: message_lvl = scanned["lvl"] message_msg = scanned["msg"] except KeyError: self.logger.error( "[%s] Invalid log line, missing \"lvl\" or \"msg\" keys: %s", job_id, message) return if message_lvl == "results": try: job = TestJob.objects.get(pk=job_id) except TestJob.DoesNotExist: self.logger.error("[%s] Unknown job id", job_id) return ret = map_scanned_results(results=message_msg, job=job) if not ret: self.logger.warning("[%s] Unable to map scanned results: %s", job_id, message) # Clear filename if '/' in level or '/' in name: self.logger.error( "[%s] Wrong level or name received, dropping the message", job_id) return filename = "%s/job-%s/pipeline/%s/%s-%s.yaml" % ( options['output_dir'], job_id, level.split('.')[0], level, name) # Find the handler (if available) if job_id in self.logs: if filename != self.logs[job_id].filename: # Close the old file handler self.logs[job_id].close() mkdir(os.path.dirname(filename)) self.logs[job_id] = FileHandler(filename) else: self.logger.info("[%s] Receiving logs from a new job", job_id) mkdir(os.path.dirname(filename)) self.logs[job_id] = FileHandler(filename) # Mark the file handler as used # TODO: try to use a more pythonnic way self.logs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # The format is a list of dictionaries message = "- %s" % message # Write data f_handler = self.logs[job_id].fd f_handler.write(message) f_handler.write('\n') f_handler.flush() # TODO: keep the file handler to avoid calling open for each line filename = os.path.join(options['output_dir'], "job-%s" % job_id, 'output.yaml') with open(filename, 'a+') as f_out: f_out.write(message) f_out.write('\n')