def test_overflow_with_spark(self): with tempfile.TemporaryDirectory( prefix="merge-coocc-entry-test") as input_dir: self.copy_models(models.COOCC, input_dir, COPIES_NUMBER) args = get_args(input_dir, False) c_neg = Cooccurrences().load(args.input[0]) c_neg.matrix.data[0] = MAX_INT32 - c_neg.matrix.data[0] c_neg.save(args.input[0]) merge_coocc(args) result = Cooccurrences().load(args.output) self.assertTrue(numpy.all(result.matrix.data <= MAX_INT32)) self.assertTrue(numpy.all(result.matrix.data >= 0))
def test_load_and_check(self): with tempfile.TemporaryDirectory( prefix="merge-coocc-entry-test") as input_dir: self.copy_models(models.COOCC, input_dir, COPIES_NUMBER) args = get_args(input_dir, True) c_neg = Cooccurrences().load(args.input[0]) c_neg.matrix.data[0] = -1 c_neg.save(args.input[0]) self.assertEqual( len(list(load_and_check(args.input, logging.getLogger("test")))), 2) c_neg = Cooccurrences().load(args.input[0]) c_neg.matrix.data = numpy.uint32(c_neg.matrix.data) c_neg.matrix.data[0] = MAX_INT32 + 1 c_neg.save(args.input[0]) for _, coocc in load_and_check(args.input, logging.getLogger("test")): self.assertTrue(numpy.all(coocc.matrix.data <= MAX_INT32)) break