Example #1
0
    def test_employment_type(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train, keys='employmentType')

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[1], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / len(self.employment_type)
Example #2
0
    def test_soc(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train)

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[0]['onet_soc_code'], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / self.occ_num
Example #3
0
    def test_state(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train, keys=['jobLocation', 'address', 'addressRegion'])

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[1], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / len(self.states)
Example #4
0
    def test_major_group(self):

        ratio = self.weights['13'] / self.weights['11']

        js = JobSampler(job_posting_generator=self.fake_corpus_train, weights=self.weights, major_group=True)

        result = []
        for i in range(self.num_loops):
            r = list(map(lambda x: x[1][:2], js.sample(self.sample_size)))
            counts = dict(Counter(r))
            result.append(counts['13'] / counts['11'])

        hist = np.histogram(result, bins=[0, 1, 2, 3, 4, 5])

        # Check if the ratio of the weights (this case is 2.0) falls into the interval with maximum counts
        # in the histogram as we expect after looping for 200 times
        assert ratio >= hist[1][np.argmax(hist[0])] and ratio <= hist[1][np.argmax(hist[0]) + 1]