예제 #1
0
    def test_employment_type(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train, keys='employmentType')

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[1], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / len(self.employment_type)
예제 #2
0
    def test_state(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train, keys=['jobLocation', 'address', 'addressRegion'])

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[1], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / len(self.states)
예제 #3
0
    def test_soc(self):
        js = JobSampler(job_posting_generator=self.fake_corpus_train)

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: x[0]['onet_soc_code'], js.sample(self.sample_size))))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / self.occ_num
예제 #4
0
    def test_major_group(self):

        ratio = self.weights['13'] / self.weights['11']

        major_group_filter = lambda job: job['onet_soc_code'][:2] in ['11', '13']

        filtered_jobposting = JobPostingFilterer(
                self.fake_corpus_train,
                [major_group_filter]
                )

        js = JobSampler(
                job_posting_generator=filtered_jobposting,
                k=self.sample_size,
                weights=self.weights,
                key=lambda job: job['onet_soc_code'][:2]
                )

        result = []
        for i in range(self.num_loops):
            r = list(map(lambda x: x['onet_soc_code'][:2], js))
            counts = dict(Counter(r))
            result.append(counts['13'] / counts['11'])

        hist = np.histogram(result, bins=[0, 1, 2, 3, 4, 5])

        # Check if the ratio of the weights (this case is 2.0) falls into the interval with maximum counts
        # in the histogram as we expect after looping for 200 times
        assert ratio >= hist[1][np.argmax(hist[0])] and ratio <= hist[1][np.argmax(hist[0]) + 1]
예제 #5
0
    def test_major_group(self):

        ratio = self.weights['13'] / self.weights['11']

        js = JobSampler(job_posting_generator=self.fake_corpus_train, weights=self.weights, major_group=True)

        result = []
        for i in range(self.num_loops):
            r = list(map(lambda x: x[1][:2], js.sample(self.sample_size)))
            counts = dict(Counter(r))
            result.append(counts['13'] / counts['11'])

        hist = np.histogram(result, bins=[0, 1, 2, 3, 4, 5])

        # Check if the ratio of the weights (this case is 2.0) falls into the interval with maximum counts
        # in the histogram as we expect after looping for 200 times
        assert ratio >= hist[1][np.argmax(hist[0])] and ratio <= hist[1][np.argmax(hist[0]) + 1]
예제 #6
0
    def test_state(self):
        transformer = lambda job: safe_get(job, 'jobLocation', 'address', 'addressRegion')
        js = JobSampler(
                job_posting_generator=self.fake_corpus_train,
                k=self.sample_size,
        )

        result = []
        for i in range(self.num_loops):
            result.extend(list(map(lambda x: transformer(x), js)))

        counts = dict(Counter(result))
        assert np.mean(np.array(list(counts.values()))) == self.num_loops * self.sample_size / len(self.states)