Beispiel #1
0
 def parse_line(self, line):
     parts = line.strip().split('\t')
     output = Bunch(self.parse_entity(parts[1]),
                    wn_id=parts[0],
                    wn_key=parts[1],
                    definition=parts[-1])
     return output
def draw_items_from_pool(draw,
                         which,
                         min_size,
                         max_size,
                         get_id_column_name=lambda x: x + '_id'):
    """
    Draw a list of NeoBunch containing example items from one of the pools of
    objects.

    NeoBunch is a subclass of dict where the items can be accessed like
    attributes and can be used both like an object and like a dict.

    :param draw: Callable for drawing examples from strategies.
    :param which: The name of the object pool to sample from.
    :param min_size: The minimum number of items to draw
    :param max_size: The maximum number of items to draw

    :return: List of dicts containing data for requested example items.
    """
    pool = fake_object_pools[which]
    strategy = st.sampled_from(pool)
    items = draw(
        non_empty_lists(strategy, min_size=min_size, max_size=max_size))
    return [
        Bunch(
            # generate id - autoincrement starts from 1
            {get_id_column_name(which): i + 1},
            **x,
        ) for i, x in enumerate(items)
    ]
Beispiel #3
0
 def _read_data_files(wndb_path,
                      lexnames,
                      pos_files=['noun', 'verb', 'adj', 'adv']):
     data = {}
     for pos_file in pos_files:
         data_path = os.path.join(wndb_path, 'data.' + pos_file)
         with open(data_path) as ifp:
             for line in ifp:
                 if line.startswith(' '):
                     continue
                 part = line.strip().split()
                 _num_words = int(part[3], 16)
                 _num_pointers = int(part[4 + _num_words * 2])
                 _pp = 4 + _num_words * 2 + 1
                 _gloss_p = part.index('|') + 1
                 data_entry = Bunch(
                     offset=part[0],
                     lexname=lexnames[part[1]],
                     synset_type=part[2],
                     words=DBWordNetParser._parse_words(
                         DBWordNetParser._chunklist(part[4:_pp - 1])),
                     pointers=DBWordNetParser._parse_pointers(
                         DBWordNetParser._chunklist(
                             part[_pp:_pp + _num_pointers * 4], 4)),
                     gloss=' '.join(part[_gloss_p:]))
                 data['{}-{}'.format(pos_file, part[0])] = data_entry
     return data
def lists_of_employees(draw, locations, job_titles, companies, min_size,
                       max_size):
    """
    Returns a strategy which generates a list of NeoBunch containing data
    describing an employee instances.

    NeoBunch is a subclass of dict where the items can be accessed like
    attributes and can be used both like an object and like a dict.

    :param draw: Callable for drawing examples from strategies.
    :param locations: The available locations drawn by hypothesis.
    :param job_titles: The available job titles drawn by hypothesis.
    :param companies: The available companies drawn by hypothesis.
    :param min_size: The minimum number of employee instances to generate.
    :param max_size: The maximum number of employee instances to generate.

    :return: Strategy for generating a ``dict`` with employee data.
    """
    strategy = employees(locations, job_titles, companies)
    items = draw(
        non_empty_lists(strategy, min_size=min_size, max_size=max_size))
    return [
        # generate id - autoincrement starts from 1
        Bunch(employee_id=i + 1, **x) for i, x in enumerate(items)
    ]
def employees(draw, locations, job_titles, companies):
    """
    Returns a strategy which generates a NeoBunch containing data describing an
    employee instance.

    NeoBunch is a subclass of dict where the items can be accessed like
    attributes and can be used both like an object and like a dict.

    :param draw: Callable for drawing examples from strategies.
    :param locations: The available locations drawn by hypothesis.
    :param job_titles: The available job titles drawn by hypothesis.
    :param companies: The available companies drawn by hypothesis.

    :return: Strategy for generating a ``dict`` with employee data.
    """
    def draw_name(which):
        pool = fake_object_pools[which]
        strategy = st.sampled_from(pool)
        return draw(strategy)[which]

    def draw_id(which):
        return draw(st.integers(min_value=1, max_value=len(which)))

    return Bunch(
        first_name=draw_name('first_name'),
        last_name=draw_name('last_name'),
        date_of_birth=draw(dates_of_birth()),
        location_id=draw_id(locations),
        job_title_id=draw_id(job_titles),
        company_id=draw_id(companies),
    )
Beispiel #6
0
 def _parse_pointers(tups):
     pointers = []
     for tup in tups:
         pointers.append(
             Bunch(pointer=tup[0],
                   offset='{}-{}'.format(
                       DBWordNetParser._POS_FILE_MAP_[tup[2]], tup[1]),
                   source=tup[3][:2],
                   target=tup[3][2:]))
     return pointers
Beispiel #7
0
 def data_info(self, data):
     n, c, w, h = data.shape
     center = w // 2
     l, r = center - self.remove_size, center + self.remove_size
     return Bunch(n=n,
                  c=c,
                  w=w,
                  h=h,
                  l=l,
                  r=r,
                  center=center,
                  rs=self.remove_size)
Beispiel #8
0
def perform_integration_test(data, page, page_name, get_expected,
                             compare_functions, **parameters):
    """
    Perform an integration test to verify that a query works end to end (load page -> set params ->
    show results

    :param data: Employee test data
    :param page: The capybara page object
    :param page_name: The name of the page to load
    :param get_expected: Callable for retrieving the expected result
    :param compare_functions: iterable containing functions to use to validate the results
                              comparing the actual with the expected.
    :param **parameters: Keyword parameters which should be passed to get_expected. Also
                         defines the forms which will be filled with generated data.

    :return:
    """
    def get_url():
        return 'sqlite:///:memory:'

    def data_importer(session):
        return import_data(data, session)

    with patch('employee_insights.database.get_url', get_url), \
         patch('employee_insights.database.import_data', data_importer):

        if page.current_path != '/' + page_name:
            page.visit(page_name)

        for name, value in parameters.items():
            page.fill_in(name, value=value)

        if len(parameters):
            page.click_button("Go")

        time.sleep(0.1)

        actual_result = page.html
        actual_result = (Bunch(x) for x in table2dicts(actual_result))
        expected_result = list(get_expected(data, **parameters))

        if expected_result:
            for actual, expected in zip_longest(actual_result,
                                                expected_result):
                for compare_function in compare_functions:
                    if not compare_function(actual, expected):
                        assert actual == expected
        else:
            expected = 'No rows'
            actual = page.find('#results').text
            assert expected in actual
Beispiel #9
0
 def get_entries(self, idx_entries):
     out_entries = []
     for idx_entry in idx_entries:
         for synset_offset in idx_entry.synset_offsets:
             data_entry = self.data[synset_offset]
             sense_key = '{}-{}-{}'.format(idx_entry.lemma,
                                           data_entry.synset_type,
                                           data_entry.offset)
             entry = Bunch(lemma=idx_entry.lemma,
                           pos=idx_entry.pos,
                           synset_type=data_entry.synset_type,
                           sense_number=self.sense_numbers[sense_key],
                           gloss=data_entry.gloss,
                           words=[e.word for e in data_entry.words])
             out_entries.append(entry)
     return out_entries
Beispiel #10
0
 def read_index_files(wndb_path, pos_files=['noun', 'verb', 'adj', 'adv']):
     entries = {}
     for pos_file in pos_files:
         idx_path = os.path.join(wndb_path, 'index.' + pos_file)
         with open(idx_path) as ifp:
             for line in ifp:
                 if line.startswith(' '):
                     continue
                 part = line.strip().split()
                 _ap = 4 + int(part[3])
                 lemma = part[0]
                 idx_entry = Bunch(lemma=lemma,
                                   pos=part[1],
                                   pointers=part[4:_ap],
                                   num_tagsenes=int(part[_ap + 1]),
                                   synset_offsets=[
                                       '{}-{}'.format(pos_file, _o)
                                       for _o in part[_ap + 2:]
                                   ])
                 entries[lemma] = entries.get(lemma, [])
                 entries[lemma].append(idx_entry)
     return entries
Beispiel #11
0
 def parse_entity(self, e):
     parts = e[2:].split('_')
     return Bunch(word=' '.join(parts[0:-2]),
                  pos=parts[-2],
                  sense_id=parts[-1])
Beispiel #12
0
 def _parse_words(tups):
     words = []
     for tup in tups:
         words.append(Bunch(word=tup[0], lex_id=tup[1]))
     return words
Beispiel #13
0
 def parse_synset_name(self, name):
     parts = name.split('.')
     return Bunch(pos=parts[-2], sense_id=int(parts[-1]), wn_id=name)
Beispiel #14
0
            data_path = os.path.join(wndb_path, 'data.' + pos_file)
            with open(data_path) as ifp:
                for line in ifp:
                    if line.startswith(' '):
                        continue
                    part = line.strip().split()
                    _num_words = int(part[3], 16)
                    _num_pointers = int(part[4 + _num_words * 2])
                    _pp = 4 + _num_words * 2 + 1
                    _gloss_p = part.index('|') + 1
                    data_entry = Bunch(
                        offset=part[0],
                        lexname=lexnames[part[1]],
                        synset_type=part[2],
                        words=DBWordNetParser._parse_words(
                            DBWordNetParser._chunklist(part[4:_pp - 1])),
                        pointers=DBWordNetParser._parse_pointers(
                            DBWordNetParser._chunklist(
                                part[_pp:_pp + _num_pointers * 4], 4)),
                        gloss=' '.join(part[_gloss_p:]))
                    data['{}-{}'.format(pos_file, part[0])] = data_entry
        return data


if __name__ == '__main__':
    opt = Bunch(wndb_dir='data/wndb')
    parser = DBWordNetParser(opt)
    idx_entries, inflected = parser.get_idx_entries('tests')
    for e in parser.get_entries(idx_entries):
        print(e)
Beispiel #15
0
def get_expected(employee_data, location, min_percentage):
    """
    Calculate the percentage of employees of a company at a particular location,
    filtered by a minimum percentage of employees using python.

    :param employee_data: Hypothesis generated employee database source data.
    :param location: The location to get employee percentage for. This is a / separated list of
                     any of the following combinations:

                      - continent/country/state/city
                      - continent/country/state
                      - continent/country
                      - continent

    :param min_percentage: The minimum percentage of employees a location should
                           have to be included in the output.

    :return: Generator which when iterated yields an instance ``Expected`` which represents the
             expected result from get_employees_percentage_by_location.
    """
    location_fields = ['city', 'state', 'country', 'continent']

    continent, country, state, city = (x for x, y in itertools.zip_longest(
        location.split('/'), location_fields))

    params = Bunch(continent=continent,
                   country=country,
                   state=state,
                   city=city)

    def get_location(employee):
        return employee_data.locations[employee.location_id - 1]

    def are_all_attrs_equal(lhs, rhs, attrs):
        return all(getattr(lhs, attr) == getattr(rhs, attr) for attr in attrs)

    def get_location_and_where(n):
        """
        Get the location to include in the result and a where function to
        filter the company employees for inclusion when calculating the
        percentage at a location.
        """
        fields = location_fields[n:]
        filter_func = partial(are_all_attrs_equal, params, attrs=fields)
        employee_location = next(filter(filter_func, employee_data.locations))

        location = '/'.join(
            getattr(employee_location, x) for x in reversed(fields))

        def where(employee):
            return are_all_attrs_equal(get_location(employee), params, fields)

        return location, where

    for company, company_employees in get_company_employees(employee_data):

        if city:
            location, where = get_location_and_where(0)
        elif state:
            location, where = get_location_and_where(1)
        elif country:
            location, where = get_location_and_where(2)
        else:
            location, where = get_location_and_where(3)

        n_employees_in_location = count(filter(where, company_employees))
        n_employees = float(len(company_employees))
        percentage = n_employees_in_location / n_employees * 100

        if 0 < percentage > min_percentage:
            yield Expected(company.company_id, company.company_name, location,
                           percentage)