Python flatten_iterator Examples

Programming Language: Python

Namespace/Package Name: bob_emploi.lib.migration_helpers

Method/Function: flatten_iterator

Examples at hotexamples.com: 3

Python flatten_iterator - 3 examples found. These are the top rated real world Python examples of bob_emploi.lib.migration_helpers.flatten_iterator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: fhs_salaries.py Project: tkoutangni/bob-emploi

def main(fhs_folder, csv_output, progress=_print_progress):
    """Extract the salaries information from FHS and bucketize them.

    In order to avoid issues about jobseekers being counted several times, we
    only consider salaries for jobseekers that are still unemployed at the time
    the FHS was sampled.

    Args:
        fhs_folder: path of the root folder of the FHS files.
        csv_output: path to the file to write to.
    """
    # TODO: Factorize this code with fhs_job_frequency.

    # Check that the output file is writeable before starting the long process
    # of collecting data.
    with open(csv_output, 'w'):
        pass

    de_rows = migration_helpers.flatten_iterator(
        path.join(fhs_folder, '*/de_*.csv'))

    # Estimation of the total # of rows in the FHS "de" table.
    total = 7000001

    job_seeker_counts = collections.defaultdict(int)
    counted = 0
    for de_dict in de_rows:
        counted += 1
        if counted % 10000 == 0 and progress:
            progress(counted, total)
        # Discard historical job requests, only work on the ones that are still
        # open.
        if de_dict[_END_DATE_FIELD]:
            continue
        job_seeker_counts[job_seeker_criteria(de_dict)] += 1

    _print_progress(total, total)

    with open(csv_output, 'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow((
            'code_rome',
            'departement_id',
            'salary_unit',
            'salary_low',
            'salary_high',
            'count'))
        for key, count in job_seeker_counts.items():
            writer.writerow(key + (count,))

Example #2

Show file

File: fhs_job_frequency.py Project: tkoutangni/bob-emploi

def main(fhs_folder, json_output, progress=_print_progress):
    """Extract the job OGR codes from FHS and count them.

    Args:
        fhs_folder: path of the root folder of the FHS files.
        json_output: path to the file to write to.
    """
    # Check that the output file is writeable before starting the long process
    # of collecting data.
    with open(json_output, 'w'):
        pass

    de_rows = migration_helpers.flatten_iterator(
        path.join(fhs_folder, '*/de_*.csv'))

    # Estimation of the total # of rows in the FHS "de" table.
    total = 7000001

    # If we need to do that often we could replace this code by a simple Map
    # Reduce to use multiple threads or multiple computers.
    job_counts = collections.defaultdict(int)
    counted = 0
    for de_dict in de_rows:
        counted += 1
        if counted % 1000 == 0 and progress:
            progress(counted, total)
        if de_dict[_END_DATE_FIELD]:
            continue
        job_code = de_dict[_JOB_CODE_FIELD]
        job_counts[job_code] += 1

    _print_progress(total, total)

    job_count_series = pandas.Series(job_counts)
    # Add random gaussian noise so that numbers do not reveal initial data.
    job_count_series = job_count_series.add(
        numpy.random.normal(scale=5, size=len(job_count_series)))
    # Keep int only to hide the fact we added noise.
    job_count_series = job_count_series.round().astype(int)
    # Keep strictly positive values only.
    job_count_series = job_count_series[job_count_series > 0]
    # Return to dict format and get rid of numpy int64 (TODO find a cleaner way
    # to do that).
    job_counts = json.loads(job_count_series.to_json())

    with open(json_output, 'w') as output_file:
        json.dump(job_counts, output_file, sort_keys=True, indent=2)

Example #3

Show file

File: fhs.py Project: mehdinassih/bob-emploi

 def _table_iterator(table):
     return PeekIterator(
         migration_helpers.flatten_iterator(
             path.join(fhs_folder, '*/{}.csv'.format(table))))