Beispiel #1
0
def extract_offers_per_cities(offers_file, colnames, min_creation_date, data_folder='data'):
    """Extract the interesting cities in terms of number of offers for each job group.

    Args:
        offers_file: path of cvs file with offers.
        colnames: the names of the columns in the offer file.
        min_creation_date: the date from which we consider the offers.
    """
    required_fields = {
        _CITY_CODE_FIELD, _JOB_GROUP_CODE_FIELD, _LATITUDE_CODE_FIELD,
        _LONGITUDE_CODE_FIELD, _CITY_NAME_CODE_FIELD}

    offers_rows = job_offers.iterate(offers_file, colnames, required_fields)

    city_data = _list_hiring_cities(offers_rows, min_creation_date, data_folder)

    # Computing the threshold per job group.
    job_group_threshold = collections.defaultdict(float)
    for job_group, offers in city_data.offers_per_job_group.items():
        job_group_threshold[job_group] = math.pow(offers, 0.6) / 40

    job_group_to_kept_cities = collections.defaultdict(list)
    for job_group, city_ids in city_data.job_group_to_city_ids.items():
        kept_cities = []
        for city_id, offer_count in city_ids.items():
            if offer_count > job_group_threshold[job_group]:
                kept_cities.append({'city': city_data.city_info[city_id], 'offers': offer_count})

        job_group_to_kept_cities[job_group] = sorted(
            kept_cities, key=lambda k: k['offers'], reverse=True)

    return [
        {'_id': job_group_id, 'hiringCities': job_group_weighted_cities}
        for job_group_id, job_group_weighted_cities in job_group_to_kept_cities.items()]
def trim_job_offers_csv(in_csv,
                        colnames_txt,
                        out_csv,
                        min_creation_date='',
                        fields=_DEFAULT_FIELDS):
    """Trim job offers CSV.

    Args:
        in_csv: the path of the CSV file containing all job offers in the Pôle
            Emploi format (using latin-1 encoding, | separators, etc).
        colnames_txt: the TXT file containing the list of column names.
        out_csv: the path where to store the output CSV file.
        fields: the list of fields to keep, separated by commas.
    """
    fieldnames = fields.split(',')
    all_job_offers = job_offers.iterate(in_csv,
                                        colnames_txt,
                                        required_fields=set(fieldnames +
                                                            ['creation_date']))
    with open(out_csv, 'w') as out_file:
        writer = csv.DictWriter(out_file, fieldnames=fieldnames)
        writer.writeheader()
        for job_offer in all_job_offers:
            if job_offer.creation_date < min_creation_date:
                continue
            writer.writerow(
                {field: getattr(job_offer, field)
                 for field in fieldnames})
 def test_missing_required_fields(self):
     """Test missing required field."""
     offers = job_offers.iterate(path.join(self.testdata_folder,
                                           'job_offers.csv'),
                                 path.join(self.testdata_folder,
                                           'column_names.txt'),
                                 required_fields=set(['foobar']))
     self.assertRaises(ValueError, next, offers)
 def test_basic(self):
     """Test basic usage."""
     offers = list(
         job_offers.iterate(
             path.join(self.testdata_folder, 'job_offers.csv'),
             path.join(self.testdata_folder, 'column_names.txt')))
     # Golden values.
     self.assertEqual(8, len(offers))
     self.assertEqual('000053Q', offers[0].id_offre)
     self.assertEqual('Contrat travail', offers[1].contract_nature_name)
def csv2dicts(job_offers_csv, colnames_txt, last_year='2015'):
    """Import the changes of # of job offers per job group and dept in MongoDB.

    Args:
        job_offers_csv: Path of the csv containing the data.
        colnames_txt: Path to a file containing the name of the CSV's columns.
        last_year: The year to consider to compute the metrics.
    Returns:
        Evolution data as a LocalJobStats JSON-proto compatible dict.
    """
    counter = _EvolutionCounter(int(last_year))
    for job_offer in job_offers.iterate(
            job_offers_csv, colnames_txt, _REQUIRED_FIELDS):
        counter.collect(job_offer)
    return list(counter.get_proto_dicts())
def csv2dicts(job_offers_csv, colnames_txt):
    """Import the requirement from job offers grouped by Job Group in MongoDB.

    Args:
        job_offers_csv: Path of the csv containing the data.
        colnames_txt: Path to a file containing the name of the CSV's columns.
    Returns:
        Requirements as a JobRequirements JSON-proto compatible dict.
    """
    job_groups = collections.defaultdict(_RequirementsCollector)
    for job_offer in job_offers.iterate(
            job_offers_csv, colnames_txt, _REQUIRED_FIELDS):
        job_groups[job_offer.rome_profession_card_code].collect(job_offer)
    return [
        dict(job_groups[job_group_id].get_proto_dict(), _id=job_group_id)
        for job_group_id in sorted(job_groups)]