Esempio n. 1
0
def get_office_as_es_doc(office):
    """
    Return the office as a JSON document suitable for indexation in ElasticSearch.
    The `office` parameter can be an `Office` or an `OfficeAdminAdd` instance.
    """
    # The `headcount` field of an `OfficeAdminAdd` instance has a `code` attribute.
    if hasattr(office.headcount, 'code'):
        headcount = office.headcount.code
    else:
        headcount = office.headcount

    try:
        headcount = int(headcount)
    except (ValueError, TypeError):
        headcount = 0

    # Cleanup exotic characters.
    sanitized_name = encoding_util.sanitize_string(office.office_name)
    sanitized_email = encoding_util.sanitize_string(office.email)
    sanitized_website = encoding_util.sanitize_string(office.website)

    doc = {
        'naf': office.naf,
        'siret': office.siret,
        'score': office.score,
        'score_alternance': office.score_alternance,
        'headcount': headcount,
        'name': sanitized_name,
        'email': sanitized_email,
        'tel': office.tel,
        'website': sanitized_website,
        'department': office.departement,
        'flag_alternance': int(office.flag_alternance),
        'flag_junior': int(office.flag_junior),
        'flag_senior': int(office.flag_senior),
        'flag_handicap': int(office.flag_handicap),
        'flag_pmsmp': int(office.flag_pmsmp),
    }

    if office.y and office.x:
        # Use an array to allow multiple locations per document, see https://goo.gl/fdTaEM
        # Multiple locations may be added later via the admin UI.
        doc['locations'] = [
            {
                'lat': office.y,
                'lon': office.x
            },
        ]

    scores_by_rome, scores_alternance_by_rome, boosted_romes, boosted_alternance_romes = get_scores_by_rome_and_boosted_romes(
        office)
    if scores_by_rome:
        doc['scores_by_rome'] = scores_by_rome
        doc['boosted_romes'] = boosted_romes
    if scores_alternance_by_rome:
        doc['scores_alternance_by_rome'] = scores_alternance_by_rome
        doc['boosted_alternance_romes'] = boosted_alternance_romes

    return doc
Esempio n. 2
0
 def name(self):
     if self.office_name:
         result = self.office_name.upper()
     elif self.company_name:
         result = self.company_name.upper()
     else:
         result = 'sans nom'
     return encoding_util.sanitize_string(result)
Esempio n. 3
0
def get_fields_from_csv_line(line, delimiter='|'):
    # get rid of invisible space characters (\xc2) if present
    line = line.strip().replace('\xc2', '')
    fields = [encoding_util.sanitize_string(f) for f in line.split(delimiter)]

    # The CSV files which are now extracted can contain either '' OR 'NULL' when there are null values.
    # We need to replace 'NULL' values with an empty string
    fields = ['' if field == 'NULL' else field for field in fields]

    return fields
Esempio n. 4
0
def get_fields_from_csv_line(line):
    # get rid of invisible space characters (\xc2) if present
    line = line.strip().replace('\xc2', '')
    # ignore enclosing quotes if present
    if (line[0] in ["'", '"']):
        line = line[1:]
    if (line[-1] in ["'", '"']):
        line = line[:-1]
    # split using delimiter special character \xa5
    fields = [encoding_util.sanitize_string(f) for f in line.split('\xa5')]
    return fields
Esempio n. 5
0
def get_fields_from_csv_line(line, delimiter='|'):
    # get rid of invisible space characters (\xc2) if present
    line = line.strip().replace('\xc2', '')
    fields = [encoding_util.sanitize_string(f) for f in line.split(delimiter)]
    return fields