def get_office_as_es_doc(office): """ Return the office as a JSON document suitable for indexation in ElasticSearch. The `office` parameter can be an `Office` or an `OfficeAdminAdd` instance. """ # The `headcount` field of an `OfficeAdminAdd` instance has a `code` attribute. if hasattr(office.headcount, 'code'): headcount = office.headcount.code else: headcount = office.headcount try: headcount = int(headcount) except (ValueError, TypeError): headcount = 0 # Cleanup exotic characters. sanitized_name = encoding_util.sanitize_string(office.office_name) sanitized_email = encoding_util.sanitize_string(office.email) sanitized_website = encoding_util.sanitize_string(office.website) doc = { 'naf': office.naf, 'siret': office.siret, 'score': office.score, 'score_alternance': office.score_alternance, 'headcount': headcount, 'name': sanitized_name, 'email': sanitized_email, 'tel': office.tel, 'website': sanitized_website, 'department': office.departement, 'flag_alternance': int(office.flag_alternance), 'flag_junior': int(office.flag_junior), 'flag_senior': int(office.flag_senior), 'flag_handicap': int(office.flag_handicap), 'flag_pmsmp': int(office.flag_pmsmp), } if office.y and office.x: # Use an array to allow multiple locations per document, see https://goo.gl/fdTaEM # Multiple locations may be added later via the admin UI. doc['locations'] = [ { 'lat': office.y, 'lon': office.x }, ] scores_by_rome, scores_alternance_by_rome, boosted_romes, boosted_alternance_romes = get_scores_by_rome_and_boosted_romes( office) if scores_by_rome: doc['scores_by_rome'] = scores_by_rome doc['boosted_romes'] = boosted_romes if scores_alternance_by_rome: doc['scores_alternance_by_rome'] = scores_alternance_by_rome doc['boosted_alternance_romes'] = boosted_alternance_romes return doc
def name(self): if self.office_name: result = self.office_name.upper() elif self.company_name: result = self.company_name.upper() else: result = 'sans nom' return encoding_util.sanitize_string(result)
def get_fields_from_csv_line(line, delimiter='|'): # get rid of invisible space characters (\xc2) if present line = line.strip().replace('\xc2', '') fields = [encoding_util.sanitize_string(f) for f in line.split(delimiter)] # The CSV files which are now extracted can contain either '' OR 'NULL' when there are null values. # We need to replace 'NULL' values with an empty string fields = ['' if field == 'NULL' else field for field in fields] return fields
def get_fields_from_csv_line(line): # get rid of invisible space characters (\xc2) if present line = line.strip().replace('\xc2', '') # ignore enclosing quotes if present if (line[0] in ["'", '"']): line = line[1:] if (line[-1] in ["'", '"']): line = line[:-1] # split using delimiter special character \xa5 fields = [encoding_util.sanitize_string(f) for f in line.split('\xa5')] return fields
def get_fields_from_csv_line(line, delimiter='|'): # get rid of invisible space characters (\xc2) if present line = line.strip().replace('\xc2', '') fields = [encoding_util.sanitize_string(f) for f in line.split(delimiter)] return fields