def insert_into_importer_cycle_infos(file_name, months_time):
    logger.info(
        f"\n Start : Insert data into importer_cycle_infos from file {file_name}"
    )

    # Insert into importer cycle infos

    # TODO : Check that the prediction start date and end date match these ones
    execution_date = get_date_from_file_name(file_name)
    prediction_start_date = execution_date + relativedelta(
        months=+1) + relativedelta(day=1)  # First day of next month
    prediction_end_date = prediction_start_date + relativedelta(
        months=+months_time)
    importer_cycle_infos = PerfImporterCycleInfos(
        execution_date=execution_date,
        prediction_start_date=prediction_start_date,
        prediction_end_date=prediction_end_date,
        file_name=file_name,
        computed=False,
        on_google_sheets=False)
    db_session.add(importer_cycle_infos)
    db_session.commit()
    importer_cycle_infos = PerfImporterCycleInfos.query.filter(
        PerfImporterCycleInfos.file_name == file_name).first()

    logger.info(f"id = {importer_cycle_infos._id}")
    logger.info(f"execution_date = {execution_date}")
    logger.info(f"prediction_start_date = {prediction_start_date}")
    logger.info(f"prediction_end_date = {prediction_end_date}")
    logger.info(f"file_name = {file_name}")
    logger.info("insertion into importer_cycle_infos OK")

    return importer_cycle_infos
Exemple #2
0
def set_importer_cycle_infos_google_sheets_boolean(importer_cycle_infos_id):
    for ici_id in importer_cycle_infos_id:
        ici = PerfImporterCycleInfos.query.filter(
            PerfImporterCycleInfos._id == ici_id).first()
        ici.on_google_sheets = True
        db_session.add(ici)
        db_session.commit()
    def setUp(self):
        """
        Populate the DB with data required for these tests to work.
        """
        super(UserAccountTest, self).setUp()

        self.user = User(email='*****@*****.**',
                         gender='male',
                         first_name='John',
                         last_name='Doe')
        db_session.add(self.user)
        db_session.flush()

        self.office1 = Office(
            departement='57',
            siret='00000000000001',
            company_name='1',
            headcount='5',
            city_code='57070',
            zipcode='57070',
            naf='4646Z',
            score=90,
            x=6.166667,
            y=49.133333,
        )
        self.office2 = Office(
            departement='57',
            siret='00000000000002',
            company_name='1',
            headcount='5',
            city_code='57070',
            zipcode='57070',
            naf='4646Z',
            score=90,
            x=6.166667,
            y=49.133333,
        )
        db_session.add_all([self.office1, self.office2])
        db_session.flush()

        self.user_social_auth = UserSocialAuth(
            provider=PEAMOpenIdConnect.name,
            extra_data={'id_token': 'fake'},
            user_id=self.user.id,
        )
        self.fav1 = UserFavoriteOffice(user_id=self.user.id,
                                       office_siret=self.office1.siret)
        self.fav2 = UserFavoriteOffice(user_id=self.user.id,
                                       office_siret=self.office2.siret)
        db_session.add_all([self.user_social_auth, self.fav1, self.fav2])
        db_session.flush()

        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
        self.assertEqual(db_session.query(Office).count(), 2)
        self.assertEqual(db_session.query(UserFavoriteOffice).count(), 2)
        self.assertEqual(db_session.query(UserSocialAuth).count(), 1)
Exemple #4
0
    def test_office_admin_add(self):
        form = {
            "siret": "78548035101646",
            "company_name": "SUPERMARCHES MATCH",
            "office_name": "SUPERMARCHES MATCH",
            "naf": "4711D",
            "street_number": "45",
            "street_name": "AVENUE ANDRE MALRAUX",
            "city_code": "57463",
            "zipcode": "57000",
            "email": "*****@*****.**",
            "tel": "0387787878",
            "website": "http://www.supermarchesmatch.fr",
            "flag_alternance": 0,
            "flag_junior": 0,
            "flag_senior": 0,
            "flag_handicap": 0,
            "departement": "57",
            "headcount": "12",
            "score": 90,
            "score_alternance": 75,
            "x": 6.17952,
            "y": 49.1044,
            "reason": "Demande de mise en avant",
        }

        with self.test_request_context():
            # Create an user admin
            self.user = User(email='*****@*****.**', gender='male',
                             first_name='John', last_name='Doe', active=True,
                             is_admin=True)
            db_session.add(self.user)
            db_session.flush()

            user_social_auth = UserSocialAuth(
                provider=PEAMOpenIdConnect.name,
                extra_data={'id_token': 'fake'},
                user_id=self.user.id,
            )
            db_session.add(user_social_auth)
            db_session.commit()

            # Login as user admin
            self.user = db_session.query(User).filter_by(id=self.user.id).first()
            self.assertEqual(db_session.query(User).count(), 1)
            self.login(self.user)

            # Create OfficeAdminRemove
            self.assertEqual(0, OfficeAdminAdd.query.filter_by(id=1).count())
            self.app.post(url_for('officeadminadd.create_view'), data=form)
            self.assertEqual(1, OfficeAdminAdd.query.filter_by(id=1).count())

            # Delete OfficeAdminAdd
            self.app.post(url_for('officeadminadd.delete_view'), data={'id': 1})
            self.assertEqual(0, OfficeAdminRemove.query.filter_by(id=1).count())
Exemple #5
0
def cbs_delete_records():
    try:
        print('> Deleting CBS records...')
        sql = text("""
            delete from labonneboite.etablissements_third_party_update where reason='%s';
        """ % (REASON_KEY))
        db_session.execute(sql)
        db_session.commit()
        print('> Done')
    except Exception as err:
        print('> error executing request', err)
Exemple #6
0
 def add_favorite(cls, user, office):
     """
     Add a favorite to a user.
     Avoid as much as possible replication errors by ignoring duplicates.
     """
     statement = cls.__table__.insert().prefix_with("IGNORE").values(
         user_id=user.id,
         office_siret=office.siret,
     )
     db_session.execute(statement)
     db_session.commit()
Exemple #7
0
def cbs_insert_records():
    try:
        print('> Inserting CBS records...', file)
        sql = text("""
            LOAD DATA LOCAL INFILE '%s' into table etablissements_third_party_update FIELDS ENCLOSED BY '\"' TERMINATED BY ','  LINES TERMINATED BY '\\n' IGNORE 1 ROWS (@score,@siret) SET score_alternance=@score, sirets=@siret, reason='%s', date_created=NOW();
        """ % (file, REASON_KEY))
        db_session.execute(sql)
        db_session.commit()
        print('> Done')
    except Exception as err:
        print('> error executing request', err,
              '\n> Did you forget to set the env var `ENABLE_DB_INFILE=1`?')
Exemple #8
0
def load_csv_perf_division_per_rome(filename, delimiter=';'):
    #date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
    for row in load_data.load_csv_file(filename, delimiter):
        perf_div_per_rome = PerfDivisionPerRome(_id=row[0],
                                                importer_cycle_infos_id=row[1],
                                                naf=row[3],
                                                rome=row[2],
                                                threshold_lbb=row[4],
                                                nb_bonne_boites_lbb=row[5],
                                                threshold_lba=row[6],
                                                nb_bonne_boites_lba=row[7])
        db_session.add(perf_div_per_rome)
        db_session.commit()
Exemple #9
0
 def get_geocode_from_csv(self, csv_api_path):
     logger.info("Parsing CSV sent back by API : {}".format(csv_api_path))
     df_geocodes = pd.read_csv(csv_api_path, dtype={'siret': str})
     for index, row in df_geocodes.iterrows():
         if not numpy.isnan(row.latitude):
             coordinates = [row.longitude, row.latitude]
             geolocation = Geolocation.get(row.full_address)
             # There should not be an already existing geolocation
             # but working on this job, makes you know that sometimes,
             # the coordinates related to a siret do not update, but the geolocation is still added
             # in the database
             if geolocation:
                 logger.info("Geolocation already found")
                 GEOCODING_STATS['updatable_coordinates'] = GEOCODING_STATS.get(
                     'updatable_coordinates', 0) + 1
                 coordinates_updates.append(
                     [row.siret, coordinates])
             else:
                 logger.info("Geolocation not found")
                 geolocation = Geolocation(
                     full_address=row.full_address,
                     x=coordinates[0],
                     y=coordinates[1]
                 )
                 db_session.add(geolocation)
                 # as this method is run in parallel jobs,
                 # let's commit often so that each job see each other's changes
                 # and rollback in case of rare simultaneous changes on same geolocation
                 try:
                     db_session.commit()
                     # usually flush() is called as part of commit()
                     # however it is not the case in our project
                     # because autoflush=False
                     db_session.flush()
                     GEOCODING_STATS['flushes'] = GEOCODING_STATS.get(
                         'flushes', 0) + 1
                 except IntegrityError:
                     # happens when a job tries to insert an already existing full_address
                     # rollback needed otherwise db_session is left
                     # in a state unusable by the other parallel jobs
                     db_session.rollback()
                     GEOCODING_STATS['rollbacks'] = GEOCODING_STATS.get(
                         'rollbacks', 0) + 1
                 if coordinates:
                     GEOCODING_STATS['updatable_coordinates'] = GEOCODING_STATS.get(
                         'updatable_coordinates', 0) + 1
                     coordinates_updates.append(
                         [row.siret, coordinates])
         else:
             GEOCODING_STATS['coordinates_not_found'] = GEOCODING_STATS.get(
                 'coordinates_not_found', 0) + 1
Exemple #10
0
def remove_scam_emails():
    scam_emails = get_latest_scam_emails()

    for scam_emails_chunk in chunks(scam_emails, 100):
        query = Office.query.filter(Office.email.in_(scam_emails_chunk))
        office_count = query.count()
        if office_count:
            query.update({Office.email: ''}, synchronize_session="fetch")
            db_session.commit()
        logger.info(
            "Removed a chunk of %d scam emails from %d offices.",
            len(scam_emails_chunk),
            office_count,
        )
Exemple #11
0
 def get_or_create(cls, defaults=None, **kwargs):
     try:
         return db_session.query(cls).filter_by(**kwargs).one(), False
     except NoResultFound:
         if defaults:
             kwargs.update(defaults)
         instance = cls(**kwargs)
         try:
             db_session.add(instance)
             db_session.commit()
             return instance, True
         except IntegrityError:
             db_session.rollback()
             return db_session.query(cls).filter_by(**kwargs).one(), True
Exemple #12
0
    def test_office_admin_remove(self):
        # Create officeAdminRemove
        form = {
            'siret': '01234567891234',
            'name': 'Test company',
            'reason': 'N/A',
            'initiative': 'office',
        }

        with self.test_request_context():
            # Create an user admin
            self.user = User(email='*****@*****.**',
                             gender='male',
                             first_name='John',
                             last_name='Doe',
                             active=True,
                             is_admin=True)
            db_session.add(self.user)
            db_session.flush()

            user_social_auth = UserSocialAuth(
                provider=PEAMOpenIdConnect.name,
                extra_data={'id_token': 'fake'},
                user_id=self.user.id,
            )
            db_session.add(user_social_auth)
            db_session.commit()

            # Login as user admin
            self.user = db_session.query(User).filter_by(
                id=self.user.id).first()
            self.assertEqual(db_session.query(User).count(), 1)
            self.login(self.user)

            # Create OfficeAdminRemove
            self.assertEqual(
                0,
                OfficeAdminRemove.query.filter_by(
                    siret='01234567891234').count())
            self.app.post(url_for('officeadminremove.create_view'), data=form)
            self.assertEqual(
                1,
                OfficeAdminRemove.query.filter_by(
                    siret='01234567891234').count())

            # Delete OfficeAdminRemove
            self.app.post(url_for('officeadminremove.delete_view'),
                          data={'id': 1})
            self.assertEqual(0,
                             OfficeAdminRemove.query.filter_by(id=1).count())
Exemple #13
0
def load_csv_perf_importer_cycle_infos(filename, delimiter=';'):
    #date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
    for row in load_data.load_csv_file(filename, delimiter):
        perf_importer_cycle_info = PerfImporterCycleInfos(
            _id=row[0],
            execution_date=datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S.%f'),
            prediction_start_date=datetime.strptime(row[2],
                                                    '%Y-%m-%d %H:%M:%S.%f'),
            prediction_end_date=datetime.strptime(row[3],
                                                  '%Y-%m-%d %H:%M:%S.%f'),
            file_name=row[4],
            computed=(row[5] == 'True'),
            on_google_sheets=(row[6] == 'True'))
        db_session.add(perf_importer_cycle_info)
        db_session.commit()
def logout(user_social_auth=None):
    """
    Log a user out.

    Param `user_social_auth`: a `UserSocialAuth` instance. `None` most of the time, except when a user
    is coming from the `user.account_delete` view. This param is intended to be passed when the view
    is called directly as a Python function, i.e. not with a `redirect()`.
    """
    if not current_user.is_authenticated:
        return redirect(url_for('root.home'))

    logged_with_peam = session.get(
        'social_auth_last_login_backend') == PEAMOpenIdConnect.name
    if logged_with_peam:
        if not user_social_auth:
            user_social_auth = get_user_social_auth(current_user.id)
        if user_social_auth:
            id_token = user_social_auth.extra_data['id_token']

    # Force delete PEAMU token.
    db_session.query(UserSocialAuth).filter_by(user_id=current_user.id).delete()
    db_session.commit()

    # Log the user out and destroy the LBB session.
    activity.log('deconnexion')
    logout_user()

    # Clean the session: drop Python Social Auth info because it isn't done by `logout_user`.
    if 'social_auth_last_login_backend' in session:
        # Some backends have a `backend-name_state` stored in session as required by e.g. Oauth2.
        social_auth_state_key = '%s_state' % session['social_auth_last_login_backend']
        if social_auth_state_key in session:
            session.pop(social_auth_state_key)
        session.pop('social_auth_last_login_backend')

    # Log the user out from PEAM and destroy the PEAM session.
    if logged_with_peam and user_social_auth:
        params = {
            'id_token_hint': id_token,
            'redirect_uri': url_for('auth.logout_from_peam_callback', _external=True),
        }
        peam_logout_url = '%s/compte/deconnexion?%s' % (
            settings.PEAM_AUTH_BASE_URL, urlencode(params))
        # After this redirect, the user will be redirected to the LBB website `logout_from_peam_callback` route.
        return redirect(peam_logout_url)

    return redirect(url_for('root.home'))
Exemple #15
0
def remove_scam_emails():
    scam_emails = get_latest_scam_emails()
    for scam_emails_chunk in chunks(scam_emails, 100):
        query = Office.query.filter(Office.email.in_(scam_emails_chunk))
        office_count = query.count()
        if office_count:
            history = []
            for office in query.all():
                history.append(
                    HistoryBlacklist(email=office.email,
                                     datetime_removal=datetime.datetime.now()))
            db_session.add_all(history)
            query.update({Office.email: ''}, synchronize_session="fetch")
            db_session.commit()
        logger.info(
            "Removed a chunk of %d scam emails from %d offices.",
            len(scam_emails_chunk),
            office_count,
        )
    def test_get_user_social_auth(self):
        """
        Test the `get_user_social_auth()` function.
        """
        user = User(email='*****@*****.**', gender='male', first_name='John', last_name='Doe')
        db_session.add(user)
        db_session.flush()

        expected_user_social_auth = UserSocialAuth(provider=PEAMOpenIdConnect.name, extra_data=None, user_id=user.id)
        db_session.add(expected_user_social_auth)
        db_session.flush()

        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
        self.assertEqual(db_session.query(UserSocialAuth).count(), 1)

        user_social_auth = get_user_social_auth(user.id)
        self.assertEqual(user_social_auth.id, expected_user_social_auth.id)
    def test_logout(self):
        """
        Test that the session is cleaned after a logout.
        """

        user = User(email='*****@*****.**',
                    gender='male',
                    first_name='John',
                    last_name='Doe')
        db_session.add(user)
        db_session.flush()

        # This `UserSocialAuth` entry will be required later by the logout function.
        user_social_auth = UserSocialAuth(
            provider=PEAMOpenIdConnect.name,
            extra_data={'id_token': 'fake'},
            user_id=user.id,
        )
        db_session.add(user_social_auth)
        db_session.commit()

        with self.test_request_context:

            with self.app.session_transaction() as sess:
                sess[
                    'this_should_not_be_deleted'] = 'foo'  # This should not be deleted by a login or logout.

            self.login(user)

            with self.app.session_transaction() as sess:
                self.assertIn('this_should_not_be_deleted', sess)
                self.assertIn('user_id', sess)
                self.assertIn('social_auth_last_login_backend', sess)
                self.assertIn('peam-openidconnect_state', sess)

            self.logout()

            with self.app.session_transaction() as sess:
                self.assertIn('this_should_not_be_deleted', sess)
                self.assertNotIn('user_id', sess)
                self.assertNotIn('social_auth_last_login_backend', sess)
                self.assertNotIn('peam-openidconnect_state', sess)
    def setUp(self, *args, **kwargs):
        super(AdminTest, self).setUp(*args, **kwargs)

        self.user = User(email='*****@*****.**',
                         gender='male',
                         first_name='John',
                         last_name='Doe')
        db_session.add(self.user)
        db_session.flush()

        # Required for `self.logout` to work which looks for the `extra_data` attribute.
        user_social_auth = UserSocialAuth(
            provider=PEAMOpenIdConnect.name,
            extra_data={'id_token': 'fake'},
            user_id=self.user.id,
        )
        db_session.add(user_social_auth)
        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
 def test_clean(self):
     """
     Test `OfficeAdminExtraGeoLocation.clean()`.
     """
     extra_geolocation = OfficeAdminExtraGeoLocation(
         siret="38524664000176",
         codes="75110\n\n\n\n\n\n\n57616",
         reason="Paris 10 + Metz Saint Julien",
     )
     db_session.add(extra_geolocation)
     db_session.commit()
     # The `clean()` method should have been called automatically.
     extra_geolocation = db_session.query(
         OfficeAdminExtraGeoLocation).first()
     # Multiple newlines should have been removed.
     self.assertEqual(extra_geolocation.codes, '57616\n75110')
     # Corresponding Lat/Lon coords should have been found and stored.
     self.assertEqual(
         extra_geolocation.geolocations,
         '[[49.135208952059884, 6.207906756168173], [48.8815994262695, 2.36229991912841]]'
     )
Exemple #20
0
def add_offices():
    """
    Add offices (complete the data provided by the importer).
    """
    for office_to_add in db_session.query(OfficeAdminAdd).all():

        office = Office.query.filter_by(siret=office_to_add.siret).first()

        # Only create a new office if it does not already exist.
        # This guarantees that the importer data will always have precedence.
        if not office:

            # The `headcount` field of an `OfficeAdminAdd` instance has a `code` attribute.
            if hasattr(office_to_add.headcount, 'code'):
                headcount = office_to_add.headcount.code
            else:
                headcount = office_to_add.headcount

            # Create the new office in DB.
            new_office = Office()
            # Use `inspect` because `Office` columns are named distinctly from attributes.
            for field_name in list(inspect(Office).columns.keys()):
                try:
                    value = getattr(office_to_add, field_name)
                except AttributeError:
                    # Some fields are not shared between `Office` and `OfficeAdminAdd`.
                    continue
                if field_name == 'headcount':
                    value = headcount
                setattr(new_office, field_name, value)
            db_session.add(new_office)
            db_session.commit()

            # Create the new office in ES.
            doc = get_office_as_es_doc(office_to_add)
            es.Elasticsearch().create(index=settings.ES_INDEX,
                                      doc_type=es.OFFICE_TYPE,
                                      id=office_to_add.siret,
                                      body=doc)
Exemple #21
0
def load_csv_perf_prediction_and_effective_h(filename, delimiter=';'):
    #date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
    for row in load_data.load_csv_file(filename, delimiter):
        perf_importer_cycle_info = PerfPredictionAndEffectiveHirings(
            _id=row[0],
            importer_cycle_infos_id=row[1],
            siret=row[2],
            naf=row[3],
            city_code=row[4],
            zipcode=row[5],
            departement=row[6],
            company_name=row[7],
            office_name=row[8],
            lbb_nb_predicted_hirings_score=row[9],
            lba_nb_predicted_hirings_score=row[10],
            lbb_nb_predicted_hirings=row[11],
            lba_nb_predicted_hirings=row[12],
            lbb_nb_effective_hirings=row[13],
            lba_nb_effective_hirings=row[14],
            is_a_bonne_boite=(row[15] == "True"),
            is_a_bonne_alternance=(row[16] == "True"))
        db_session.add(perf_importer_cycle_info)
        db_session.commit()
Exemple #22
0
def account_delete():
    """
    Ask for a confirmation, then delete the current user account and all of its information.
    """
    form = UserAccountDeleteForm(request.form)

    if request.method == 'POST' and form.validate():

        # Store the current `UserSocialAuth` instance in memory because it will be deleted
        # but it will also be needed later to properly logout the user from PEAM.
        user_social_auth = get_user_social_auth(current_user.id)

        # Now we can safely delete the current `UserSocialAuth` instance.
        # We have to delete it because it has a foreign key to the User table.
        # We don't need to deal with the other tables of Social Auth, see:
        # https://python-social-auth.readthedocs.io/en/latest/storage.html
        db_session.query(UserSocialAuth).filter_by(
            user_id=current_user.id).delete()

        # Delete the current user.
        # The user's favorites will be deleted at the same time because of the `ondelete='CASCADE'`
        # on the `user_id` field of the `UserFavoriteOffice` model.
        db_session.query(User).filter_by(id=current_user.id).delete()

        db_session.commit()

        message = "La suppression de votre compte a bien été effectuée."
        flash(message, 'warning')

        # Return the `logout` view directly. It allows us to pass the full
        # `user_social_auth` object as a parameter.
        return logout(user_social_auth=user_social_auth)

    context = {
        'form': form,
    }
    return render_template('user/account_delete.html', **context)
Exemple #23
0
        def fonction_with_history():
            # Get the job_name argument and remove the .py extension in the job name
            job = script_name.split('.')
            if job[1] == 'py':
                job_name = job[0]
            else:
                raise BadDecoratorUse

            # Check that the previous job is done to start this one
            # If the previous job is not done, it will raise an exception
            info_previous_job = get_previous_job_info(job_name)
            if info_previous_job['is_completed'] is False:
                print(
                    f"The previous job '{info_previous_job['name']}' is not done "
                )
                raise PreviousJobNotDone
            else:
                print(
                    f"The previous job '{info_previous_job['name']}' is done. We can run this one ! "
                )

            #Save in database the start of this job
            start_date = datetime.now()
            history = HistoryImporterJobs(start_date=start_date,
                                          end_date=None,
                                          job_name=job_name,
                                          status=StatusJobExecution['start'],
                                          exception=None,
                                          trace_log=None)
            db_session.add(history)
            db_session.commit()

            #If the job is done, we save it with done status
            try:
                result = function_to_execute()
                history.end_date = datetime.now()
                history.status = StatusJobExecution['done']
                db_session.commit()
            #Else if an error occured, we raise the exception, and save it in the DB with a failed status
            except Exception as e:
                history.end_date = datetime.now()
                history.exception = type(e).__name__
                history.trace_log = traceback.format_exc()
                history.status = StatusJobExecution['error']
                db_session.commit()
                raise

            return result
Exemple #24
0
 def save(self, commit=True):
     db_session.add(self)
     if commit:
         db_session.commit()
     return self
Exemple #25
0
 def delete(self, commit=True):
     db_session.delete(self)
     return commit and db_session.commit()
Exemple #26
0
    def find_coordinates_for_address(self):
        """
        finding coordinates for an address based on the BAN (base d'adresses nationale),
        an online governmental service.
        """
        coordinates = None
        # FIXME refer to settings.API_ADRESS_BASE_URL and make sure we don't
        # make real requests in unit tests
        BASE = "http://api-adresse.data.gouv.fr/search/?q="
        geocoding_request = "%s%s" % (BASE, self.full_address)
        geolocation = Geolocation.get(self.full_address)

        if geolocation:
            # coordinates were already queried and cached before
            coordinates = [geolocation.x, geolocation.y]
            GEOCODING_STATS['cache_hits'] = GEOCODING_STATS.get(
                'cache_hits', 0) + 1
        else:
            # coordinates need to be queried and cached
            response = session.get(geocoding_request)
            response.close()
            GEOCODING_STATS['cache_misses'] = GEOCODING_STATS.get(
                'cache_misses', 0) + 1
            if response.status_code == 200:
                try:
                    results = response.json()['features']
                    if len(results) >= 1:
                        coordinates = results[0]['geometry']['coordinates']
                        # let's cache the result for later computations
                        geolocation = Geolocation(
                            full_address=self.full_address,
                            x=coordinates[0],
                            y=coordinates[1])
                        db_session.add(geolocation)

                        # as this method is run in parallel jobs,
                        # let's commit often so that each job see each other's changes
                        # and rollback in case of rare simultaneous changes on same geolocation
                        try:
                            db_session.commit()
                            # usually flush() is called as part of commit()
                            # however it is not the case in our project
                            # because autoflush=False
                            db_session.flush()
                            GEOCODING_STATS['flushes'] = GEOCODING_STATS.get(
                                'flushes', 0) + 1
                        except IntegrityError:
                            # happens when a job tries to insert an already existing full_address
                            # rollback needed otherwise db_session is left
                            # in a state unusable by the other parallel jobs
                            db_session.rollback()
                            GEOCODING_STATS['rollbacks'] = GEOCODING_STATS.get(
                                'rollbacks', 0) + 1
                except ValueError:
                    logger.warning('ValueError in json-ing features result %s',
                                   response.text)

        if coordinates:
            if coordinates == self.initial_coordinates:
                GEOCODING_STATS['unchanged_coordinates'] = GEOCODING_STATS.get(
                    'unchanged_coordinates', 0) + 1
            else:
                GEOCODING_STATS['updatable_coordinates'] = GEOCODING_STATS.get(
                    'updatable_coordinates', 0) + 1
                self.updates.append([self.siret, coordinates])
        else:
            GEOCODING_STATS['coordinates_not_found'] = GEOCODING_STATS.get(
                'coordinates_not_found', 0) + 1
def compute_effective_and_predicted_hirings():
    logger.info(f"\n Start : Computing effective hirings")

    importer_cycles_infos = PerfImporterCycleInfos.query.filter(
        PerfImporterCycleInfos.computed == False).all()
    importer_cycles_infos_to_compute = []
    for ici in importer_cycles_infos:
        if os.environ["LBB_ENV"] in ["development", "test"]:
            importer_cycles_infos_to_compute.append(ici)
            continue
        if ici.prediction_end_date < datetime.now():
            importer_cycles_infos_to_compute.append(ici)

    logger.info(
        f"Importer cycles infos which have not been computed yet : {[i.file_name for i in importer_cycles_infos_to_compute]}"
    )

    for ici in importer_cycles_infos_to_compute:
        perf_division_per_rome_dict = load_perf_division_per_rome_dict()

        naf_not_founds = set()
        nb_companies_with_naf_not_found = 0

        logger.info(
            f"Start computing for importer cycle infos : {ici._id} - {ici.file_name}"
        )

        engine = import_util.create_sqlalchemy_engine()
        ppaeh = PerfPredictionAndEffectiveHirings.query.filter(
            PerfPredictionAndEffectiveHirings.importer_cycle_infos_id ==
            ici._id)
        columns_companies = [
            "_id", "siret", "naf", "lbb_nb_predicted_hirings_score",
            "lba_nb_predicted_hirings_score"
        ]
        dict_df_companies = {}
        dict_ppaeh = {}
        for col in columns_companies:
            dict_df_companies[col] = []
        for perf in ppaeh:
            dict_ppaeh[perf._id] = perf
            for col in columns_companies:
                dict_df_companies[col].append(getattr(perf, col))
        del ppaeh
        df_companies_list = pd.DataFrame(data=dict_df_companies)

        logger.info(f"Nb offices to compute : {len(df_companies_list)}")

        query_hirings_lbb = f"SELECT siret, count(*) as lbb_nb_effective_hirings \
                FROM hirings\
                WHERE hiring_date >= '{ici.prediction_start_date}'\
                and hiring_date <= '{ici.prediction_end_date}'\
                and (contract_type={Hiring.CONTRACT_TYPE_CDD} or contract_type={Hiring.CONTRACT_TYPE_CDI})\
                GROUP BY siret;"

        df_hirings_lbb = pd.read_sql_query(query_hirings_lbb, engine)
        logger.info(
            f"Nb offices found in hirings for lbb : {len(df_hirings_lbb)}")

        query_hirings_lba = f"SELECT siret, count(*) as lba_nb_effective_hirings \
                FROM hirings\
                WHERE hiring_date >= '{ici.prediction_start_date}'\
                and hiring_date <= '{ici.prediction_end_date}'\
                and (contract_type={Hiring.CONTRACT_TYPE_APR} or contract_type={Hiring.CONTRACT_TYPE_CP})\
                GROUP BY siret;"

        df_hirings_lba = pd.read_sql_query(query_hirings_lba, engine)
        logger.info(
            f"Nb offices found in hirings for lba: {len(df_hirings_lba)}")

        engine.close()

        df_merge_hirings_tmp = pd.merge(df_companies_list,
                                        df_hirings_lbb,
                                        how='left',
                                        on="siret")
        df_merged = pd.merge(df_merge_hirings_tmp,
                             df_hirings_lba,
                             how='left',
                             on="siret")

        # Compute the predicted hirings from the score
        df_merged["lbb_nb_predicted_hirings"] = df_merged[
            "lbb_nb_predicted_hirings_score"].apply(
                lambda x: scoring_util.get_hirings_from_score(x))
        df_merged["lba_nb_predicted_hirings"] = df_merged[
            "lba_nb_predicted_hirings_score"].apply(
                lambda x: scoring_util.get_hirings_from_score(x))

        df_merged = df_merged.fillna(0)

        cols_we_want_to_keep = [
            "_id",
            "siret",
            "naf",
            "lbb_nb_effective_hirings",
            "lba_nb_effective_hirings",
            "lbb_nb_predicted_hirings",
            "lba_nb_predicted_hirings",
            "lbb_nb_predicted_hirings_score",
            "lba_nb_predicted_hirings_score",
        ]

        df_merged = df_merged[cols_we_want_to_keep]

        values_to_update = df_merged.values.tolist()
        count = 0

        updated_ppaeh = []
        for row in values_to_update:
            row_id = row[0]
            siret = row[1]
            naf = row[2]
            params = dict(
                zip([
                    "lbb_nb_effective_hirings", "lba_nb_effective_hirings",
                    "lbb_nb_predicted_hirings", "lba_nb_predicted_hirings"
                ], row[3:7]))
            lbb_nb_predicted_hirings_score = row[7]
            lba_nb_predicted_hirings_score = row[8]
            # foo
            pred_effective_hirings = dict_ppaeh[row_id]
            updated_values = {"_id": row_id}
            for key, val in params.items():
                updated_values[key] = val
            is_a_bonne_boite = False
            is_a_bonne_alternance = False

            naf_present_in_mapping_rome_naf = naf in perf_division_per_rome_dict

            if naf_present_in_mapping_rome_naf:
                for rome_code, values in perf_division_per_rome_dict[
                        naf].items():
                    score_lbb = scoring_util.get_score_adjusted_to_rome_code_and_naf_code(
                        score=lbb_nb_predicted_hirings_score,
                        rome_code=rome_code,
                        naf_code=naf)
                    if score_lbb >= values["threshold_lbb"]:
                        perf_division_per_rome_dict[naf][rome_code][
                            "nb_bonne_boites_lbb"] += 1
                        is_a_bonne_boite = True

                    score_lba = scoring_util.get_score_adjusted_to_rome_code_and_naf_code(
                        score=lba_nb_predicted_hirings_score,
                        rome_code=rome_code,
                        naf_code=naf)
                    if score_lba >= values["threshold_lba"]:
                        perf_division_per_rome_dict[naf][rome_code][
                            "nb_bonne_boites_lba"] += 1
                        is_a_bonne_alternance = True
            else:
                naf_not_founds.add(naf)
                nb_companies_with_naf_not_found += 1
            pred_effective_hirings.is_a_bonne_boite = is_a_bonne_boite
            pred_effective_hirings.is_a_bonne_alternance = is_a_bonne_alternance
            updated_values["is_a_bonne_boite"] = is_a_bonne_boite
            updated_values["is_a_bonne_alternance"] = is_a_bonne_alternance

            updated_ppaeh.append(updated_values)
            count += 1
            # Commit all the 10 000 transactions
            if len(updated_ppaeh) % 100000 == 0:
                logger.info(f"{count} companies have been treated")
                db_session.bulk_update_mappings(
                    PerfPredictionAndEffectiveHirings, updated_ppaeh)
                db_session.commit()
                updated_ppaeh = []

        # Commit for the remaining rows
        db_session.bulk_update_mappings(PerfPredictionAndEffectiveHirings,
                                        updated_ppaeh)
        db_session.commit()
        updated_ppaeh = []

        logger.info(
            f"Number of naf not found in the mapping rome naf for this importer cycle : {len(naf_not_founds)}"
        )
        logger.info(
            f"List of naf not found in the mapping rome naf for this importer cycle : {naf_not_founds}"
        )
        logger.info(
            f"Number of companies with naf not found in the mapping rome naf for this importer cycle : {nb_companies_with_naf_not_found}"
        )
        logger.info(f"Number of total companies : {count}")

        for naf_code, romes_list in perf_division_per_rome_dict.items():
            for rome_code, values in romes_list.items():
                division_per_rome = PerfDivisionPerRome(
                    importer_cycle_infos_id=ici._id,
                    naf=naf_code,
                    rome=rome_code,
                    threshold_lbb=values["threshold_lbb"],
                    threshold_lba=values["threshold_lba"],
                    nb_bonne_boites_lbb=values["nb_bonne_boites_lbb"],
                    nb_bonne_boites_lba=values["nb_bonne_boites_lba"],
                )
                db_session.add(division_per_rome)

        db_session.commit()

        ici.computed = True
        db_session.add(ici)
        db_session.commit()
    def test_admin_access(self):
        """
        Test admin access permissions.
        """

        admin_urls = [
            self.url_for('admin.index'),
            self.url_for('users.index_view'),
            self.url_for('officeadminadd.index_view'),
            self.url_for('officeadminremove.index_view'),
            self.url_for('officeadminupdate.index_view'),
            self.url_for('officeadminextrageolocation.index_view'),
        ]

        with self.test_request_context():

            for url in admin_urls:

                # Access should be denied when a user is not logged in.
                db_session.query(User).update({
                    User.active: True,
                    User.is_admin: False
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertTrue(self.user.active)
                self.assertFalse(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                self.login(self.user)

                # Access should be denied when a user is logged in but is not an admin.
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                # Access should be granted when a user is logged in and is admin.
                db_session.query(User).update({
                    User.active: True,
                    User.is_admin: True
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertTrue(self.user.active)
                self.assertTrue(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 200)

                # Access should be denied when a user is not active.
                db_session.query(User).update({
                    User.active: False,
                    User.is_admin: True
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertFalse(self.user.active)
                self.assertTrue(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                self.logout()
    def run_task(self):
        date_insertion = datetime.now()
        logger.info("extracting %s ", self.input_filename)
        # this pattern matches the first date
        # e.g. 'lbb_xdpdpae_delta_201611102200.bz2'
        # will match 2018-09-12
        date_pattern = r'.*_(\d\d\d\d\d\d\d\d)\d\d\d\d'  #We keep only the date in the file name, ex: 20190910 = 10th september 2019
        date_match = re.match(date_pattern, self.input_filename)
        if date_match:
            date_part = date_match.groups()[0]
            self.last_historical_data_date_in_file = datetime.strptime(
                date_part, "%Y%m%d")
            logger.debug("identified last_historical_data_date_in_file=%s",
                         self.last_historical_data_date_in_file)
        else:
            raise Exception(
                "couldn't find a date pattern in filename. filename should be \
                like lbb_xdpdpae_delta_YYYYMMDDHHMM.csv")

        count = 0
        statements = []
        something_new = False
        query = """
            INSERT into %s(
                siret,
                hiring_date,
                contract_type,
                departement,
                contract_duration,
                iiann,
                tranche_age,
                handicap_label,
                duree_pec,
                date_insertion
                )
            values(%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)
        """ % settings.HIRING_TABLE
        imported_dpae = 0
        imported_dpae_distribution = {}
        not_imported_dpae = 0
        last_historical_data_date_in_db = db_session.query(func.max(Hiring.hiring_date)) \
                                        .filter(Hiring.contract_type.in_((Hiring.CONTRACT_TYPE_CDI,
                                                                          Hiring.CONTRACT_TYPE_CDD,
                                                                          Hiring.CONTRACT_TYPE_CTT))).first()[0]
        if last_historical_data_date_in_db is None:
            last_historical_data_date_in_db = DEFAULT_DATETIME_DPAE
        logger.info(
            "will now extract all dpae with hiring_date between %s and %s",
            last_historical_data_date_in_db,
            self.last_historical_data_date_in_file)

        with import_util.get_reader(self.input_filename) as myfile:
            con, cur = import_util.create_cursor()
            header_line = myfile.readline().strip(
            )  # FIXME detect column positions from header
            if b"siret" not in header_line:
                logger.debug(header_line)
                raise Exception("wrong header line")
            for line in myfile:
                line = line.decode()
                count += 1
                if not count % 100000:
                    logger.debug("reading line %i", count)
                    try:
                        try:
                            cur.executemany(query, statements)
                        except OperationalError:  # retry once in case of deadlock error
                            time.sleep(10)
                            cur.executemany(query, statements)
                        statements = []
                        con.commit()
                        something_new = True
                    except:
                        logger.error(
                            "error in executing statement into dpae table: %s",
                            sys.exc_info()[1])
                        statements = []
                        raise
                try:
                    siret, hiring_date, _, contract_type, departement, contract_duration, \
                    iiann, tranche_age, handicap_label, duree_pec = parse_dpae_line(line)
                except ValueError:
                    self.zipcode_errors += 1
                    continue
                except InvalidRowException:
                    logger.info("invalid_row met at row: %i", count)
                    self.invalid_row_errors += 1
                    continue

                dpae_should_be_imported = (
                    hiring_date > last_historical_data_date_in_db
                    and hiring_date <= self.last_historical_data_date_in_file
                    # For DPAE contracts we only keep all CDI, only long enough CDD (at least 31 days)
                    # and we ignore CTT.
                    and (contract_type == Hiring.CONTRACT_TYPE_CDI or
                         (contract_type == Hiring.CONTRACT_TYPE_CDD
                          and contract_duration is not None
                          and contract_duration > 31)))

                if dpae_should_be_imported:
                    statement = (siret, hiring_date, contract_type,
                                 departement, contract_duration, iiann,
                                 tranche_age, handicap_label, duree_pec,
                                 date_insertion)
                    statements.append(statement)
                    imported_dpae += 1

                    if hiring_date.year not in imported_dpae_distribution:
                        imported_dpae_distribution[hiring_date.year] = {}
                    if hiring_date.month not in imported_dpae_distribution[
                            hiring_date.year]:
                        imported_dpae_distribution[hiring_date.year][
                            hiring_date.month] = {}
                    if hiring_date.day not in imported_dpae_distribution[
                            hiring_date.year][hiring_date.month]:
                        imported_dpae_distribution[hiring_date.year][
                            hiring_date.month][hiring_date.day] = 0
                    imported_dpae_distribution[hiring_date.year][
                        hiring_date.month][hiring_date.day] += 1
                else:
                    not_imported_dpae += 1

        # run remaining statements
        try:
            cur.executemany(query, statements)
            something_new = True
        except:
            logger.error("error in executing statement into dpae table: %s",
                         sys.exc_info()[1])
            raise

        logger.info("processed %i dpae...", count)
        logger.info("imported dpae: %i", imported_dpae)
        logger.info("not imported dpae: %i", not_imported_dpae)
        logger.info("zipcode errors: %i", self.zipcode_errors)
        logger.info("invalid_row errors: %i", self.invalid_row_errors)
        if self.zipcode_errors > settings.MAXIMUM_ZIPCODE_ERRORS:
            raise IOError('too many zipcode errors')
        if self.invalid_row_errors > settings.MAXIMUM_INVALID_ROWS:
            raise IOError('too many invalid_row errors')
        logger.info("verifying good number of dpae imported.")
        query = "select count(*) from hirings h where hiring_date > %s and hiring_date <= %s and h.contract_type in (1,2,3)"
        cur.execute(query, [
            last_historical_data_date_in_db,
            self.last_historical_data_date_in_file
        ])
        res = cur.fetchone()
        if res[0] != imported_dpae:
            raise DoublonException(
                f"Too many DPAE ({res[0]}) in DB compared to DPAE file ({imported_dpae})."
            )
        logger.info("verifying number of DPAE: OK.")
        con.commit()
        cur.close()
        con.close()

        try:
            statistics = DpaeStatistics(
                last_import=datetime.now(),
                most_recent_data_date=self.last_historical_data_date_in_file,
                file_type=self.file_type)
            db_session.add(statistics)
            db_session.commit()
            logger.info("First way to insert DPAE statistics in DB : OK")
        except OperationalError:
            # For an obscure reason, the DpaeStatistics way to insert does not work on the bonaparte server
            # So we insert it directly via an SQL query
            # This job has been broken for more than a year, only way to fix it :
            db_session.rollback()
            last_import_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            most_recent_date = self.last_historical_data_date_in_file.strftime(
                '%Y-%m-%d %H:%M:%S')
            query = f"insert into dpae_statistics (last_import, most_recent_data_date, file_type) values ('{last_import_date}','{most_recent_date}','{self.file_type}')"
            con, cur = import_util.create_cursor()
            cur.execute(query)
            con.commit()
            cur.close()
            con.close()
            logger.info("Second way to insert DPAE statistics in DB : OK")

        logger.info("finished importing dpae...")
        return something_new
    def run_task(self):
        date_insertion = datetime.now()
        logger.info("extracting %s ", self.input_filename)
        # this pattern matches the first date
        # e.g. '20200803ExtractApp'
        # will match 20200803
        date_string = self.input_filename.split('/')[-1][0:8] 
        try:
            self.last_historical_data_date_in_file = datetime.strptime(date_string, "%Y%m%d")
        except ValueError:
            raise Exception("couldn't find a date pattern in filename. filename should be \
                like 20200803ExtractApp.csv")

        count = 0
        statements = []
        something_new = False
        query = """
            INSERT into %s(
                siret,
                hiring_date,
                contract_type,
                departement,
                contract_duration,
                iiann,
                tranche_age,
                handicap_label,
                duree_pec,
                date_insertion
                )
            values(%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)
        """ % settings.HIRING_TABLE
        imported_alternance_contracts = 0
        imported_alternance_contracts_distribution = {}
        not_imported_alternance_contracts = 0

        last_historical_data_date_in_db = db_session.query(func.max(Hiring.hiring_date))\
                                                            .filter(Hiring.contract_type == self.contract_type).first()[0]

        logger.info("will now extract all alternance contracts with hiring_date between %s and %s",
                    last_historical_data_date_in_db, self.last_historical_data_date_in_file)

        with import_util.get_reader(self.input_filename) as myfile:
            con, cur = import_util.create_cursor()
            header_line = myfile.readline().strip()   # FIXME detect column positions from header
            
            if b"SIRET" not in header_line:
                logger.debug(header_line)
                raise Exception("wrong header line")

            for line in myfile:
                line = line.decode()
                count += 1
                if not count % 10000:
                    logger.debug("reading line %i", count)
                    try:
                        try:
                            cur.executemany(query, statements)
                        except OperationalError:  # retry once in case of deadlock error
                            time.sleep(10)
                            cur.executemany(query, statements)
                        statements = []
                        con.commit()
                        something_new = True
                    except:
                        logger.error("error in executing statement into hirings table: %s", sys.exc_info()[1])
                        statements = []
                        raise
                try:
                    siret, hiring_date, departement = parse_alternance_line(line)
                except InvalidRowException:
                    logger.info("invalid_row met at row: %i", count)
                    self.invalid_row_errors += 1
                    continue
                except InvalidSiretException:
                    error_message = traceback.format_exc()
                    logger.info("invalid siret met at row: %i", count)
                    logger.info(error_message)
                    self.invalid_siret_errors += 1
                    continue
                except InvalidZipCodeException:
                    logger.info("invalid zip code met at row: %i", count)
                    self.invalid_zipcode_errors += 1
                    continue
                
                # This part of code is useless : 
                #   The data used has a lot of late contracts inputs
                #   So we have to insert ALL the contracts from different dates

                #  alternance_contract_should_be_imported = (
                #      hiring_date > last_historical_data_date_in_db 
                #      and hiring_date <= self.last_historical_data_date_in_file
                #)

                if hiring_date <= self.last_historical_data_date_in_file:
                    statement = (
                        siret,
                        hiring_date,
                        self.contract_type,
                        departement,
                        None, #contract_duration
                        None, #iiann
                        None, #tranche_age
                        None, #handicap_label
                        None,  #duree_pec
                        date_insertion

                    )
                    statements.append(statement)
                    imported_alternance_contracts += 1

                    if hiring_date.year not in imported_alternance_contracts_distribution:
                        imported_alternance_contracts_distribution[hiring_date.year] = {}
                    if hiring_date.month not in imported_alternance_contracts_distribution[hiring_date.year]:
                        imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month] = {}
                    if hiring_date.day not in imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month]:
                        imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month][hiring_date.day] = 0
                    imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month][hiring_date.day] += 1

        # run remaining statements
        try:
            cur.executemany(query, statements)
            something_new = True
        except:
            logger.error("error in executing statement into hirings table: %s", sys.exc_info()[1])
            raise

        logger.info(f"Types de contrats à importer : {self.contract_name}")
        logger.info(f"processed {count} lba_contracts...")
        logger.info(f"imported lba_contracts: {imported_alternance_contracts}")
        logger.info(f"not imported lba_contracts: {not_imported_alternance_contracts}")
        logger.info(f"zipcode errors: {self.invalid_zipcode_errors}")
        logger.info(f"invalid_row errors: {self.invalid_row_errors}")
        logger.info(f"invalid siret errors: {self.invalid_siret_errors}")
#        if self.zipcode_errors > settings.MAXIMUM_ZIPCODE_ERRORS:
#            raise IOError('too many zipcode errors')
#        if self.invalid_row_errors > settings.MAXIMUM_INVALID_ROWS:
#            raise IOError('too many invalid_row errors')

        con.commit()
        cur.close()
        con.close()

        try:
            statistics = DpaeStatistics(
                last_import=datetime.now(),
                most_recent_data_date=self.last_historical_data_date_in_file,
                file_type=self.file_type
            )
            db_session.add(statistics)
            db_session.commit()
            logger.info("First way to insert DPAE statistics in DB : OK")
        except OperationalError:
            # For an obscure reason, the DpaeStatistics way to insert does not work on the bonaparte server
            # So we insert it directly via an SQL query
            # This job has been broken for more than a year, only way to fix it : 
            db_session.rollback()
            last_import_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            most_recent_date = self.last_historical_data_date_in_file.strftime('%Y-%m-%d %H:%M:%S')
            query = f"insert into dpae_statistics (last_import, most_recent_data_date, file_type) values ('{last_import_date}','{most_recent_date}','{self.file_type}')"
            con, cur = import_util.create_cursor()
            cur.execute(query)
            con.commit()
            cur.close()
            con.close()
            logger.info("Second way to insert DPAE statistics in DB : OK")


        logger.info("finished importing dpae...")
        return something_new