Esempio n. 1
0
def deduplicate_users():
    # We import the app to initialize the social models
    import labonneboite.web.app  # pylint: disable=unused-import,unused-variable
    from labonneboite.common.database import db_session
    from labonneboite.common.models import auth
    from labonneboite.common.models.user_favorite_offices import UserFavoriteOffice

    # Iterate on duplicated users
    for user in auth.User.query.group_by('external_id').having(
            sa.func.count(auth.User.external_id) > 1):
        duplicate_user_ids = []
        favorite_count = 0
        # Create favorites, if necessary
        for duplicate_user in auth.User.query.filter(
                auth.User.external_id == user.external_id,
                auth.User.id != user.id):
            duplicate_user_ids.append(duplicate_user.id)
            for favorite in duplicate_user.favorite_offices:
                _, created = UserFavoriteOffice.get_or_create(
                    user_id=user.id, office_siret=favorite.office_siret)
                if created:
                    favorite_count += 1

        print(
            "Removing {} duplicates for user #{} ({} favorite added to original user)"
            .format(len(duplicate_user_ids), user.id, favorite_count))
        # Remove duplicate social user
        db_session.query(auth.UserSocialAuth).filter(
            auth.UserSocialAuth.user_id.in_(duplicate_user_ids)).delete(
                synchronize_session=False)
        # Remove duplicate user
        auth.User.query.filter(auth.User.id.in_(duplicate_user_ids)).delete(
            synchronize_session=False)
    def test_delete_user_account(self):
        """
        Test the deletion of a user account.
        """

        url = self.url_for('user.account_delete')

        with self.test_request_context():

            self.login(self.user)

            # Display the account deletion confirmation page.
            rv = self.app.get(url)
            self.assertEqual(rv.status_code, 200)

            # Confirm account deletion.
            rv = self.app.post(url, data={'confirm_deletion': 1})
            # The user should be redirected to the PEAM logout endpoint.
            self.assertEqual(rv.status_code, 302)
            self.assertIn(settings.PEAM_AUTH_BASE_URL, rv.location)
            self.assertIn(self.user_social_auth.extra_data['id_token'],
                          rv.location)

            # The user and its info should have been deleted.
            self.assertEqual(db_session.query(User).count(), 0)
            self.assertEqual(db_session.query(UserFavoriteOffice).count(), 0)
            self.assertEqual(db_session.query(UserSocialAuth).count(), 0)

            # The user should now be anonymous and cannot access protected pages.
            rv = self.app.get(url)
            self.assertEqual(rv.status_code, 401)
    def setUp(self):
        """
        Populate the DB with data required for these tests to work.
        """
        super(UserAccountTest, self).setUp()

        self.user = User(email='*****@*****.**',
                         gender='male',
                         first_name='John',
                         last_name='Doe')
        db_session.add(self.user)
        db_session.flush()

        self.office1 = Office(
            departement='57',
            siret='00000000000001',
            company_name='1',
            headcount='5',
            city_code='57070',
            zipcode='57070',
            naf='4646Z',
            score=90,
            x=6.166667,
            y=49.133333,
        )
        self.office2 = Office(
            departement='57',
            siret='00000000000002',
            company_name='1',
            headcount='5',
            city_code='57070',
            zipcode='57070',
            naf='4646Z',
            score=90,
            x=6.166667,
            y=49.133333,
        )
        db_session.add_all([self.office1, self.office2])
        db_session.flush()

        self.user_social_auth = UserSocialAuth(
            provider=PEAMOpenIdConnect.name,
            extra_data={'id_token': 'fake'},
            user_id=self.user.id,
        )
        self.fav1 = UserFavoriteOffice(user_id=self.user.id,
                                       office_siret=self.office1.siret)
        self.fav2 = UserFavoriteOffice(user_id=self.user.id,
                                       office_siret=self.office2.siret)
        db_session.add_all([self.user_social_auth, self.fav1, self.fav2])
        db_session.flush()

        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
        self.assertEqual(db_session.query(Office).count(), 2)
        self.assertEqual(db_session.query(UserFavoriteOffice).count(), 2)
        self.assertEqual(db_session.query(UserSocialAuth).count(), 1)
Esempio n. 4
0
    def test_office_admin_add(self):
        form = {
            "siret": "78548035101646",
            "company_name": "SUPERMARCHES MATCH",
            "office_name": "SUPERMARCHES MATCH",
            "naf": "4711D",
            "street_number": "45",
            "street_name": "AVENUE ANDRE MALRAUX",
            "city_code": "57463",
            "zipcode": "57000",
            "email": "*****@*****.**",
            "tel": "0387787878",
            "website": "http://www.supermarchesmatch.fr",
            "flag_alternance": 0,
            "flag_junior": 0,
            "flag_senior": 0,
            "flag_handicap": 0,
            "departement": "57",
            "headcount": "12",
            "score": 90,
            "score_alternance": 75,
            "x": 6.17952,
            "y": 49.1044,
            "reason": "Demande de mise en avant",
        }

        with self.test_request_context():
            # Create an user admin
            self.user = User(email='*****@*****.**', gender='male',
                             first_name='John', last_name='Doe', active=True,
                             is_admin=True)
            db_session.add(self.user)
            db_session.flush()

            user_social_auth = UserSocialAuth(
                provider=PEAMOpenIdConnect.name,
                extra_data={'id_token': 'fake'},
                user_id=self.user.id,
            )
            db_session.add(user_social_auth)
            db_session.commit()

            # Login as user admin
            self.user = db_session.query(User).filter_by(id=self.user.id).first()
            self.assertEqual(db_session.query(User).count(), 1)
            self.login(self.user)

            # Create OfficeAdminRemove
            self.assertEqual(0, OfficeAdminAdd.query.filter_by(id=1).count())
            self.app.post(url_for('officeadminadd.create_view'), data=form)
            self.assertEqual(1, OfficeAdminAdd.query.filter_by(id=1).count())

            # Delete OfficeAdminAdd
            self.app.post(url_for('officeadminadd.delete_view'), data={'id': 1})
            self.assertEqual(0, OfficeAdminRemove.query.filter_by(id=1).count())
Esempio n. 5
0
    def test_office_admin_remove(self):
        # Create officeAdminRemove
        form = {
            'siret': '01234567891234',
            'name': 'Test company',
            'reason': 'N/A',
            'initiative': 'office',
        }

        with self.test_request_context():
            # Create an user admin
            self.user = User(email='*****@*****.**',
                             gender='male',
                             first_name='John',
                             last_name='Doe',
                             active=True,
                             is_admin=True)
            db_session.add(self.user)
            db_session.flush()

            user_social_auth = UserSocialAuth(
                provider=PEAMOpenIdConnect.name,
                extra_data={'id_token': 'fake'},
                user_id=self.user.id,
            )
            db_session.add(user_social_auth)
            db_session.commit()

            # Login as user admin
            self.user = db_session.query(User).filter_by(
                id=self.user.id).first()
            self.assertEqual(db_session.query(User).count(), 1)
            self.login(self.user)

            # Create OfficeAdminRemove
            self.assertEqual(
                0,
                OfficeAdminRemove.query.filter_by(
                    siret='01234567891234').count())
            self.app.post(url_for('officeadminremove.create_view'), data=form)
            self.assertEqual(
                1,
                OfficeAdminRemove.query.filter_by(
                    siret='01234567891234').count())

            # Delete OfficeAdminRemove
            self.app.post(url_for('officeadminremove.delete_view'),
                          data={'id': 1})
            self.assertEqual(0,
                             OfficeAdminRemove.query.filter_by(id=1).count())
Esempio n. 6
0
 def get_or_create(cls, defaults=None, **kwargs):
     try:
         return db_session.query(cls).filter_by(**kwargs).one(), False
     except NoResultFound:
         if defaults:
             kwargs.update(defaults)
         instance = cls(**kwargs)
         try:
             db_session.add(instance)
             db_session.commit()
             return instance, True
         except IntegrityError:
             db_session.rollback()
             return db_session.query(cls).filter_by(**kwargs).one(), True
Esempio n. 7
0
def get_user_social_auth(user_id, provider=PEAMOpenIdConnect.name):
    """
    Return the latest `UserSocialAuth` instance for the given `user_id` and `provider`.
    """
    return (db_session.query(UserSocialAuth).filter_by(
        user_id=user_id,
        provider=provider).order_by(desc(UserSocialAuth.id)).first())
Esempio n. 8
0
def remove_offices():
    """
    Remove offices (overload the data provided by the importer).
    """
    # When returning multiple rows, the SQLAlchemy Query class can only give them out as tuples.
    # We need to unpack them explicitly.
    offices_to_remove = [
        siret for (siret, ) in db_session.query(OfficeAdminRemove.siret).all()
    ]

    for siret in offices_to_remove:
        # Apply changes in ElasticSearch.
        try:
            es.Elasticsearch().delete(index=settings.ES_INDEX,
                                      doc_type=es.OFFICE_TYPE,
                                      id=siret)
        except TransportError as e:
            if e.status_code != 404:
                raise
        # Apply changes in DB.
        office = Office.query.filter_by(siret=siret).first()
        if office:
            try:
                office.delete()
            except OperationalError:  # retry once in case of deadlock error
                time.sleep(10)
                office.delete()
            # Delete the current PDF.
            pdf_util.delete_file(office)
Esempio n. 9
0
def update_offices_geolocations():
    """
    Remove or add extra geolocations to offices.
    New geolocations are entered into the system through the `OfficeAdminExtraGeoLocation` table.
    """
    for extra_geolocation in db_session.query(
            OfficeAdminExtraGeoLocation).all():
        office = Office.query.filter_by(siret=extra_geolocation.siret).first()
        if office:
            locations = []
            if office.y and office.x:
                locations.append({'lat': office.y, 'lon': office.x})
            if not extra_geolocation.is_outdated():
                locations.extend(
                    extra_geolocation.geolocations_as_lat_lon_properties())
                office.has_multi_geolocations = True
            else:
                office.has_multi_geolocations = False
            # Apply changes in DB.
            office.save()
            # Apply changes in ElasticSearch.
            body = {'doc': {'locations': locations}}
            es.Elasticsearch().update(
                index=settings.ES_INDEX,
                doc_type=es.OFFICE_TYPE,
                id=office.siret,
                body=body,
                params={'ignore': 404},
            )
Esempio n. 10
0
def logout(user_social_auth=None):
    """
    Log a user out.

    Param `user_social_auth`: a `UserSocialAuth` instance. `None` most of the time, except when a user
    is coming from the `user.account_delete` view. This param is intended to be passed when the view
    is called directly as a Python function, i.e. not with a `redirect()`.
    """
    if not current_user.is_authenticated:
        return redirect(url_for('root.home'))

    logged_with_peam = session.get(
        'social_auth_last_login_backend') == PEAMOpenIdConnect.name
    if logged_with_peam:
        if not user_social_auth:
            user_social_auth = get_user_social_auth(current_user.id)
        if user_social_auth:
            id_token = user_social_auth.extra_data['id_token']

    # Force delete PEAMU token.
    db_session.query(UserSocialAuth).filter_by(user_id=current_user.id).delete()
    db_session.commit()

    # Log the user out and destroy the LBB session.
    activity.log('deconnexion')
    logout_user()

    # Clean the session: drop Python Social Auth info because it isn't done by `logout_user`.
    if 'social_auth_last_login_backend' in session:
        # Some backends have a `backend-name_state` stored in session as required by e.g. Oauth2.
        social_auth_state_key = '%s_state' % session['social_auth_last_login_backend']
        if social_auth_state_key in session:
            session.pop(social_auth_state_key)
        session.pop('social_auth_last_login_backend')

    # Log the user out from PEAM and destroy the PEAM session.
    if logged_with_peam and user_social_auth:
        params = {
            'id_token_hint': id_token,
            'redirect_uri': url_for('auth.logout_from_peam_callback', _external=True),
        }
        peam_logout_url = '%s/compte/deconnexion?%s' % (
            settings.PEAM_AUTH_BASE_URL, urlencode(params))
        # After this redirect, the user will be redirected to the LBB website `logout_from_peam_callback` route.
        return redirect(peam_logout_url)

    return redirect(url_for('root.home'))
Esempio n. 11
0
def create_offices_for_departement(departement):
    """
    Populate the `office` type in ElasticSearch with offices having given departement.
    """
    actions = []

    logger.info("STARTED indexing offices for departement=%s ...", departement)

    # For LBB we apply two thresholds to show an office:
    # 1) its global all-rome-included score should be at least SCORE_REDUCING_MINIMUM_THRESHOLD
    # 2) its score adapted to requested rome should be at least SCORE_FOR_ROME_MINIMUM
    # For LBA we only apply the second threshold (SCORE_ALTERNANCE_FOR_ROME_MINIMUM)
    # and no longer apply the all-rome-included score threshold, in order to include
    # more relevant smaller companies.
    all_offices = db_session.query(Office).filter(
        and_(
            Office.departement == departement,
            or_(
                Office.score >=
                importer_settings.SCORE_REDUCING_MINIMUM_THRESHOLD,
                # Fetching offices with score lower than SCORE_ALTERNANCE_FOR_ROME_MINIMUM
                # would be a waste of resources as score-for-rome will always be less or
                # equal to all-rome-included score.
                Office.score_alternance >=
                scoring_util.SCORE_ALTERNANCE_FOR_ROME_MINIMUM,
            ),
        )).all()

    for office in all_offices:
        st.increment_office_count()

        es_doc = get_office_as_es_doc(office)

        office_is_reachable = ('scores_by_rome'
                               in es_doc) or ('scores_alternance_by_rome'
                                              in es_doc)

        if office_is_reachable:
            st.increment_indexed_office_count()
            actions.append({
                '_op_type': 'index',
                '_index': settings.ES_INDEX,
                '_type': es.OFFICE_TYPE,
                '_id': office.siret,
                '_source': es_doc,
            })

    bulk_actions(actions)

    completed_jobs_counter.increment()

    logger.info(
        "COMPLETED indexing offices for departement=%s (%s of %s jobs completed)",
        departement,
        completed_jobs_counter.value,
        len(dpt.DEPARTEMENTS),
    )

    display_performance_stats(departement)
Esempio n. 12
0
 def user_favs_as_sirets(cls, user):
     """
     Returns the favorites offices of a user as a list of sirets.
     Useful to check if an office is already in the favorites of a user.
     """
     if user.is_anonymous:
         return []
     sirets = [fav.office_siret for fav in db_session.query(cls).filter_by(user_id=user.id)]
     return sirets
Esempio n. 13
0
def get_user_social_auth(user_id):
    """
    Return the latest `UserSocialAuth` instance for the given `user_id`.
    """
    return (
        db_session.query(UserSocialAuth)
        .filter_by(user_id=user_id)
        .order_by(desc(UserSocialAuth.id))
        .first()
    )
Esempio n. 14
0
    def test_get_user_social_auth(self):
        """
        Test the `get_user_social_auth()` function.
        """
        user = User(email='*****@*****.**', gender='male', first_name='John', last_name='Doe')
        db_session.add(user)
        db_session.flush()

        expected_user_social_auth = UserSocialAuth(provider=PEAMOpenIdConnect.name, extra_data=None, user_id=user.id)
        db_session.add(expected_user_social_auth)
        db_session.flush()

        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
        self.assertEqual(db_session.query(UserSocialAuth).count(), 1)

        user_social_auth = get_user_social_auth(user.id)
        self.assertEqual(user_social_auth.id, expected_user_social_auth.id)
Esempio n. 15
0
def account_delete():
    """
    Ask for a confirmation, then delete the current user account and all of its information.
    """
    form = UserAccountDeleteForm(request.form)

    if request.method == 'POST' and form.validate():

        # Store the current `UserSocialAuth` instance in memory because it will be deleted
        # but it will also be needed later to properly logout the user from PEAM.
        user_social_auth = get_user_social_auth(current_user.id)

        # Now we can safely delete the current `UserSocialAuth` instance.
        # We have to delete it because it has a foreign key to the User table.
        # We don't need to deal with the other tables of Social Auth, see:
        # https://python-social-auth.readthedocs.io/en/latest/storage.html
        db_session.query(UserSocialAuth).filter_by(
            user_id=current_user.id).delete()

        # Delete the current user.
        # The user's favorites will be deleted at the same time because of the `ondelete='CASCADE'`
        # on the `user_id` field of the `UserFavoriteOffice` model.
        db_session.query(User).filter_by(id=current_user.id).delete()

        db_session.commit()

        message = "La suppression de votre compte a bien été effectuée."
        flash(message, 'warning')

        # Return the `logout` view directly. It allows us to pass the full
        # `user_social_auth` object as a parameter.
        return logout(user_social_auth=user_social_auth)

    context = {
        'form': form,
    }
    return render_template('user/account_delete.html', **context)
Esempio n. 16
0
    def user_favs_as_csv(cls, user):
        """
        Returns the favorites offices of a user as a CSV text.
        """
        header_row = cls.as_csv_header_row()

        if user.is_anonymous:
            return header_row

        rows = [fav.as_csv_row() for fav in db_session.query(cls).filter_by(user_id=user.id)]
        csv_text = "%s\r\n%s" % (
            header_row,
            "\r\n".join(rows),
        )
        return csv_text
    def setUp(self, *args, **kwargs):
        super(AdminTest, self).setUp(*args, **kwargs)

        self.user = User(email='*****@*****.**',
                         gender='male',
                         first_name='John',
                         last_name='Doe')
        db_session.add(self.user)
        db_session.flush()

        # Required for `self.logout` to work which looks for the `extra_data` attribute.
        user_social_auth = UserSocialAuth(
            provider=PEAMOpenIdConnect.name,
            extra_data={'id_token': 'fake'},
            user_id=self.user.id,
        )
        db_session.add(user_social_auth)
        db_session.commit()

        self.assertEqual(db_session.query(User).count(), 1)
Esempio n. 18
0
 def test_clean(self):
     """
     Test `OfficeAdminExtraGeoLocation.clean()`.
     """
     extra_geolocation = OfficeAdminExtraGeoLocation(
         siret="38524664000176",
         codes="75110\n\n\n\n\n\n\n57616",
         reason="Paris 10 + Metz Saint Julien",
     )
     db_session.add(extra_geolocation)
     db_session.commit()
     # The `clean()` method should have been called automatically.
     extra_geolocation = db_session.query(
         OfficeAdminExtraGeoLocation).first()
     # Multiple newlines should have been removed.
     self.assertEqual(extra_geolocation.codes, '57616\n75110')
     # Corresponding Lat/Lon coords should have been found and stored.
     self.assertEqual(
         extra_geolocation.geolocations,
         '[[49.135208952059884, 6.207906756168173], [48.8815994262695, 2.36229991912841]]'
     )
Esempio n. 19
0
    def after_check(self):
        query = db_session.query(RawOffice.departement.distinct().label("departement"))
        departements = [row.departement for row in query.all()]

        if len(departements) != settings.DISTINCT_DEPARTEMENTS_HAVING_OFFICES:
            msg = "wrong number of departements : %s instead of expected %s" % (
                len(departements),
                settings.DISTINCT_DEPARTEMENTS_HAVING_OFFICES
            )
            raise Exception(msg)

        # FIXME parallelize for better performance
        for departement in departements:
            count = RawOffice.query.filter_by(departement=departement).count()
            logger.info("number of companies in departement %s : %i", departement, count)
            if not count >= settings.MINIMUM_OFFICES_TO_BE_EXTRACTED_PER_DEPARTEMENT:
                msg = "too few companies in departement : %s instead of expected %s" % (
                    count,
                    settings.MINIMUM_OFFICES_TO_BE_EXTRACTED_PER_DEPARTEMENT
                )
                raise Exception(msg)
Esempio n. 20
0
def add_offices():
    """
    Add offices (complete the data provided by the importer).
    """
    for office_to_add in db_session.query(OfficeAdminAdd).all():

        office = Office.query.filter_by(siret=office_to_add.siret).first()

        # Only create a new office if it does not already exist.
        # This guarantees that the importer data will always have precedence.
        if not office:

            # The `headcount` field of an `OfficeAdminAdd` instance has a `code` attribute.
            if hasattr(office_to_add.headcount, 'code'):
                headcount = office_to_add.headcount.code
            else:
                headcount = office_to_add.headcount

            # Create the new office in DB.
            new_office = Office()
            # Use `inspect` because `Office` columns are named distinctly from attributes.
            for field_name in list(inspect(Office).columns.keys()):
                try:
                    value = getattr(office_to_add, field_name)
                except AttributeError:
                    # Some fields are not shared between `Office` and `OfficeAdminAdd`.
                    continue
                if field_name == 'headcount':
                    value = headcount
                setattr(new_office, field_name, value)
            db_session.add(new_office)
            db_session.commit()

            # Create the new office in ES.
            doc = get_office_as_es_doc(office_to_add)
            es.Elasticsearch().create(index=settings.ES_INDEX,
                                      doc_type=es.OFFICE_TYPE,
                                      id=office_to_add.siret,
                                      body=doc)
    def run_task(self):
        date_insertion = datetime.now()
        logger.info("extracting %s ", self.input_filename)
        # this pattern matches the first date
        # e.g. '20200803ExtractApp'
        # will match 20200803
        date_string = self.input_filename.split('/')[-1][0:8] 
        try:
            self.last_historical_data_date_in_file = datetime.strptime(date_string, "%Y%m%d")
        except ValueError:
            raise Exception("couldn't find a date pattern in filename. filename should be \
                like 20200803ExtractApp.csv")

        count = 0
        statements = []
        something_new = False
        query = """
            INSERT into %s(
                siret,
                hiring_date,
                contract_type,
                departement,
                contract_duration,
                iiann,
                tranche_age,
                handicap_label,
                duree_pec,
                date_insertion
                )
            values(%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)
        """ % settings.HIRING_TABLE
        imported_alternance_contracts = 0
        imported_alternance_contracts_distribution = {}
        not_imported_alternance_contracts = 0

        last_historical_data_date_in_db = db_session.query(func.max(Hiring.hiring_date))\
                                                            .filter(Hiring.contract_type == self.contract_type).first()[0]

        logger.info("will now extract all alternance contracts with hiring_date between %s and %s",
                    last_historical_data_date_in_db, self.last_historical_data_date_in_file)

        with import_util.get_reader(self.input_filename) as myfile:
            con, cur = import_util.create_cursor()
            header_line = myfile.readline().strip()   # FIXME detect column positions from header
            
            if b"SIRET" not in header_line:
                logger.debug(header_line)
                raise Exception("wrong header line")

            for line in myfile:
                line = line.decode()
                count += 1
                if not count % 10000:
                    logger.debug("reading line %i", count)
                    try:
                        try:
                            cur.executemany(query, statements)
                        except OperationalError:  # retry once in case of deadlock error
                            time.sleep(10)
                            cur.executemany(query, statements)
                        statements = []
                        con.commit()
                        something_new = True
                    except:
                        logger.error("error in executing statement into hirings table: %s", sys.exc_info()[1])
                        statements = []
                        raise
                try:
                    siret, hiring_date, departement = parse_alternance_line(line)
                except InvalidRowException:
                    logger.info("invalid_row met at row: %i", count)
                    self.invalid_row_errors += 1
                    continue
                except InvalidSiretException:
                    error_message = traceback.format_exc()
                    logger.info("invalid siret met at row: %i", count)
                    logger.info(error_message)
                    self.invalid_siret_errors += 1
                    continue
                except InvalidZipCodeException:
                    logger.info("invalid zip code met at row: %i", count)
                    self.invalid_zipcode_errors += 1
                    continue
                
                # This part of code is useless : 
                #   The data used has a lot of late contracts inputs
                #   So we have to insert ALL the contracts from different dates

                #  alternance_contract_should_be_imported = (
                #      hiring_date > last_historical_data_date_in_db 
                #      and hiring_date <= self.last_historical_data_date_in_file
                #)

                if hiring_date <= self.last_historical_data_date_in_file:
                    statement = (
                        siret,
                        hiring_date,
                        self.contract_type,
                        departement,
                        None, #contract_duration
                        None, #iiann
                        None, #tranche_age
                        None, #handicap_label
                        None,  #duree_pec
                        date_insertion

                    )
                    statements.append(statement)
                    imported_alternance_contracts += 1

                    if hiring_date.year not in imported_alternance_contracts_distribution:
                        imported_alternance_contracts_distribution[hiring_date.year] = {}
                    if hiring_date.month not in imported_alternance_contracts_distribution[hiring_date.year]:
                        imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month] = {}
                    if hiring_date.day not in imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month]:
                        imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month][hiring_date.day] = 0
                    imported_alternance_contracts_distribution[hiring_date.year][hiring_date.month][hiring_date.day] += 1

        # run remaining statements
        try:
            cur.executemany(query, statements)
            something_new = True
        except:
            logger.error("error in executing statement into hirings table: %s", sys.exc_info()[1])
            raise

        logger.info(f"Types de contrats à importer : {self.contract_name}")
        logger.info(f"processed {count} lba_contracts...")
        logger.info(f"imported lba_contracts: {imported_alternance_contracts}")
        logger.info(f"not imported lba_contracts: {not_imported_alternance_contracts}")
        logger.info(f"zipcode errors: {self.invalid_zipcode_errors}")
        logger.info(f"invalid_row errors: {self.invalid_row_errors}")
        logger.info(f"invalid siret errors: {self.invalid_siret_errors}")
#        if self.zipcode_errors > settings.MAXIMUM_ZIPCODE_ERRORS:
#            raise IOError('too many zipcode errors')
#        if self.invalid_row_errors > settings.MAXIMUM_INVALID_ROWS:
#            raise IOError('too many invalid_row errors')

        con.commit()
        cur.close()
        con.close()

        try:
            statistics = DpaeStatistics(
                last_import=datetime.now(),
                most_recent_data_date=self.last_historical_data_date_in_file,
                file_type=self.file_type
            )
            db_session.add(statistics)
            db_session.commit()
            logger.info("First way to insert DPAE statistics in DB : OK")
        except OperationalError:
            # For an obscure reason, the DpaeStatistics way to insert does not work on the bonaparte server
            # So we insert it directly via an SQL query
            # This job has been broken for more than a year, only way to fix it : 
            db_session.rollback()
            last_import_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            most_recent_date = self.last_historical_data_date_in_file.strftime('%Y-%m-%d %H:%M:%S')
            query = f"insert into dpae_statistics (last_import, most_recent_data_date, file_type) values ('{last_import_date}','{most_recent_date}','{self.file_type}')"
            con, cur = import_util.create_cursor()
            cur.execute(query)
            con.commit()
            cur.close()
            con.close()
            logger.info("Second way to insert DPAE statistics in DB : OK")


        logger.info("finished importing dpae...")
        return something_new
Esempio n. 22
0
def update_offices():
    """
    Update offices (overload the data provided by the importer).
    """
    # Good engineering eliminates users being able to do the wrong thing as much as possible.
    # But since it is possible to store multiple SIRETs, there is no longer any constraint of uniqueness
    # on a SIRET. As a result, it shouldn't but there may be `n` entries in `OfficeAdminUpdate`
    # for the same SIRET. We order the query by creation date ASC so that the most recent changes take
    # priority over any older ones.
    for office_to_update in db_session.query(OfficeAdminUpdate).order_by(
            asc(OfficeAdminUpdate.date_created)).all():

        for siret in OfficeAdminUpdate.as_list(office_to_update.sirets):

            office = Office.query.filter_by(siret=siret).first()

            if office:
                # Apply changes in DB.
                office.company_name = office_to_update.new_company_name or office.company_name
                office.office_name = office_to_update.new_office_name or office.office_name
                office.email = '' if office_to_update.remove_email else (
                    office_to_update.new_email or office.email)
                office.tel = '' if office_to_update.remove_phone else (
                    office_to_update.new_phone or office.tel)
                office.website = '' if office_to_update.remove_website else (
                    office_to_update.new_website or office.website)

                office.email_alternance = office_to_update.email_alternance
                office.phone_alternance = office_to_update.phone_alternance
                office.website_alternance = office_to_update.website_alternance

                # Note : we need to handle when score and score_alternance = 0
                office.score = office_to_update.score if office_to_update.score is not None else office.score
                office.score_alternance = office_to_update.score_alternance if office_to_update.score_alternance is not None else office.score_alternance
                office.social_network = office_to_update.social_network
                office.contact_mode = office_to_update.contact_mode
                office.save()

                # Apply changes in ElasticSearch.
                body = {
                    'doc': {
                        'email': office.email,
                        'phone': office.tel,
                        'website': office.website,
                        'flag_alternance': 1 if office.flag_alternance else 0
                    }
                }

                scores_by_rome, scores_alternance_by_rome, boosted_romes, boosted_alternance_romes = get_scores_by_rome_and_boosted_romes(
                    office, office_to_update)
                if scores_by_rome:
                    body['doc']['scores_by_rome'] = scores_by_rome
                    body['doc']['boosted_romes'] = boosted_romes
                if scores_alternance_by_rome:
                    body['doc'][
                        'scores_alternance_by_rome'] = scores_alternance_by_rome
                    body['doc'][
                        'boosted_alternance_romes'] = boosted_alternance_romes

                # The update API makes partial updates: existing `scalar` fields are overwritten,
                # but `objects` fields are merged together.
                # https://www.elastic.co/guide/en/elasticsearch/guide/1.x/partial-updates.html
                # However `scores_by_rome` and `boosted_romes` need to be overwritten because they
                # may change over time.
                # To do this, we perform 2 requests: the first one resets `scores_by_rome` and
                # `boosted_romes` and the second one populates them.
                delete_body = {'doc': {}}
                delete_body = {
                    'doc': {
                        'scores_by_rome': None,
                        'boosted_romes': None,
                        'scores_alternance_by_rome': None,
                        'boosted_alternance_romes': None
                    }
                }

                # Unfortunately these cannot easily be bulked :-(
                # The reason is there is no way to tell bulk to ignore missing documents (404)
                # for a partial update. Tried it and failed it on Oct 2017 @vermeer.
                es.Elasticsearch().update(index=settings.ES_INDEX,
                                          doc_type=es.OFFICE_TYPE,
                                          id=siret,
                                          body=delete_body,
                                          params={'ignore': 404})
                es.Elasticsearch().update(index=settings.ES_INDEX,
                                          doc_type=es.OFFICE_TYPE,
                                          id=siret,
                                          body=body,
                                          params={'ignore': 404})

                # Delete the current PDF thus it will be regenerated at the next download attempt.
                pdf_util.delete_file(office)
    def run_task(self):
        date_insertion = datetime.now()
        logger.info("extracting %s ", self.input_filename)
        # this pattern matches the first date
        # e.g. 'lbb_xdpdpae_delta_201611102200.bz2'
        # will match 2018-09-12
        date_pattern = r'.*_(\d\d\d\d\d\d\d\d)\d\d\d\d'  #We keep only the date in the file name, ex: 20190910 = 10th september 2019
        date_match = re.match(date_pattern, self.input_filename)
        if date_match:
            date_part = date_match.groups()[0]
            self.last_historical_data_date_in_file = datetime.strptime(
                date_part, "%Y%m%d")
            logger.debug("identified last_historical_data_date_in_file=%s",
                         self.last_historical_data_date_in_file)
        else:
            raise Exception(
                "couldn't find a date pattern in filename. filename should be \
                like lbb_xdpdpae_delta_YYYYMMDDHHMM.csv")

        count = 0
        statements = []
        something_new = False
        query = """
            INSERT into %s(
                siret,
                hiring_date,
                contract_type,
                departement,
                contract_duration,
                iiann,
                tranche_age,
                handicap_label,
                duree_pec,
                date_insertion
                )
            values(%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)
        """ % settings.HIRING_TABLE
        imported_dpae = 0
        imported_dpae_distribution = {}
        not_imported_dpae = 0
        last_historical_data_date_in_db = db_session.query(func.max(Hiring.hiring_date)) \
                                        .filter(Hiring.contract_type.in_((Hiring.CONTRACT_TYPE_CDI,
                                                                          Hiring.CONTRACT_TYPE_CDD,
                                                                          Hiring.CONTRACT_TYPE_CTT))).first()[0]
        if last_historical_data_date_in_db is None:
            last_historical_data_date_in_db = DEFAULT_DATETIME_DPAE
        logger.info(
            "will now extract all dpae with hiring_date between %s and %s",
            last_historical_data_date_in_db,
            self.last_historical_data_date_in_file)

        with import_util.get_reader(self.input_filename) as myfile:
            con, cur = import_util.create_cursor()
            header_line = myfile.readline().strip(
            )  # FIXME detect column positions from header
            if b"siret" not in header_line:
                logger.debug(header_line)
                raise Exception("wrong header line")
            for line in myfile:
                line = line.decode()
                count += 1
                if not count % 100000:
                    logger.debug("reading line %i", count)
                    try:
                        try:
                            cur.executemany(query, statements)
                        except OperationalError:  # retry once in case of deadlock error
                            time.sleep(10)
                            cur.executemany(query, statements)
                        statements = []
                        con.commit()
                        something_new = True
                    except:
                        logger.error(
                            "error in executing statement into dpae table: %s",
                            sys.exc_info()[1])
                        statements = []
                        raise
                try:
                    siret, hiring_date, _, contract_type, departement, contract_duration, \
                    iiann, tranche_age, handicap_label, duree_pec = parse_dpae_line(line)
                except ValueError:
                    self.zipcode_errors += 1
                    continue
                except InvalidRowException:
                    logger.info("invalid_row met at row: %i", count)
                    self.invalid_row_errors += 1
                    continue

                dpae_should_be_imported = (
                    hiring_date > last_historical_data_date_in_db
                    and hiring_date <= self.last_historical_data_date_in_file
                    # For DPAE contracts we only keep all CDI, only long enough CDD (at least 31 days)
                    # and we ignore CTT.
                    and (contract_type == Hiring.CONTRACT_TYPE_CDI or
                         (contract_type == Hiring.CONTRACT_TYPE_CDD
                          and contract_duration is not None
                          and contract_duration > 31)))

                if dpae_should_be_imported:
                    statement = (siret, hiring_date, contract_type,
                                 departement, contract_duration, iiann,
                                 tranche_age, handicap_label, duree_pec,
                                 date_insertion)
                    statements.append(statement)
                    imported_dpae += 1

                    if hiring_date.year not in imported_dpae_distribution:
                        imported_dpae_distribution[hiring_date.year] = {}
                    if hiring_date.month not in imported_dpae_distribution[
                            hiring_date.year]:
                        imported_dpae_distribution[hiring_date.year][
                            hiring_date.month] = {}
                    if hiring_date.day not in imported_dpae_distribution[
                            hiring_date.year][hiring_date.month]:
                        imported_dpae_distribution[hiring_date.year][
                            hiring_date.month][hiring_date.day] = 0
                    imported_dpae_distribution[hiring_date.year][
                        hiring_date.month][hiring_date.day] += 1
                else:
                    not_imported_dpae += 1

        # run remaining statements
        try:
            cur.executemany(query, statements)
            something_new = True
        except:
            logger.error("error in executing statement into dpae table: %s",
                         sys.exc_info()[1])
            raise

        logger.info("processed %i dpae...", count)
        logger.info("imported dpae: %i", imported_dpae)
        logger.info("not imported dpae: %i", not_imported_dpae)
        logger.info("zipcode errors: %i", self.zipcode_errors)
        logger.info("invalid_row errors: %i", self.invalid_row_errors)
        if self.zipcode_errors > settings.MAXIMUM_ZIPCODE_ERRORS:
            raise IOError('too many zipcode errors')
        if self.invalid_row_errors > settings.MAXIMUM_INVALID_ROWS:
            raise IOError('too many invalid_row errors')
        logger.info("verifying good number of dpae imported.")
        query = "select count(*) from hirings h where hiring_date > %s and hiring_date <= %s and h.contract_type in (1,2,3)"
        cur.execute(query, [
            last_historical_data_date_in_db,
            self.last_historical_data_date_in_file
        ])
        res = cur.fetchone()
        if res[0] != imported_dpae:
            raise DoublonException(
                f"Too many DPAE ({res[0]}) in DB compared to DPAE file ({imported_dpae})."
            )
        logger.info("verifying number of DPAE: OK.")
        con.commit()
        cur.close()
        con.close()

        try:
            statistics = DpaeStatistics(
                last_import=datetime.now(),
                most_recent_data_date=self.last_historical_data_date_in_file,
                file_type=self.file_type)
            db_session.add(statistics)
            db_session.commit()
            logger.info("First way to insert DPAE statistics in DB : OK")
        except OperationalError:
            # For an obscure reason, the DpaeStatistics way to insert does not work on the bonaparte server
            # So we insert it directly via an SQL query
            # This job has been broken for more than a year, only way to fix it :
            db_session.rollback()
            last_import_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            most_recent_date = self.last_historical_data_date_in_file.strftime(
                '%Y-%m-%d %H:%M:%S')
            query = f"insert into dpae_statistics (last_import, most_recent_data_date, file_type) values ('{last_import_date}','{most_recent_date}','{self.file_type}')"
            con, cur = import_util.create_cursor()
            cur.execute(query)
            con.commit()
            cur.close()
            con.close()
            logger.info("Second way to insert DPAE statistics in DB : OK")

        logger.info("finished importing dpae...")
        return something_new
    def test_admin_access(self):
        """
        Test admin access permissions.
        """

        admin_urls = [
            self.url_for('admin.index'),
            self.url_for('users.index_view'),
            self.url_for('officeadminadd.index_view'),
            self.url_for('officeadminremove.index_view'),
            self.url_for('officeadminupdate.index_view'),
            self.url_for('officeadminextrageolocation.index_view'),
        ]

        with self.test_request_context():

            for url in admin_urls:

                # Access should be denied when a user is not logged in.
                db_session.query(User).update({
                    User.active: True,
                    User.is_admin: False
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertTrue(self.user.active)
                self.assertFalse(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                self.login(self.user)

                # Access should be denied when a user is logged in but is not an admin.
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                # Access should be granted when a user is logged in and is admin.
                db_session.query(User).update({
                    User.active: True,
                    User.is_admin: True
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertTrue(self.user.active)
                self.assertTrue(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 200)

                # Access should be denied when a user is not active.
                db_session.query(User).update({
                    User.active: False,
                    User.is_admin: True
                })
                db_session.commit()
                self.user = db_session.query(User).filter_by(
                    id=self.user.id).first()
                self.assertFalse(self.user.active)
                self.assertTrue(self.user.is_admin)
                rv = self.app.get(url)
                self.assertEqual(rv.status_code, 404)

                self.logout()