def get_session_and_leave_open(*args, **kwargs):
     from wikimetrics.configurables import db
     from wikimetrics.models import ReportStore, RunReport
     session = db.get_session()
     session2 = db.get_session()
     session2.query(ReportStore).first()
     session.query(ReportStore).first()
 def test_db_session_always_fresh(self):
     s = db.get_session()
     try:
         r = ReportStore()
         s.add(r)
         s.commit()
         r.id = None
         s.commit()
     except:
         pass
     
     # if the session is not cleaned up properly, this will throw an exception
     s = db.get_session()
     s.execute('select 1').fetchall()
     
     s = db.get_mw_session(mediawiki_project)
     try:
         u = MediawikiUser()
         s.add(u)
         s.commit()
         u.user_id = None
         s.commit()
     except:
         pass
     
     # if the session is not cleaned up properly, this will throw an exception
     s = db.get_mw_session(mediawiki_project)
     s.execute('select 1').fetchall()
Example #3
0
    def test_db_session_always_fresh(self):
        s = db.get_session()
        try:
            r = ReportStore()
            s.add(r)
            s.commit()
            r.id = None
            s.commit()
        except:
            pass

        # if the session is not cleaned up properly, this will throw an exception
        s = db.get_session()
        s.execute('select 1').fetchall()

        s = db.get_mw_session(mediawiki_project)
        try:
            u = MediawikiUser()
            s.add(u)
            s.commit()
            u.user_id = None
            s.commit()
        except:
            pass

        # if the session is not cleaned up properly, this will throw an exception
        s = db.get_mw_session(mediawiki_project)
        s.execute('select 1').fetchall()
    def post_process(self, results):
        """
         If the report is public and this task went well,
         it will create a file on disk asynchronously.

         Results are of this form:

         Parameters:
            results : data to write to disk, in this form:
                {'5cab8d55-da19-436f-b675-1d2a3fca3481':
                    {'Sum': {'pages_created': Decimal('0.0000')}}
                }
        """

        if self.public is False:
            return

        try:
            session = db.get_session()
            db_report = session.query(ReportStore).get(self.persistent_id)
        finally:
            session.close()

        data = db_report.get_json_result(results)

        # code below schedules an async task on celery to write the file
        if self.recurrent_parent_id is not None:
            write_report_task.delay(self.recurrent_parent_id, self.created, data)
        else:
            # report is public and does not have a recurrent_parent_id, it's
            # the parent report, call the first run of the report
            self._run_child_report()
Example #5
0
def get_usernames_for_task_result(task_result):
    """
    Parameters
        task_result : the result dictionary from Celery
    Returns
         user_names : dictionary of user names (keyed by WikiUserKey)
                      empty if results are not detailed by user

    TODO: this function should move outside the controller,
          at the time of writing the function we are
          consolidating code that wasduplicated
    """
    user_names = {}
    if Aggregation.IND in task_result:
        session = db.get_session()
        # cohort should be the same for all users
        # get cohort from first key
        cohort_id = None

        for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
            wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
            cohort_id = wiki_user_key.cohort_id
            break

        user_names = g.cohort_service.get_wikiusernames_for_cohort(cohort_id, session)

    return user_names
Example #6
0
def get_celery_task(result_key):
    """
    From a unique identifier, gets the celery task and database records associated.

    Parameters
        result_key  : The unique identifier found in the report database table
                        This parameter is required and should not be None

    Returns
        A tuple of the form (celery_task_object, database_report_object)
    """
    if not result_key:
        return (None, None)

    try:
        db_session = db.get_session()
        pj = db_session.query(ReportStore)\
            .filter(ReportStore.result_key == result_key)\
            .one()

        celery_task = Report.task.AsyncResult(pj.queue_result_key)
        return (celery_task, pj)
    except NoResultFound:
        # don't need to roll back session because it's just a query
        return (None, None)
Example #7
0
    def group_by_project(self):
        """
        mimics the interface of itertools.groupby, with the
        exception that the grouped items are simply user_ids
        rather than complete user records

        Returns:
            iterable of tuples of the form:
                (project, <iterable_of_usernames>)

        this is useful for turning a project-heterogenous cohort
        into a set of project-homogenous cohorts, which can be
        analyzed using a single database connection
        """
        db_session = db.get_session()
        user_id_projects = self.filter_wikiuser_query(
            db_session.query(WikiUserStore.mediawiki_userid, WikiUserStore.project)
        ).order_by(WikiUserStore.project).all()

        if not len(user_id_projects):
            return [(self.default_project, None)]

        groups = itertools.groupby(user_id_projects, key=itemgetter(1))

        return (
            (project or self.default_project, (r[0] for r in users))
            for project, users in groups
        )
 def report_result(self, results, child_results=None):
     """
     Creates a unique identifier for this ReportNode, and returns a one element
     dictionary with that identifier as the key and its results as the value.
     This allows ReportNode results to be merged as the tree of ReportNodes is
     evaluated.
     
     Parameters
         results         : Anything that the ReportNode compiles in its finish step
         child_results   : The results from a child Report(s) if they should be
                           preserved.  ReportLeaf results and any ReportNode results
                           that are copied should not be preserved.
     """
     if child_results is None:
         child_results = []
     
     self.result_key = str(uuid4())
     db_session = db.get_session()
     try:
         pj = db_session.query(PersistentReport).get(self.persistent_id)
         pj.result_key = self.result_key
         db_session.add(pj)
         db_session.commit()
     finally:
         db_session.close()
     
     merged = {self.result_key: results}
     for child_result in child_results:
         merged.update(child_result)
     return merged
Example #9
0
 def __init__(self,
              user_id=None,
              status=celery.states.PENDING,
              name=None,
              result_key=None,
              children=[]):
     
     self.user_id = None
     try:
         if current_user.is_authenticated():
             self.user_id = current_user.id
     except RuntimeError:
         # nothing to worry about, just using current_user outside
         # of a web context.  This should only happen during testing
         pass
     
     self.status = status
     self.name = name
     self.result_key = result_key
     self.children = children
     
     # create PersistentJob and store id
     # note that result_key is always empty at this stage
     pj = PersistentJob(user_id=self.user_id,
                        status=self.status,
                        name=self.name,
                        show_in_ui=self.show_in_ui)
     db_session = db.get_session()
     db_session.add(pj)
     db_session.commit()
     self.persistent_id = pj.id
 def report_result(self, results, child_results=[]):
     """
     Creates a unique identifier for this ReportNode, and returns a one element
     dictionary with that identifier as the key and its results as the value.
     This allows ReportNode results to be merged as the tree of ReportNodes is
     evaluated.
     
     Parameters
         results         : Anything that the ReportNode compiles in its finish step
         child_results   : The results from a child Report(s) if they should be
                           preserved.  ReportLeaf results and any ReportNode results
                           that are copied should not be preserved.
     """
     self.result_key = str(uuid4())
     db_session = db.get_session()
     pj = db_session.query(PersistentReport).get(self.persistent_id)
     pj.result_key = self.result_key
     db_session.add(pj)
     db_session.commit()
     db_session.close()
     
     merged = {self.result_key: results}
     for child_result in child_results:
         merged.update(child_result)
     return merged
 def __init__(self,
              user_id=None,
              status=celery.states.PENDING,
              name=None,
              queue_result_key=None,
              children=[],
              parameters='{}'):
     
     self.user_id = user_id
     if not self.user_id:
         try:
             if current_user.is_authenticated():
                 self.user_id = current_user.id
         except RuntimeError:
             # nothing to worry about, just using current_user outside
             # of a web context.  This should only happen during testing
             pass
     
     self.status = status
     self.name = name
     self.queue_result_key = queue_result_key
     self.children = children
     
     # store report to database
     # note that queue_result_key is always empty at this stage
     pj = PersistentReport(user_id=self.user_id,
                           status=self.status,
                           name=self.name,
                           show_in_ui=self.show_in_ui,
                           parameters=parameters)
     db_session = db.get_session()
     db_session.add(pj)
     db_session.commit()
     self.persistent_id = pj.id
     db_session.close()
    def report_result(self, results, child_results=None):
        """
        NOTE: child_results is currently not used.  This function will still work
        as originally implemented, but child_results should go under evaluation.
        
        Creates a unique identifier for this ReportNode, and returns a one element
        dictionary with that identifier as the key and its results as the value.
        This allows ReportNode results to be merged as the tree of ReportNodes is
        evaluated.
        
        Parameters
            results         : Anything that the ReportNode compiles in its finish step
            child_results   : The results from a child Report(s) if they should be
                              preserved.  ReportLeaf results and any ReportNode results
                              that are copied should not be preserved.
        """
        if child_results is None:
            child_results = []

        self.result_key = str(uuid4())

        if self.store:
            db_session = db.get_session()
            pj = db_session.query(ReportStore).get(self.persistent_id)
            pj.result_key = self.result_key
            db_session.add(pj)
            db_session.commit()

        merged = {self.result_key: results}
        for child_result in child_results:
            merged.update(child_result)
        return merged
Example #13
0
    def update_reports(report_ids, owner_id, public=None, recurrent=None):
        """
        Updates reports in bulk, making sure they belong to an owner

        TODO: an Admin should be able to modify any report but it does not look we
        have an admin check (that kind of check should be cached)

        Parameters:
            report_ids  : list of ids of ReportStore objects to update
            owner_id    : the person purporting to own these reports
            public      : update all reports' public attribute to this, default is None
            recurrent   : update all reports' recurrent attribute to this, default is None

        Returns:
            True if the number of updated records matches the number of report_ids
            False otherwise
        """
        db_session = db.get_session()
        values = {}
        if public is not None:
            values['public'] = public
        if recurrent is not None:
            values['recurrent'] = recurrent
        update = db_session.execute(
            ReportStore.__table__.update().values(**values).where(
                and_(ReportStore.id.in_(report_ids),
                     ReportStore.user_id == owner_id)))
        db_session.commit()

        if update and update.rowcount == len(report_ids):
            return True
        else:
            raise UnauthorizedReportAccessError(
                'Unauthorized access to report by {0}'.format(owner_id))
    def run(self):
        """
        This initializes the cohort and any parameters not known at init time
        for this ReportNode, and initializes and calls it's super class' run method.

        Raises:
            KeyError if required parameters are missing
        """
        cohort_store_object = cohort_service.fetch_by_id(self.cohort_id)
        # First make sure this is a valid cohort
        if cohort_store_object is not None and cohort_store_object.validated:
            self.cohort = cohort_service.convert(cohort_store_object)
            validate_report = ValidateProgramMetricsReport(
                self.cohort, db.get_session(), user_id=self.user_id)
            self.cohort.size = validate_report.unique_users
            self.parameters = {
                'name': 'Program Global Metrics Report',
                'cohort': {
                    'id': self.cohort.id,
                    'name': self.cohort.name,
                    'size': self.cohort.size,
                },
                'user_id': self.user_id,
                'metric': {
                    'name': 'ProgramGlobalMetrics',
                    'end_date': self.end_date
                },
            }

            super(RunProgramMetricsReport, self).__init__(
                name=self.parameters['name'],
                user_id=self.user_id,
                parameters=self.parameters,
                public=False,
                recurrent=False,
                recurrent_parent_id=self.recurrent_parent_id,
                created=None,
                store=True,
                persistent_id=self.persistent_id,
            )

            if validate_report.valid():
                self.children = [
                    self.get_active_editors_report(),
                    self.get_new_editors_report(),
                    self.get_pages_created_report(),
                    self.get_pages_edited_report(),
                    self.get_bytes_added_report()
                ]
            else:
                self.children = [validate_report]
            return super(RunProgramMetricsReport, self).run()

        else:
            # This should never happen, unless it's a test where RunProgramMetricsReport
            # is being directly instantiated.
            task_logger.error("Cohort not validated")
            # Clean up cohort anyway
            cohort_service.delete_owner_cohort(None, self.cohort_id)
            raise Exception("Cohort not validated")
 def run(self):
     """
     This will get executed if the instance is added into a Report node hierarchy
     It outputs failure messages due to any invalid configuration.  None of these
     failures should happen unless the user tries to hack the system.
     """
     self.set_status(celery.states.STARTED, task_id=current_task.request.id)
     session = db.get_session()
     try:
         from wikimetrics.models.storage import ReportStore
         pj = session.query(ReportStore).get(self.persistent_id)
         pj.name = '{0} - {1} (failed validation)'.format(
             self.metric_label,
             self.cohort_name,
         )
         pj.status = celery.states.FAILURE
         session.commit()
     finally:
         session.close()
     
     message = ''
     if not self.metric_valid:
         message += '{0} was incorrectly configured\n'.format(
             self.metric_label,
         )
     return {'FAILURE': message or 'False'}
def get_celery_task(result_key):
    """
    From a unique identifier, gets the celery task and database records associated.

    Parameters
        result_key  : The unique identifier found in the report database table
                        This parameter is required and should not be None

    Returns
        A tuple of the form (celery_task_object, database_report_object)
    """
    if not result_key:
        return (None, None)

    try:
        db_session = db.get_session()
        try:
            pj = db_session.query(ReportStore)\
                .filter(ReportStore.result_key == result_key)\
                .one()

            celery_task = Report.task.AsyncResult(pj.queue_result_key)
        finally:
            db_session.close()
        return (celery_task, pj)
    except NoResultFound:
        return (None, None)
 def login_for_testing_only():
     if app.config['DEBUG']:
         db_session = db.get_session()
         user = db_session.query(UserStore).filter_by(email='*****@*****.**').one()
         user.login(db_session)
         login_user(user)
         return ''
def cohort_detail(name_or_id):
    """
    Returns a JSON object of the form:
    {id: 2, name: 'Berlin Beekeeping Society', description: '', wikiusers: [
        {mediawiki_username: '******', mediawiki_userid: 5, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 6, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 7, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 8, project: 'dewiki'},
    ]}
    """
    full_detail = request.args.get('full_detail', 0)
    
    cohort = None
    db_session = db.get_session()
    try:
        kargs = dict()
        if str(name_or_id).isdigit():
            kargs['by_id'] = int(name_or_id)
        else:
            kargs['by_name'] = name_or_id
        cohort = g.cohort_service.get_for_display(db_session, current_user.id, **kargs)
    except Unauthorized:
        return 'You are not allowed to access this Cohort', 401
    except NoResultFound:
        return 'Could not find this Cohort', 404
    finally:
        db_session.close()
    
    limit = 200 if full_detail == 'true' else 3
    cohort_with_wikiusers = populate_cohort_wikiusers(cohort, limit)
    cohort_with_tags = populate_cohort_tags(cohort_with_wikiusers, cohort.id)
    cohort_with_status = populate_cohort_validation_status(cohort_with_tags)
    return json_response(cohort_with_status)
    def group_by_project(self):
        """
        mimics the interface of itertools.groupby, with the
        exception that the grouped items are simply user_ids
        rather than complete user records
        
        Returns:
            iterable of tuples of the form:
                (project, <iterable_of_usernames>)
        
        this is useful for turning a project-heterogenous cohort
        into a set of project-homogenous cohorts, which can be
        analyzed using a single database connection
        """

        db_session = db.get_session()
        user_id_projects = db_session\
            .query(WikiUser.mediawiki_userid, WikiUser.project)\
            .join(CohortWikiUser)\
            .filter(CohortWikiUser.cohort_id == self.id)\
            .order_by(WikiUser.project)\
            .all()
        db_session.close()
        # TODO: push this logic into sqlalchemy.  The solution
        # includes subquery(), but I can't seem to get anything working
        groups = itertools.groupby(user_id_projects, key=itemgetter(1))

        # note: the below line is more concise but harder to read
        #return ((project, (r[0] for r in users)) for project, users in groups)
        for project, users in groups:
            yield project or self.default_project, (r[0] for r in users)
def delete_cohort(cohort_id):
    """
    Deletes a cohort and all its associated links if it belongs to only current_user
    Removes the relationship between current_user and this cohort if it belongs
    to a user other than current_user
    """
    session = db.get_session()
    try:
        owner_and_viewers = num_users(session, cohort_id)
        role = get_role(session, cohort_id)

        # Owner wants to delete, no other viewers or
        # Owner wants to delete, have other viewers, delete from other viewer's lists too
        if owner_and_viewers >= 1 and role == CohortUserRole.OWNER:
            delete_owner_cohort(session, cohort_id)
            session.commit()
            return json_redirect(url_for('cohorts_index'))

        # Viewer wants to delete cohort from their list, doesn't delete cohort from db;l,
        elif owner_and_viewers > 1 and role == CohortUserRole.VIEWER:
            delete_viewer_cohort(session, cohort_id)
            session.commit()
            return json_redirect(url_for('cohorts_index'))

        # None of the other cases fit.
        else:
            session.rollback()
            return json_error('This Cohort can not be deleted.')
    except DatabaseError as e:
        session.rollback()
        return json_error(e.message)
    finally:
        session.close()
    def report_result(self, results, child_results=None):
        """
        NOTE: child_results is currently not used.  This function will still work
        as originally implemented, but child_results should go under evaluation.
        
        Creates a unique identifier for this ReportNode, and returns a one element
        dictionary with that identifier as the key and its results as the value.
        This allows ReportNode results to be merged as the tree of ReportNodes is
        evaluated.
        
        Parameters
            results         : Anything that the ReportNode compiles in its finish step
            child_results   : The results from a child Report(s) if they should be
                              preserved.  ReportLeaf results and any ReportNode results
                              that are copied should not be preserved.
        """
        if child_results is None:
            child_results = []
        
        self.result_key = str(uuid4())

        if self.store:
            db_session = db.get_session()
            pj = db_session.query(ReportStore).get(self.persistent_id)
            pj.result_key = self.result_key
            db_session.add(pj)
            db_session.commit()
        
        merged = {self.result_key: results}
        for child_result in child_results:
            merged.update(child_result)
        return merged
def delete_cohort(cohort_id):
    """
    Deletes a cohort and all its associated links if it belongs to only current_user
    Removes the relationship between current_user and this cohort if it belongs
    to a user other than current_user
    """
    session = db.get_session()
    try:
        owner_and_viewers = num_users(session, cohort_id)
        role = get_role(session, cohort_id)

        # Owner wants to delete, no other viewers or
        # Owner wants to delete, have other viewers, delete from other viewer's lists too
        if owner_and_viewers >= 1 and role == CohortUserRole.OWNER:
            g.cohort_service.delete_owner_cohort(session, cohort_id)
            session.commit()
            return json_redirect(url_for('cohorts_index'))

        # Viewer wants to delete cohort from their list, doesn't delete cohort from db;l,
        elif owner_and_viewers > 1 and role == CohortUserRole.VIEWER:
            g.cohort_service.delete_viewer_cohort(session, current_user.id, cohort_id)
            session.commit()
            return json_redirect(url_for('cohorts_index'))

        # None of the other cases fit.
        else:
            session.rollback()
            return json_error('This Cohort can not be deleted.')
    except DatabaseError as e:
        session.rollback()
        return json_error(e.message)
def cohort_detail(name_or_id):
    """
    Returns a JSON object of the form:
    {id: 2, name: 'Berlin Beekeeping Society', description: '', wikiusers: [
        {mediawiki_username: '******', mediawiki_userid: 5, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 6, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 7, project: 'dewiki'},
        {mediawiki_username: '******', mediawiki_userid: 8, project: 'dewiki'},
    ]}
    """
    cohort = None
    db_session = db.get_session()
    try:
        kargs = dict()
        if str(name_or_id).isdigit():
            kargs['by_id'] = int(name_or_id)
        else:
            kargs['by_name'] = name_or_id
        cohort = g.cohort_service.get_for_display(db_session, current_user.id, **kargs)

        cohort_dict = cohort.__dict__
        cohort_dict['tags'] = populate_cohort_tags(cohort.id, db_session)

        cohort_dict['validation'] =\
            populate_cohort_validation_status(cohort, db_session, cohort.size)

    # don't need to roll back session because it's just a query
    except Unauthorized:
        return 'You are not allowed to access this Cohort', 401
    except NoResultFound:
        return 'Could not find this Cohort', 404

    return json_response(cohort_dict)
Example #24
0
 def group_by_project(self):
     """
     mimics the interface of itertools.groupby, with the
     exception that the grouped items are simply user_ids
     rather than complete user records
     
     Returns:
         iterable of tuples of the form:
             (project, <iterable_of_usernames>)
     
     this is useful for turning a project-heterogenous cohort
     into a set of project-homogenous cohorts, which can be
     analyzed using a single database connection
     """
     db_session = db.get_session()
     try:
         user_id_projects = self.filter_wikiuser_query(
             db_session.query(WikiUser.mediawiki_userid, WikiUser.project)
         ).order_by(WikiUser.project).all()
     finally:
         db_session.close()
     # TODO: push this logic into sqlalchemy.  The solution
     # includes subquery(), but I can't seem to get anything working
     groups = itertools.groupby(user_id_projects, key=itemgetter(1))
     
     return (
         (project or self.default_project, (r[0] for r in users))
         for project, users in groups
     )
 def add(task_type, task_id, message, traceback):
     db_session = db.get_session()
     existing = TaskErrorStore.get(db_session, task_type, task_id)
     if existing:
         TaskErrorStore.update(db_session, existing, message, traceback)
     else:
         TaskErrorStore.create(db_session, task_type, task_id, message, traceback)
    def set_public_report_state(report_id, owner_id, file_manager, public=True, data=''):
        """
        Internal method that sets a report public/private status.
        If we are making a report private that
        was public before will remove files from disk.

        If a new report is made public it will save report to disk.

        Validation that user can update this report has already happened before
        we reach this method.

        The UI calls this method on a per report basis
        but updates can be done for a set of reports.

        TODO: This method should not have http level code and
            should be part of an API,
            not be on the controller.
        TODO: We should not open & close a session here, I think session should be
            open/closed at the beginning/end of the request
            using flask request scoped functions

        Parameters:
            report_id   : id of PersistentReport to update
            owner_id    : the User purporting to own this report
            public      : True | False if True data must be present
            data        : String, report data to write out to filepath
            file_manager: PublicReportFileManager to manage io interactions

        Returns:
            Nothing

        Throws:
            Exception if there are issues making the report public or private

        A private report is has public=False
        """
        # NOTE: update_reports checks ownership and raises an exception if needed
        PersistentReport.update_reports([report_id], owner_id, public=public)

        # good no exception
        try:
            db_session = db.get_session()
            path = file_manager.get_public_report_path(report_id)
            if public:
                file_manager.write_data(path, data)

            else:
                file_manager.remove_file(path)

        except (PublicReportIOError, SQLAlchemyError) as e:
            app.logger.exception(str(e))
            # if there was an IO error rollback prior changes
            # this issues a new query as now our session scope and
            # transaction scope are now the same
            PersistentReport.update_reports([report_id], owner_id, public=not public)
            raise e

        finally:
            db_session.close()
def cohorts_index():
    """
    Renders a page with a list cohorts belonging to the currently logged in user.
    If the user is an admin, she has the option of seeing other users' cohorts.
    """
    session = db.get_session()
    tags = g.tag_service.get_all_tags(session)
    return render_template('cohorts.html', tags=json.dumps(tags))
 def add(task_type, task_id, message, traceback):
     db_session = db.get_session()
     existing = TaskErrorStore.get(db_session, task_type, task_id)
     if existing:
         TaskErrorStore.update(db_session, existing, message, traceback)
     else:
         TaskErrorStore.create(db_session, task_type, task_id, message,
                               traceback)
Example #29
0
 def login_for_testing_only():
     if app.config['DEBUG']:
         db_session = db.get_session()
         user = db_session.query(UserStore).filter_by(
             email='*****@*****.**').one()
         user.login(db_session)
         login_user(user)
         return ''
    def __init__(self,
                 user_id=None,
                 status=celery.states.PENDING,
                 name=None,
                 queue_result_key=None,
                 children=None,
                 public=False,
                 parameters={},
                 recurrent=False,
                 recurrent_parent_id=None,
                 created=None,
                 store=False,
                 persistent_id=None):
        
        if children is None:
            children = []
        self.user_id = user_id
        if not self.user_id:
            try:
                if current_user.is_authenticated():
                    self.user_id = current_user.id
            except RuntimeError:
                # nothing to worry about, just using current_user outside
                # of a web context.  This should only happen during testing
                pass
        
        self.status = status
        self.name = name
        self.queue_result_key = queue_result_key
        self.children = children
        self.public = public
        self.store = store
        self.persistent_id = persistent_id
        self.created = None

        if self.store is True and self.persistent_id is None:
            # store report to database
            # note that queue_result_key is always empty at this stage
            pj = ReportStore(user_id=self.user_id,
                             status=self.status,
                             show_in_ui=self.show_in_ui,
                             parameters=stringify(parameters).encode('utf-8'),
                             public=self.public,
                             recurrent=recurrent,
                             recurrent_parent_id=recurrent_parent_id,
                             created=created or datetime.now())
            try:
                session = db.get_session()
                session.add(pj)
                session.commit()
                self.persistent_id = pj.id
                self.created = pj.created
                pj.name = self.name or str(self)
                session.commit()
            except:
                session.rollback()
                raise
Example #31
0
    def fetch(self, cohort):
        """
        Fetches a CohortStore object from the database, without checking permissions

        Parameters
            cohort  : a logical Cohort object
        """
        db_session = db.get_session()
        return db_session.query(CohortStore).get(cohort.id)
    def __init__(self,
                 user_id=None,
                 status=celery.states.PENDING,
                 name=None,
                 queue_result_key=None,
                 children=None,
                 public=False,
                 parameters={},
                 recurrent=False,
                 recurrent_parent_id=None,
                 created=None,
                 store=False,
                 persistent_id=None):

        if children is None:
            children = []
        self.user_id = user_id
        if not self.user_id:
            try:
                if current_user.is_authenticated():
                    self.user_id = current_user.id
            except RuntimeError:
                # nothing to worry about, just using current_user outside
                # of a web context.  This should only happen during testing
                pass

        self.status = status
        self.name = name
        self.queue_result_key = queue_result_key
        self.children = children
        self.public = public
        self.store = store
        self.persistent_id = persistent_id
        self.created = None

        if self.store is True and self.persistent_id is None:
            # store report to database
            # note that queue_result_key is always empty at this stage
            pj = ReportStore(user_id=self.user_id,
                             status=self.status,
                             show_in_ui=self.show_in_ui,
                             parameters=stringify(parameters).encode('utf-8'),
                             public=self.public,
                             recurrent=recurrent,
                             recurrent_parent_id=recurrent_parent_id,
                             created=created or datetime.now())
            try:
                session = db.get_session()
                session.add(pj)
                session.commit()
                self.persistent_id = pj.id
                self.created = pj.created
                pj.name = self.name or str(self)
                session.commit()
            except:
                session.rollback()
                raise
    def fetch(self, cohort):
        """
        Fetches a CohortStore object from the database, without checking permissions

        Parameters
            cohort  : a logical Cohort object
        """
        db_session = db.get_session()
        return db_session.query(CohortStore).get(cohort.id)
def get_cohort_by_name(name):
    """
    Gets a cohort by name, without checking access or worrying about duplicates
    """
    try:
        db_session = db.get_session()
        return db_session.query(CohortStore).filter(CohortStore.name == name).first()
    finally:
        db_session.close()
    def from_upload(cls, cohort_upload, owner_user_id, session=None):
        """
        Create a new cohort and validate a list of uploaded users for it

        Parameters:
            cohort_upload   : the cohort upload form, parsed by WTForms
            owner_user_id   : the Wikimetrics user id that is uploading

        Returns:
            An instance of ValidateCohort
        """
        cohort_description = cohort_upload.description.data
        if cohort_description is not None:
            cohort_description = cohort_description.encode('utf-8')

        cohort = CohortStore(
            name=cohort_upload.name.data,
            description=cohort_description,
            default_project=cohort_upload.project.data,
            enabled=True,
            public=False,
            validated=False,
            validate_as_user_ids=cohort_upload.validate_as_user_ids.data == 'True',
        )
        centralauth = getattr(cohort_upload, 'centralauth', None)
        if centralauth is not None and centralauth.data is True:
            cohort.class_name = 'CentralAuthCohort'
        session = session or db.get_session()
        try:
            session.add(cohort)
            session.commit()

            cohort_user = CohortUserStore(
                user_id=owner_user_id,
                cohort_id=cohort.id,
                role=CohortUserRole.OWNER
            )
            session.add(cohort_user)
            session.commit()

            session.execute(
                WikiUserStore.__table__.insert(), [
                    {
                        'raw_id_or_name'    : record['raw_id_or_name'],
                        'project'           : record['project'],
                        'valid'             : None,
                        'reason_invalid'    : '',
                        'validating_cohort' : cohort.id,
                    } for record in cohort_upload.records
                ]
            )
            session.commit()
            return cls(cohort)
        except Exception, e:
            session.rollback()
            app.logger.error(str(e))
            return None
 def run(self):
     session = db.get_session()
     try:
         cohort = session.query(Cohort).get(self.cohort_id)
         cohort.validation_queue_key = current_task.request.id
         session.commit()
         self.validate_records(session, cohort)
     finally:
         session.close()
def auth_meta_mw(resp):
    """
    Callback for meta.wikimedia.org to send us authentication results.
    This is responsible for fetching existing users or creating new ones.
    If a new user is created, they get the default role of GUEST and
    an email or username to match their details from the OAuth provider.
    """
    if resp is None:
        flash('You need to grant the app permissions in order to login.', 'error')
        return redirect(url_for('login'))
    
    session['access_token'] = (
        resp['oauth_token'],
        resp['oauth_token_secret']
    )
    
    try:
        identify_token_encoded = meta_mw.post(
            app.config['META_MW_BASE_URL'] + app.config['META_MW_IDENTIFY_URI'],
        ).data
        identify_token = process_mw_jwt(identify_token_encoded)
        
        username = identify_token['username']
        userid = identify_token['sub']
        
        db_session = db.get_session()
        user = None
        try:
            user = db_session.query(User).filter_by(meta_mw_id=userid).one()
        
        except NoResultFound:
            user = User(
                username=username,
                meta_mw_id=userid,
                role=UserRole.GUEST,
            )
            db_session.add(user)
            db_session.commit()
        
        except MultipleResultsFound:
            db_session.close()
            return 'Multiple users found with your id!!! Contact Administrator'
        
        user.login(db_session)
        try:
            if login_user(user):
                user.detach_from(db_session)
                redirect_to = session.get('next') or url_for('home_index')
                redirect_to = urllib2.unquote(redirect_to)
                return redirect(redirect_to)
        finally:
            db_session.close()
    
    except Exception, e:
        flash('Access to this application was revoked. Please re-login!')
        app.logger.exception(str(e))
        return redirect(url_for('login'))
def validate_cohort_name_allowed():
    """
    Returns true if there are no other cohorts with this name. Remote call is
    set up in static/js/cohortUpload.js.
    """
    name = request.args.get('name')
    session = db.get_session()
    available = g.cohort_service.get_cohort_by_name(session, name) is None
    return json.dumps(available)
Example #39
0
def auth_meta_mw():
    """
    Callback for meta.wikimedia.org to send us authentication results.
    This is responsible for fetching existing users or creating new ones.
    If a new user is created, they get the default role of GUEST and
    an email or username to match their details from the OAuth provider.
    """
    try:
        handshaker = make_handshaker_mw()
        raw_req_token = session['request_token']
        request_token = RequestToken(key=raw_req_token[0],
                                     secret=raw_req_token[1])
        access_token = handshaker.complete(request_token, request.query_string)
        session['access_token'] = access_token

        identity = handshaker.identify(access_token)
        username = identity['username']
        userid = identity['sub']

        db_session = db.get_session()
        user = None
        try:
            user = db_session.query(UserStore).filter_by(
                meta_mw_id=userid).one()

        except NoResultFound:
            try:
                user = UserStore(
                    username=username,
                    meta_mw_id=userid,
                    role=UserRole.GUEST,
                )
                db_session.add(user)
                db_session.commit()
            except:
                db_session.rollback()
                raise

        except MultipleResultsFound:
            flash('Multiple users found with your id!!! Contact Administrator',
                  'error')
            return redirect(url_for('login'))

        user.login(db_session)
        if login_user(user):
            user.detach_from(db_session)
            del session['request_token']

    except Exception:
        flash('You need to grant the app permissions in order to login.',
              'error')
        app.logger.exception(traceback.format_exc())
        return redirect(url_for('login'))

    redirect_to = session.get('next') or url_for('home_index')
    return redirect(urllib2.unquote(redirect_to))
def cohort_tag_list(cohort_id):
    session = db.get_session()

    # tag_names returns tuples, why?
    tag_names = session.query(TagStore.name) \
        .filter(CohortTagStore.cohort_id == cohort_id) \
        .filter(CohortTagStore.tag_id == TagStore.id) \
        .all()
    tag_names = [tag[0] for tag in tag_names]
    return json.dumps(sorted(tag_names))
def cohort_tag_list(cohort_id):
    session = db.get_session()

    # tag_names returns tuples, why?
    tag_names = session.query(TagStore.name) \
        .filter(CohortTagStore.cohort_id == cohort_id) \
        .filter(CohortTagStore.tag_id == TagStore.id) \
        .all()
    tag_names = [tag[0] for tag in tag_names]
    return json.dumps(sorted(tag_names))
def delete_tag(cohort_id, tag_id):
    session = db.get_session()
    session.query(CohortTagStore) \
        .filter(CohortTagStore.cohort_id == cohort_id) \
        .filter(CohortTagStore.tag_id == tag_id) \
        .delete()
    session.commit()

    tags = g.tag_service.get_all_tags(session)
    return json_response(message='success', tagsAutocompleteList=json.dumps(tags))
 def __iter__(self):
     """ returns list of user_ids """
     db_session = db.get_session()
     tuples_with_ids = db_session\
         .query(WikiUser.mediawiki_userid)\
         .join(CohortWikiUser)\
         .filter(CohortWikiUser.cohort_id == self.id)\
         .all()
     db_session.close()
     return (t[0] for t in tuples_with_ids)
def logout():
    """
    Logs out the user.
    """
    session['access_token'] = None
    db_session = db.get_session()
    if type(current_user) is UserStore:
        current_user.logout(db_session)
    logout_user()
    return redirect(url_for('home_index'))
Example #45
0
def logout():
    """
    Logs out the user.
    """
    session['access_token'] = None
    db_session = db.get_session()
    if type(current_user) is UserStore:
        current_user.logout(db_session)
    logout_user()
    return redirect(url_for('home_index'))
Example #46
0
 def __iter__(self):
     """ returns list of user_ids """
     db_session = db.get_session()
     try:
         wikiusers = self.filter_wikiuser_query(
             db_session.query(WikiUser.mediawiki_userid)
         ).all()
     finally:
         db_session.close()
     return (r.mediawiki_userid for r in wikiusers)
 def setUp(self):
     #****************************************************************
     # set up and clean database (Warning: this DESTROYS ALL DATA)
     #****************************************************************
     self.session = db.get_session()
     engine = db.get_mw_engine(mediawiki_project)
     db.MediawikiBase.metadata.create_all(engine, checkfirst=True)
     # mediawiki_project is a global defined on this file
     self.mwSession = db.get_mw_session(mediawiki_project)
     DatabaseTest.tearDown(self)
Example #48
0
    def from_upload(cls, cohort_upload, owner_user_id, session=None):
        """
        Create a new cohort and validate a list of uploaded users for it

        Parameters:
            cohort_upload   : the cohort upload form, parsed by WTForms
            owner_user_id   : the Wikimetrics user id that is uploading

        Returns:
            An instance of ValidateCohort
        """
        cohort_description = cohort_upload.description.data
        if cohort_description is not None:
            cohort_description = cohort_description.encode('utf-8')

        cohort = CohortStore(
            name=cohort_upload.name.data,
            description=cohort_description,
            default_project=cohort_upload.project.data,
            enabled=True,
            public=False,
            validated=False,
            validate_as_user_ids=cohort_upload.validate_as_user_ids.data ==
            'True',
        )
        centralauth = getattr(cohort_upload, 'centralauth', None)
        if centralauth is not None and centralauth.data is True:
            cohort.class_name = 'CentralAuthCohort'
        session = session or db.get_session()
        try:
            session.add(cohort)
            session.commit()

            cohort_user = CohortUserStore(user_id=owner_user_id,
                                          cohort_id=cohort.id,
                                          role=CohortUserRole.OWNER)
            session.add(cohort_user)
            session.commit()

            session.execute(WikiUserStore.__table__.insert(),
                            [{
                                'raw_id_or_name': record['raw_id_or_name'],
                                'project': record['project'],
                                'valid': None,
                                'reason_invalid': '',
                                'validating_cohort': cohort.id,
                            } for record in cohort_upload.records])
            session.commit()
            return cls(cohort)
        except Exception, e:
            session.rollback()
            app.logger.error(str(e))
            return None
Example #49
0
    def __iter__(self):
        """
        Returns list of user_ids to filter by to obtain data for just for this cohort.
        TODO: remove this method, it only makes sense for single-project cohorts and
              Cohort display that will be removed soon.

        """
        db_session = db.get_session()
        wikiusers = self.filter_wikiuser_query(
            db_session.query(WikiUserStore.mediawiki_userid)
        ).all()
        return (r.mediawiki_userid for r in wikiusers)
Example #50
0
    def parse_request(self, desired_responses):
        children = []
        metric_names = []
        cohort_names = []
        allowed_roles = [CohortUserRole.OWNER, CohortUserRole.VIEWER]
        for cohort_metric_dict in desired_responses:

            # get cohort
            cohort_dict = cohort_metric_dict['cohort']
            db_session = db.get_session()
            cohort = db_session.query(Cohort)\
                .join(CohortUser)\
                .join(User)\
                .filter(User.id == self.user_id)\
                .filter(Cohort.id == cohort_dict['id'])\
                .filter(Cohort.enabled)\
                .filter(CohortUser.role.in_(allowed_roles))\
                .one()
            db_session.close()

            # construct metric
            metric_dict = cohort_metric_dict['metric']
            class_name = metric_dict['name']
            metric_class = metric_classes[class_name]
            metric = metric_class(**metric_dict)
            # TODO: don't think csrf can work here, but see if there's another way
            metric.fake_csrf()
            if metric.validate():
                # construct and start RunReport
                output_child = AggregateReport(
                    cohort,
                    metric,
                    individual=metric_dict['individualResults'],
                    aggregate=metric_dict['aggregateResults'],
                    aggregate_sum=metric_dict['aggregateSum'],
                    aggregate_average=metric_dict['aggregateAverage'],
                    aggregate_std_deviation=metric_dict[
                        'aggregateStandardDeviation'],
                    name=cohort_metric_dict['name'],
                    user_id=self.user_id,
                )
                children.append(output_child)
                metric_names.append(metric.label)
                cohort_names.append(cohort.name)
            else:
                raise Exception('{0} was incorrectly configured'.format(
                    metric.label))

        metric_names = deduplicate(metric_names)
        cohort_names = deduplicate(cohort_names)

        self.name = ', '.join(metric_names) + ' for ' + ', '.join(cohort_names)
        self.children = children
Example #51
0
def auth_google(resp):
    """
    Callback for Google to send us authentication results.
    This is responsible for fetching existing users or creating new ones.
    If a new user is created, they get the default role of GUEST and
    an email or username to match their details from the OAuth provider.
    """
    if resp is None and request.args.get('error') == 'access_denied':
        flash('You need to grant the app permissions in order to login.',
              'error')
        return redirect(url_for('login'))

    access_token = resp['access_token'] or request.args.get('code')
    if access_token:
        session['access_token'] = access_token, ''
        r = requests.get(app.config['GOOGLE_USERINFO_URI'],
                         headers={'Authorization': 'OAuth ' + access_token})
        if r.ok:
            userinfo = json.loads(r.text)
            email = userinfo['email']
            id = userinfo['id']

            db_session = db.get_session()
            user = None
            try:
                user = db_session.query(UserStore).filter_by(
                    google_id=id).one()

            except NoResultFound:
                try:
                    user = UserStore(
                        email=email,
                        google_id=id,
                        role=UserRole.GUEST,
                    )
                    db_session.add(user)
                    db_session.commit()
                except:
                    db_session.rollback()
                    raise

            except MultipleResultsFound:
                return 'Multiple users found with your id!!! Contact Administrator'

            user.login(db_session)
            if login_user(user):
                user.detach_from(db_session)
                redirect_to = session.get('next') or url_for('home_index')
                redirect_to = urllib2.unquote(redirect_to)
                return redirect(redirect_to)

    flash('Was not allowed to authenticate you with Google.', 'error')
    return redirect(url_for('login'))
 def set_status(self, status, task_id=None):
     """
     helper function for updating database status after celery
     task has been started
     """
     db_session = db.get_session()
     pj = db_session.query(PersistentReport).get(self.persistent_id)
     pj.status = status
     if task_id:
         pj.queue_result_key = task_id
     db_session.add(pj)
     db_session.commit()
     db_session.close()
Example #53
0
 def update_status(self):
     # if we don't have the result key leave as is
     if self.queue_result_key and self.status not in (
             celery.states.READY_STATES):
         # TODO: inline import.  Can't import up above because of circular reference
         from wikimetrics.models.report_nodes import Report
         celery_task = Report.task.AsyncResult(self.queue_result_key)
         self.status = celery_task.status
         existing_session = Session.object_session(self)
         if not existing_session:
             existing_session = db.get_session()
             existing_session.add(self)
         existing_session.commit()