def delete_entries_from_project(self, project): sql = text(''' DELETE FROM webhook WHERE project_id=:project_id; ''') self.db.session.execute(sql, dict(project_id=project.id)) self.db.session.commit() clean_project(project.id)
def update_priority(self, project_id, priority, filters): priority = min(1.0, priority) priority = max(0.0, priority) conditions, params = get_task_filters(filters) sql = text(''' WITH to_update AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ) UPDATE task SET priority_0=:priority WHERE project_id=:project_id AND task.id in ( SELECT id FROM to_update); '''.format(conditions)) self.db.session.execute(sql, dict(priority=priority, project_id=project_id, **params)) self.db.session.commit() cached_projects.clean_project(project_id)
def update_tasks_redundancy(self, project, n_answer): """update the n_answer of every task from a project and their state. Use raw SQL for performance""" sql = text(''' UPDATE task SET n_answers=:n_answers, state='ongoing' WHERE project_id=:project_id AND is_broken=FALSE''') self.db.session.execute(sql, dict(n_answers=n_answer, project_id=project.id)) # Update task.state according to their new n_answers value sql = text(''' WITH project_tasks AS ( SELECT task.id, task.n_answers, COUNT(task_run.id) AS n_task_runs, task.state FROM task, task_run WHERE task_run.task_id=task.id AND task.project_id=:project_id AND task.is_broken=FALSE GROUP BY task.id) UPDATE task SET state='completed' FROM project_tasks WHERE (project_tasks.n_task_runs >=:n_answers) AND project_tasks.id=task.id ''') self.db.session.execute(sql, dict(n_answers=n_answer, project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id)
def create_tasks(self, task_repo, project_id, **form_data): """Create tasks.""" from pybossa.cache import projects as cached_projects from pybossa.model.task import Task """Create tasks from a remote source using an importer object and avoiding the creation of repeated tasks""" importer_id = form_data.get('type') empty = True n = 0 importer = self._create_importer_for(importer_id) for task_data in importer.tasks(**form_data): task = Task(project_id=project_id) [setattr(task, k, v) for k, v in task_data.iteritems()] found = task_repo.get_task_by(project_id=project_id, info=task.info) if found is None: task_repo.save(task) n += 1 empty = False if empty: msg = gettext('It looks like there were no new records to import') return msg msg = str(n) + " " + gettext('new tasks were imported successfully') if n == 1: msg = str(n) + " " + gettext('new task was imported successfully') cached_projects.clean_project(project_id) return msg
def update_tasks_redundancy(self, project, n_answer): """update the n_answer of every task from a project and their state. Use raw SQL for performance""" sql = text(''' UPDATE task SET n_answers=:n_answers, state='ongoing' WHERE project_id=:project_id''') self.db.session.execute( sql, dict(n_answers=n_answer, project_id=project.id)) # Update task.state according to their new n_answers value sql = text(''' WITH project_tasks AS ( SELECT task.id, task.n_answers, COUNT(task_run.id) AS n_task_runs, task.state FROM task, task_run WHERE task_run.task_id=task.id AND task.project_id=:project_id GROUP BY task.id) UPDATE task SET state='completed' FROM project_tasks WHERE (project_tasks.n_task_runs >=:n_answers) and project_tasks.id=task.id ''') self.db.session.execute( sql, dict(n_answers=n_answer, project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id)
def delete(self, blogpost): self._validate_can_be('deleted', blogpost) project_id = blogpost.project_id blog = self.db.session.query(Blogpost).filter(Blogpost.id==blogpost.id).first() self.db.session.delete(blog) self.db.session.commit() clean_project(project_id)
def update_tasks_redundancy(self, project, n_answers, filters=None): """ Update the n_answer of every task from a project and their state. Use raw SQL for performance. Mark tasks as exported = False for tasks with curr redundancy < new redundancy, with state as completed and were marked as exported = True """ from pybossa.jobs import check_and_send_task_notifications if n_answers < self.MIN_REDUNDANCY or n_answers > self.MAX_REDUNDANCY: raise ValueError("Invalid redundancy value: {}".format(n_answers)) filters = filters or {} task_expiration = '{} day'.format(self.rdancy_upd_exp) conditions, params = get_task_filters(filters) tasks_not_updated = self._get_redundancy_update_msg( project, n_answers, conditions, params, task_expiration) self.update_task_exported_status(project.id, n_answers, conditions, params, task_expiration) sql = text(''' WITH all_tasks_with_orig_filter AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ), tasks_with_file_urls AS ( SELECT t.id as id FROM task t WHERE t.id IN (SELECT id from all_tasks_with_orig_filter) AND jsonb_typeof(t.info) = 'object' AND EXISTS(SELECT TRUE FROM jsonb_object_keys(t.info) AS key WHERE key ILIKE '%\_\_upload\_url%') ), tasks_excl_file_urls AS ( SELECT id FROM all_tasks_with_orig_filter WHERE id NOT IN (SELECT id FROM tasks_with_file_urls) ) UPDATE task SET n_answers=:n_answers, state='ongoing' WHERE project_id=:project_id AND ((id IN (SELECT id from tasks_excl_file_urls)) OR (id IN (SELECT id from tasks_with_file_urls) AND state='ongoing' AND TO_DATE(created, 'YYYY-MM-DD\THH24:MI:SS.US') >= NOW() - :task_expiration ::INTERVAL));''' .format(conditions)) self.db.session.execute(sql, dict(n_answers=n_answers, project_id=project.id, task_expiration=task_expiration, **params)) self.update_task_state(project.id) self.db.session.commit() cached_projects.clean_project(project.id) check_and_send_task_notifications(project.id) return tasks_not_updated
def delete(self, blogpost): self._validate_can_be('deleted', blogpost) project_id = blogpost.project_id blog = self.db.session.query(Blogpost).filter( Blogpost.id == blogpost.id).first() self.db.session.delete(blog) self.db.session.commit() clean_project(project_id)
def delete_taskruns_from_project(self, project): sql = text(''' DELETE FROM task_run WHERE project_id=:project_id; ''') self.db.session.execute(sql, dict(project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id) self._delete_zip_files_from_store(project)
def update_tasks_price(self, project, price): """update the price of every task from a project and their state. Use raw SQL for performance""" sql = text(''' UPDATE task SET price=:price, state='ongoing' WHERE project_id=:project_id''') self.db.session.execute(sql, dict(price=price, project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id)
def delete_all(self, elements): if not elements: return for element in elements: self._delete(element) project = elements[0].project self.db.session.commit() cached_projects.clean_project(element.project_id) self._delete_zip_files_from_store(project)
def update(self, blogpost): self._validate_can_be('updated', blogpost) try: self.db.session.merge(blogpost) self.db.session.commit() clean_project(blogpost.project_id) except IntegrityError as e: self.db.session.rollback() raise DBIntegrityError(e)
def update(self, element): self._validate_can_be(self.UPDATE_ACTION, element) try: self.db.session.merge(element) self.db.session.commit() cached_projects.clean_project(element.project_id) except IntegrityError as e: self.db.session.rollback() raise DBIntegrityError(e)
def update(self, element): self._validate_can_be("updated", element) try: self.db.session.merge(element) self.db.session.commit() cached_projects.clean_project(element.project_id) except IntegrityError as e: self.db.session.rollback() raise DBIntegrityError(e)
def save(self, element): self._validate_can_be('saved', element) try: self.db.session.add(element) self.db.session.commit() cached_projects.clean_project(element.project_id) except IntegrityError as e: self.db.session.rollback() raise DBIntegrityError(e)
def create_tasks(self, task_repo, project, importer=None, **form_data): """Create tasks.""" from pybossa.model.task import Task from pybossa.cache import projects as cached_projects """Create tasks from a remote source using an importer object and avoiding the creation of repeated tasks""" n = 0 importer = importer or self._create_importer_for(**form_data) tasks = importer.tasks() header_report = self._validate_headers(importer, project, **form_data) if header_report: return header_report msg = '' validator = TaskImportValidator(get_enrichment_output_fields(project)) n_answers = project.get_default_n_answers() try: for task_data in tasks: self.upload_private_data(task_data, project.id) task = Task(project_id=project.id, n_answers=n_answers) [setattr(task, k, v) for k, v in task_data.iteritems()] gold_answers = task_data.pop('gold_answers', None) set_gold_answers(task, gold_answers) found = task_repo.find_duplicate(project_id=project.id, info=task.info) if found is not None: continue if not validator.validate(task): continue try: n += 1 task_repo.save(task, clean_project=False) except Exception as e: current_app.logger.exception(msg) validator.add_error(str(e)) finally: cached_projects.clean_project(project.id) if form_data.get('type') == 'localCSV': csv_filename = form_data.get('csv_filename') delete_import_csv_file(csv_filename) metadata = importer.import_metadata() if n==0: msg = gettext('It looks like there were no new records to import. ') elif n == 1: msg = str(n) + " " + gettext('new task was imported successfully ') else: msg = str(n) + " " + gettext('new tasks were imported successfully ') msg += str(validator) return ImportReport(message=msg, metadata=metadata, total=n)
def delete_taskruns_from_project(self, project): sql = text(''' DELETE FROM task_run WHERE project_id=:project_id; UPDATE task SET state='ongoing', exported=false WHERE project_id=:project_id; UPDATE task SET exported=true WHERE project_id=:project_id AND calibration=1 ''') self.db.session.execute(sql, dict(project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id) self._delete_zip_files_from_store(project)
def save(self, element, clean_project=True): self._validate_can_be(self.SAVE_ACTION, element) try: self.db.session.add(element) self.db.session.commit() if clean_project: cached_projects.clean_project(element.project_id) except IntegrityError as e: self.db.session.rollback() raise DBIntegrityError(e)
def delete_valid_from_project(self, project): """Delete only tasks that have no results associated.""" sql = text(''' DELETE FROM task WHERE task.project_id=:project_id AND task.id NOT IN (SELECT task_id FROM result WHERE result.project_id=:project_id GROUP BY result.task_id); ''') self.db.session.execute(sql, dict(project_id=project.id)) self.db.session.commit() cached_projects.clean_project(project.id) self._delete_zip_files_from_store(project)
def delete_all(self, elements): if not elements: return print '*** inside task_repository: delete_all ****' for element in elements: task = element print 'task_id : %r, task_info: %r' % (task.id, task.info) self._delete(element) project = elements[0].project self.db.session.commit() cached_projects.clean_project(element.project_id) self._delete_zip_files_from_store(project)
def delete_valid_from_project(self, project, force_reset=False, filters=None): if not force_reset: """Delete only tasks that have no results associated.""" params = {} sql = text(''' DELETE FROM task WHERE task.project_id=:project_id AND task.id NOT IN (SELECT task_id FROM result WHERE result.project_id=:project_id GROUP BY result.task_id); ''') else: """force reset, remove all results.""" filters = filters or {} conditions, params = get_task_filters(filters) # bulkdel db conn is with db user having session_replication_role # when bulkdel is not configured, make explict sql query to set # session replication role to replica sql_session_repl = '' if not 'bulkdel' in current_app.config.get('SQLALCHEMY_BINDS'): sql_session_repl = 'SET session_replication_role TO replica;' sql = text(''' BEGIN; {} CREATE TEMP TABLE to_delete ON COMMIT DROP AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ); DELETE FROM result WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task_run WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task WHERE task.project_id=:project_id AND id in (SELECT id FROM to_delete); COMMIT; '''.format(sql_session_repl, conditions)) self.db.bulkdel_session.execute(sql, dict(project_id=project.id, **params)) self.db.bulkdel_session.commit() cached_projects.clean_project(project.id) self._delete_zip_files_from_store(project)
def update(self, element): print "in update" self._validate_can_be('updated', element) try: print "in update try" self.db.session.merge(element) self.db.session.commit() print "in update try" cached_projects.clean_project(element.project_id) except IntegrityError as e: print "in rollback" self.db.session.rollback() raise DBIntegrityError(e)
def delete_task_by_id(self, project_id, task_id): from pybossa.jobs import check_and_send_task_notifications args = dict(project_id=project_id, task_id=task_id) self.db.session.execute(text(''' DELETE FROM result WHERE project_id=:project_id AND task_id=:task_id;'''), args) self.db.session.execute(text(''' DELETE FROM task_run WHERE project_id=:project_id AND task_id=:task_id;'''), args) self.db.session.execute(text(''' DELETE FROM task WHERE project_id=:project_id AND id=:task_id;'''), args) self.db.session.commit() cached_projects.clean_project(project_id) check_and_send_task_notifications(project_id)
def flush_task_runs(project_short_name, confirmed): project = cached_projects.get_project(project_short_name) if current_user.admin or project.owner_id == current_user.id: if confirmed == "confirmed": associated_task_runs = TaskRun.query.filter_by(project_id=project.id).all() for task_run in associated_task_runs: db.session.delete(task_run) pass db.session.commit() # Iterate over all tasks associated with the project, and mark them as 'ongoing' # Some tasks might be marked as 'completed' if enough task_runs were done associated_tasks = Task.query.filter_by(project_id=project.id).all() for task in associated_tasks: if task.state != u"ongoing": task.state = u"ongoing" db.session.commit() # Reset project data in the cache cached_projects.clean_project(project.id) # Note: The cache will hold the old data about the users who contributed # to the tasks associated with this projects till the User Cache Timeout. # Querying the list of contributors to this project, and then individually updating # their cache after that will be a very expensive query, hence we will avoid that # for the time being. flash('All Task Runs associated with this project have been successfully deleted.', 'success') return redirect(url_for('project.task_settings', short_name = project_short_name)) elif confirmed == "unconfirmed": # Obtain data required by the project profile renderer (project, owner, n_tasks, n_task_runs, overall_progress, last_activity, n_results) = projects_view.project_by_shortname(project_short_name) return render_template('geotagx/projects/delete_task_run_confirmation.html', project=project, owner=owner, n_tasks=n_tasks, n_task_runs=n_task_runs, overall_progress=overall_progress, last_activity=last_activity, n_results=n_results, n_completed_tasks=cached_projects.n_completed_tasks(project.id), n_volunteers=cached_projects.n_volunteers(project.id)) else: abort(404) else: abort(404)
def delete_valid_from_project(self, project, force_reset=False, filters=None): if not force_reset: """Delete only tasks that have no results associated.""" params = {} sql = text(''' DELETE FROM task WHERE task.project_id=:project_id AND task.id NOT IN (SELECT task_id FROM result WHERE result.project_id=:project_id GROUP BY result.task_id); ''') else: """force reset, remove all results.""" filters = filters or {} conditions, params = get_task_filters(filters) sql = text(''' BEGIN; CREATE TEMP TABLE to_delete ON COMMIT DROP AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ); DELETE FROM result WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task_run WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task WHERE task.project_id=:project_id AND id in (SELECT id FROM to_delete); COMMIT; '''.format(conditions)) self.db.session.execute(sql, dict(project_id=project.id, **params)) self.db.session.commit() cached_projects.clean_project(project.id) self._delete_zip_files_from_store(project)
def update_tasks_redundancy(self, project, n_answers, filters=None): """ Update the n_answer of every task from a project and their state. Use raw SQL for performance. Mark tasks as exported = False for tasks with curr redundancy < new redundancy, with state as completed and were marked as exported = True """ filters = filters or {} conditions, params = get_task_filters(filters) if n_answers < self.MIN_REDUNDANCY or n_answers > self.MAX_REDUNDANCY: raise ValueError("Invalid redundancy value: {}".format(n_answers)) self.update_task_exported_status(project.id, n_answers, conditions, params) sql = text(''' WITH to_update AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ) UPDATE task SET n_answers=:n_answers, state='ongoing' WHERE project_id=:project_id AND task.id in (SELECT id from to_update);''' .format(conditions)) self.db.session.execute(sql, dict(n_answers=n_answers, project_id=project.id, **params)) self.update_task_state(project.id, n_answers) self.db.session.commit() cached_projects.clean_project(project.id)
def create_tasks(self, task_repo, project, **form_data): """Create tasks.""" from pybossa.model.task import Task from pybossa.cache import projects as cached_projects """Create tasks from a remote source using an importer object and avoiding the creation of repeated tasks""" n = 0 importer = self._create_importer_for(**form_data) tasks = importer.tasks() import_headers = importer.headers() mismatch_headers = [] msg = '' if import_headers: if not project: msg = gettext('Could not load project info') else: task_presenter_headers = project.get_presenter_headers() mismatch_headers = [ header for header in task_presenter_headers if header not in import_headers ] if mismatch_headers: msg = 'Imported columns do not match task presenter code. ' additional_msg = 'Mismatched columns: {}'.format( (', '.join(mismatch_headers))[:80]) current_app.logger.error(msg) current_app.logger.error(', '.join(mismatch_headers)) msg += additional_msg if msg: # Failed validation current_app.logger.error(msg) return ImportReport(message=msg, metadata=None, total=0) validator = TaskImportValidator() n_answers = project.get_default_n_answers() try: for task_data in tasks: self.upload_private_data(task_data, project.id) task = Task(project_id=project.id, n_answers=n_answers) [setattr(task, k, v) for k, v in task_data.iteritems()] gold_answers = task_data.pop('gold_answers', None) set_gold_answers(task, gold_answers) found = task_repo.find_duplicate(project_id=project.id, info=task.info) if found is None: if validator.validate(task): try: n += 1 task_repo.save(task, clean_project=False) except Exception as e: current_app.logger.exception(msg) validator.add_error(str(e)) finally: cached_projects.clean_project(project.id) if form_data.get('type') == 'localCSV': csv_filename = form_data.get('csv_filename') delete_import_csv_file(csv_filename) metadata = importer.import_metadata() if n == 0: msg = gettext( 'It looks like there were no new records to import. ') elif n == 1: msg = str(n) + " " + gettext('new task was imported successfully ') else: msg = str(n) + " " + gettext( 'new tasks were imported successfully ') msg += str(validator) return ImportReport(message=msg, metadata=metadata, total=n)
def _refresh_cache(self, task): cached_projects.clean_project(task.project_id)
def delete_bulk_tasks(data): """Delete tasks in bulk from project.""" from sqlalchemy.sql import text from pybossa.core import db import pybossa.cache.projects as cached_projects from pybossa.cache.task_browse_helpers import get_task_filters project_id = data['project_id'] project_name = data['project_name'] curr_user = data['curr_user'] coowners = data['coowners'] current_user_fullname = data['current_user_fullname'] force_reset = data['force_reset'] params = {} # lock tasks for given project with SELECT FOR UPDATE # create temp table with all tasks to be deleted # during transaction, disable constraints check with session_replication_role # delete rows from child talbes first and then from parent if not force_reset: """Delete only tasks that have no results associated.""" sql = text(''' BEGIN; SELECT task_id FROM counter WHERE project_id=:project_id FOR UPDATE; SELECT task_id FROM task_run WHERE project_id=:project_id FOR UPDATE; SELECT id FROM task WHERE project_id=:project_id FOR UPDATE; SET session_replication_role TO replica; CREATE TEMP TABLE to_delete ON COMMIT DROP AS ( SELECT task.id as id FROM task WHERE project_id=:project_id AND task.id NOT IN (SELECT task_id FROM result WHERE result.project_id=:project_id GROUP BY result.task_id) ); DELETE FROM counter WHERE project_id=:project_id AND task_id IN (SELECT id FROM to_delete); DELETE FROM task_run WHERE project_id=:project_id AND task_id IN (SELECT id FROM to_delete); DELETE FROM task WHERE project_id=:project_id AND id IN (SELECT id FROM to_delete); COMMIT; ''') msg = ("Tasks and taskruns with no associated results have been " "deleted from project {0} by {1}" .format(project_name, current_user_fullname)) else: args = data.get('filters', {}) conditions, params = get_task_filters(args) sql = text(''' BEGIN; SELECT task_id FROM counter WHERE project_id=:project_id FOR UPDATE; SELECT task_id FROM result WHERE project_id=:project_id FOR UPDATE; SELECT task_id FROM task_run WHERE project_id=:project_id FOR UPDATE; SELECT id FROM task WHERE project_id=:project_id FOR UPDATE; SET session_replication_role TO replica; CREATE TEMP TABLE to_delete ON COMMIT DROP AS ( SELECT task.id as id, coalesce(ct, 0) as n_task_runs, task.n_answers, ft, priority_0, task.created FROM task LEFT OUTER JOIN (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct, MAX(finish_time) as ft FROM task_run WHERE project_id=:project_id GROUP BY task_id) AS log_counts ON task.id=log_counts.task_id WHERE task.project_id=:project_id {} ); DELETE FROM counter WHERE project_id=:project_id AND task_id IN (SELECT id FROM to_delete); DELETE FROM result WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task_run WHERE project_id=:project_id AND task_id in (SELECT id FROM to_delete); DELETE FROM task WHERE task.project_id=:project_id AND id in (SELECT id FROM to_delete); COMMIT; '''.format(conditions)) msg = ("Tasks, taskruns and results associated have been " "deleted from project {0} as requested by {1}" .format(project_name, current_user_fullname)) db.session.execute(sql, dict(project_id=project_id, **params)) cached_projects.clean_project(project_id) subject = 'Tasks deletion from %s' % project_name body = 'Hello,\n\n' + msg + '\n\nThe %s team.'\ % current_app.config.get('BRAND') recipients = [curr_user] for user in coowners: recipients.append(user.email_addr) mail_dict = dict(recipients=recipients, subject=subject, body=body) send_mail(mail_dict)
def delete(self, element): self._delete(element) project = element.project self.db.session.commit() cached_projects.clean_project(element.project_id) self._delete_zip_files_from_store(project)