def post(self): """HTTP Get that enables boolean query processing and search.""" to_run = request.args.get('run') faculty = request.args.get('faculty') if not to_run or not faculty: abort(400) try: task_list = TASKLIST[to_run] except: abort(400) workflow = Workflow(task_list, Faculty.safe_get(faculty)) run_workflow.apply_async((workflow,), countdown=1) return 200
def bulk_create_faculty(json_data, write=True): """Takes in a list of JSON objects, and loads them into elasticsearch. :exception TypeError: If the json_data is not a sequence object. The expected type is a List. """ if not isinstance(json_data, Sequence): raise TypeError("Expected a Sequence, but got a {}", type(json_data)) count = 0 for faculty_member in json_data: count += 1 DataIngester.create_faculty(faculty_member, write) # TODO: This should be running in the create_faculty, once we fix the workflow. workflow = Workflow(DataIngester.INITIAL_PAGE_SCRAPE) run_workflow.apply_async((workflow, ), countdown=5) print("Ingested {} faculty members".format(count))
def create_faculty(json_data, write=True): """Creates an instance of Faculty from a JSON representation. :param dict json_data: Dictionary representation of the JSON data. :param bool write: Boolean switch that will enable writing to elastic. """ schema = FacultySchema() try: faculty = schema.load(json_data) except ValidationError as err: raise DataIngestionException( "Missing one of the required fields of the schema. {}".format( err.messages)) if write: faculty.save() workflow = Workflow(DataIngester.INITIAL_PAGE_SCRAPE, faculty.name) run_workflow.apply_async((workflow, ), countdown=5)