Exemple #1
0
    def post(self):
        """HTTP Get that enables boolean query processing and search."""
        to_run = request.args.get('run')
        faculty = request.args.get('faculty')

        if not to_run or not faculty:
            abort(400)

        try:
            task_list = TASKLIST[to_run]
        except:
            abort(400)

        workflow = Workflow(task_list, Faculty.safe_get(faculty))
        run_workflow.apply_async((workflow,), countdown=1)

        return 200
Exemple #2
0
    def bulk_create_faculty(json_data, write=True):
        """Takes in a list of JSON objects, and loads them into elasticsearch.

        :exception TypeError: If the json_data is not a sequence object. The expected type is a List.
        """
        if not isinstance(json_data, Sequence):
            raise TypeError("Expected a Sequence, but got a {}",
                            type(json_data))

        count = 0
        for faculty_member in json_data:
            count += 1
            DataIngester.create_faculty(faculty_member, write)

        # TODO: This should be running in the create_faculty, once we fix the workflow.
        workflow = Workflow(DataIngester.INITIAL_PAGE_SCRAPE)
        run_workflow.apply_async((workflow, ), countdown=5)
        print("Ingested {} faculty members".format(count))
    def create_faculty(json_data, write=True):
        """Creates an instance of Faculty from a JSON representation.

        :param dict json_data: Dictionary representation of the JSON data.
        :param bool write: Boolean switch that will enable writing to elastic.
        """
        schema = FacultySchema()

        try:
            faculty = schema.load(json_data)
        except ValidationError as err:
            raise DataIngestionException(
                "Missing one of the required fields of the schema. {}".format(
                    err.messages))

        if write:
            faculty.save()

        workflow = Workflow(DataIngester.INITIAL_PAGE_SCRAPE, faculty.name)
        run_workflow.apply_async((workflow, ), countdown=5)