def extract_speakers_from_txt_file(self,file_object_path,committee_id,meeting_id):

        text = object_storage.read(self.s3, "committees", file_object_path).decode()


        with CommitteeMeetingProtocol.get_from_text(text) as protocol:
            speakers = protocol.speakers

        if speakers is not None:

            for speaker in speakers:
                yield {"committee_id":committee_id,
                       "meeting_id":meeting_id,
                       "name":speaker }
def get_pipeline_schema(pipeline_spec, pipeline_id):
    bucket = pipeline_spec
    if pipeline_id == 'committee_meeting_protocols_parsed':
        object_name = "table-schemas/committee_meeting_protocols_parsed.json"
    else:
        object_name = "table-schemas/{}.json".format(pipeline_id)
    s3 = object_storage.get_s3()
    if object_storage.exists(s3, bucket, object_name):
        return json.loads(object_storage.read(s3, bucket, object_name))
    else:
        logging.warning("Missing local table schema, trying from remote")
        url = "https://minio.oknesset.org/{}/{}".format(bucket, object_name)
        res = requests.get(url)
        res.raise_for_status()
        return res.json()
def get_pipeline_schema(pipeline_spec, pipeline_id):
    bucket = pipeline_spec
    if pipeline_id == 'committee_meeting_protocols_parsed':
        object_name = "table-schemas/committee_meeting_protocols_parsed.json"
    else:
        object_name = "table-schemas/{}.json".format(pipeline_id)
    s3 = object_storage.get_s3()
    if object_storage.exists(s3, bucket, object_name):
        return json.loads(object_storage.read(s3, bucket, object_name))
    else:
        logging.warning("Missing local table schema, trying from remote")
        url = "https://minio.oknesset.org/{}/{}".format(bucket, object_name)
        res = requests.get(url)
        res.raise_for_status()
        return res.json()
Exemple #4
0
    def extract_speakers_from_txt_file(self, file_object_path, committee_id,
                                       meeting_id):

        text = object_storage.read(self.s3, "committees",
                                   file_object_path).decode()

        with CommitteeMeetingProtocol.get_from_text(text) as protocol:
            speakers = protocol.speakers

        if speakers is not None:

            for speaker in speakers:
                yield {
                    "committee_id": committee_id,
                    "meeting_id": meeting_id,
                    "name": speaker
                }
    def extract_attendees_from_txt_file(self,file_object_path,committee_id,meeting_id):

        text = object_storage.read(self.s3, "committees",file_object_path).decode()

        with CommitteeMeetingProtocol.get_from_text(text) as protocol:
            attendees = protocol.attendees

        if attendees is not None:

            for key in attendees.keys():
                for attendee in attendees[key]:
                    if key == "invitees":
                        yield {"committee_id":committee_id,
                               "meeting_id":meeting_id,
                               "name":attendee["name"],
                               "role":"invitees",
                               "additional_information":attendee["role"] if "role" in attendee.keys() else ""}

                    else:
                        yield {"committee_id":committee_id,
                               "meeting_id":meeting_id,
                               "name":attendee,
                               "role":key,
                               "additional_information":""}