Exemple #1
0
def redownload_protocol(committee_meeting):
    if committee_meeting.committee.type == 'plenum':
        download_for_existing_meeting(committee_meeting)
    else:
        with CommitteeMeetingProtocol.get_from_url(
                committee_meeting.src_url) as protocol:
            committee_meeting.protocol_text = protocol.text
            committee_meeting.protocol_text_update_date = datetime.now()
            committee_meeting.save()
Exemple #2
0
 def redownload_protocol(self):
     if self.committee.type == 'plenum':
         # TODO: Using managment command this way is an antipattern, a common service should be extracted and used
         from plenum.management.commands.parse_plenum_protocols_subcommands.download import \
             download_for_existing_meeting
         download_for_existing_meeting(self)
     else:
         with KnessetDataCommitteeMeetingProtocol.get_from_url(self.src_url) as protocol:
             self.protocol_text = protocol.text
             self.protocol_text_update_date = datetime.now()
             self.save()
Exemple #3
0
 def redownload_protocol(self):
     if self.committee.type == 'plenum':
         # TODO: Using managment command this way is an antipattern, a common service should be extracted and used
         from plenum.management.commands.parse_plenum_protocols_subcommands.download import \
             download_for_existing_meeting
         download_for_existing_meeting(self)
     else:
         try:
             with KnessetDataCommitteeMeetingProtocol.get_from_url(
                     self.src_url) as protocol:
                 self.protocol_text = protocol.text
                 self.protocol_text_update_date = datetime.now()
                 self.save()
         except AntiwordException as e:
             logger.error(e.message,
                          exc_info=True,
                          extra={'output': e.output})
             raise e
Exemple #4
0
 def redownload_protocol(self):
     if self.committee.type == 'plenum':
         # TODO: Using managment command this way is an antipattern, a common service should be extracted and used
         from plenum.management.commands.parse_plenum_protocols_subcommands.download import \
             download_for_existing_meeting
         download_for_existing_meeting(self)
     else:
         try:
             with KnessetDataCommitteeMeetingProtocol.get_from_url(self.src_url) as protocol:
                 self.protocol_text = protocol.text
                 self.protocol_text_update_date = datetime.now()
                 self.save()
         except AntiwordException, e:
             logger.error(
                 e.message,
                 exc_info=True,
                 extra={
                     'output': e.output
                 }
             )
             raise e
def get_resource():
    for row_num, row in enumerate(download_rows):
        logging.info("{} / {}".format(row_num, len(download_rows)))
        try:
            original_filename = os.path.join("files", str(row["GroupTypeID"]),
                                             str(row["DocumentCommitteeSessionID"])[0],
                                             str(row["DocumentCommitteeSessionID"])[1],
                                             str(row["DocumentCommitteeSessionID"]) + "." + row["ApplicationDesc"])
            ext = os.path.splitext(original_filename)[1].lower()
            output_filename = "files/{}/{}/{}.{}".format(str(row["CommitteeSessionID"])[0],
                                                         str(row["CommitteeSessionID"])[1],
                                                         str(row["CommitteeSessionID"]),
                                                         "csv" if parse_type == "parts" else "txt")
            if not files_limit or stats["parsed files"] < files_limit:
                if download_from_path:
                    download_filename = "../data/committees/download_document_committee_session/" + original_filename
                    if os.path.exists(download_filename):
                        with open(download_filename, "rb") as f:
                            with CommitteeMeetingProtocol.get_from_file(f) as protocol:
                                parse_protocol(output_filename, protocol)
                    else:
                        logging.warning("missing download_filename {}".format(download_filename))
                elif download_from_remote_storage:
                    url = download_from_remote_storage + original_filename
                    with CommitteeMeetingProtocol.get_from_url(url) as protocol:
                        parse_protocol(output_filename, protocol)
                else:
                    raise Exception("no valid download option")
            row.update(protocol_extension=ext,
                       parsed_filename=output_filename)
            yield row
        except Exception as e:
            # there is a bug in knesset-data-python which prevents getting the error message from the exception
            # TODO: fix this bug
            error_message = "failed to parse CommitteeSessionID {}".format(row["CommitteeSessionID"])  # , str(e))
            logging.exception(error_message)
            row.update(error=error_message)
            errors.append(row)
class CommitteeMeeting(BaseKnessetDataServiceFunctionObject):

    ORDERED_FIELDS = [
        ("id",
         KnessetDataServiceSimpleField(
             'Committee_Agenda_id', 'integer',
             "the primary key of committee meetings")),
        ("committee_id",
         KnessetDataServiceSimpleField(
             'Committee_Agenda_committee_id', 'integer',
             "id of the committee (linked to Committee object)")),
        ("datetime",
         KnessetDataServiceSimpleField('committee_agenda_date', 'datetime',
                                       "date/time when the meeting started")),
        ("title",
         KnessetDataServiceSimpleField('title', 'string',
                                       "title of the meeting")),
        ("session_content",
         KnessetDataServiceSimpleField(
             'committee_agenda_session_content', 'string',
             "seems like in some committee meetings, the title field is empty, in that case title can be taken from this field"
         )),
        ("url",
         KnessetDataServiceSimpleField('url', 'string',
                                       "url to download the protocol")),
        # a CommitteeMeetingProtocol object which allows to get data from the protocol
        # because parsing the protocol requires heavy IO and processing - we provide it as a generator
        # see tests/test_meetings.py for usage example
        ("protocol",
         KnessetDataServiceLambdaField(
             lambda obj, entry: CommitteeMeetingProtocol.get_from_url(
                 obj.url, proxies=obj._proxies) if obj.url else None)),
        ("location ",
         KnessetDataServiceSimpleField(
             'committee_location', 'string',
             "this seems like a shorter name of the place where meeting took place"
         )),
        ("place ",
         KnessetDataServiceSimpleField(
             'Committee_Agenda_place', 'string',
             "this looks like a longer field with the specific details of where the meeting took place"
         )),
        ("meeting_stop ",
         KnessetDataServiceSimpleField(
             'meeting_stop', 'string',
             "date/time when the meeting ended - this is not always available, in some meetings it's empty"
         )),
        ### following fields seem less interesting ###
        ("agenda_canceled ",
         KnessetDataServiceSimpleField('Committee_Agenda_canceled')),
        ("agenda_sub ", KnessetDataServiceSimpleField('Committee_agenda_sub')),
        ("agenda_associated ",
         KnessetDataServiceSimpleField('Committee_agenda_associated')),
        ("agenda_associated_id ",
         KnessetDataServiceSimpleField('Committee_agenda_associated_id')),
        ("agenda_special ",
         KnessetDataServiceSimpleField('Committee_agenda_special')),
        ("agenda_invited1 ",
         KnessetDataServiceSimpleField('Committee_agenda_invited1')),
        ("agenda_invite ",
         KnessetDataServiceSimpleField('sd2committee_agenda_invite')),
        ("note ", KnessetDataServiceSimpleField('Committee_agenda_note')),
        ("start_datetime ", KnessetDataServiceSimpleField('StartDateTime')),
        ("topid_id ", KnessetDataServiceSimpleField('Topic_ID')),
        ("creation_date ", KnessetDataServiceSimpleField('Date_Creation')),
        ("streaming_url ", KnessetDataServiceSimpleField('streaming_url')),
        ("meeting_start ", KnessetDataServiceSimpleField('meeting_start')),
        ("is_paused ", KnessetDataServiceSimpleField('meeting_is_paused')),
        ("date_order ", KnessetDataServiceSimpleField('committee_date_order')),
        ("date ", KnessetDataServiceSimpleField('committee_date')),
        ("day ", KnessetDataServiceSimpleField('committee_day')),
        ("month ", KnessetDataServiceSimpleField('committee_month')),
        ("material_id ", KnessetDataServiceSimpleField('material_id')),
        ("material_committee_id ",
         KnessetDataServiceSimpleField('material_comittee_id')),
        ("material_expiration_date ",
         KnessetDataServiceSimpleField('material_expiration_date')),
        ("material_hour ",
         KnessetDataServiceSimpleField('committee_material_hour')),
        ("old_url ", KnessetDataServiceSimpleField('OldUrl')),
        ("background_page_link ",
         KnessetDataServiceSimpleField('CommitteeBackgroundPageLink')),
        ("agenda_invited ",
         KnessetDataServiceSimpleField('Committee_agenda_invited')),
    ]

    @classmethod
    def _get_url_base(cls):
        return "http://online.knesset.gov.il/WsinternetSps/KnessetDataService/CommitteeScheduleData.svc/CommitteeAgendaSearch"

    @classmethod
    def get(cls, committee_id, from_date, to_date=None, proxies=None):
        """
        # example usage:
        >>> from datetime import datetime
        # get all meetings of committee 1 from Jan 01, 2016
        >>> CommitteeMeeting.get(1, datetime(2016, 1, 1))
        # get all meetings of committee 2 from Feb 01, 2015 to Feb 20, 2015
        >>> CommitteeMeeting.get(2, datetime(2015, 2, 1), datetime(2015, 2, 20))
        """
        params = {
            "CommitteeId": "'%s'" % committee_id,
            "FromDate": "'%sT00:00:00'" % from_date.strftime('%Y-%m-%d')
        }
        if to_date:
            params["ToDate"] = "'%sT00:00:00'" % to_date.strftime('%Y-%m-%d')
        return super(CommitteeMeeting, cls).get(params, proxies=proxies)