Esempio n. 1
0
def parse_check_numbers(params, verbose=False):
    """
    Checks if a list contains interval info. This function just exists to
    shorten read_timeline() to something that Python won't throw warnings
    about. There's no need to call this outside that function.
        :param params: is the list to check.
        :param verbose: is an optional flag.
            When true, extra parsing information is printed to the console. Defaults to false.
    """

    # make sure it's a set of 3 numbers
    if len(params) != 3:
        if verbose:
            print(magenta("\t\tinvalid parameter count:"), len(params))
        return False

    # make sure each of the 3 numbers is good
    for param in params:
        try:
            num = int(param, 10)  # 10 is the number base
        # catch parsing errors
        except ValueError:
            if verbose:
                print(red("\t\tinvalid integer > "), end='')
                print(add_quotes(param))
            return False
        else:
            # int() allows negatives, but we don't want those
            if num < 0:
                if verbose:
                    print(red("\t\tinvalid integer range > "), end='')
                    print(add_quotes(param))
                return False
    return True
Esempio n. 2
0
def prompt(msg, minor=False, default_yes=False):
    if default_yes:
        prompt = '[Yn]'
    else:
        prompt = '[yN]'

    if minor:
        ansicolor.write_out(ansicolor.magenta('-> %s %s ' % (msg, prompt)))
    else:
        ansicolor.write_out(ansicolor.magenta('> %s %s ' % (msg, prompt)))
    inp = raw_input()

    if default_yes:
        return False if 'n' in inp else True
    else:
        return True if 'y' in inp else False
Esempio n. 3
0
def prompt(msg, minor=False, default_yes=False):
    if default_yes:
        prompt = '[Yn]'
    else:
        prompt = '[yN]'

    if minor:
        ansicolor.write_out(ansicolor.magenta('-> %s %s ' % (msg, prompt)))
    else:
        ansicolor.write_out(ansicolor.magenta('> %s %s ' % (msg, prompt)))
    inp = raw_input()

    if default_yes:
        return False if 'n' in inp else True
    else:
        return True if 'y' in inp else False
Esempio n. 4
0
    def parse_opinion(self, response):
        """
        Parse one pre-law opinion
        """
        op_data = response.meta['op_data']

        parl_id = LAW.PARL_ID.xt(response)

        description = LAW.DESCRIPTION.xt(response)
        docs = self.parse_docs(response)
        category = self.parse_category(response)
        keywords = self.parse_keywords(response)
        entity = OPINION.ENTITY.xt(response)
        entity['title'] = op_data['title'] or entity['title_detail']
        entity['title_detail'] = entity['title_detail']
        entity['email'] = entity['email'] or op_data['email']

        entity_item, created = Entity.objects.get_or_create(
            title=entity['title'],
            title_detail=entity['title_detail']
        )

        if entity['phone'] and not entity_item.phone:
            entity_item.phone = entity['phone']
        if entity['email'] and not entity_item.email:
            entity_item.email = entity['email']

        opinion_item, created = Opinion.objects.get_or_create(
            parl_id=parl_id,
            defaults={
                'date': op_data['date'],
                'description': description,
                'source_link': response.url,
                'entity': entity_item,
                'prelaw': response.meta['law_item'],
                'category': category
            }
        )

        # Foreign Keys
        opinion_item.documents = docs
        opinion_item.keywords = keywords

        response.meta['opinion'] = opinion_item
        step_num = self.parse_op_steps(response)

        entity_str = u"{} / {} / {} [{}]".format(
            green(entity_item.title_detail),
            entity_item.phone,
            entity_item.email,
            'new' if created else 'updated')

        log.msg(
            u"Opinion: {} by {}".format(
                magenta(opinion_item.parl_id),
                entity_str
            ))
Esempio n. 5
0
    def parse_opinion(self, response):
        """
        Parse one pre-law opinion
        """
        op_data = response.meta['op_data']

        parl_id = LAW.PARL_ID.xt(response)

        description = LAW.DESCRIPTION.xt(response)
        docs = self.parse_docs(response)
        category = self.parse_category(response)
        keywords = self.parse_keywords(response)
        entity = OPINION.ENTITY.xt(response)
        entity['title'] = op_data['title'] or entity['title_detail']
        entity['title_detail'] = entity['title_detail']
        entity['email'] = entity['email'] or op_data['email']

        entity_item, created = Entity.objects.get_or_create(
            title=entity['title'],
            title_detail=entity['title_detail']
        )

        if entity['phone'] and not entity_item.phone:
            entity_item.phone = entity['phone']
        if entity['email'] and not entity_item.email:
            entity_item.email = entity['email']

        opinion_item, created = Opinion.objects.get_or_create(
            parl_id=parl_id,
            defaults={
                'date': op_data['date'],
                'description': description,
                'source_link': response.url,
                'entity': entity_item,
                'prelaw': response.meta['law_item'],
                'category': category
            }
        )

        # Foreign Keys
        opinion_item.documents = docs
        opinion_item.keywords = keywords

        response.meta['opinion'] = opinion_item
        step_num = self.parse_op_steps(response)

        entity_str = u"{} / {} / {} [{}]".format(
            green(entity_item.title_detail),
            entity_item.phone,
            entity_item.email,
            'new' if created else 'updated')

        log.msg(
            u"Opinion: {} by {}".format(
                magenta(opinion_item.parl_id),
                entity_str
            ))
Esempio n. 6
0
def send_request(url, headers, redirects, data=None):
    if data is None:
        try:
            r = requests.get(url,
                             headers=headers,
                             timeout=timeout_,
                             verify=False,
                             allow_redirects=redirects)
        except requests.exceptions.ConnectionError:
            print "Error. Connection refused."
            sys.exit(1)
        except requests.exceptions.Timeout:
            print magenta(
                "[!]") + " Time of response exceeded {} seconds!".format(
                    timeout)
            sys.exit(1)
    else:
        try:
            r = requests.post(url,
                              headers=headers,
                              data=data,
                              timeout=timeout_,
                              verify=False,
                              allow_redirects=redirects)
        except requests.exceptions.ConnectionError:
            print "Error. Connection refused."
            sys.exit(1)
        except requests.exceptions.Timeout:
            print magenta(
                "[!]") + " Time of response exceeded {} seconds!".format(
                    timeout_)
            sys.exit(1)

    if r.status_code != 200 and r.status_code != 302:
        print "Error with HTTP code", r.status_code
        print r.text
        sys.exit(-1)
    return r
Esempio n. 7
0
def f_verbose(value):
    if ("[X]" in value) or ("[+]" in value):
        f_save(value + '\n')

    col_cred = value.split('`')
    neutrino = ''

    for index, item in enumerate(col_cred):
        if index & 1:
            neutrino = neutrino + blue(item)
        else:
            neutrino += item
    if "[X]" in neutrino:
        print neutrino.replace("[X]", red("[X]"))
    elif "[+]" in neutrino:
        print neutrino.replace("[+]", yellow("[+]"))
    elif args.verbose:
        print neutrino.replace("[*]",
                               green("[*]")).replace("[!]", magenta("[!]"))

    return
Esempio n. 8
0
    def parse(self, response):
        # Extract fields
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        ts = GENERIC.TIMESTAMP.xt(response)

        if not (u'BI' in parl_id or u'PET' in parl_id):
            # VBG have their parl_id only in the url
            parl_id = response.url.split('/')[-2]

        status = LAW.STATUS.xt(response)

        raw_llp = response.url.split('/')[-4]
        if raw_llp != u'BR':
            LLP = LegislativePeriod.objects.get(roman_numeral=raw_llp)
        else:
            LLP = None

        if not self.IGNORE_TIMESTAMP and not self.has_changes(
                parl_id, LLP, response.url, ts):
            self.logger.info(
                green(u"Skipping Petition, no changes: {}".format(title)))
            return

        # save ids and stuff for internals
        if LLP not in self.idlist:
            self.idlist[LLP] = {}
        self.idlist[LLP][response.url] = [parl_id, LLP]

        # Extract foreign keys
        category = self.parse_category(response)
        description = LAW.DESCRIPTION.xt(response)

        signing_url, signable = PETITION.SIGNING.xt(response)

        signature_count = PETITION.SIGNATURE_COUNT.xt(response)

        # Parse reference
        reference = self.parse_reference(response)

        # Log our progress
        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(title), magenta(u"[{}]".format(parl_id)), green(str(LLP)),
            blue(response.url))
        log.msg(logtext, level=log.INFO)

        # Create and save Petition
        petition_item, petition_item_created = Petition.objects.update_or_create(
            parl_id=parl_id,
            legislative_period=LLP,
            defaults={
                'title': title,
                'status': status,
                'source_link': response.url,
                'description': description,
                'signable': signable,
                'signing_url': signing_url,
                'signature_count': signature_count,
                'reference': reference,
                'ts': ts
            })

        if not petition_item_created:
            petition_item.save()

        # Attach foreign keys
        petition_item.keywords = self.parse_keywords(response)
        petition_item.category = category
        petition_item.documents = self.parse_docs(response)

        petition_item.save()

        # Parse creators
        petition_creators = self.parse_creators(response)

        for petition_creator in petition_creators:
            petition_creator.created_petitions.add(petition_item)

        callback_requests = []

        # is the tab 'Parlamentarisches Verfahren available?'
        if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'):
            response.meta['petition_item'] = petition_item
            self.parse_parliament_steps(response)

        # Parse opinions
        opinions = PETITION.OPINIONS.xt(response)

        if opinions:
            for op in opinions:
                if Opinion.objects.filter(parl_id=op['parl_id']).exists():
                    continue
                post_req = scrapy.Request("{}/{}".format(BASE_HOST, op['url']),
                                          callback=self.parse_opinion,
                                          dont_filter=True)
                post_req.meta['petition_item'] = petition_item
                post_req.meta['op_data'] = op

                callback_requests.append(post_req)

        # Only BI or PET (but not PET-BR) have online signatures
        if u'BI' in parl_id or u'PET' in parl_id and not u'PET-BR' in parl_id:
            signatures_base_url = '{}/PAKT/VHG/{}/{}/{}/filter.psp?xdocumentUri=/PAKT/VHG/{}/{}/{}/'\
                'index.shtml&GP_CODE={}&ITYP={}&INR={}&FBEZ=BI_001&R_1000=ALLE&STEP=&pageNumber='

            raw_parl_id = petition_item.parl_id[1:-1].split('/')
            petition_type = raw_parl_id[1]
            petition_number = int(raw_parl_id[0])
            url_parl_id = '{}_{}'.format(petition_type, petition_number)

            signatures_url = signatures_base_url.format(
                BASE_HOST, LLP.roman_numeral, petition_type, url_parl_id,
                LLP.roman_numeral, petition_type, url_parl_id,
                LLP.roman_numeral, petition_type, petition_number)

            post_req = scrapy.Request(signatures_url,
                                      callback=self.parse_signatures,
                                      dont_filter=True)

            post_req.meta['petition_item'] = petition_item

            callback_requests.append(post_req)

        log.msg(green("Open Callback requests: {}".format(
            len(callback_requests))),
                level=log.INFO)

        return callback_requests
Esempio n. 9
0
    def parse(self, response):
        # Parse
        parl_id = COMITTEE.url_to_parlid(response.url)[1]
        ts = GENERIC.TIMESTAMP.xt(response)
        llp = COMITTEE.LLP.xt(response)
        name = COMITTEE.NAME.xt(response)

        if llp is not None:
            nrbr = 'Nationalrat'
            legislative_period = LegislativePeriod.objects.get(
                roman_numeral=llp)
            # NR comittees are "active" if they are in the current LLP
            active = (
                legislative_period == LegislativePeriod.objects.get_current())
        else:
            nrbr = 'Bundesrat'
            legislative_period = None
            # BR comittees are active if they are not "aufgelöst"
            active = COMITTEE.ACTIVE.xt(response)

        # main-comittee parl_id starts with the number 1
        # sub-comittees parl_id start  with the number 2
        if not parl_id.startswith(u'(1/'):
            try:
                parent_parl_id = u'(1/{}'.format(parl_id.split('/')[1])
                parent_comitee = Comittee.objects.get(
                    parl_id=parent_parl_id, legislative_period=legislative_period)
            except Comittee.DoesNotExist:
                parent_comitee = None
        else:
            parent_comitee = None

        if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, legislative_period, nrbr, response.url, ts):
            self.logger.info(
                green(u"Skipping Comittee, no changes: {}".format(
                    name)))
            return

        # Log our progress
        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(name),
            magenta(u"[{}]".format(parl_id)),
            green(unicode(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        description = COMITTEE.DESCRIPTION.xt(response)

        comittee_data = {
            'description': description,
            'name': name,
            'source_link': response.url,
            'parent_comittee': parent_comitee,
            'active': active,
            'ts': ts
        }

        try:
            comittee_item, created_comittee = Comittee.objects.update_or_create(
                parl_id=parl_id,
                legislative_period=legislative_period,
                nrbr=nrbr,
                defaults=comittee_data
            )
        except:
            log.msg(
                u"Could not update/create Comittee {}".format(name),
                level=log.ERROR)
            return
            # import ipdb
            # ipdb.set_trace()

        meetings = COMITTEE.MEETINGS.xt(response)

        comittee_laws = []

        for meeting in meetings:
            agenda_data = meeting['agenda']
            if agenda_data is not None:
                agenda_item, agenda_created = Document.objects.get_or_create(
                    **agenda_data)
            else:
                agenda_item = None

            meeting_data = {
                'agenda': agenda_item
            }

            # Log our progress
            logtext = u"Scraping meeting no. {} of {} on {}".format(
                red(meeting['number']),
                magenta(name),
                green(str(meeting['date'].date())),
            )
            log.msg(logtext, level=log.INFO)

            meeting_item, meeting_created = ComitteeMeeting.objects.update_or_create(
                number=meeting['number'],
                date=meeting['date'],
                comittee=comittee_item,
                defaults=meeting_data
            )

            for topic in meeting['topics']:
                if topic['law'] is not None:
                    law = topic['law']
                    law_item = self.parse_law(law)
                    if law_item is not None:
                        comittee_laws.append(law_item)
                else:
                    law_item = None

                agenda_topic_data = {
                    'comment': topic['comment'],
                    'law': law_item,
                }

                agenda_topic_item, agenda_topic_created = ComitteeAgendaTopic.objects.update_or_create(
                    number=topic['number'],
                    meeting=meeting_item,
                    text=topic['text'],
                    defaults=agenda_topic_data,
                )

        # parse Verhandlungsgegenstaende and Veroeffentlichungen
        laws_and_reports = COMITTEE.LAWS.xt(response)

        for law in laws_and_reports:
            # Log our progress
            logtext = u"Adding law with id {}, LLP {} to {}".format(
                magenta(u"[{}]".format(law['parl_id'])),
                green(law['llp']),
                blue(name)
            )
            log.msg(logtext, level=log.INFO)

            law_item = self.parse_law(law)
            if law_item is not None:
                comittee_laws.append(law_item)

        comittee_item.laws.add(*comittee_laws)
        comittee_item.save()
Esempio n. 10
0
def suggest(msg, minor=False):
    if minor:
        ansicolor.write_out(ansicolor.magenta('-> %s\n' % msg))
    else:
        ansicolor.write_out(ansicolor.magenta('> %s\n' % msg))
Esempio n. 11
0
    def parse(self, response):
        # Extract fields
        ts = GENERIC.TIMESTAMP.xt(response)
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        LLP = LegislativePeriod.objects.get(
            roman_numeral=response.url.split('/')[-4])

        if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, LLP, response.url, ts):
            self.logger.info(
                green(u"Skipping Law, no changes: {}".format(
                    title)))
            return

        # save ids and stuff for internals
        if LLP not in self.idlist:
            self.idlist[LLP] = {}
        self.idlist[LLP][response.url] = [parl_id, LLP]

        # Extract foreign keys
        category = self.parse_category(response)
        description = PRELAW.DESCRIPTION.xt(response)

        # Log our progress
        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(title),
            magenta(u"[{}]".format(parl_id)),
            green(str(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        # Create and save Law
        pre_law_data = {
            'title': title,
            'description': description,
            'ts': ts
        }
        law_item, created = Law.objects.get_or_create(
            parl_id=parl_id,
            source_link=response.url,
            legislative_period=LLP,
            defaults=pre_law_data)

        if not created:
            law_item.save()

        # Attach foreign keys
        law_item.keywords = self.parse_keywords(response)
        law_item.category = category
        law_item.documents = self.parse_docs(response)

        law_item.save()

        # Parse opinions
        opinions = PRELAW.OPINIONS.xt(response)

        callback_requests = []

        # is the tab 'Parlamentarisches Verfahren available?'
        if opinions:
            skipped_ops = 0
            for op in opinions:
                if Opinion.objects.filter(parl_id=op['parl_id']).exists():
                    skipped_ops += 1
                    continue
                post_req = scrapy.Request("{}/{}".format(BASE_HOST, op['url']),
                                          callback=self.parse_opinion,
                                          dont_filter=True)
                post_req.meta['law_item'] = law_item
                post_req.meta['op_data'] = op

                callback_requests.append(post_req)

            log.msg(green("Open/Skipped Callback requests: {}/{}".format(
                len(callback_requests), skipped_ops)), level=log.INFO)

        return callback_requests
Esempio n. 12
0
    def parse(self, response):
        # Extract fields
        ts = GENERIC.TIMESTAMP.xt(response)
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        LLP = LegislativePeriod.objects.get(
            roman_numeral=response.url.split('/')[-4])

        if not self.IGNORE_TIMESTAMP and not self.has_changes(
                parl_id, LLP, response.url, ts):
            self.logger.info(
                green(u"Skipping Law, no changes: {}".format(title)))
            return

        # save ids and stuff for internals
        if LLP not in self.idlist:
            self.idlist[LLP] = {}
        self.idlist[LLP][response.url] = [parl_id, LLP]

        # Extract foreign keys
        category = self.parse_category(response)
        description = PRELAW.DESCRIPTION.xt(response)

        # Log our progress

        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(title), magenta(u"[{}]".format(parl_id)), green(unicode(LLP)),
            blue(response.url))
        log.msg(logtext, level=log.INFO)

        # Create and save Law
        pre_law_data = {
            'title': title,
            'description': description,
            'source_link': response.url,
            'ts': ts
        }
        law_item, created = Law.objects.get_or_create(parl_id=parl_id,
                                                      legislative_period=LLP,
                                                      defaults=pre_law_data)

        if not created:
            law_item.save()

        # Attach foreign keys
        law_item.keywords = self.parse_keywords(response)
        law_item.category = category
        law_item.documents = self.parse_docs(response)

        law_item.save()

        # Parse opinions
        opinions = PRELAW.OPINIONS.xt(response)

        callback_requests = []

        # is the tab 'Parlamentarisches Verfahren available?'
        if opinions:
            skipped_ops = 0
            for op in opinions:
                if Opinion.objects.filter(parl_id=op['parl_id']).exists():
                    skipped_ops += 1
                    continue
                post_req = scrapy.Request("{}/{}".format(BASE_HOST, op['url']),
                                          callback=self.parse_opinion,
                                          dont_filter=True)
                post_req.meta['law_item'] = law_item
                post_req.meta['op_data'] = op

                callback_requests.append(post_req)

            log.msg(green("Open/Skipped Callback requests: {}/{}".format(
                len(callback_requests), skipped_ops)),
                    level=log.INFO)

        return callback_requests
Esempio n. 13
0
    def parse(self, response):
        # Extract fields
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        ts = GENERIC.TIMESTAMP.xt(response)

        if not (u'BI' in parl_id or u'PET' in parl_id):
            # VBG have their parl_id only in the url
            parl_id = response.url.split('/')[-2]

        status = LAW.STATUS.xt(response)

        raw_llp = response.url.split('/')[-4]
        if raw_llp != u'BR':
            LLP = LegislativePeriod.objects.get(
                roman_numeral=raw_llp)
        else:
            LLP = None

        if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, LLP, response.url, ts):
            self.logger.info(
                green(u"Skipping Petition, no changes: {}".format(
                    title)))
            return

        # save ids and stuff for internals
        if LLP not in self.idlist:
            self.idlist[LLP] = {}
        self.idlist[LLP][response.url] = [parl_id, LLP]

        # Extract foreign keys
        category = self.parse_category(response)
        description = LAW.DESCRIPTION.xt(response)

        signing_url, signable = PETITION.SIGNING.xt(response)

        signature_count = PETITION.SIGNATURE_COUNT.xt(response)

        # Parse reference
        reference = self.parse_reference(response)

        # Log our progress
        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(title),
            magenta(u"[{}]".format(parl_id)),
            green(str(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        # Create and save Petition
        petition_item, petition_item_created = Petition.objects.update_or_create(
            parl_id=parl_id,
            legislative_period=LLP,
            defaults={
                'title': title,
                'status': status,
                'source_link': response.url,
                'description': description,
                'signable': signable,
                'signing_url': signing_url,
                'signature_count': signature_count,
                'reference': reference,
                'ts': ts
            }
        )

        if not petition_item_created:
            petition_item.save()

        # Attach foreign keys
        petition_item.keywords = self.parse_keywords(response)
        petition_item.category = category
        petition_item.documents = self.parse_docs(response)

        petition_item.save()

        # Parse creators
        petition_creators = self.parse_creators(response)

        for petition_creator in petition_creators:
            petition_creator.created_petitions.add(petition_item)

        callback_requests = []

        # is the tab 'Parlamentarisches Verfahren available?'
        if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'):
            response.meta['petition_item'] = petition_item
            self.parse_parliament_steps(response)

        # Parse opinions
        opinions = PETITION.OPINIONS.xt(response)

        if opinions:
            for op in opinions:
                if Opinion.objects.filter(parl_id=op['parl_id']).exists():
                    continue
                post_req = scrapy.Request("{}/{}".format(BASE_HOST, op['url']),
                                          callback=self.parse_opinion,
                                          dont_filter=True)
                post_req.meta['petition_item'] = petition_item
                post_req.meta['op_data'] = op

                callback_requests.append(post_req)

        # Only BI or PET (but not PET-BR) have online signatures
        if u'BI' in parl_id or u'PET' in parl_id and not u'PET-BR' in parl_id:
            signatures_base_url = '{}/PAKT/VHG/{}/{}/{}/filter.psp?xdocumentUri=/PAKT/VHG/{}/{}/{}/'\
                'index.shtml&GP_CODE={}&ITYP={}&INR={}&FBEZ=BI_001&R_1000=ALLE&STEP=&pageNumber='

            raw_parl_id = petition_item.parl_id[1:-1].split('/')
            petition_type = raw_parl_id[1]
            petition_number = int(raw_parl_id[0])
            url_parl_id = '{}_{}'.format(petition_type, petition_number)

            signatures_url = signatures_base_url.format(BASE_HOST, LLP.roman_numeral, petition_type, url_parl_id,
                                                        LLP.roman_numeral, petition_type, url_parl_id,
                                                        LLP.roman_numeral, petition_type, petition_number)

            post_req = scrapy.Request(signatures_url,
                                      callback=self.parse_signatures,
                                      dont_filter=True)

            post_req.meta['petition_item'] = petition_item

            callback_requests.append(post_req)

        log.msg(green("Open Callback requests: {}".format(
            len(callback_requests))), level=log.INFO)

        return callback_requests
Esempio n. 14
0
    def parse(self, response):
        # Parse
        parl_id = COMITTEE.url_to_parlid(response.url)[1]
        ts = GENERIC.TIMESTAMP.xt(response)
        LLP = COMITTEE.LLP.xt(response)
        name = COMITTEE.NAME.xt(response)

        if LLP is not None:
            nrbr = 'Nationalrat'
            legislative_period = LegislativePeriod.objects.get(
                roman_numeral=LLP)
            # NR comittees are always "active", only BR comittees are either active or inactive
            active = True
        else:
            nrbr = 'Bundesrat'
            legislative_period = None
            # BR comittees are active if they are not "aufgelöst"
            active = COMITTEE.ACTIVE.xt(response)

        # main-comittee parl_id starts with the number 1
        # sub-comittees parl_id start  with the number 2
        if not parl_id.startswith(u'(1/'):
            try:
                parent_parl_id = u'(1/{}'.format(parl_id.split('/')[1])
                parent_comitee = Comittee.objects.get(
                    parl_id=parent_parl_id, legislative_period=legislative_period)
            except Comittee.DoesNotExist:
                parent_comitee = None
        else:
            parent_comitee = None

        if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, legislative_period, nrbr, response.url, ts):
            self.logger.info(
                green(u"Skipping Comittee, no changes: {}".format(
                    name)))
            return

        # Log our progress
        logtext = u"Scraping {} with id {}, LLP {} @ {}".format(
            red(name),
            magenta(u"[{}]".format(parl_id)),
            green(unicode(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        description = COMITTEE.DESCRIPTION.xt(response)

        comittee_data = {
            'description': description,
            'name': name,
            'source_link': response.url,
            'parent_comittee': parent_comitee,
            'ts': ts
        }

        try:
            comittee_item, created_comittee = Comittee.objects.update_or_create(
                parl_id=parl_id,
                legislative_period=legislative_period,
                nrbr=nrbr,
                active=active,
                defaults=comittee_data
            )
        except:
            log.msg(
                u"Could not update/create Comittee {}".format(name),
                level=log.ERROR)
            return
            # import ipdb
            # ipdb.set_trace()

        meetings = COMITTEE.MEETINGS.xt(response)

        comittee_laws = []

        for meeting in meetings:
            agenda_data = meeting['agenda']
            if agenda_data is not None:
                agenda_item, agenda_created = Document.objects.get_or_create(
                    **agenda_data)
            else:
                agenda_item = None

            meeting_data = {
                'agenda': agenda_item
            }

            # Log our progress
            logtext = u"Scraping meeting no. {} of {} on {}".format(
                red(meeting['number']),
                magenta(name),
                green(str(meeting['date'].date())),
            )
            log.msg(logtext, level=log.INFO)

            meeting_item, meeting_created = ComitteeMeeting.objects.update_or_create(
                number=meeting['number'],
                date=meeting['date'],
                comittee=comittee_item,
                defaults=meeting_data
            )

            for topic in meeting['topics']:
                if topic['law'] is not None:
                    law = topic['law']
                    law_item = self.parse_law(law)
                    if law_item is not None:
                        comittee_laws.append(law_item)
                else:
                    law_item = None

                agenda_topic_data = {
                    'comment': topic['comment'],
                    'law': law_item,
                }

                agenda_topic_item, agenda_topic_created = ComitteeAgendaTopic.objects.update_or_create(
                    number=topic['number'],
                    meeting=meeting_item,
                    text=topic['text'],
                    defaults=agenda_topic_data,
                )

        # parse Verhandlungsgegenstaende and Veroeffentlichungen
        laws_and_reports = COMITTEE.LAWS.xt(response)

        for law in laws_and_reports:
            # Log our progress
            logtext = u"Adding law with id {}, LLP {} to {}".format(
                magenta(u"[{}]".format(law['parl_id'])),
                green(law['llp']),
                blue(name)
            )
            log.msg(logtext, level=log.INFO)

            law_item = self.parse_law(law)
            if law_item is not None:
                comittee_laws.append(law_item)

        comittee_item.laws.add(*comittee_laws)
        comittee_item.save()
Esempio n. 15
0
def f_verbose(value):
    if args.verbose:
        print value.replace("[X]", red("[X]")).replace("[*]", green("[*]")).replace("[!]", magenta("[!]"))\
            .replace("safe", blue("safe"))
    return
Esempio n. 16
0
def suggest(msg, minor=False):
    if minor:
        ansicolor.write_out(ansicolor.magenta('-> %s\n' % msg))
    else:
        ansicolor.write_out(ansicolor.magenta('> %s\n' % msg))