def get_context_data(self, *args, **kwargs): context = super(MeetingDetailView, self).get_context_data(*args, **kwargs) cm = context['object'] colors = {} speakers = cm.parts.order_by('speaker__mk').values_list('header','speaker__mk').distinct() n = speakers.count() for (i,(p,mk)) in enumerate(speakers): (r,g,b) = colorsys.hsv_to_rgb(float(i)/n, 0.5 if mk else 0.3, 255) colors[p] = 'rgb(%i, %i, %i)' % (r, g, b) context['title'] = _('%(committee)s meeting on %(date)s') % {'committee':cm.committee.name, 'date':cm.date_string} context['description'] = _('%(committee)s meeting on %(date)s on topic %(topic)s') \ % {'committee':cm.committee.name, 'date':cm.date_string, 'topic':cm.topics} context['description'] = clean_string(context['description']).replace('"', '') page = self.request.GET.get('page', None) if page: context['description'] += _(' page %(page)s') % {'page': page} context['colors'] = colors parts_lengths = {} for part in cm.parts.all(): parts_lengths[part.id] = len(part.body) context['parts_lengths'] = json.dumps(parts_lengths) context['paginate_by'] = models.COMMITTEE_PROTOCOL_PAGINATE_BY if cm.committee.type == 'plenum': context['members'] = cm.mks_attended.order_by('name') context['hide_member_presence'] = True else: #get meeting members with presence calculation meeting_members_ids = set(m.id for m in cm.mks_attended.all()) context['members'] = cm.committee.members_by_presence(ids=meeting_members_ids) context['hide_member_presence'] = False return context
def parse_pdf_text(filename=None, url=None): logger.debug('parse_pdf_text filename=%s url=%s' % (str(filename), str(url))) if not filename: filename = 'tmp.txt' f = open(filename, 'rt') content = f.read() d = None result = [] m = re.search('עמוד(.*?)מתפרסמת בזה', content, re.UNICODE | re.DOTALL) if not m: # couldn't read this file logger.warn("can't read this file") return None m = clean_string(m.group(1).decode('utf8')) m2 = re.findall('^(הצעת חוק.*?) \. '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m3 = re.findall('^(חוק.*?) \. '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m2.extend(m3) for title in m2: law = {} title = title.replace('\n', ' ') s = re.search(r'[^\d]\d{2,3}[^\d]', title + ' ', re.UNICODE) # find numbers of 2-3 digits if s: (a, b) = s.span() title = title[:a + 1] + title[b - 2:a:-1] + title[b - 1:] # reverse them law['title'] = title result.append(law) count = 0 # count how many bills we found the original_ids for so far lines = content.split('\n') for line in lines: m = re.search('(\d{4,4})[\.|\s](\d+)[\.|\s](\d+)', line) if m: d = date(int(m.group(1)[::-1]), int(m.group(2)[::-1]), int(m.group(3)[::-1])) m = re.search( '[הצעת|הצעות] חוק מס.*?\d+/\d+.*?[הועברה|הועברו]'.decode('utf8'), line.decode('utf8'), re.UNICODE) if m: try: result[count]['references'] = line m2 = re.findall('\d+/\d+', line.decode('utf8'), re.UNICODE) # find IDs of original proposals result[count]['original_ids'] = [a[::-1] for a in m2] count += 1 except IndexError: logger.exception( u'parse knesset pdf exception with content {0}'.format( content)) for l in result: l['date'] = d return result
def parse_pdf_text(filename=None, url=None): logger.debug("parse_pdf_text filename=%s url=%s" % (str(filename), str(url))) if not filename: filename = "tmp.txt" f = open(filename, "rt") content = f.read() d = None result = [] m = re.search("עמוד(.*?)מתפרסמת בזה", content, re.UNICODE | re.DOTALL) if not m: # couldn't read this file logger.warn("can't read this file") return None m = clean_string(m.group(1).decode("utf8")) m2 = re.findall("^(הצעת חוק.*?) \. ".decode("utf8"), m, re.UNICODE | re.DOTALL | re.MULTILINE) m3 = re.findall("^(חוק.*?) \. ".decode("utf8"), m, re.UNICODE | re.DOTALL | re.MULTILINE) m2.extend(m3) for title in m2: law = {} title = title.replace("\n", " ") s = re.search(r"[^\d]\d{2,3}[^\d]", title + " ", re.UNICODE) # find numbers of 2-3 digits if s: (a, b) = s.span() title = title[: a + 1] + title[b - 2 : a : -1] + title[b - 1 :] # reverse them law["title"] = title result.append(law) count = 0 # count how many bills we found the original_ids for so far lines = content.split("\n") for line in lines: m = re.search("(\d{4,4})[\.|\s](\d+)[\.|\s](\d+)", line) if m: d = date(int(m.group(1)[::-1]), int(m.group(2)[::-1]), int(m.group(3)[::-1])) m = re.search("הצעת חוק מס.*?\w+/\d+/\d+.*?[הועברה|הועברו]".decode("utf8"), line.decode("utf8"), re.UNICODE) if m: try: result[count]["references"] = line m2 = re.findall("\w+/\d+/\d+", line.decode("utf8"), re.UNICODE) # find IDs of original proposals result[count]["original_ids"] = [a[-1:0:-1] + a[0] for a in m2] # reverse count += 1 except IndexError: exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() logger.error( "%s", "".join(traceback.format_exception(exceptionType, exceptionValue, exceptionTraceback)) ) logger.error( "count=%d, len(result)=%d, content = \n%s\n--- end of content" % (count, len(result), content.decode("utf8")) ) for l in result: l["date"] = d return result
def parse_pdf_text(filename=None, url=None): logger.debug('parse_pdf_text filename=%s url=%s' % (str(filename), str(url))) if not filename: filename = 'tmp.txt' f = open(filename,'rt') content = f.read() d = None result = [] m = re.search('עמוד(.*?)מתפרסמת בזה',content, re.UNICODE | re.DOTALL) if not m: # couldn't read this file logger.warn("can't read this file") return None m = clean_string(m.group(1).decode('utf8')) m2 = re.findall('^(הצעת חוק.*?) \. '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m3 = re.findall('^(חוק.*?) \. '.decode('utf8'),m, re.UNICODE | re.DOTALL | re.MULTILINE) m2.extend(m3) for title in m2: law = {} title = title.replace('\n',' ') s = re.search(r'[^\d]\d{2,3}[^\d]',title+' ',re.UNICODE) # find numbers of 2-3 digits if s: (a,b) = s.span() title = title[:a+1] + title[b-2:a:-1] + title[b-1:] # reverse them law['title'] = title result.append(law) count = 0 # count how many bills we found the original_ids for so far lines = content.split('\n') for line in lines: m = re.search('(\d{4,4})[\.|\s](\d+)[\.|\s](\d+)', line) if m: d = date(int(m.group(1)[::-1]), int(m.group(2)[::-1]), int(m.group(3)[::-1])) m = re.search('[הצעת|הצעות] חוק מס.*?\d+/\d+.*?[הועברה|הועברו]'.decode('utf8'), line.decode('utf8'), re.UNICODE) if m: try: result[count]['references'] = line m2 = re.findall('\d+/\d+',line.decode('utf8'), re.UNICODE) # find IDs of original proposals result[count]['original_ids'] = [a[::-1] for a in m2] count += 1 except IndexError: exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() logger.error("%s", ''.join(traceback.format_exception(exceptionType, exceptionValue, exceptionTraceback))) logger.error('count=%d, len(result)=%d, content = \n%s\n--- end of content' % \ (count, len(result), content.decode('utf8'))) for l in result: l['date'] = d return result
def parse_pdf_text(filename=None, url=None): if not filename: filename = 'tmp.txt' f = open(filename, 'rt') content = f.read() d = None result = [] m = re.search('עמוד(.*?)מתפרסמת בזה', content, re.UNICODE | re.DOTALL) m = clean_string(m.group(1).decode('utf8')) m2 = re.findall('^(הצעת חוק.*?) . '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m3 = re.findall('^(חוק.*?) . '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m2.extend(m3) for title in m2: law = {} title = title.replace('\n', ' ') s = re.search(r'[^\d]\d{2,3}[^\d]', title + ' ', re.UNICODE) # find numbers of 2-3 digits if s: (a, b) = s.span() title = title[:a + 1] + title[b - 2:a:-1] + title[b - 1:] # reverse them law['title'] = title result.append(law) count = 0 # count how many bills we found the original_ids for so far lines = content.split('\n') for line in lines: m = re.search('(\d{4,4})[\.|\s](\d+)[\.|\s](\d+)', line) if m: d = date(int(m.group(1)[::-1]), int(m.group(2)[::-1]), int(m.group(3)[::-1])) m = re.search( 'הצעת חוק מס.*?\w+/\d+/\d+.*?[הועברה|הועברו]'.decode('utf8'), line.decode('utf8'), re.UNICODE) if m: result[count]['references'] = line m2 = re.findall('\w+/\d+/\d+', line.decode('utf8'), re.UNICODE) # find IDs of original proposals result[count]['original_ids'] = [a[-1:0:-1] + a[0] for a in m2] # reverse count += 1 for l in result: l['date'] = d return result
def parse_pdf_text(filename=None, url=None): if not filename: filename = 'tmp.txt' f = open(filename,'rt') content = f.read() d = None result = [] m = re.search('עמוד(.*?)מתפרסמת בזה',content, re.UNICODE | re.DOTALL) m = clean_string(m.group(1).decode('utf8')) m2 = re.findall('^(הצעת חוק.*?) . '.decode('utf8'), m, re.UNICODE | re.DOTALL | re.MULTILINE) m3 = re.findall('^(חוק.*?) . '.decode('utf8'),m, re.UNICODE | re.DOTALL | re.MULTILINE) m2.extend(m3) for title in m2: law = {} title = title.replace('\n',' ') s = re.search(r'[^\d]\d{2,3}[^\d]',title+' ',re.UNICODE) # find numbers of 2-3 digits if s: (a,b) = s.span() title = title[:a+1] + title[b-2:a:-1] + title[b-1:] # reverse them law['title'] = title result.append(law) count = 0 # count how many bills we found the original_ids for so far lines = content.split('\n') for line in lines: m = re.search('(\d{4,4})[\.|\s](\d+)[\.|\s](\d+)', line) if m: d = date(int(m.group(1)[::-1]), int(m.group(2)[::-1]), int(m.group(3)[::-1])) m = re.search('הצעת חוק מס.*?\w+/\d+/\d+.*?[הועברה|הועברו]'.decode('utf8'), line.decode('utf8'), re.UNICODE) if m: result[count]['references'] = line m2 = re.findall('\w+/\d+/\d+',line.decode('utf8'), re.UNICODE) # find IDs of original proposals result[count]['original_ids'] = [a[-1:0:-1]+a[0] for a in m2] # reverse count += 1 for l in result: l['date'] = d return result
def get_context_data(self, *args, **kwargs): context = super(MeetingDetailView, self).get_context_data(*args, **kwargs) meeting = context["object"] transcript = meeting.transcript colors = {} speakers = transcript.blocks.order_by("speaker__mk").values_list("header", "speaker__mk").distinct() n = speakers.count() for (i, (p, mk)) in enumerate(speakers): (r, g, b) = colorsys.hsv_to_rgb(float(i) / n, 0.5 if mk else 0.3, 255) colors[p] = "rgb(%i, %i, %i)" % (r, g, b) context["title"] = _("Plenum meeting on %(date)s") % {"date": meeting.date} context["description"] = meeting.title context["description"] = clean_string(context["description"]).replace('"', "") page = self.request.GET.get("page", None) if page: context["description"] += _(" page %(page)s") % {"page": page} context["colors"] = colors parts_lengths = {} for block in transcript.blocks.all(): parts_lengths[block.id] = len(block.body) context["parts_lengths"] = json.dumps(parts_lengths) context["paginate_by"] = TRANSCRIPT_PAGINATE_BY return context