def check_entry(self, first_line_checked, entry_lines): if not entry_lines: return for line in entry_lines: if parse_bug_id_from_changelog(line): break if searchIgnorecase("Unreviewed", line): break if searchIgnorecase("build", line) and searchIgnorecase("fix", line): break else: self.handle_style_error(first_line_checked, "changelog/bugnumber", 5, "ChangeLog entry has no bug number") # check file change descriptions for style violations line_no = first_line_checked - 1 for line in entry_lines: line_no = line_no + 1 # filter file change descriptions if not match('\s*\*\s', line): continue if search(':\s*$', line) or search(':\s', line): continue self.handle_style_error(line_no, "changelog/filechangedescriptionwhitespace", 5, "Need whitespace between colon and description") # check for a lingering "No new tests. (OOPS!)" left over from prepare-changeLog. line_no = first_line_checked - 1 for line in entry_lines: line_no = line_no + 1 if match('\s*No new tests. \(OOPS!\)$', line): self.handle_style_error(line_no, "changelog/nonewtests", 5, "You should remove the 'No new tests' and either add and list tests, or explain why no new tests were possible.")
def parse_weekly_data(filename): txt = c.pdf_to_text(filename) txt = re.sub(r'(\d)\s(\d)', r'\1\2', txt) year = c.search(r'Stand:\s\d+\.\d+\.(\d{4})', txt) week = int(c.search(r'Liechtenstein - Woche (\d+) ', txt)) tot_tests = '' tot_antigen_tests = '' pcr_pos = txt.find('Gemeldete Tests') if pcr_pos > 0: pcr_pos = txt.find('PCR', pcr_pos) pcr_end_pos = txt.find('\n', pcr_pos) assert pcr_end_pos > pcr_pos line = txt[pcr_pos:pcr_end_pos] #line = re.sub(r'(\d)\s(\d)', r'\1\2', line) line = re.sub(r'\s+', r' ', line) tot_tests = c.txt_to_int(line.split(' ')[-2]) # Antigen tests pcr_pos = txt.find('Antigen-Schnelltests', pcr_pos) pcr_end_pos = txt.find('\n', pcr_pos) assert pcr_end_pos > pcr_pos line = txt[pcr_pos:pcr_end_pos] line = re.sub(r'\s+', r' ', line) tot_antigen_tests = c.txt_to_int(line.split(' ')[-2]) positivity_rate = '' antigen_positivity_rate = '' positivity_pos = txt.find('\nPositivit') if positivity_pos == -1: positivity_pos = txt.find('\nAnteil positiver Tests') if positivity_pos > 0: positivity_pos = txt.find('PCR', positivity_pos) positivity_end_pos = txt.find('\n', positivity_pos) assert positivity_end_pos > positivity_pos line = txt[positivity_pos:positivity_end_pos] line = re.sub(r'\s+', r' ', line) positivity_rate = line.split(' ')[-1] positivity_rate = c.txt_to_float(positivity_rate.replace('%', '')) # Antigen tests positivity_pos = txt.find('Antigen-Schnelltest', positivity_pos) positivity_end_pos = txt.find('\n', positivity_pos) assert positivity_end_pos > positivity_pos line = txt[positivity_pos:positivity_end_pos] line = re.sub(r'\s+', r' ', line) antigen_positivity_rate = line.split(' ')[-1] try: antigen_positivity_rate = c.txt_to_float( positivity_rate.replace('%', '')) except: pass print( f'{year},{week},{tot_tests},{positivity_rate},{tot_antigen_tests},{antigen_positivity_rate},{filename}' )
def parse_canton_data(canton, filename): txt = c.pdf_to_text(filename) # pylint: disable=W0105 """ Coronavirus-Krankheit-2019 (COVID-19) Eidgen<C3><B6>ssisches Departement des Innern EDI Bundesamt f<C3><BC>r Gesundheit BAG Direktionsbereich <C3><96>ffentliche Gesundheit Situationsbericht zur epidemiologischen Lage in der Schweiz und im F<C3><BC>rstentum Liechtenstein - Woche 28 (06.-12.07.2020) """ year = c.search(r'Stand:\s\d+\.\d+\.(\d{4})', txt) week = int(c.search(r'Liechtenstein - Woche (\d+)', txt)) """ Canton, tests of previous-week then current-week AG 5478 3588 808 529 1.3 1.8 AI 96 55 595 341 0.0 0.0 AR 391 249 708 451 0.5 1.2 BE 6924 4652 669 449 0.4 0.9 ... """ start = txt.find('Anzahl PCR-Tests in der Schweiz') if start == -1: start = txt.find('Anzahl durchgeführte PCR-Tests in der Schweiz') if start == -1: start = txt.find('Anzahl durchgeführte Tests in der Schweiz') if start == -1: start = txt.find('Anzahl gemeldeter Tests, Anzahl Tests pro') if start > 0: start = txt.find(r' AG ', start) else: start = 0 end = txt.find('Tabelle 4. Durchgeführte Tests nach Kalenderwoche', start) if end == -1: end = txt.find('Die Altersverteilung der', start) if end == -1: end = txt.find('Die Anzahl durchgeführter Tests', start) if end >= 0: end -= 1 if end == -1: end = txt.find('Gemeldete Tests nach Alter und Geschlecht', start) if end > start > 0 and end > start: tests_table = txt[start:end] # the numbers are sometimes separated with spaces for >1k values pcr = re.compile(r'(\d+)\s(\d+)') tests_table = pcr.sub(r'\1\2', tests_table) number_of_tests = c.txt_to_int(c.search(r'(\n\s+)?{}\s+\d+\s+(\d+)'.format(canton), tests_table, index=2)) positivity_rate = c.txt_to_float(c.search(r'(\n\s+)?{}\s+.*\s([0-9]+\.[0-9]+)\n'.format(canton), tests_table, index=2)) print(f'{year},{week},{number_of_tests},{positivity_rate},{filename}')
def _check_one_space_cmds(self, line_number, line_content): # check command like "IF (" or "if(" or "if (" or "If ()" for t in self.ONE_SPACE_CMDS: self._check_non_lowercase_cmd(line_number, line_content, t) if search('(^|\ +)' + t.lower() + '(\(|\ \ +\()', line_content): msg = 'One space between command "' + t.lower() + '" and its parentheses, should be "' + t + ' ("' self._handle_style_error(line_number, 'whitespace/parentheses', 5, msg)
def _check_no_space_cmds(self, line_number, line_content): # check command like "SET (" or "Set(" for t in self.NO_SPACE_CMDS: self._check_non_lowercase_cmd(line_number, line_content, t) if search('(^|\ +)' + t.lower() + '\ +\(', line_content): msg = 'No space between command "' + t.lower() + '" and its parentheses, should be "' + t + '("' self._handle_style_error(line_number, 'whitespace/parentheses', 5, msg)
def parse_weekly_data(filename): txt = c.pdf_to_text(filename) week = c.search(r'Liechtenstein - Woche (\d+) ', txt) tot_tests = None pcr_pos = txt.find('PCR-Tests') if pcr_pos > 0: pcr_pos = txt.find('\n', pcr_pos) + 1 pcr_end_pos = txt.find('\n', pcr_pos) assert pcr_end_pos > pcr_pos line = txt[pcr_pos:pcr_end_pos] line = re.sub(r'(\d)\s(\d)', r'\1\2', line) line = re.sub(r'\s+', r' ', line) tot_tests = c.txt_to_int(line.split(' ')[-2]) positivity_rate = None positivity_pos = txt.find('\nPositivit') if positivity_pos == -1: positivity_pos = txt.find('\nAnteil positive Tests') if positivity_pos > 0: positivity_pos += 1 positivity_end_pos = txt.find('\n', positivity_pos) assert positivity_end_pos > positivity_pos line = txt[positivity_pos:positivity_end_pos] line = re.sub(r'\s+', r' ', line) positivity_rate = line.split(' ')[-1] positivity_rate = c.txt_to_float(positivity_rate.replace('%', '')) print('{},{},{},{}'.format(week, tot_tests or '', positivity_rate or '', filename))
def dashboard(env, headers): cookies = _get_cookies(env) user_id = auth.is_logined(cookies) if user_id is not None: headers.append(('Location', '/dashboard')) post_data = _get_post_data(env) search_list = None if 'search' in post_data: request = post_data['search'][0] search_list = ui.search_page({'results': common.search(request)}) cond_f = 'user_id={}' cond = cond_f.format(user_id) users_cmps = db.select('cmps', cond) user_answers = db.select('user_answers', cond) cond_f = 'id={}' solved_cmps = [] used_tasks = {None} for user_answer in user_answers: cond = cond_f.format(user_answer[2]) task_id = db.select('questions', cond)[0][2] if not (task_id in used_tasks): used_tasks.add(task_id) cond = cond_f.format(db.select('questions', cond)[0][2]) solved_cmps += db.select('cmps', cond) return ui.dashboard_page({ 'user_id': user_id, 'search_page': search_list, 'users_cmps': users_cmps, 'solved_cmps': solved_cmps, 'is_op': auth.is_op(user_id) }) else: headers.append(('Location', '/')) return ''
def _process_line(self, line_number, line_content): if match('(^|\ +)#', line_content): # ignore comment line return l = line_content.expandtabs(4) # check command like message( "testing") if search('\(\ +', l): self._handle_style_error(line_number, 'whitespace/parentheses', 5, 'No space after "("') # check command like message("testing" ) if search('\ +\)', l) and not search('^\ +\)$', l): self._handle_style_error(line_number, 'whitespace/parentheses', 5, 'No space before ")"') self._check_trailing_whitespace(line_number, l) self._check_no_space_cmds(line_number, l) self._check_one_space_cmds(line_number, l) self._check_indent(line_number, line_content)
def get_tests(soup): title = soup.find('h3', string=re.compile(r'Tests and share of positive tests')) par = title.find_next('p') date = c.search(r'Status: (\d+\.\d+\.20\d{2}, \d{2}\.\d{2})h', par.text) date = parse_date(date) total_tests = '' positivity_rate = '' total_antigen_tests = '' antigen_positivity_rate = '' table = title.find_next('table') for row in table.find_all('tr'): if c.search(r'^(PCR tests)', row.find_all('th')[0].text): total_tests = strip_number(row.find_all('td')[0].text) if c.search(r'^(Rapid antigen tests)', row.find_all('th')[0].text): total_antigen_tests = strip_number(row.find_all('td')[0].text) if c.search(r'^(Share of positive PCR tests)', row.find_all('th')[0].text): positivity_rate = c.search(r'(\d+.*)%', row.find_all('td')[0].text) positivity_rate = positivity_rate.replace(',', '.') if c.search(r'^(Share of positive rapid antigen tests)', row.find_all('th')[0].text): antigen_positivity_rate = c.search(r'(\d+.*)%', row.find_all('td')[0].text) antigen_positivity_rate = antigen_positivity_rate.replace(',', '.') return date, total_tests, positivity_rate, total_antigen_tests, antigen_positivity_rate
def parse_data(filename): txt = c.pdf_to_text(filename) date_time = c.search(r'Stand (\d.*) Uhr', txt) if date_time is None: date = c.search(r'Stand\: (\d{2}\.\d{2}\.20\d{2})', txt) time = c.search(r'Zeit: (\d+:\d{2})', txt) if date is not None and time is not None: date_time = '{} {}'.format(date, time) date = c.parse_date(date_time) tot_tests = parse_pcr_tot_tests(txt) positivity_rate = c.txt_to_float( c.search(r'Bei (\d+)% dieser Tests fiel das Resultat positiv aus', txt)) if positivity_rate is None: positivity_rate = c.txt_to_float( c.search(r'Positivit.tsrate( \*+| \(%\)|\*+)?\s+(\d\.?\d?)[%\s]', txt, index=2)) if positivity_rate is None: positivity_rate = c.txt_to_float( c.search(r'Anteil positive Tests \(%\)(\d)?\s+(\d\.?\d?)[%\s]', txt, index=2)) isolated = c.txt_to_int( c.search( r'(\d+)\s+(F.lle|Personen aufgrund einer laborbest.tigten COVID-19 Erkrankung)? in\sIsolation', txt, index=1)) quarantined = c.txt_to_int( c.search( r'(\d+)\s?(in|Kontaktpersonen\sin\s.rztlich\sverordneter)? Quarant.ne', txt)) quarantined_travel = None if isolated is None or quarantined is None: pos = txt.find('Contact Tracing') if pos > 0: pcr = re.compile( r'Total\s?(\*+|\(%\))?\s+(\d+\s?\d+|\d+)\s+(\d+\s?\d+|\d+)\s+(\d+ ?\d+|\d+)?\n' ) #pcr = re.compile(r'Total\s?(\*+|\(%\))?\s+(\d+)\s+(\d+)\s+(\d+|\d+\s?\d+)?') res = pcr.search(txt, pos) if res is not None: isolated = c.txt_to_int(res[2]) quarantined = c.txt_to_int(res[3]) quarantined_travel = c.txt_to_int(res[4].strip()) print('{},{},{},{},{},{},{}'.format(date, tot_tests or '', positivity_rate or '', isolated or '', quarantined or '', quarantined_travel or '', filename))
def get_isolated_quarantined(soup): title = soup.find('h3', string=re.compile(r'Contact tracing')) par = title.find_next('p') date = c.search(r'Status: (\d+\.\d+\.20\d{2}, \d{2}\.\d{2})h', par.text) date = parse_date(date) isolated = '' quarantined = '' travel_quarantined = '' table = title.find_next('table') for row in table.find_all('tr'): if c.search(r'^(In isolation)', row.find_all('th')[0].text): isolated = strip_number(row.find_all('td')[0].text) if c.search(r'^(In quarantine)', row.find_all('th')[0].text): quarantined = strip_number(row.find_all('td')[0].text) if c.search(r'^(Additionally in quarantine)', row.find_all('th')[0].text): travel_quarantined = strip_number(row.find_all('td')[0].text) return date, isolated, quarantined, travel_quarantined
def _check_list_order(self, lines): last_line = None line_number = 0 for line in lines: matched = search('\$\{.*\}', line) if matched: continue line_number += 1 line = line.strip() if last_line == None: matched = match('(set\(|list\((APPEND|REMOVE_ITEM) )(?P<name>\w+)(?P<item>\s+\w+)?$', line) if matched: # FIXME: Add handling for include directories. if 'INCLUDE_DIRECTORIES' in matched.group('name'): continue empty_lines_count = 0 last_line = '' if matched.group('item'): msg = 'First listitem "%s" should be in a new line.' % matched.group('item').strip() self._handle_style_error(line_number, 'list/parentheses', 5, msg) else: matched = match('(?P<item>.+)?\)$', line) if matched: last_line = None if matched.group('item'): msg = 'The parentheses after the last listitem "%s" should be in a new line.' % matched.group('item').strip() self._handle_style_error(line_number, 'list/parentheses', 5, msg) elif line == '': empty_lines_count += 1 else: last_line_path = self._list_item_path(last_line) line_path = self._list_item_path(line) if line == last_line: msg = 'The item "%s" should be added only once to the list.' % line self._handle_style_error(line_number, 'list/duplicate', 5, msg) elif line_path < last_line_path or line_path == last_line_path and line < last_line: msg = 'Alphabetical sorting problem. "%s" should be before "%s".' % (line, last_line) self._handle_style_error(line_number, 'list/order', 5, msg) elif last_line != '': if line_path != last_line_path: if empty_lines_count != 1: msg = 'There should be exactly one empty line instead of %d between "%s" and "%s".' % (empty_lines_count, last_line, line) self._handle_style_error(line_number, 'list/emptyline', 5, msg) elif empty_lines_count != 0: msg = 'There should be no empty line between "%s" and "%s".' % (last_line, line) self._handle_style_error(line_number, 'list/emptyline', 5, msg) last_line = line empty_lines_count = 0
def parse_pcr_tot_tests(txt): tot_tests = c.txt_to_int( c.search(r'insgesamt auf( .ber| rund| mehr als)? ([\d\s.]+)\.', txt, index=2)) pcr_pos = txt.find('Tests') if tot_tests is None and pcr_pos > 0: # extract the line with Total / Totale Anzahl pcr_pos = txt.find('\n', pcr_pos) + 1 pcr_end_pos = txt.find('\n', pcr_pos) line = txt[pcr_pos:pcr_end_pos] # replace whitespace between numbers '937 488' -> '937488' line = re.sub(r'(\d)\s(\d)', r'\1\2', line) # match the value pcr = re.compile(r'(Totale Anzahl|Total)\s+\+?(\d+)\s') res = pcr.match(line) if res is not None: tot_tests = c.txt_to_int(res[2]) return tot_tests res = re.search(r'Total durchgef.hrte Tests\s+(\d+)\s+\+?\d+\s', line) if res is not None: tot_tests = c.txt_to_int(res[1]) return tot_tests
def _check_non_lowercase_cmd(self, line_number, line_content, cmd): if searchIgnorecase('(^|\ +)' + cmd + '\ *\(', line_content) and \ (not search('(^|\ +)' + cmd.lower() + '\ *\(', line_content)): msg = 'Use lowercase command "' + cmd.lower() + '"' self._handle_style_error(line_number, 'command/lowercase', 5, msg)
from sklearn.linear_model import SGDClassifier import common gsearches = common.search( SGDClassifier( loss='log', n_jobs=-1 ), { 'n_iter': [100, 120, 140, 160] } )
from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC import common import random gsearchs = common.search( SVC(), { 'degree': [2, 3] }, range(1, 11))