def get_championship(*args): """ Returns the chosen Championship and Season ID """ if args: champ_id = convert_to_int(args[0]) else: print("Load the list of championships...") # List All the most popular championships championships = get( "https://api.sofascore.com/api/v1/config/unique-tournaments/EN/football", headers=HEADERS, proxies=PROXY) if championships.status_code >= 400: raise AssertionError championships = championships.json() print("\n".join([ f"{index + 1}. {championship['name']} - {championship['category']['flag']}" for index, championship in enumerate( championships["uniqueTournaments"]) ])) print("\nWhich championship do you want to bet on ?") champ_id = convert_to_int(input("=> ")) # Find the chosen championship id if len(championships["uniqueTournaments"]) >= champ_id: champ_id = championships["uniqueTournaments"][champ_id - 1]["id"] else: raise AssertionError seasons = get( f"https://api.sofascore.com/api/v1/unique-tournament/{champ_id}/seasons", headers=HEADERS, proxies=PROXY) # Find the actual season id for the chosen championship if seasons.status_code >= 400: raise AssertionError return champ_id, seasons.json()["seasons"][0]["id"]
def generate_decoder_input_target(transcripts, dataset_number, word_level=False, test=False, partitions=32): """ Wrapper for the _generate_input_target_data method. :return: 3D numpy Array, 3D numpy Array """ if word_level: # Word level encoding character_set = settings.CHARACTER_SET char_to_int = convert_to_int(sorted(character_set)) generate_word_level_input_target_data(transcripts=transcripts, num_partition=dataset_number, char_to_int=char_to_int, partitions=partitions, test=test) else: # Character level encoding character_set = settings.CHARACTER_SET char_to_int = convert_to_int(sorted(character_set)) generate_character_level_input_target_data( transcripts=transcripts, num_partition=dataset_number, char_to_int=char_to_int, partitions=partitions, test=test)
def test_convert_to_int(): assert utils.convert_to_int("23") == 23 assert utils.convert_to_int("0") == 0 assert utils.convert_to_int("-23") == -23 assert utils.convert_to_int(245) == 245 assert utils.convert_to_int(-245) == -245 assert utils.convert_to_int(0) == 0 assert utils.convert_to_int("43.53") is None assert utils.convert_to_int("25sn") is None assert utils.convert_to_int("s45") is None assert utils.convert_to_int("string") is None
def _process_housing_location(self): """ Receives a housing location from the HousingLocation table and parses it editing the different fields """ self._location_segments = self._housing_location.housing_location.replace( "-", " ").split() if self._location_segments == [] or convert_to_int( self._location_segments[0], None) is None: # Location did not start with a number so no further parsing if self._housing_location.housing_location == "": self._housing_location.housing_location = "UNKNOWN" return self._housing_location.division = self._location_segments[0] if len( self._location_segments ) == 1: # Execute only if the housing information is a single division number return # ex: '01-' self._set_day_release() location_start = convert_to_int(self._location_segments[0], -1) if location_start in [ 2, 8, 9, 11, 14 ] or (location_start == 1 and "ABO" in self._housing_location.housing_location): self._set_sub_division(self._location_segments[1], self._location_segments[2:]) return elif location_start == 3: if "AX" in self._housing_location.housing_location: self._set_sub_division(self._location_segments[2], self._location_segments[3:]) return elif location_start in [5, 6, 10]: self._set_location_05_06_10_values() return elif location_start == 15: self._set_location_15_values() return elif location_start == 16: return elif location_start == 17: self._set_location_17_values() return elif location_start == 4: self._set_location_04_values() self._set_sub_division( join_with_space_and_convert_spaces(self._location_segments[1:3], ""), self._location_segments[3:])
def read_rider_info(rider_soup, riders, teams): #try: position = convert_to_int(rider_soup.find('span').find('span').text) if position is None: return (None, None, None) time_lag = read_time_result(rider_soup) #read the rider's info info = rider_soup.find_all('a') team_id = read_team_info(info, teams) name = process_string(info[0].contents[1]) surname = process_string(info[0].contents[0].contents[0]) rider_link = info[0].get('href') key = FullName(name=name, surname=surname) if key not in riders: riders[key] = { 'name': name, 'surname': surname, 'link': rider_link, 'team_id': team_id, 'id': len(riders) + 1 } return (time_lag, position, riders[key]['id'])
def predict_sequence_test(self, audio_input): char_to_int = convert_to_int(sorted(settings.CHARACTER_SET)) int_to_char = convert_int_to_char(char_to_int) t_force = "\tmsA' Alxyr >wlA hw Almwqf tm tSHyHh\n" encoded_transcript = [] for index, character in enumerate(t_force): encoded_character = [0] * len(settings.CHARACTER_SET) position = char_to_int[character] encoded_character[position] = 1 encoded_transcript.append(encoded_character) decoder_input = np.array([encoded_transcript]) print(decoder_input.shape) output = self.model.predict([audio_input, decoder_input]) print(output.shape) sentence = "" output = output[0] for character in output: position = np.argmax(character) character = int_to_char[position] sentence+=character print(sentence)
def get_mandatory(this_item): mandatory = convert_to_int(this_item["mandatory"]) mandatory = mandatory >= 0 return mandatory
def _store_bail_info(self): # Bond: If the value is an integer, it's a dollar # amount. Otherwise, it's a status, e.g. "* NO BOND *". self._inmate.bail_amount = convert_to_int(self._inmate_details.bail_amount().replace(',', ''), None) if self._inmate.bail_amount is None: self._inmate.bail_status = self._inmate_details.bail_amount().replace('*', '').strip() else: self._inmate.bail_status = None
def _process_housing_location(self): """ Receives a housing location from the HousingLocation table and parses it editing the different fields """ self._location_segments = self._housing_location.housing_location.replace("-", " ").split() if self._location_segments == [] or convert_to_int(self._location_segments[0], None) is None: # Location did not start with a number so no further parsing if self._housing_location.housing_location == "": self._housing_location.housing_location = "UNKNOWN" return self._housing_location.division = self._location_segments[0] if len(self._location_segments) == 1: # Execute only if the housing information is a single division number return # ex: '01-' self._set_day_release() location_start = convert_to_int(self._location_segments[0], -1) if location_start in [2, 8, 9, 11, 14] or (location_start == 1 and "ABO" in self._housing_location.housing_location): self._set_sub_division(self._location_segments[1], self._location_segments[2:]) return elif location_start == 3: if "AX" in self._housing_location.housing_location: self._set_sub_division(self._location_segments[2], self._location_segments[3:]) return elif location_start in [5, 6, 10]: self._set_location_05_06_10_values() return elif location_start == 15: self._set_location_15_values() return elif location_start == 16: return elif location_start == 17: self._set_location_17_values() return elif location_start == 4: self._set_location_04_values() self._set_sub_division(join_with_space_and_convert_spaces(self._location_segments[1:3], ""), self._location_segments[3:])
def __init__(self, elem_lst): self.cons_no = elem_lst[0] #用户编号 self.cons_status = elem_lst[3] #费控用户状态 if len(elem_lst[3]) == 0: self.cons_status = "null" self.org_no = elem_lst[1] #供电单位编码 self.rca_flag = None #费控标志 self.rca_flag = utils.convert_to_int(elem_lst[2])
def __init__(self, game) -> None: self.match_id = convert_to_int(game["id"]) self.league_name = str(game["tournament"]["name"]) self.hometeam_name = str(game["homeTeam"]["name"]) self.awayteam_name = str(game["awayTeam"]["name"]) self.game_start = strftime("%Y-%m-%d %H:%M:%S", gmtime(game["startTimestamp"])) self.home_team_goal = [] self.visitor_team_goal = [] self.score_probability = {} self.result_proba = Results() self.odds = Odds(self.match_id)
def test_multicharacter_conversion(self): value = b'\x00\x00\x00\x00' expected = 0 actual, = utils.convert_to_int(value, method_name) self.assertEqual(actual, expected) value = b'\x01\x01\x01\x01' expected = 16843009 actual, = utils.convert_to_int(value, method_name) self.assertEqual(actual, expected) value = b'\xff\xff\xff\x7f' expected = 2147483647 actual, = utils.convert_to_int(value, method_name) self.assertEqual(actual, expected) value = b'\x00' expected = 0 actual, = utils.convert_to_int(value, bool_method_name) self.assertEqual(actual, expected) value = b'\x01' expected = 1 actual, = utils.convert_to_int(value, bool_method_name) self.assertEqual(actual, expected)
def __init__(self, elem_lst): self.cons_no = elem_lst[0] #用户编号 self.org_no = elem_lst[1] #供电单位代码 self.charge_ym = None #收费年月 self.charge_ym = utils.convert_to_date_YM('12_A_PAY_FLOW.TSV', elem_lst[2]) self.charge_date = None #收费日期 self.charge_date = utils.convert_to_datetime('12_A_PAY_FLOW.TSV', elem_lst[3]) self.pay_mode = utils.convert_to_int(elem_lst[4]) #缴费方式
def main_province(): df_extended = _main_municipality() df_extended = df_extended \ .fillna({"Gemeentenaam":"", "Provincienaam":""}) \ .groupby(["Datum", "Provincienaam", "Provinciecode", "Type"])["Aantal", "AantalCumulatief"] \ .apply(lambda x: x.sum(min_count=1)) \ .reset_index() # df_extended['Opmerking'] = np.nan df_extended.loc[df_extended["Provinciecode"] == -1, ["Provincienaam"]] = np.nan df_extended = convert_to_int( df_extended, ["Aantal", "AantalCumulatief", "Provinciecode"]) # format the columns df_extended = df_extended[[ "Datum", "Provincienaam", "Provinciecode", "Type", "Aantal", "AantalCumulatief", # "Opmerking" ]].sort_values(["Datum", "Provinciecode"]) Path(DATA_FOLDER, "data-provincial").mkdir(exist_ok=True) dates = sorted(df_extended["Datum"].unique()) # export by date for data_date in dates: export_date(df_extended, "data-provincial", "RIVM_NL_provincial", data_date, str(data_date).replace("-", "")) # export latest export_date(df_extended, "data-provincial", "RIVM_NL_provincial", data_date=dates[-1], label="latest") # export all export_date(df_extended, "data-provincial", "RIVM_NL_provincial", data_date=None, label=None)
def decode_audio_sequence_character_based(self, audio_sequence): """ Decodes audio sequence into a transcript using encoder_model and decoder_model generated from training :param audio_sequence: 2D numpy array :param encoder_model: Model :param decoder_model: Model :param character_set: Dict :return: String """ # Getting converters char_to_int = convert_to_int(sorted(settings.CHARACTER_SET)) int_to_char = convert_int_to_char(char_to_int) # Returns the encoded audio_sequence states_value = self.encoder_model.predict(audio_sequence) states_value = [states_value] print("ENCODER PREDICTION DONE") num_decoder_tokens = len(char_to_int) target_sequence = np.zeros((1, 1, num_decoder_tokens)) # Populate the first character of target sequence with the start character. target_sequence[0, 0, char_to_int['\t']] = 1. print(target_sequence) stop_condition = False t_force = "\tmsA' Alxyr >wlA hw Almwqf tm tSHyHh\n" decoded_sentence = '' max_length = len(t_force) i = 0 while not stop_condition: output_tokens, h = self.decoder_model.predict( [target_sequence] + states_value) states_value = [h] #print("DECODER PREDICTION DONE khobz") sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = int_to_char[sampled_token_index] #print(sampled_char) decoded_sentence += sampled_char if sampled_char == "\n" or len(decoded_sentence) > max_length : # End of transcription stop_condition = True else: # updating target sequence vector target_sequence = np.zeros((1, 1, num_decoder_tokens)) target_sequence[0, 0, char_to_int[t_force[i]]] = 1 i += 1 print(decoded_sentence) return decoded_sentence
def generate_decoder_input_target(character_set, transcripts): """ Wrapper for the _generate_input_target_data method. :return: 3D numpy Array, 3D numpy Array """ char_to_int = convert_to_int(character_set) max_transcript_length = get_longest_sample_size(transcripts) decoder_input, decoder_target = _generate_fixed_size_input_target_data( transcripts=transcripts, char_to_int=char_to_int, num_transcripts=len(transcripts), max_length=max_transcript_length, num_distinct_chars=len(character_set)) #decoder_input, decoder_target = _generate_input_target_data(transcripts, # char_to_int, # len(character_set)) return decoder_input, decoder_target
def transform_api_datasets(): df = pandas.read_csv( Path("raw_data", "rivm_api", "COVID-19_aantallen_gemeente_cumulatief-2020-05-21.csv"), ";") df["Date_of_report"] = df["Date_of_report"].str[0:10] df["Municipality_code"] = df["Municipality_code"].str[2:6].fillna( -1).astype(int) df = df \ .groupby(["Date_of_report", "Municipality_code"])["Total_reported", "Hospital_admission", "Deceased"] \ .sum().reset_index().rename({"Date_of_report":"Datum","Municipality_code":"Gemeentecode"}, axis=1) \ .merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left") df = convert_to_int(df, ["Provinciecode"]) return df
def _get_test_vector(self, test): def _get_value(single_line): var_name = single_line.split(':')[2].strip() prefix_end = var_name.find("'") var_name = var_name[prefix_end + 1:-1] return var_name ktest_tool = [os.path.join(bin_dir, 'ktest-tool')] exec_output = utils.execute(ktest_tool + [test.origin], err_to_output=False, quiet=True) test_info = exec_output.stdout.split('\n') vector = utils.TestVector(test.name, test.origin) last_number = -1 last_nondet_method = None last_value = None for line in [l for l in test_info if l.startswith('object')]: logging.debug("Looking at line: %s", line) if 'name:' in line: #assert len(line.split(':')) == 3 var_number = int(self._get_var_number(line)) assert var_number > last_number last_number = var_number var_name = _get_value(line) assert last_nondet_method is None, \ "Last nondet method already or still assigned: %s" % last_nondet_method assert "'" not in var_name, \ "Variable name contains \"'\": %s" % var_name last_nondet_method = utils.get_corresponding_method_name( var_name) elif 'data:' in line: #assert len(line.split(':')) == 3 var_number = self._get_var_number(line) assert last_nondet_method is not None value = _get_value(line) value, = utils.convert_to_int(value, last_nondet_method) assert last_value is None last_value = str(value) if last_nondet_method is not None and last_value is not None: vector.add(last_value, last_nondet_method) last_nondet_method = None last_value = None return vector
def decode_audio_sequence(audio_sequence, encoder_model, decoder_model, character_set): """ Decodes audio sequence into a transcript using encoder_model and decoder_model generated from training :param audio_sequence: 2D numpy array :param encoder_model: Model :param decoder_model: Model :param character_set: Dict :return: String """ # Getting converters char_to_int = convert_to_int(character_set) int_to_char = convert_to_char(character_set) states_value = encoder_model.predict(audio_sequence) num_decoder_tokens = len(char_to_int) target_sequence = np.zeros((1, 1, num_decoder_tokens)) # Populate the first character of target sequence with the start character. target_sequence[0, 0, char_to_int['\t']] = 1. stop_condition = False decoded_sentence = '' while not stop_condition: output_tokens, h, c = decoder_model.predict( [target_sequence] + states_value) sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = char_to_int[sampled_token_index] decoded_sentence += sampled_char if sampled_char == "\n": # End of transcription stop_condition = True else: # updating target sequence vector target_sequence[0, 0, sampled_token_index] = 1 states_values = [h, c] return decoded_sentence
def _get_test_vector(self, test): def _get_value(single_line): var_name = single_line.split(':')[2].strip() prefix_end = var_name.find("'") var_name = var_name[prefix_end+1:-1] return var_name ktest_tool = [os.path.join(bin_dir, 'ktest-tool')] exec_output = utils.execute(ktest_tool + [test.origin], err_to_output=False, quiet=True) test_info = exec_output.stdout.split('\n') vector = utils.TestVector(test.name, test.origin) last_number = -1 last_nondet_method = None last_value = None for line in [l for l in test_info if l.startswith('object')]: logging.debug("Looking at line: %s", line) if 'name:' in line: #assert len(line.split(':')) == 3 var_number = int(self._get_var_number(line)) assert var_number > last_number last_number = var_number var_name = _get_value(line) assert last_nondet_method is None, \ "Last nondet method already or still assigned: %s" % last_nondet_method assert "'" not in var_name, \ "Variable name contains \"'\": %s" % var_name last_nondet_method = utils.get_corresponding_method_name(var_name) elif 'data:' in line: #assert len(line.split(':')) == 3 var_number = self._get_var_number(line) assert last_nondet_method is not None value = _get_value(line) value, = utils.convert_to_int(value, last_nondet_method) assert last_value is None last_value = str(value) if last_nondet_method is not None and last_value is not None: vector.add(last_value, last_nondet_method) last_nondet_method = None last_value = None return vector
def read_time_result(soup): try: time = soup.find('span', class_='time').text hms = time.split(':') if len(hms) == 1: return convert_to_int(hms[0], 0) if len(hms) == 2: return 60 * convert_to_int(hms[0], 0) + convert_to_int(hms[1], 0) if len(hms) == 3: return 60 * 60 * convert_to_int(hms[0], 0) + 60 * convert_to_int( hms[1], 0) + convert_to_int(hms[2], 0) return 0 except: return 0
def parse_court_location(location_string): """ Takes a location string of the form: "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608" The lines can contain spurious white-space at the beginning and end of the lines, these are stripped and returns two values, cleaned up version the input string and a dict of the form: { 'location_name': 'Criminal C', 'branch_name': 'Criminal Courts Building', 'room_number': 506, 'address': '2650 South California Avenue', 'city': 'Chicago', 'state': 'IL', 'zip_code': 60608, } If location is malformed, then original location string is returned with an empty dict """ lines = strip_the_lines(location_string.splitlines()) if len(lines) == 4: try: # The first line is the location_name location_name = lines[0] # Second line must be split into room number and branch name branch_line = lines[1].split(', Room:') branch_name = branch_line[0].strip() room_number = convert_to_int(branch_line[1], 0) # Third line has address - remove room number and store address = lines[2].split('Room:')[0].strip() # Fourth line has city, state and zip separated by spaces, # or a weird unicode space character city_state_zip = lines[3].replace(u'\xa0', u' ').split(' ') city = " ".join(city_state_zip[0:-2]).replace(',', '').strip() state = city_state_zip[-2].strip() zip_code = convert_to_int(city_state_zip[-1], 60639) d = { 'location_name': location_name, 'branch_name': branch_name, 'room_number': room_number, 'address': address, 'city': city, 'state': state, 'zip_code': zip_code, } return "\n".join(lines), d except IndexError: log.debug("Following Court location has unknown format: %s" % location_string) return location_string, {} else: log.debug("Following Court location doesn't have right number of lines: %s" % location_string) return location_string, {}
def merge_postest(): df_frames = { "raw_data/peildatum-31-03-2020-14-00.csv": None, "raw_data/peildatum-04-04-2020-12-45.csv": None, "raw_data/peildatum-01-04-2020-13-58.csv": None, "raw_data/peildatum-02-04-2020-14-00.csv": None, "raw_data/peildatum-31-03-2020-19-20.csv": None, "raw_data/peildatum-03-04-2020-14-00.csv": None, "raw_data/peildatum-07-04-2020-13-55.csv": None, "raw_data/peildatum-05-04-2020-14-15.csv": None, "raw_data/peildatum-06-04-2020-13-50.csv": None, } # files not in the list above for file in Path('raw_data').glob('peildatum*.csv'): if str(file) not in df_frames.keys(): print(f"Parse file {file}") df_frames[str(file)] = parse_format_v4(file, "Meldingen") result = merge_df_days(df_frames) result["Gemeentecode"] = result["id"].astype(int) result = result[["Datum", "Gemeentecode", "Aantal"]] result["Datum"] = result["Datum"].astype(str) # make combinations of the new items combinations = itertools.product(result["Datum"].unique(), MUNICIPALITIES) df_base = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\ merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\ merge(result, on=["Datum", "Gemeentecode"], how="left") # make combinations of the old items result_old = pandas.read_csv(Path("data", "rivm_corona_in_nl.csv"), usecols=["Datum", "Gemeentecode", "Aantal"]) combinations = itertools.product(result_old["Datum"].unique(), MUNICIPALITIES) df_base_old = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\ merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\ merge(result_old, on=["Datum", "Gemeentecode"], how="left") df = df_base.append(df_base_old).sort_values(["Datum", "Gemeentecode"]) df = convert_to_int(df, ["Provinciecode", "Aantal"]) # fill na cond = (df["Gemeentecode"] > 0) & df["Aantal"].isnull() df.loc[cond, "Aantal"] = 0 # determine missing locations national = pandas.read_csv(Path("data", "rivm_NL_covid19_national.csv")) national = national[national["Type"] == "Totaal"] national["Aantal_nat"] = national["Aantal"].astype(int) national = national[["Datum", "Aantal_nat"]].set_index("Datum") diff = pandas.concat([national, count_values(df, "Gemeentecode")], axis=1) n_missing = (diff["Aantal_nat"] - diff["Aantal"]).dropna() for k, v in n_missing.items(): df.loc[(df["Datum"] == k) & (df["Gemeentecode"] == -1), "Aantal"] = v df_rivm_api = transform_api_datasets()[[ "Datum", "Gemeentecode", "Gemeentenaam", "Provincienaam", "Provinciecode", "Total_reported" ]] df_rivm_api = df_rivm_api.rename({"Total_reported": "Aantal"}, axis=1) df_not_in_rivm_api = df[~df["Datum"].isin(df_rivm_api["Datum"].unique())] df = pandas.concat([df_rivm_api, df_not_in_rivm_api], axis=0) df.sort_values(["Datum", "Gemeentecode"], inplace=True) df.to_csv(Path("data", "rivm_NL_covid19_total_municipality.csv"), index=False)
def cache_ttl(): default_ttl = 60 * 12 # Time to Live in Cache: 12 minutes the_cache_ttl = os.environ.get('CACHE_TTL') return convert_to_int(the_cache_ttl, default_ttl) if the_cache_ttl else default_ttl
BASE_DIR = Path(__file__).parent.parent.absolute() STATS_PATH = f'{BASE_DIR}/stats' IMG_CHARTS_PATH = f'{BASE_DIR}/charts/img' raw_stats_list = extract_csv(STATS_PATH) # Convert general data to dict { <latency>: (<QPS>, <Response Time>) } latency_stats = { "50%": [], "90%": [], "99%": [], } for percentile in latency_stats.keys(): latency_stats[percentile] = sorted([(round(float( item['Requests/s'])), convert_to_int(item[percentile])) for item in raw_stats_list], key=lambda el: el[0]) # Build line charts for each percentile fig, axs = plt.subplots(3) fig.suptitle('Load Testing') fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.grid(False)
def decode_audio_sequence(self, audio_sequence): # Getting converters char_to_int = convert_to_int(sorted(settings.CHARACTER_SET)) int_to_char = convert_int_to_char(char_to_int) states_value = self.encoder_model(audio_sequence) print("ENCODER PREDICTION DONE") # creating first input_sequence for decoder target_sequence = np.zeros((1, 1, settings.WORD_TARGET_LENGTH), dtype=np.float32) print(self.decoder_model.summary()) sos_characters = ["S", "O", "S", "_"] target_length = len(settings.CHARACTER_SET) + 1 for i in range(0, 4): position = char_to_int[sos_characters[i]] + i * target_length target_sequence[0, 0, position] = 1 for i in range(4, settings.LONGEST_WORD_LENGTH): position = i * target_length + target_length - 1 target_sequence[0, 0, position] = 1 # print(target_sequence) stop_condition = False decoded_sentence = "" while not stop_condition: print("target sequence:") print(target_sequence) result = self.decoder_model.predict([target_sequence] + [states_value], steps=1) dense_outputs = [] for i in range(0, settings.LONGEST_WORD_LENGTH): dense_outputs.append(result[i]) h = result[-1] states_value = h print("DECODER PREDICTION DONE") # decoding values of each dense output decoded_word = "" for i in range(0, settings.LONGEST_WORD_LENGTH): sampled_token_index = np.argmax(dense_outputs[i][0, -1, :]) if sampled_token_index == target_length - 1: sampled_char = "" else: sampled_char = int_to_char[sampled_token_index] decoded_word += sampled_char print("decoded_word is : " + decoded_word) corrected_word = correct_word(decoded_word) print("corrected_word is : " + corrected_word) print("corrected word in arabic is :" + buckwalter_to_arabic(corrected_word)) decoded_sentence += decoded_word + " " if decoded_word == "EOS_": stop_condition = True else: target_sequence = np.zeros((1, 1, settings.WORD_TARGET_LENGTH)) i = 0 for i, character in enumerate(decoded_word): position = char_to_int[character] + i * target_length target_sequence[0, 0, position] = 1 if i < settings.LONGEST_WORD_LENGTH - 1: for j in range(i + 1, settings.LONGEST_WORD_LENGTH): position = i * target_length + target_length - 1 target_sequence[0, 0, position] = 1
def parse_court_location(location_string): """ Takes a location string of the form: "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608" The lines can contain spurious white-space at the beginning and end of the lines, these are stripped and returns two values, cleaned up version the input string and a dict of the form: { 'location_name': 'Criminal C', 'branch_name': 'Criminal Courts Building', 'room_number': 506, 'address': '2650 South California Avenue', 'city': 'Chicago', 'state': 'IL', 'zip_code': 60608, } If location is malformed, then original location string is returned with an empty dict """ lines = strip_the_lines(location_string.splitlines()) if len(lines) == 4: try: # The first line is the location_name location_name = lines[0] # Second line must be split into room number and branch name branch_line = lines[1].split(', Room:') branch_name = branch_line[0].strip() room_number = convert_to_int(branch_line[1], 0) # Third line has address - remove room number and store address = lines[2].split('Room:')[0].strip() # Fourth line has city, state and zip separated by spaces, # or a weird unicode space character city_state_zip = lines[3].replace(u'\xa0', u' ').split(' ') city = " ".join(city_state_zip[0:-2]).replace(',', '').strip() state = city_state_zip[-2].strip() zip_code = convert_to_int(city_state_zip[-1], 60639) d = { 'location_name': location_name, 'branch_name': branch_name, 'room_number': room_number, 'address': address, 'city': city, 'state': state, 'zip_code': zip_code, } return "\n".join(lines), d except IndexError: log.debug("Following Court location has unknown format: %s" % location_string) return location_string, {} else: log.debug( "Following Court location doesn't have right number of lines: %s" % location_string) return location_string, {}
def merge_dead(): df_frames = { "raw_data/peildatum-31-03-2020-14-00.csv": None, "raw_data/peildatum-31-03-2020-19-20.csv": None, "raw_data/peildatum-01-04-2020-13-58.csv": None, "raw_data/peildatum-02-04-2020-14-00.csv": None, "raw_data/peildatum-03-04-2020-14-00.csv": None, "raw_data/peildatum-04-04-2020-12-45.csv": None, "raw_data/peildatum-05-04-2020-14-15.csv": None, "raw_data/peildatum-06-04-2020-13-50.csv": None, "raw_data/peildatum-07-04-2020-13-55.csv": None, "raw_data/peildatum-08-04-2020-13-55.csv": None, "raw_data/peildatum-09-04-2020-13-50.csv": None, "raw_data/peildatum-10-04-2020-14-20.csv": None, "raw_data/peildatum-11-04-2020-14-00.csv": None, "raw_data/peildatum-12-04-2020-14-00.csv": None, "raw_data/peildatum-13-04-2020-14-00.csv": None, "raw_data/peildatum-14-04-2020-14-00.csv": None, "raw_data/peildatum-15-04-2020-14-00.csv": None, "raw_data/peildatum-16-04-2020-14-00.csv": None, "raw_data/peildatum-17-04-2020-14-00.csv": None, "raw_data/peildatum-17-04-2020-16-00.csv": None, } # files not in the list above for file in Path('raw_data').glob('peildatum*.csv'): if str(file) not in df_frames.keys(): print(f"Parse file {file}") df_frames[str(file)] = parse_format_v4(file, "Overleden") result = merge_df_days(df_frames) result["Gemeentecode"] = result["id"].astype(int) result = result[["Datum", "Gemeentecode", "Aantal"]] result["Datum"] = result["Datum"].astype(str) # make combinations of the new items combinations = itertools.product(result["Datum"].unique(), MUNICIPALITIES) df = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\ merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\ merge(result, on=["Datum", "Gemeentecode"], how="left") df = convert_to_int(df, ["Provinciecode", "Aantal"]) # fill na cond = (df["Gemeentecode"] > 0) & df["Aantal"].isnull() df.loc[cond, "Aantal"] = 0 # determine missing locations national = pandas.read_csv(Path("data", "rivm_NL_covid19_national.csv")) national = national[national["Type"] == "Overleden"] national["Aantal_nat"] = national["Aantal"].astype(int) national = national[["Datum", "Aantal_nat"]].set_index("Datum") diff = pandas.concat([national, count_values(df, "Gemeentecode")], axis=1) n_missing = (diff["Aantal_nat"] - diff["Aantal"]).dropna() for k, v in n_missing.items(): df.loc[(df["Datum"] == k) & (df["Gemeentecode"] == -1), "Aantal"] = v df.sort_values(["Datum", "Gemeentecode"], inplace=True) print(df.tail()) df.to_csv(Path("data", "rivm_NL_covid19_fatalities_municipality.csv"), index=False)
def _parse_court_location(self): """ Takes a location string of the form: "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608" The lines can contain spurious white-space at their beginning and end, multiple newline characters, and annoying ASCII characters; this is all normalized, no matter what. We return two values: a cleaned up version of the input string, and a dict of the following form... { 'location_name': 'Criminal C', 'branch_name': 'Criminal Courts Building', 'room_number': 506, 'address': '2650 South California Avenue', 'city': 'Chicago', 'state': 'IL', 'zip_code': 60608, } If location string is something other than 4 lines long, or doesn't match our current parsing expectations (mostly based around where the characters "Room:" appear in the string), then the normalized location string is returned, as well an empty dict. Note that room_number and zip_code are stored as ints, not strings. """ location_string = self._inmate_details.court_house_location() if location_string == "": return "", {} # Normalize whitespace, newlines (and weird unicode character). location_string = location_string.replace(u'\xa0', u' ') lines = strip_the_lines(location_string.splitlines()) if len(lines) == 4: try: # First line is the shortened form of the branch name, usually. location_name = lines[0] # Second line must be split into room number and branch name. branch_line = lines[1].split(', Room:') branch_name = branch_line[0].strip() room_number = convert_to_int(branch_line[1], 0) # Remove room number and store the address. address = lines[2].split('Room:')[0].strip() # Fourth line has city, state and zip separated by spaces. city_state_zip = lines[3].split(' ') city = " ".join(city_state_zip[0:-2]).replace(',', '').strip() state = city_state_zip[-2].strip() zip_code = convert_to_int(city_state_zip[-1], 60639) d = { 'location_name': location_name, 'branch_name': branch_name, 'room_number': room_number, 'address': address, 'city': city, 'state': state, 'zip_code': zip_code, } return "\n".join(lines), d except IndexError: self._debug("Following Court location has unknown format: %s" % location_string) return "\n".join(lines), {} else: self._debug( "Following Court location doesn't have right number of lines: %s" % location_string) return "\n".join(lines), {}
lat_vs_time = { "50%": [], "90%": [], "99%": [], } fails_vs_time = { "Failures/s": [], } # Convert raw data to dict { <latency>: (<Time range>, <Response Time>) } for percentile in lat_vs_time.keys(): lat_vs_time[percentile] = sorted([ (convert_to_int(item['Time']), convert_to_int(item[percentile])) for item in raw_soak_stats_list ], key=lambda el: el[0]) # Convert raw data to dict { <fails>: (<Time range>, <Failures/s>) } for percentile in fails_vs_time.keys(): fails_vs_time[percentile] = sorted([ (convert_to_int(item['Time']), float(item['Failures/s'])) for item in raw_soak_stats_list ], key=lambda el: el[0]) build_chart(lat_vs_time, 'Time Range',
def _parse_court_location(self): """ Takes a location string of the form: "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608" The lines can contain spurious white-space at their beginning and end, multiple newline characters, and annoying ASCII characters; this is all normalized, no matter what. We return two values: a cleaned up version of the input string, and a dict of the following form... { 'location_name': 'Criminal C', 'branch_name': 'Criminal Courts Building', 'room_number': 506, 'address': '2650 South California Avenue', 'city': 'Chicago', 'state': 'IL', 'zip_code': 60608, } If location string is something other than 4 lines long, or doesn't match our current parsing expectations (mostly based around where the characters "Room:" appear in the string), then the normalized location string is returned, as well an empty dict. Note that room_number and zip_code are stored as ints, not strings. """ location_string = self._inmate_details.court_house_location() if location_string == "": return "", {} # Normalize whitespace, newlines (and weird unicode character). location_string = location_string.replace(u'\xa0', u' ') lines = strip_the_lines(location_string.splitlines()) if len(lines) == 4: try: # First line is the shortened form of the branch name, usually. location_name = lines[0] # Second line must be split into room number and branch name. branch_line = lines[1].split(', Room:') branch_name = branch_line[0].strip() room_number = convert_to_int(branch_line[1], 0) # Remove room number and store the address. address = lines[2].split('Room:')[0].strip() # Fourth line has city, state and zip separated by spaces. city_state_zip = lines[3].split(' ') city = " ".join(city_state_zip[0:-2]).replace(',', '').strip() state = city_state_zip[-2].strip() zip_code = convert_to_int(city_state_zip[-1], 60639) d = { 'location_name': location_name, 'branch_name': branch_name, 'room_number': room_number, 'address': address, 'city': city, 'state': state, 'zip_code': zip_code, } return "\n".join(lines), d except IndexError: self._debug("Following Court location has unknown format: %s" % location_string) return "\n".join(lines), {} else: self._debug("Following Court location doesn't have right number of lines: %s" % location_string) return "\n".join(lines), {}