Example #1
0
def get_championship(*args):
    """
    Returns the chosen Championship and Season ID
    """
    if args:
        champ_id = convert_to_int(args[0])
    else:
        print("Load the list of championships...")
        # List All the most popular championships
        championships = get(
            "https://api.sofascore.com/api/v1/config/unique-tournaments/EN/football",
            headers=HEADERS,
            proxies=PROXY)
        if championships.status_code >= 400:
            raise AssertionError
        championships = championships.json()
        print("\n".join([
            f"{index + 1}. {championship['name']} - {championship['category']['flag']}"
            for index, championship in enumerate(
                championships["uniqueTournaments"])
        ]))
        print("\nWhich championship do you want to bet on ?")
        champ_id = convert_to_int(input("=> "))
        # Find the chosen championship id
        if len(championships["uniqueTournaments"]) >= champ_id:
            champ_id = championships["uniqueTournaments"][champ_id - 1]["id"]
        else:
            raise AssertionError
    seasons = get(
        f"https://api.sofascore.com/api/v1/unique-tournament/{champ_id}/seasons",
        headers=HEADERS,
        proxies=PROXY)  # Find the actual season id for the chosen championship
    if seasons.status_code >= 400:
        raise AssertionError
    return champ_id, seasons.json()["seasons"][0]["id"]
Example #2
0
def generate_decoder_input_target(transcripts,
                                  dataset_number,
                                  word_level=False,
                                  test=False,
                                  partitions=32):
    """
    Wrapper for the _generate_input_target_data method.
    :return: 3D numpy Array, 3D numpy Array
    """

    if word_level:
        # Word level encoding
        character_set = settings.CHARACTER_SET
        char_to_int = convert_to_int(sorted(character_set))
        generate_word_level_input_target_data(transcripts=transcripts,
                                              num_partition=dataset_number,
                                              char_to_int=char_to_int,
                                              partitions=partitions,
                                              test=test)
    else:
        # Character level encoding
        character_set = settings.CHARACTER_SET
        char_to_int = convert_to_int(sorted(character_set))
        generate_character_level_input_target_data(
            transcripts=transcripts,
            num_partition=dataset_number,
            char_to_int=char_to_int,
            partitions=partitions,
            test=test)
Example #3
0
def test_convert_to_int():
    assert utils.convert_to_int("23") == 23
    assert utils.convert_to_int("0") == 0
    assert utils.convert_to_int("-23") == -23
    assert utils.convert_to_int(245) == 245
    assert utils.convert_to_int(-245) == -245
    assert utils.convert_to_int(0) == 0
    assert utils.convert_to_int("43.53") is None
    assert utils.convert_to_int("25sn") is None
    assert utils.convert_to_int("s45") is None
    assert utils.convert_to_int("string") is None
Example #4
0
    def _process_housing_location(self):
        """
        Receives a housing location from the HousingLocation table and parses it editing the different fields
        """
        self._location_segments = self._housing_location.housing_location.replace(
            "-", " ").split()
        if self._location_segments == [] or convert_to_int(
                self._location_segments[0], None) is None:
            # Location did not start with a number so no further parsing
            if self._housing_location.housing_location == "":
                self._housing_location.housing_location = "UNKNOWN"
            return

        self._housing_location.division = self._location_segments[0]
        if len(
                self._location_segments
        ) == 1:  # Execute only if the housing information is a single division number
            return  # ex: '01-'

        self._set_day_release()

        location_start = convert_to_int(self._location_segments[0], -1)

        if location_start in [
                2, 8, 9, 11, 14
        ] or (location_start == 1
              and "ABO" in self._housing_location.housing_location):
            self._set_sub_division(self._location_segments[1],
                                   self._location_segments[2:])
            return
        elif location_start == 3:
            if "AX" in self._housing_location.housing_location:
                self._set_sub_division(self._location_segments[2],
                                       self._location_segments[3:])
                return
        elif location_start in [5, 6, 10]:
            self._set_location_05_06_10_values()
            return
        elif location_start == 15:
            self._set_location_15_values()
            return
        elif location_start == 16:
            return
        elif location_start == 17:
            self._set_location_17_values()
            return
        elif location_start == 4:
            self._set_location_04_values()

        self._set_sub_division(
            join_with_space_and_convert_spaces(self._location_segments[1:3],
                                               ""),
            self._location_segments[3:])
Example #5
0
def read_rider_info(rider_soup, riders, teams):
    #try:
    position = convert_to_int(rider_soup.find('span').find('span').text)
    if position is None:
        return (None, None, None)

    time_lag = read_time_result(rider_soup)

    #read the rider's info
    info = rider_soup.find_all('a')

    team_id = read_team_info(info, teams)

    name = process_string(info[0].contents[1])
    surname = process_string(info[0].contents[0].contents[0])
    rider_link = info[0].get('href')
    key = FullName(name=name, surname=surname)
    if key not in riders:
        riders[key] = {
            'name': name,
            'surname': surname,
            'link': rider_link,
            'team_id': team_id,
            'id': len(riders) + 1
        }

    return (time_lag, position, riders[key]['id'])
Example #6
0
    def predict_sequence_test(self, audio_input):
        char_to_int = convert_to_int(sorted(settings.CHARACTER_SET))
        int_to_char = convert_int_to_char(char_to_int)

        t_force = "\tmsA' Alxyr >wlA hw Almwqf tm tSHyHh\n"
        encoded_transcript = []
        for index, character in enumerate(t_force):
            encoded_character = [0] * len(settings.CHARACTER_SET)
            position = char_to_int[character]
            encoded_character[position] = 1
            encoded_transcript.append(encoded_character)

        decoder_input = np.array([encoded_transcript])
        print(decoder_input.shape)

        output = self.model.predict([audio_input, decoder_input])
        print(output.shape)
        sentence = ""
        output = output[0]
        for character in output:
            position = np.argmax(character)
            character = int_to_char[position]
            sentence+=character

        print(sentence)
Example #7
0
def get_mandatory(this_item):

    mandatory = convert_to_int(this_item["mandatory"])

    mandatory = mandatory >= 0

    return mandatory
Example #8
0
 def _store_bail_info(self):
     # Bond: If the value is an integer, it's a dollar
     # amount. Otherwise, it's a status, e.g. "* NO BOND *".
     self._inmate.bail_amount = convert_to_int(self._inmate_details.bail_amount().replace(',', ''), None)
     if self._inmate.bail_amount is None:
         self._inmate.bail_status = self._inmate_details.bail_amount().replace('*', '').strip()
     else:
         self._inmate.bail_status = None
    def _process_housing_location(self):
        """
        Receives a housing location from the HousingLocation table and parses it editing the different fields
        """
        self._location_segments = self._housing_location.housing_location.replace("-", " ").split()
        if self._location_segments == [] or convert_to_int(self._location_segments[0], None) is None:
            # Location did not start with a number so no further parsing
            if self._housing_location.housing_location == "":
                self._housing_location.housing_location = "UNKNOWN"
            return

        self._housing_location.division = self._location_segments[0]
        if len(self._location_segments) == 1:  # Execute only if the housing information is a single division number
            return                             # ex: '01-'

        self._set_day_release()

        location_start = convert_to_int(self._location_segments[0], -1)

        if location_start in [2, 8, 9, 11, 14] or (location_start == 1 and
                                                   "ABO" in self._housing_location.housing_location):
            self._set_sub_division(self._location_segments[1], self._location_segments[2:])
            return
        elif location_start == 3:
            if "AX" in self._housing_location.housing_location:
                self._set_sub_division(self._location_segments[2], self._location_segments[3:])
                return
        elif location_start in [5, 6, 10]:
            self._set_location_05_06_10_values()
            return
        elif location_start == 15:
            self._set_location_15_values()
            return
        elif location_start == 16:
            return
        elif location_start == 17:
            self._set_location_17_values()
            return
        elif location_start == 4:
            self._set_location_04_values()

        self._set_sub_division(join_with_space_and_convert_spaces(self._location_segments[1:3], ""),
                               self._location_segments[3:])
Example #10
0
    def __init__(self, elem_lst):
        self.cons_no = elem_lst[0]  #用户编号

        self.cons_status = elem_lst[3]  #费控用户状态
        if len(elem_lst[3]) == 0:
            self.cons_status = "null"

        self.org_no = elem_lst[1]  #供电单位编码

        self.rca_flag = None  #费控标志
        self.rca_flag = utils.convert_to_int(elem_lst[2])
 def __init__(self, game) -> None:
     self.match_id = convert_to_int(game["id"])
     self.league_name = str(game["tournament"]["name"])
     self.hometeam_name = str(game["homeTeam"]["name"])
     self.awayteam_name = str(game["awayTeam"]["name"])
     self.game_start = strftime("%Y-%m-%d %H:%M:%S",
                                gmtime(game["startTimestamp"]))
     self.home_team_goal = []
     self.visitor_team_goal = []
     self.score_probability = {}
     self.result_proba = Results()
     self.odds = Odds(self.match_id)
Example #12
0
    def test_multicharacter_conversion(self):
        value = b'\x00\x00\x00\x00'
        expected = 0
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)
        value = b'\x01\x01\x01\x01'
        expected = 16843009
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)
        value = b'\xff\xff\xff\x7f'
        expected = 2147483647
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)

        value = b'\x00'
        expected = 0
        actual, = utils.convert_to_int(value, bool_method_name)
        self.assertEqual(actual, expected)
        value = b'\x01'
        expected = 1
        actual, = utils.convert_to_int(value, bool_method_name)
        self.assertEqual(actual, expected)
Example #13
0
    def test_multicharacter_conversion(self):
        value = b'\x00\x00\x00\x00'
        expected = 0
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)
        value = b'\x01\x01\x01\x01'
        expected = 16843009
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)
        value = b'\xff\xff\xff\x7f'
        expected = 2147483647
        actual, = utils.convert_to_int(value, method_name)
        self.assertEqual(actual, expected)

        value = b'\x00'
        expected = 0
        actual, = utils.convert_to_int(value, bool_method_name)
        self.assertEqual(actual, expected)
        value = b'\x01'
        expected = 1
        actual, = utils.convert_to_int(value, bool_method_name)
        self.assertEqual(actual, expected)
Example #14
0
    def __init__(self, elem_lst):
        self.cons_no = elem_lst[0]  #用户编号
        self.org_no = elem_lst[1]  #供电单位代码

        self.charge_ym = None  #收费年月
        self.charge_ym = utils.convert_to_date_YM('12_A_PAY_FLOW.TSV',
                                                  elem_lst[2])

        self.charge_date = None  #收费日期
        self.charge_date = utils.convert_to_datetime('12_A_PAY_FLOW.TSV',
                                                     elem_lst[3])

        self.pay_mode = utils.convert_to_int(elem_lst[4])  #缴费方式
Example #15
0
def main_province():

    df_extended = _main_municipality()

    df_extended = df_extended \
        .fillna({"Gemeentenaam":"",  "Provincienaam":""}) \
        .groupby(["Datum", "Provincienaam", "Provinciecode", "Type"])["Aantal", "AantalCumulatief"] \
        .apply(lambda x: x.sum(min_count=1)) \
        .reset_index()
    # df_extended['Opmerking'] = np.nan

    df_extended.loc[df_extended["Provinciecode"] == -1,
                    ["Provincienaam"]] = np.nan

    df_extended = convert_to_int(
        df_extended, ["Aantal", "AantalCumulatief", "Provinciecode"])

    # format the columns
    df_extended = df_extended[[
        "Datum",
        "Provincienaam",
        "Provinciecode",
        "Type",
        "Aantal",
        "AantalCumulatief",
        # "Opmerking"
    ]].sort_values(["Datum", "Provinciecode"])

    Path(DATA_FOLDER, "data-provincial").mkdir(exist_ok=True)
    dates = sorted(df_extended["Datum"].unique())

    # export by date
    for data_date in dates:

        export_date(df_extended, "data-provincial", "RIVM_NL_provincial",
                    data_date,
                    str(data_date).replace("-", ""))

    # export latest
    export_date(df_extended,
                "data-provincial",
                "RIVM_NL_provincial",
                data_date=dates[-1],
                label="latest")

    # export all
    export_date(df_extended,
                "data-provincial",
                "RIVM_NL_provincial",
                data_date=None,
                label=None)
Example #16
0
    def decode_audio_sequence_character_based(self, audio_sequence):
        """
        Decodes audio sequence into a transcript using encoder_model and decoder_model generated from training
        :param audio_sequence: 2D numpy array
        :param encoder_model: Model
        :param decoder_model: Model
        :param character_set: Dict
        :return: String
        """
        # Getting converters
        char_to_int = convert_to_int(sorted(settings.CHARACTER_SET))
        int_to_char = convert_int_to_char(char_to_int)

        # Returns the encoded audio_sequence
        states_value = self.encoder_model.predict(audio_sequence)
        states_value = [states_value]
        print("ENCODER PREDICTION DONE")
        num_decoder_tokens = len(char_to_int)
        target_sequence = np.zeros((1, 1, num_decoder_tokens))

        # Populate the first character of target sequence with the start character.
        target_sequence[0, 0, char_to_int['\t']] = 1.
        print(target_sequence)
        stop_condition = False
        t_force = "\tmsA' Alxyr >wlA hw Almwqf tm tSHyHh\n"
        decoded_sentence = ''
        max_length = len(t_force)
        i = 0
        while not stop_condition:
            output_tokens, h = self.decoder_model.predict(
                [target_sequence] + states_value)
            states_value = [h]
            #print("DECODER PREDICTION DONE khobz")
            sampled_token_index = np.argmax(output_tokens[0, -1, :])
            sampled_char = int_to_char[sampled_token_index]
            #print(sampled_char)
            decoded_sentence += sampled_char

            if sampled_char == "\n" or len(decoded_sentence) > max_length :
                # End of transcription
                stop_condition = True
            else:
                # updating target sequence vector
                target_sequence = np.zeros((1, 1, num_decoder_tokens))
                target_sequence[0, 0, char_to_int[t_force[i]]] = 1
                i += 1

        print(decoded_sentence)
        return decoded_sentence
def generate_decoder_input_target(character_set, transcripts):
    """
    Wrapper for the _generate_input_target_data method.
    :return: 3D numpy Array, 3D numpy Array
    """
    char_to_int = convert_to_int(character_set)
    max_transcript_length = get_longest_sample_size(transcripts)
    decoder_input, decoder_target = _generate_fixed_size_input_target_data(
        transcripts=transcripts,
        char_to_int=char_to_int,
        num_transcripts=len(transcripts),
        max_length=max_transcript_length,
        num_distinct_chars=len(character_set))
    #decoder_input, decoder_target = _generate_input_target_data(transcripts,
    #                                                           char_to_int,
    #                                                            len(character_set))
    return decoder_input, decoder_target
Example #18
0
def transform_api_datasets():

    df = pandas.read_csv(
        Path("raw_data", "rivm_api",
             "COVID-19_aantallen_gemeente_cumulatief-2020-05-21.csv"), ";")

    df["Date_of_report"] = df["Date_of_report"].str[0:10]
    df["Municipality_code"] = df["Municipality_code"].str[2:6].fillna(
        -1).astype(int)

    df = df \
        .groupby(["Date_of_report", "Municipality_code"])["Total_reported", "Hospital_admission", "Deceased"] \
        .sum().reset_index().rename({"Date_of_report":"Datum","Municipality_code":"Gemeentecode"}, axis=1) \
        .merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left")

    df = convert_to_int(df, ["Provinciecode"])

    return df
Example #19
0
File: klee.py Project: FArian/tbf
    def _get_test_vector(self, test):
        def _get_value(single_line):
            var_name = single_line.split(':')[2].strip()
            prefix_end = var_name.find("'")
            var_name = var_name[prefix_end + 1:-1]
            return var_name

        ktest_tool = [os.path.join(bin_dir, 'ktest-tool')]
        exec_output = utils.execute(ktest_tool + [test.origin],
                                    err_to_output=False,
                                    quiet=True)
        test_info = exec_output.stdout.split('\n')
        vector = utils.TestVector(test.name, test.origin)
        last_number = -1
        last_nondet_method = None
        last_value = None
        for line in [l for l in test_info if l.startswith('object')]:
            logging.debug("Looking at line: %s", line)
            if 'name:' in line:
                #assert len(line.split(':')) == 3
                var_number = int(self._get_var_number(line))
                assert var_number > last_number
                last_number = var_number
                var_name = _get_value(line)
                assert last_nondet_method is None, \
                        "Last nondet method already or still assigned: %s" % last_nondet_method
                assert "'" not in var_name, \
                        "Variable name contains \"'\": %s" % var_name
                last_nondet_method = utils.get_corresponding_method_name(
                    var_name)
            elif 'data:' in line:
                #assert len(line.split(':')) == 3
                var_number = self._get_var_number(line)
                assert last_nondet_method is not None
                value = _get_value(line)
                value, = utils.convert_to_int(value, last_nondet_method)
                assert last_value is None
                last_value = str(value)
            if last_nondet_method is not None and last_value is not None:
                vector.add(last_value, last_nondet_method)
                last_nondet_method = None
                last_value = None

        return vector
Example #20
0
def decode_audio_sequence(audio_sequence, encoder_model, decoder_model, character_set):
    """
    Decodes audio sequence into a transcript using encoder_model and decoder_model generated from training
    :param audio_sequence: 2D numpy array
    :param encoder_model: Model
    :param decoder_model: Model
    :param character_set: Dict
    :return: String
    """
    # Getting converters
    char_to_int = convert_to_int(character_set)
    int_to_char = convert_to_char(character_set)

    states_value = encoder_model.predict(audio_sequence)

    num_decoder_tokens = len(char_to_int)
    target_sequence = np.zeros((1, 1, num_decoder_tokens))

    # Populate the first character of target sequence with the start character.
    target_sequence[0, 0, char_to_int['\t']] = 1.

    stop_condition = False
    decoded_sentence = ''

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_sequence] + states_value)

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = char_to_int[sampled_token_index]
        decoded_sentence += sampled_char

        if sampled_char == "\n":
            # End of transcription
            stop_condition = True
        else:
            # updating target sequence vector
            target_sequence[0, 0, sampled_token_index] = 1

        states_values = [h, c]

    return decoded_sentence
Example #21
0
File: klee.py Project: FArian/tbf
    def _get_test_vector(self, test):
        def _get_value(single_line):
            var_name = single_line.split(':')[2].strip()
            prefix_end = var_name.find("'")
            var_name = var_name[prefix_end+1:-1]
            return var_name

        ktest_tool = [os.path.join(bin_dir, 'ktest-tool')]
        exec_output = utils.execute(ktest_tool + [test.origin], err_to_output=False, quiet=True)
        test_info = exec_output.stdout.split('\n')
        vector = utils.TestVector(test.name, test.origin)
        last_number = -1
        last_nondet_method = None
        last_value = None
        for line in [l for l in test_info if l.startswith('object')]:
            logging.debug("Looking at line: %s", line)
            if 'name:' in line:
                #assert len(line.split(':')) == 3
                var_number = int(self._get_var_number(line))
                assert var_number > last_number
                last_number = var_number
                var_name = _get_value(line)
                assert last_nondet_method is None, \
                        "Last nondet method already or still assigned: %s" % last_nondet_method
                assert "'" not in var_name, \
                        "Variable name contains \"'\": %s" % var_name
                last_nondet_method = utils.get_corresponding_method_name(var_name)
            elif 'data:' in line:
                #assert len(line.split(':')) == 3
                var_number = self._get_var_number(line)
                assert last_nondet_method is not None
                value = _get_value(line)
                value, = utils.convert_to_int(value, last_nondet_method)
                assert last_value is None
                last_value = str(value)
            if last_nondet_method is not None and last_value is not None:
                vector.add(last_value, last_nondet_method)
                last_nondet_method = None
                last_value = None

        return vector
Example #22
0
def read_time_result(soup):
    try:
        time = soup.find('span', class_='time').text
        hms = time.split(':')

        if len(hms) == 1:
            return convert_to_int(hms[0], 0)
        if len(hms) == 2:
            return 60 * convert_to_int(hms[0], 0) + convert_to_int(hms[1], 0)
        if len(hms) == 3:
            return 60 * 60 * convert_to_int(hms[0], 0) + 60 * convert_to_int(
                hms[1], 0) + convert_to_int(hms[2], 0)

        return 0
    except:
        return 0
def parse_court_location(location_string):
    """
    Takes a location string of the form:

    "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608"

    The lines can contain spurious white-space at the beginning and end of the lines, these are stripped

     and returns two values, cleaned up version the input string and a dict of the form:
    {
        'location_name': 'Criminal C',
        'branch_name': 'Criminal Courts Building',
        'room_number': 506,
        'address': '2650 South California Avenue',
        'city': 'Chicago',
        'state': 'IL',
        'zip_code': 60608,
    }

    If location is malformed, then original location string is returned with an empty dict
    """

    lines = strip_the_lines(location_string.splitlines())
    if len(lines) == 4:
        try:
            # The first line is the location_name
            location_name = lines[0]

            # Second line must be split into room number and branch name
            branch_line = lines[1].split(', Room:')
            branch_name = branch_line[0].strip()
            room_number = convert_to_int(branch_line[1], 0)

            # Third line has address - remove room number and store
            address = lines[2].split('Room:')[0].strip()

            # Fourth line has city, state and zip separated by spaces,
            # or a weird unicode space character
            city_state_zip = lines[3].replace(u'\xa0', u' ').split(' ')

            city = " ".join(city_state_zip[0:-2]).replace(',', '').strip()
            state = city_state_zip[-2].strip()
            zip_code = convert_to_int(city_state_zip[-1], 60639)

            d = {
                'location_name': location_name,
                'branch_name': branch_name,
                'room_number': room_number,
                'address': address,
                'city': city,
                'state': state,
                'zip_code': zip_code,
            }
            return "\n".join(lines), d

        except IndexError:
            log.debug("Following Court location has unknown format: %s" % location_string)
            return location_string, {}

    else:
        log.debug("Following Court location doesn't have right number of lines: %s" % location_string)
        return location_string, {}
Example #24
0
def merge_postest():

    df_frames = {
        "raw_data/peildatum-31-03-2020-14-00.csv": None,
        "raw_data/peildatum-04-04-2020-12-45.csv": None,
        "raw_data/peildatum-01-04-2020-13-58.csv": None,
        "raw_data/peildatum-02-04-2020-14-00.csv": None,
        "raw_data/peildatum-31-03-2020-19-20.csv": None,
        "raw_data/peildatum-03-04-2020-14-00.csv": None,
        "raw_data/peildatum-07-04-2020-13-55.csv": None,
        "raw_data/peildatum-05-04-2020-14-15.csv": None,
        "raw_data/peildatum-06-04-2020-13-50.csv": None,
    }

    # files not in the list above
    for file in Path('raw_data').glob('peildatum*.csv'):
        if str(file) not in df_frames.keys():
            print(f"Parse file {file}")
            df_frames[str(file)] = parse_format_v4(file, "Meldingen")

    result = merge_df_days(df_frames)
    result["Gemeentecode"] = result["id"].astype(int)
    result = result[["Datum", "Gemeentecode", "Aantal"]]
    result["Datum"] = result["Datum"].astype(str)

    # make combinations of the new items
    combinations = itertools.product(result["Datum"].unique(), MUNICIPALITIES)
    df_base = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\
        merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\
        merge(result, on=["Datum", "Gemeentecode"], how="left")

    # make combinations of the old items

    result_old = pandas.read_csv(Path("data", "rivm_corona_in_nl.csv"),
                                 usecols=["Datum", "Gemeentecode", "Aantal"])
    combinations = itertools.product(result_old["Datum"].unique(),
                                     MUNICIPALITIES)
    df_base_old = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\
        merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\
        merge(result_old, on=["Datum", "Gemeentecode"], how="left")

    df = df_base.append(df_base_old).sort_values(["Datum", "Gemeentecode"])

    df = convert_to_int(df, ["Provinciecode", "Aantal"])

    # fill na
    cond = (df["Gemeentecode"] > 0) & df["Aantal"].isnull()
    df.loc[cond, "Aantal"] = 0

    # determine missing locations
    national = pandas.read_csv(Path("data", "rivm_NL_covid19_national.csv"))
    national = national[national["Type"] == "Totaal"]
    national["Aantal_nat"] = national["Aantal"].astype(int)
    national = national[["Datum", "Aantal_nat"]].set_index("Datum")

    diff = pandas.concat([national, count_values(df, "Gemeentecode")], axis=1)
    n_missing = (diff["Aantal_nat"] - diff["Aantal"]).dropna()

    for k, v in n_missing.items():
        df.loc[(df["Datum"] == k) & (df["Gemeentecode"] == -1), "Aantal"] = v

    df_rivm_api = transform_api_datasets()[[
        "Datum", "Gemeentecode", "Gemeentenaam", "Provincienaam",
        "Provinciecode", "Total_reported"
    ]]
    df_rivm_api = df_rivm_api.rename({"Total_reported": "Aantal"}, axis=1)
    df_not_in_rivm_api = df[~df["Datum"].isin(df_rivm_api["Datum"].unique())]
    df = pandas.concat([df_rivm_api, df_not_in_rivm_api], axis=0)

    df.sort_values(["Datum", "Gemeentecode"], inplace=True)

    df.to_csv(Path("data", "rivm_NL_covid19_total_municipality.csv"),
              index=False)
Example #25
0
def cache_ttl():
    default_ttl = 60 * 12  # Time to Live in Cache: 12 minutes
    the_cache_ttl = os.environ.get('CACHE_TTL')
    return convert_to_int(the_cache_ttl, default_ttl) if the_cache_ttl else default_ttl
Example #26
0
    BASE_DIR = Path(__file__).parent.parent.absolute()
    STATS_PATH = f'{BASE_DIR}/stats'
    IMG_CHARTS_PATH = f'{BASE_DIR}/charts/img'

    raw_stats_list = extract_csv(STATS_PATH)

    # Convert general data to dict { <latency>: (<QPS>, <Response Time>) }
    latency_stats = {
        "50%": [],
        "90%": [],
        "99%": [],
    }

    for percentile in latency_stats.keys():
        latency_stats[percentile] = sorted([(round(float(
            item['Requests/s'])), convert_to_int(item[percentile]))
                                            for item in raw_stats_list],
                                           key=lambda el: el[0])

    # Build line charts for each percentile
    fig, axs = plt.subplots(3)
    fig.suptitle('Load Testing')

    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor='none',
                    top=False,
                    bottom=False,
                    left=False,
                    right=False)
    plt.grid(False)
Example #27
0
    def decode_audio_sequence(self, audio_sequence):

        # Getting converters
        char_to_int = convert_to_int(sorted(settings.CHARACTER_SET))
        int_to_char = convert_int_to_char(char_to_int)

        states_value = self.encoder_model(audio_sequence)
        print("ENCODER PREDICTION DONE")

        # creating first input_sequence for decoder

        target_sequence = np.zeros((1, 1, settings.WORD_TARGET_LENGTH),
                                   dtype=np.float32)
        print(self.decoder_model.summary())
        sos_characters = ["S", "O", "S", "_"]
        target_length = len(settings.CHARACTER_SET) + 1
        for i in range(0, 4):
            position = char_to_int[sos_characters[i]] + i * target_length
            target_sequence[0, 0, position] = 1

        for i in range(4, settings.LONGEST_WORD_LENGTH):
            position = i * target_length + target_length - 1
            target_sequence[0, 0, position] = 1

        # print(target_sequence)
        stop_condition = False

        decoded_sentence = ""
        while not stop_condition:
            print("target sequence:")
            print(target_sequence)
            result = self.decoder_model.predict([target_sequence] +
                                                [states_value],
                                                steps=1)

            dense_outputs = []
            for i in range(0, settings.LONGEST_WORD_LENGTH):
                dense_outputs.append(result[i])

            h = result[-1]
            states_value = h
            print("DECODER PREDICTION DONE")

            # decoding values of each dense output
            decoded_word = ""
            for i in range(0, settings.LONGEST_WORD_LENGTH):
                sampled_token_index = np.argmax(dense_outputs[i][0, -1, :])
                if sampled_token_index == target_length - 1:
                    sampled_char = ""
                else:
                    sampled_char = int_to_char[sampled_token_index]
                decoded_word += sampled_char

            print("decoded_word is : " + decoded_word)
            corrected_word = correct_word(decoded_word)
            print("corrected_word is : " + corrected_word)
            print("corrected word in arabic is :" +
                  buckwalter_to_arabic(corrected_word))
            decoded_sentence += decoded_word + " "

            if decoded_word == "EOS_":
                stop_condition = True
            else:
                target_sequence = np.zeros((1, 1, settings.WORD_TARGET_LENGTH))
                i = 0
                for i, character in enumerate(decoded_word):
                    position = char_to_int[character] + i * target_length
                    target_sequence[0, 0, position] = 1

                if i < settings.LONGEST_WORD_LENGTH - 1:
                    for j in range(i + 1, settings.LONGEST_WORD_LENGTH):
                        position = i * target_length + target_length - 1
                        target_sequence[0, 0, position] = 1
Example #28
0
def parse_court_location(location_string):
    """
    Takes a location string of the form:

    "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608"

    The lines can contain spurious white-space at the beginning and end of the lines, these are stripped

     and returns two values, cleaned up version the input string and a dict of the form:
    {
        'location_name': 'Criminal C',
        'branch_name': 'Criminal Courts Building',
        'room_number': 506,
        'address': '2650 South California Avenue',
        'city': 'Chicago',
        'state': 'IL',
        'zip_code': 60608,
    }

    If location is malformed, then original location string is returned with an empty dict
    """

    lines = strip_the_lines(location_string.splitlines())
    if len(lines) == 4:
        try:
            # The first line is the location_name
            location_name = lines[0]

            # Second line must be split into room number and branch name
            branch_line = lines[1].split(', Room:')
            branch_name = branch_line[0].strip()
            room_number = convert_to_int(branch_line[1], 0)

            # Third line has address - remove room number and store
            address = lines[2].split('Room:')[0].strip()

            # Fourth line has city, state and zip separated by spaces,
            # or a weird unicode space character
            city_state_zip = lines[3].replace(u'\xa0', u' ').split(' ')

            city = " ".join(city_state_zip[0:-2]).replace(',', '').strip()
            state = city_state_zip[-2].strip()
            zip_code = convert_to_int(city_state_zip[-1], 60639)

            d = {
                'location_name': location_name,
                'branch_name': branch_name,
                'room_number': room_number,
                'address': address,
                'city': city,
                'state': state,
                'zip_code': zip_code,
            }
            return "\n".join(lines), d

        except IndexError:
            log.debug("Following Court location has unknown format: %s" %
                      location_string)
            return location_string, {}

    else:
        log.debug(
            "Following Court location doesn't have right number of lines: %s" %
            location_string)
        return location_string, {}
Example #29
0
def merge_dead():

    df_frames = {
        "raw_data/peildatum-31-03-2020-14-00.csv": None,
        "raw_data/peildatum-31-03-2020-19-20.csv": None,
        "raw_data/peildatum-01-04-2020-13-58.csv": None,
        "raw_data/peildatum-02-04-2020-14-00.csv": None,
        "raw_data/peildatum-03-04-2020-14-00.csv": None,
        "raw_data/peildatum-04-04-2020-12-45.csv": None,
        "raw_data/peildatum-05-04-2020-14-15.csv": None,
        "raw_data/peildatum-06-04-2020-13-50.csv": None,
        "raw_data/peildatum-07-04-2020-13-55.csv": None,
        "raw_data/peildatum-08-04-2020-13-55.csv": None,
        "raw_data/peildatum-09-04-2020-13-50.csv": None,
        "raw_data/peildatum-10-04-2020-14-20.csv": None,
        "raw_data/peildatum-11-04-2020-14-00.csv": None,
        "raw_data/peildatum-12-04-2020-14-00.csv": None,
        "raw_data/peildatum-13-04-2020-14-00.csv": None,
        "raw_data/peildatum-14-04-2020-14-00.csv": None,
        "raw_data/peildatum-15-04-2020-14-00.csv": None,
        "raw_data/peildatum-16-04-2020-14-00.csv": None,
        "raw_data/peildatum-17-04-2020-14-00.csv": None,
        "raw_data/peildatum-17-04-2020-16-00.csv": None,
    }

    # files not in the list above
    for file in Path('raw_data').glob('peildatum*.csv'):
        if str(file) not in df_frames.keys():
            print(f"Parse file {file}")
            df_frames[str(file)] = parse_format_v4(file, "Overleden")

    result = merge_df_days(df_frames)
    result["Gemeentecode"] = result["id"].astype(int)
    result = result[["Datum", "Gemeentecode", "Aantal"]]
    result["Datum"] = result["Datum"].astype(str)

    # make combinations of the new items
    combinations = itertools.product(result["Datum"].unique(), MUNICIPALITIES)

    df = pandas.DataFrame(combinations, columns=["Datum", "Gemeentecode"]).\
        merge(DF_MUNICIPALITIES, on="Gemeentecode", how="left").\
        merge(result, on=["Datum", "Gemeentecode"], how="left")

    df = convert_to_int(df, ["Provinciecode", "Aantal"])

    # fill na
    cond = (df["Gemeentecode"] > 0) & df["Aantal"].isnull()
    df.loc[cond, "Aantal"] = 0

    # determine missing locations
    national = pandas.read_csv(Path("data", "rivm_NL_covid19_national.csv"))
    national = national[national["Type"] == "Overleden"]
    national["Aantal_nat"] = national["Aantal"].astype(int)
    national = national[["Datum", "Aantal_nat"]].set_index("Datum")

    diff = pandas.concat([national, count_values(df, "Gemeentecode")], axis=1)
    n_missing = (diff["Aantal_nat"] - diff["Aantal"]).dropna()

    for k, v in n_missing.items():
        df.loc[(df["Datum"] == k) & (df["Gemeentecode"] == -1), "Aantal"] = v

    df.sort_values(["Datum", "Gemeentecode"], inplace=True)

    print(df.tail())
    df.to_csv(Path("data", "rivm_NL_covid19_fatalities_municipality.csv"),
              index=False)
Example #30
0
def cache_ttl():
    default_ttl = 60 * 12  # Time to Live in Cache: 12 minutes
    the_cache_ttl = os.environ.get('CACHE_TTL')
    return convert_to_int(the_cache_ttl, default_ttl) if the_cache_ttl else default_ttl
    def _parse_court_location(self):
        """
        Takes a location string of the form:

        "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608"

        The lines can contain spurious white-space at their beginning and end, 
        multiple newline characters, and annoying ASCII characters; this is all
        normalized, no matter what. We return two values: a cleaned up version 
        of the input string, and a dict of the following form...

        {
            'location_name': 'Criminal C',
            'branch_name': 'Criminal Courts Building',
            'room_number': 506,
            'address': '2650 South California Avenue',
            'city': 'Chicago',
            'state': 'IL',
            'zip_code': 60608,
        }

        If location string is something other than 4 lines long, or doesn't match our 
        current parsing expectations (mostly based around where the characters "Room:"
        appear in the string), then the normalized location string is returned,
        as well an empty dict.

        Note that room_number and zip_code are stored as ints, not strings.
        """

        location_string = self._inmate_details.court_house_location()

        if location_string == "":
            return "", {}

        # Normalize whitespace, newlines (and weird unicode character).
        location_string = location_string.replace(u'\xa0', u' ')
        lines = strip_the_lines(location_string.splitlines())

        if len(lines) == 4:
            try:
                # First line is the shortened form of the branch name, usually.
                location_name = lines[0]

                # Second line must be split into room number and branch name.
                branch_line = lines[1].split(', Room:')
                branch_name = branch_line[0].strip()
                room_number = convert_to_int(branch_line[1], 0)

                # Remove room number and store the address.
                address = lines[2].split('Room:')[0].strip()

                # Fourth line has city, state and zip separated by spaces.
                city_state_zip = lines[3].split(' ')

                city = " ".join(city_state_zip[0:-2]).replace(',', '').strip()
                state = city_state_zip[-2].strip()
                zip_code = convert_to_int(city_state_zip[-1], 60639)

                d = {
                    'location_name': location_name,
                    'branch_name': branch_name,
                    'room_number': room_number,
                    'address': address,
                    'city': city,
                    'state': state,
                    'zip_code': zip_code,
                }
                return "\n".join(lines), d

            except IndexError:
                self._debug("Following Court location has unknown format: %s" %
                            location_string)
                return "\n".join(lines), {}

        else:
            self._debug(
                "Following Court location doesn't have right number of lines: %s"
                % location_string)
            return "\n".join(lines), {}
Example #32
0
    lat_vs_time = {
        "50%": [],
        "90%": [],
        "99%": [],
    }

    fails_vs_time = {
        "Failures/s": [],
    }


    # Convert raw data to dict { <latency>: (<Time range>, <Response Time>) }
    for percentile in lat_vs_time.keys():
        lat_vs_time[percentile] = sorted([
            (convert_to_int(item['Time']),
             convert_to_int(item[percentile]))
            for item in raw_soak_stats_list
        ], key=lambda el: el[0])

    # Convert raw data to dict { <fails>: (<Time range>, <Failures/s>) }
    for percentile in fails_vs_time.keys():
        fails_vs_time[percentile] = sorted([
            (convert_to_int(item['Time']),
             float(item['Failures/s']))
            for item in raw_soak_stats_list
        ], key=lambda el: el[0])


    build_chart(lat_vs_time,
                'Time Range',
    def _parse_court_location(self):

        """
        Takes a location string of the form:

        "Criminal C\nCriminal Courts Building, Room:506\n2650 South California Avenue Room: 506\nChicago, IL 60608"

        The lines can contain spurious white-space at their beginning and end, 
        multiple newline characters, and annoying ASCII characters; this is all
        normalized, no matter what. We return two values: a cleaned up version 
        of the input string, and a dict of the following form...

        {
            'location_name': 'Criminal C',
            'branch_name': 'Criminal Courts Building',
            'room_number': 506,
            'address': '2650 South California Avenue',
            'city': 'Chicago',
            'state': 'IL',
            'zip_code': 60608,
        }

        If location string is something other than 4 lines long, or doesn't match our 
        current parsing expectations (mostly based around where the characters "Room:"
        appear in the string), then the normalized location string is returned,
        as well an empty dict.

        Note that room_number and zip_code are stored as ints, not strings.
        """

        location_string = self._inmate_details.court_house_location()

        if location_string == "":
            return "", {}

        # Normalize whitespace, newlines (and weird unicode character).
        location_string = location_string.replace(u'\xa0', u' ')
        lines = strip_the_lines(location_string.splitlines())

        if len(lines) == 4:
            try:
                # First line is the shortened form of the branch name, usually.
                location_name = lines[0]

                # Second line must be split into room number and branch name.
                branch_line = lines[1].split(', Room:')
                branch_name = branch_line[0].strip()
                room_number = convert_to_int(branch_line[1], 0)

                # Remove room number and store the address.
                address = lines[2].split('Room:')[0].strip()

                # Fourth line has city, state and zip separated by spaces.
                city_state_zip = lines[3].split(' ')

                city = " ".join(city_state_zip[0:-2]).replace(',', '').strip()
                state = city_state_zip[-2].strip()
                zip_code = convert_to_int(city_state_zip[-1], 60639)

                d = {
                    'location_name': location_name,
                    'branch_name': branch_name,
                    'room_number': room_number,
                    'address': address,
                    'city': city,
                    'state': state,
                    'zip_code': zip_code,
                }
                return "\n".join(lines), d

            except IndexError:
                self._debug("Following Court location has unknown format: %s" % location_string)
                return "\n".join(lines), {}

        else:
            self._debug("Following Court location doesn't have right number of lines: %s" % location_string)
            return "\n".join(lines), {}