Ejemplo n.º 1
0
def main_process_data(
    paths,
    gsheets_api,
    google_spreadsheet_vax_id: str,
    skip_complete: list = None,
    skip_monotonic: dict = {},
    skip_anomaly: dict = {},
):
    print("-- Processing data... --")
    # Get data from sheets
    logger.info("Getting data from Google Spreadsheet...")
    gsheet = VaccinationGSheet(gsheets_api, google_spreadsheet_vax_id)
    df_manual_list = gsheet.df_list()

    # Get automated-country data
    logger.info("Getting data from output...")
    automated = gsheet.automated_countries
    filepaths_auto = [paths.tmp_vax_out(country) for country in automated]
    df_auto_list = [read_csv(filepath) for filepath in filepaths_auto]

    # Concatenate
    vax = df_manual_list + df_auto_list

    # Check that no location is present in both manual and automated data
    manual_locations = set([df.location[0] for df in df_manual_list])
    auto_locations = os.listdir(
        os.path.join(paths.tmp_vax_out_dir, "main_data"))
    auto_locations = set([loc.replace(".csv", "") for loc in auto_locations])
    common_locations = auto_locations.intersection(manual_locations)
    if len(common_locations) > 0:
        raise DataError(
            f"The following locations have data in both output/main_data and GSheet: {common_locations}"
        )

    # vax = [v for v in vax if v.location.iloc[0] == "Pakistan"]  # DEBUG
    # Process locations
    def _process_location(df):
        monotonic_check_skip = skip_monotonic.get(df.loc[0, "location"], [])
        anomaly_check_skip = skip_anomaly.get(df.loc[0, "location"], [])
        return process_location(df, monotonic_check_skip, anomaly_check_skip)

    logger.info("Processing and exporting data...")
    vax_valid = []
    for df in vax:
        if "location" not in df:
            raise ValueError(f"Column `location` missing. df: {df.tail(5)}")
        country = df.loc[0, "location"]
        if country.lower() not in skip_complete:
            df = _process_location(df)
            vax_valid.append(df)
            # Export
            df.to_csv(paths.pub_vax_loc(country), index=False)
            logger.info(f"{country}: SUCCESS ✅")
        else:
            logger.info(f"{country}: SKIPPED 🚧")
    df = pd.concat(vax_valid).sort_values(by=["location", "date"])
    df.to_csv(paths.tmp_vax_all, index=False)
    gsheet.metadata.to_csv(paths.tmp_met_all, index=False)
    logger.info("Exported ✅")
    print_eoe()
Ejemplo n.º 2
0
    def check_symmetry_hcc(self):
        """Checks the toplevel symmetry of the pairwise harmony matrix."""

        M = self.Hcc.reshape((self.nF * self.nR, -1))
        if not is_symmetric(M):
            raise DataError(
                "The Hcc Matrix should be symmetric at the top level!")
Ejemplo n.º 3
0
 def __init__(self, df: DataFrame) -> None:
     if df.shape[0] > 0:
         self.df = df
     else:
         raise DataError('Dataframe has to have some data in it')
     self.appointment_list = []
     logger.debug(': about: Quote Service started')
     quote_service = QuoteService()
     self.quote_service_quote = quote_service.__get_random_quote()
Ejemplo n.º 4
0
    def _apply(self, func, **kwargs):
        """
        Rolling statistical measure using supplied function. Designed to be
        used with passed-in Cython array-based functions.

        Parameters
        ----------
        func : str/callable to apply

        Returns
        -------
        y : same type as input argument
        """
        blocks, obj = self._create_blocks()
        block_list = list(blocks)

        results = []
        exclude = []
        for i, b in enumerate(blocks):
            try:
                values = self._prep_values(b.values)

            except (TypeError, NotImplementedError) as err:
                if isinstance(obj, ABCDataFrame):
                    exclude.extend(b.columns)
                    del block_list[i]
                    continue
                else:
                    raise DataError("No numeric types to aggregate") from err

            if values.size == 0:
                results.append(values.copy())
                continue

            # if we have a string function name, wrap it
            if isinstance(func, str):
                cfunc = getattr(window_aggregations, func, None)
                if cfunc is None:
                    raise ValueError(
                        f"we do not support this function in window_aggregations.{func}"
                    )

                def func(arg):
                    return cfunc(
                        arg,
                        self.com,
                        int(self.adjust),
                        int(self.ignore_na),
                        int(self.min_periods),
                    )

            results.append(np.apply_along_axis(func, self.axis, values))

        return self._wrap_results(results, block_list, obj, exclude)
Ejemplo n.º 5
0
    def _apply(self, func):
        """
        Rolling statistical measure using supplied function. Designed to be
        used with passed-in Cython array-based functions.

        Parameters
        ----------
        func : str/callable to apply

        Returns
        -------
        y : same type as input argument
        """
        blocks, obj = self._create_blocks(self._selected_obj)
        block_list = list(blocks)

        results = []
        exclude = []
        for i, b in enumerate(blocks):
            try:
                values = self._prep_values(b.values)

            except (TypeError, NotImplementedError) as err:
                if isinstance(obj, ABCDataFrame):
                    exclude.extend(b.columns)
                    del block_list[i]
                    continue
                else:
                    raise DataError("No numeric types to aggregate") from err

            if values.size == 0:
                results.append(values.copy())
                continue

            results.append(np.apply_along_axis(func, self.axis, values))

        return self._wrap_results(results, block_list, obj, exclude)
Ejemplo n.º 6
0
        # ability_list = [None]*30
        # for i, ability in enumerate(fighter['Abilities']):
        #     if ability:
        #         ability_list[i] = ability

        for ability in ability_list:
            ability_data.append(ability)

        ability_lens.append(len(ability_list))
        # ability_data.append(ability_list)
        # print(skill_match_data)
        # print(ability_match_data)
        # input()

    if len(skill_match_data) != 113:
        raise DataError('len(skill_match_data) not 113, instead',
                        len(skill_match_data))
    skill_data.append(skill_match_data)
    winner_data.append(match['winner'])

skill_data = np.array(skill_data)
print(skill_data.shape)
print('start building ability array')
ability_data = pd.DataFrame(ability_data)
# print(ability_data[0])

col_len = 14

col_transformer = ColumnTransformer([
    ('Passthrough', 'passthrough', [0] + [(n * col_len) + x for n in range(8)
                                          for x in (1, 3, 4)]),
    ('Sign', OrdinalEncoder(), [(n * col_len) + 2 for n in range(8)]),