Example #1
0
    def population_project(self, year_length=None, method=None, growth_rate=None):
        """
        Continuation of population to provide convergent present values
        
        Parameters
        ----------
        year_length : int, default None
                      Duration to continue the population projection
        method : str, default None
                 The value must be 'stable' or 'exp_growth'  
        """

        if "pop" not in self.columns:
            raise Exception("pop is not a column of cohort")
        if year_length is None:
            raise Exception("a duration in years should be provided")
        if method is None:
            raise Exception("a method should be specified")
        years = self.index_sets["year"]
        first_year = min(years)
        last_year = max(years)

        if (first_year + year_length) > last_year:
            new_last_year = first_year + year_length
        else:
            return

        if method == "stable":
            last_pop = self.xs(last_year, level="year", axis=0)
            pop = DataFrame(self["pop"])
            years = range(last_year + 1, new_last_year + 1)
            list_df = [last_pop] * len(years)

            pop = concat(list_df, keys=years, names=["year"])
            pop = pop.reorder_levels(["age", "sex", "year"], axis=0)
            combined = self.combine_first(pop)
            self.__init__(data=combined, columns=["pop"])

        if method == "exp_growth":
            if growth_rate is None:
                raise Exception("a growth rate must be provided for the method")

            last_pop = self.xs(last_year, level="year", axis=0)
            pop = DataFrame(self["pop"])
            years = range(last_year + 1, new_last_year + 1)
            list_df = [last_pop] * len(years)

            pop = concat(list_df, keys=years, names=["year"])
            pop = pop.reorder_levels(["age", "sex", "year"], axis=0)
            pop = Cohorts(pop)
            pop.gen_grth(growth_rate)
            pop["pop"] *= pop["grth"]
            del pop["grth"]

            combined = self.combine_first(pop)
            self.__init__(data=combined, columns=["pop"])
Example #2
0
File: io.py Project: ALGe9/owls
def read_data_file(fn, skiplines=1, maxlines=False):
    """  A function to read any foam data files returning data and
         index after header
    """

    # TODO check if sorting the index gives any performance benefits
    # print "opening file {}".format(fn)
    if not os.path.exists(fn):
        print("Can not open file " + fn)
        return None
    try:
        with open(fn, encoding="utf-8") as f:
            field = fn.split("/")[-1]
            content = f.readlines()
            content.append("bla")
            start, num_entries = if_header_skip(content)
            entries = len(content[start].split())
            is_a_vector = True if entries > 1 else False
            end = start + num_entries
            if is_a_vector:
                data = list(map(lambda x: re.sub("[0-9]*\(|\)", "", x).split(), content[start:end:skiplines]))
                loc, names = evaluate_names(fn, entries)
                df = DataFrame(data=data, columns=names)
                if loc:
                    df["Loc"] = loc
                else:
                    df["Loc"] = range(len(df))
                df.set_index("Loc", append=True, inplace=True)
                df.index.names = ["Id", "Loc"]
                df = df.reorder_levels(["Loc", "Id"])
                df = df.astype(float)
                hashes = {}
                for row in df.columns:
                    hashes.update({row: hash_series(df[row])})
                return names, df, hashes
            else:
                data = [np.float32(x) for x in content[start:end:skiplines]]
                entries = 1
                df = DataFrame(data=data, columns=[field])
                df["Loc"] = "Field"
                df.set_index("Loc", append=True, inplace=True)
                df.index.names = ["Id", "Loc"]
                df = df.reorder_levels(["Loc", "Id"])
                hashes = {field: int(hashlib.md5(str(data).encode("utf-8")).hexdigest(), 16)}
                return field, df, hashes
    except Exception as e:
        if DEBUG:
            print("Error processing datafile " + fn)
            print(e)
        return None
Example #3
0
    def test_pandas_extend_index(self):
        d1 = DataFrame(data=[2, 4, 6, 8], columns=["A"], index=[1, 2, 3, 4])
        d1.index.name = "first"

        d1["second"] = "default"
        d1.set_index(["second"], append=True, inplace=True)
        self.assertEqual(d1.index.names, ["first", "second"])

        d1 = d1.reorder_levels(["second", "first"])
        self.assertEqual(d1.index.names, ["second", "first"])
Example #4
0
def aggregate_chunks(mod_features_df, modality):
    without_info_df = mod_features_df.query('field != "info"')
    cnt_df = DataFrame(
        [list(mod_features_df.loc[("info", "count"), :].values)] * len(without_info_df), index=without_info_df.index
    )
    agg_df = without_info_df * cnt_df
    agg_df = DataFrame(agg_df.sum(axis=1) / cnt_df.sum(axis=1), index=without_info_df.index)
    agg_df["modality"] = modality
    agg_df.set_index("modality", append=True, inplace=True)
    agg_df = agg_df.reorder_levels(["modality", "field", "feature"])
    return agg_df
Example #5
0
File: io.py Project: ALGe9/owls
def import_foam_folder(path, search, files, skiplines=1, maxlines=0, skiptimes=1, exclude=None):
    """ returns a Dataframe for every file in fileList """
    # import StringIO
    from pandas import concat

    fileList = find_datafiles(path, search=search, files=files, exclude=exclude)
    if not fileList:
        print("no files found")
        return
    p_bar = ProgressBar(n_tot=sum([len(l) for l in fileList.values()]))
    df = DataFrame()
    # df.index = MultiIndex.from_tuples(zip([],[]),names=['Loc',0])
    from collections import defaultdict

    origins = Origins()
    els = list(fileList.items())[::skiptimes]
    for fullpath, files in els:
        time = strip_time(fullpath, path)
        df_tmp = DataFrame()
        for fn in files:
            # ret = read_table(StringIO.StringIO(foam_to_csv(fn)))
            ret = read_data_file(fn, skiplines, maxlines)
            p_bar.next()
            if not ret:
                continue
            field_names, x, hashes = ret
            loc = x.index.values[-1][0]
            if df_tmp.empty:
                df_tmp = x
            else:
                try:
                    # use combine first for all df at existing Loc or
                    # if not Loc is specified (Eul or Lag fields)
                    if x.index.levels[0][0] in df_tmp.index.levels[0]:
                        df_tmp = df_tmp.combine_first(x)
                        # df_tmp = concat([df_tmp, x], axis=1)
                        pass
                    else:
                        df_tmp = concat([df_tmp, x])
                except Exception as e:
                    print(x)
                    print(e)
            field_names = [field_names] if not type(field_names) == list else field_names
            for field in field_names:
                origins.insert(time, loc, field, fn, hashes[field])
        df_tmp["Time"] = time
        if df.empty:
            df = df_tmp
        else:
            df = df.append(df_tmp)
    df.set_index("Time", append=True, inplace=True)
    df = df.reorder_levels(["Time", "Loc", "Id"])
    p_bar.done()
    return origins, df
Example #6
0
    def test_reorder_levels(self):
        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
            names=["L0", "L1", "L2"],
        )
        df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index)

        # no change, position
        result = df.reorder_levels([0, 1, 2])
        assert_frame_equal(df, result)

        # no change, labels
        result = df.reorder_levels(["L0", "L1", "L2"])
        assert_frame_equal(df, result)

        # rotate, position
        result = df.reorder_levels([1, 2, 0])
        e_idx = MultiIndex(
            levels=[["one", "two", "three"], [0, 1], ["bar"]],
            labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]],
            names=["L1", "L2", "L0"],
        )
        expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx)
        assert_frame_equal(result, expected)

        result = df.reorder_levels([0, 0, 0])
        e_idx = MultiIndex(
            levels=[["bar"], ["bar"], ["bar"]],
            labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]],
            names=["L0", "L0", "L0"],
        )
        expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx)
        assert_frame_equal(result, expected)

        result = df.reorder_levels(["L0", "L0", "L0"])
        assert_frame_equal(result, expected)
Example #7
0
File: io.py Project: ALGe9/owls
def import_logs(folder, search, keys):
    """
        keys = {"ExectionTime": ["ExecTime", "ClockTime"]}

        return a DataFrame

              Loc, Time KeyName1 Keyname2
                1   0.1

                    0.2
                2


    """

    def find_start(log):
        """ Fast forward through file till 'Starting time loop' """
        for i, line in enumerate(log):
            if "Starting time loop" in line:
                return i

    def extract(line, keys):
        """
            returns key and values as list
                "ExecutionTime":[0,1]
        """
        import re

        for key, col_names in keys.items():
            if re.search(key, line):
                return (
                    col_names,
                    list(map(float, filter(lambda x: x, re.findall("[0-9]+[.]?[0-9]*[e]?[\-]?[0-9]*", line)))),
                )
        return None, None

    fold, dirs, files = next(os.walk(folder))
    logs = [fold + "/" + log for log in files if search in log]
    p_bar = ProgressBar(n_tot=len(logs))
    # Lets make sure that we find Timesteps in the log
    keys.update({"^Time = ": ["Time"]})

    for log_number, log_name in enumerate(logs):
        with open(log_name, encoding="utf-8") as log:
            f = log.readlines()
            start = find_start(f)
            dataDict = defaultdict(list)
            df = DataFrame()
            for line in f[start:-1]:
                col_names, values = extract(line, keys)
                if not col_names:
                    continue
                if col_names[0] == "Time":
                    # a new time step has begun
                    # flush datadict and concat to df
                    # Very slow but, so far the solution
                    # to keep subiterations attached to correct time
                    # FIXME: still needs handling of different length dictionaries
                    df = concat([df, DataFrame(dataDict)])
                    dataDict = defaultdict(list)
                for i, col in enumerate(col_names):
                    dataDict[col].append(values[i])
        p_bar.next()
        try:
            df.index = range(len(df))
            df.index.names = ["Id"]
            df["Loc"] = log_number
            df.set_index("Time", append=True, inplace=True)
            df.set_index("Loc", append=True, inplace=True)
            df = df.reorder_levels(["Loc", "Time", "Id"])
            p_bar.done()
        except Exception as e:
            print(log_name)
            print("failed to process")
            print(e)
            return {}, None
    return {}, DataFrame()