def apply_sequencing(self,
                         d=0.1,
                         n=40,
                         distance=False,
                         fixed=False,
                         dwelling=False,
                         enumerator=False,
                         tuned=False,
                         enumerator_dist=False):
        print('d: ', d)
        sequences_dfs = []
        if distance:
            dwellings_dist = tuned if tuned else self.get_dwellings_dist_seq(d)
            sequences_dfs.append(dwellings_dist)

        if fixed:
            sequences_dfs.append(self.get_dwellings_fixed_seq(n))

        if dwelling:
            sequences_dfs.append(self.get_dwellings_dwellings_seq())

        if enumerator:
            sequences_dfs.append(self.get_enum_seq())

        if enumerator_dist:
            dwellings_dist = self.get_enum_seq(all=True)
            dwellings_dist.dropna(subset=[self.block_col], inplace=True)
            dwellings_dist = dwellings_dist.groupby(
                [self.ward_col, "enum_seq"],
                as_index=False).apply(lambda x: sequences.col_for_seq(
                    x, X=self.x_col, Y=self.y_col))
            dwellings_dist = dwellings_dist.groupby(
                [self.ward_col, "enum_seq"],
                as_index=False).apply(lambda x: sequences.get_dist_seq(x, d))

            dwellings_dist.rename(columns={
                "sequence_id": "enum_dist_id",
                "sequence_order_enum": "enum_dist_order",
                "dist": "enum_dist",
                "sequence_len": "enum_sequence_len"
            },
                                  inplace=True)
            sequences_dfs.append(dwellings_dist.loc[:, [
                self.ward_col, self.dwelling_col, "enum_dist_id",
                "enum_dist_order", "enum_dist", "enum_sequence_len"
            ]].copy())

        self.df = reduce(
            lambda x, y: pd.merge(
                x, y, how="left", on=[self.ward_col, self.dwelling_col]),
            sequences_dfs, self.data)

        ## fill within for distance sequence
        #         if distance:
        #             self.df = self.df.groupby(self.ward_col, as_index=False).apply(lambda x: self.fill_within(x, self.dwelling_col, 'sequence_id'))

        if enumerator_dist:
            #             self.df = self.df.groupby(self.ward_col, as_index=False).apply(lambda x: self.fill_within(x, self.dwelling_col, 'enum_dist_id'))
            self.df = self.df.dropna(subset=[self.pagenum])
Ejemplo n.º 2
0
def sequence_datasets(census_1850, census_1880):
    census_1850 = census_1850.dropna(subset=["CENSUS_DWELLING_NUM"]).copy()

    dwellings_1850 = census_1850.groupby(["WARD_NUM", "CENSUS_DWELLING_NUM"],
                                         as_index=False).first()
    dwellings_1850 = dwellings_1850.dropna(subset=["CD_ADDRESS"]).copy()
    dwellings_1880 = census_1880.drop_duplicates(
        subset=["CENSUS_ADDRESS"]).reset_index(drop=True).copy()

    sequences.col_for_seq(dwellings_1850, "CD_X", "CD_Y")
    sequences.col_for_seq(dwellings_1880, "POINT_X", "POINT_Y")

    dwellings_1850 = sequences.get_dist_seq(dwellings_1850, 0.15)[2]
    dwellings_1880 = sequences.get_dist_seq(dwellings_1880, 0.15)[2]

    dwellings_1850 = dwellings_1850.groupby("sequence_id").apply(
        sequences.sequence_order)
    dwellings_1880 = dwellings_1880.groupby("sequence_id").apply(
        sequences.sequence_order)

    #Not super sure what's happening here, come back and check this
    dwellings_1850 = dwellings_1850.groupby(
        ["WARD_NUM", "CENSUS_DWELLING_NUM"], as_index=False).first()
    dwellings_1880 = dwellings_1880.drop_duplicates(
        subset=["CENSUS_ADDRESS"]).reset_index(drop=True).copy()

    census_1880_model = dataprocessing.dwellings_to_all(
        census_1880, dwellings_1880,
        ["CENSUS_MATCH_ADDR", "sequence_id", "sequence_order", "num_between"],
        ["CENSUS_MATCH_ADDR"])
    census_1850_model = dataprocessing.dwellings_to_all(
        census_1850, dwellings_1850, [
            "WARD_NUM", "CENSUS_DWELLING_NUM", "sequence_id", "sequence_order",
            "num_between", "sequence_order_enum"
        ], ["WARD_NUM", "CENSUS_DWELLING_NUM"])

    dataprocessing.create_street_house(dwellings_1880, "CENSUS_ADDRESS")
    dataprocessing.create_street_house(dwellings_1850, "CD_ADDRESS")
    dataprocessing.create_street_house(census_1880_model, "CENSUS_ADDRESS")
    dataprocessing.create_street_house(census_1850_model, "CD_ADDRESS")

    return [
        dwellings_1850, dwellings_1880, census_1850_model, census_1880_model
    ]
Ejemplo n.º 3
0
 def get_dwellings_dist_seq(self, d=0.1):
     dwellings = self.get_dwellings()
     dwellings.dropna(subset=[self.block_col], inplace=True)
     dwellings_cols = dwellings.groupby(
         self.ward_col, as_index=False).apply(
             lambda x: sequences.col_for_seq(x, X=self.x_col, Y=self.y_col))
     dwellings_cols = dwellings_cols.groupby(
         self.ward_col,
         as_index=False).apply(lambda x: sequences.get_dist_seq(x, d))
     return dwellings_cols.loc[:, [
         self.ward_col, self.dwelling_col, "sequence_id", "num_between",
         "sequence_order_enum", "dist", "sequence_len"
     ]].copy()